awk to extract tag and add to each line


 
Thread Tools Search this Thread
Top Forums Shell Programming and Scripting awk to extract tag and add to each line
# 1  
Old 07-25-2017
awk to extract tag and add to each line

In the awk below which executes as is, I am trying to add a condition that will extract the text or
value after the FR= for the lines in each line of file1 compared
to file2. As is the lines between the two files are either a match, Missing in file 1, or Missing in file2,
but I can not add the condition to extract FR= up to the ; (semi-colon). There may
not always be text after the FR=, but it always ends with a ;. Thank you Smilie.


file1
Code:
chr1    43814978    COSM27286    G    A    86.92679999999999    PASS    AF=0;AO=1;DP=5535;FAO=0;FDP=2000;FR=.,REALIGNEDx0.008;FRO=2000;FSAF=0;FSAR=0;FSRF=1213;FSRR=787;FWDB=0.0456208;FXX=0;HRUN=1;LEN=1;MLLD=67.7782;OALT=A;OID=COSM27286;OMAPALT=A;OPOS=43814979;OREF=G;PB=.;PBP=.;QD=0.173854;RBI=0.0521772;REFB=2.05804E-5;REVB=0.0253221;RO=5523;SAF=1;SAR=0;SRF=3401;SRR=2122;SSEN=0;SSEP=0;SSSB=0.00321242;STB=0.5;STBP=1;TYPE=snp;VARB=0;HS;FUNC=[{'transcript':'NM_005373.2','gene':'MPL','location':'exonic','exon':'10'}]    GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR:QT    0/0:86:5535:2000:5523:2000:1:0:0:0:1:3401:2122:0:0:1213:787:0
chr1    43814981    COSM27287    G    A    86.83350000000002    PASS    AF=0;AO=2;DP=5556;FAO=0;FDP=1999;FR=.;FRO=1999;FSAF=0;FSAR=0;FSRF=1239;FSRR=760;FWDB=-0.0139666;FXX=4.99998E-4;HRUN=1;LEN=1;MLLD=230.086;OALT=A;OID=COSM27287;OMAPALT=A;OPOS=43814981;OREF=G;PB=.;PBP=.;QD=0.173754;RBI=0.0152778;REFB=-9.34219E-7;REVB=-0.00619229;RO=5525;SAF=2;SAR=0;SRF=3391;SRR=2134;SSEN=0;SSEP=0;SSSB=0.00642119;STB=0.5;STBP=1;TYPE=snp;VARB=0;HS;FUNC=[{'transcript':'NM_005373.2','gene':'MPL','location':'exonic','exon':'10'}]    GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR:QT    0/0:86:5556:1999:5525:1999:2:0:0:0:2:3391:2134:0:0:1239:760:0
chr1    43815008    COSM29008;COSM43212;COSM19193;COSM27289;COSM28487    TGG    AAA,AAG,AGG,CGG,GCG    70.3099    PASS    AF=0,0,0,0,0;AO=0,0,1,3,0;DP=5528;FAO=0,0,0,0,0;FDP=1675;FR=.,.,.,.,.;FRO=1675;FSAF=0,0,0,0,0;FSAR=0,0,0,0,0;FSRF=1010;FSRR=665;FWDB=-0.0326268,-0.0568148,-0.0658984,-0.0526516,-0.0545445;FXX=0.162499;HRUN=1,1,1,1,1;LEN=3,2,1,1,2;MLLD=155.105,120.652,107.82,60.137,93.9951;OALT=AAA,AA,A,C,GC;OID=COSM28487,COSM19193,COSM29008,COSM43212,COSM27289;OMAPALT=AAA,AAG,AGG,CGG,GCG;OPOS=43815008,43815008,43815008,43815008,43815008;OREF=TGG,TG,T,T,TG;PB=.,.,.,.,.;PBP=.,.,.,.,.;QD=0.167904;RBI=0.0432053,0.0581938,0.0673244,0.0594109,0.0551872;REFB=0.00195961,3.64629E-5,0.00186523,0.00842498,0.00169447;REVB=-0.028323,-0.0125932,-0.0137832,-0.0275221,-0.00839827;RO=4345;SAF=0,0,1,0,0;SAR=0,0,0,3,0;SRF=2598;SRR=1747;SSEN=0,0,0,0,0;SSEP=0,0,0,0,0;SSSB=-6.91127E-8,-6.91127E-8,0.00419704,-0.0184325,-6.91127E-8;STB=0.5,0.5,0.5,0.5,0.5;STBP=1,1,1,1,1;TYPE=mnp,mnp,snp,snp,mnp;VARB=0,0,0,0,0;HS;FUNC=[{'transcript':'NM_005373.2','gene':'MPL','location':'exonic','exon':'10'}]    GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR:QT    0/0:73:5528:1675:4345:1675:0,0,1,3,0:0,0,0,0,0:0,0,0,0,0:0,0,0,3,0:0,0,1,0,0:2598:1747:0,0,0,0,0:0,0,0,0,0:1010:665:0

file2
Code:
chr1    43814979    COSM27286    G    A    86.92679999999999    PASS    AF=0;AO=1;DP=5535;FAO=0;FDP=2000;FR=.,REALIGNEDx0.004;FRO=2000;FSAF=0;FSAR=0;FSRF=1213;FSRR=787;FWDB=0.0456208;FXX=0;HRUN=1;LEN=1;MLLD=67.7782;OALT=A;OID=COSM27286;OMAPALT=A;OPOS=43814979;OREF=G;PB=.;PBP=.;QD=0.173854;RBI=0.0521772;REFB=2.05804E-5;REVB=0.0253221;RO=5523;SAF=1;SAR=0;SRF=3401;SRR=2122;SSEN=0;SSEP=0;SSSB=0.00321242;STB=0.5;STBP=1;TYPE=snp;VARB=0;HS;FUNC=[{'transcript':'NM_005373.2','gene':'MPL','location':'exonic','exon':'10'}]    GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR:QT    0/0:86:5535:2000:5523:2000:1:0:0:0:1:3401:2122:0:0:1213:787:0
chr1    43814981    COSM27287    G    A    86.83350000000002    PASS    AF=0;AO=2;DP=5556;FAO=0;FDP=1999;FR=.;FRO=1999;FSAF=0;FSAR=0;FSRF=1239;FSRR=760;FWDB=-0.0139666;FXX=4.99998E-4;HRUN=1;LEN=1;MLLD=230.086;OALT=A;OID=COSM27287;OMAPALT=A;OPOS=43814981;OREF=G;PB=.;PBP=.;QD=0.173754;RBI=0.0152778;REFB=-9.34219E-7;REVB=-0.00619229;RO=5525;SAF=2;SAR=0;SRF=3391;SRR=2134;SSEN=0;SSEP=0;SSSB=0.00642119;STB=0.5;STBP=1;TYPE=snp;VARB=0;HS;FUNC=[{'transcript':'NM_005373.2','gene':'MPL','location':'exonic','exon':'10'}]    GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR:QT    0/0:86:5556:1999:5525:1999:2:0:0:0:2:3391:2134:0:0:1239:760:0
chr1    43815008    COSM29008;COSM43212;COSM19193;COSM27289;COSM28487    TGG    AAA,AAG,AGG,CGG,GCG    70.3099    PASS    AF=0,0,0,0,0;AO=0,0,1,3,0;DP=5528;FAO=0,0,0,0,0;FDP=1675;FR=.,.,.,.,.;FRO=1675;FSAF=0,0,0,0,0;FSAR=0,0,0,0,0;FSRF=1010;FSRR=665;FWDB=-0.0326268,-0.0568148,-0.0658984,-0.0526516,-0.0545445;FXX=0.162499;HRUN=1,1,1,1,1;LEN=3,2,1,1,2;MLLD=155.105,120.652,107.82,60.137,93.9951;OALT=AAA,AA,A,C,GC;OID=COSM28487,COSM19193,COSM29008,COSM43212,COSM27289;OMAPALT=AAA,AAG,AGG,CGG,GCG;OPOS=43815008,43815008,43815008,43815008,43815008;OREF=TGG,TG,T,T,TG;PB=.,.,.,.,.;PBP=.,.,.,.,.;QD=0.167904;RBI=0.0432053,0.0581938,0.0673244,0.0594109,0.0551872;REFB=0.00195961,3.64629E-5,0.00186523,0.00842498,0.00169447;REVB=-0.028323,-0.0125932,-0.0137832,-0.0275221,-0.00839827;RO=4345;SAF=0,0,1,0,0;SAR=0,0,0,3,0;SRF=2598;SRR=1747;SSEN=0,0,0,0,0;SSEP=0,0,0,0,0;SSSB=-6.91127E-8,-6.91127E-8,0.00419704,-0.0184325,-6.91127E-8;STB=0.5,0.5,0.5,0.5,0.5;STBP=1,1,1,1,1;TYPE=mnp,mnp,snp,snp,mnp;VARB=0,0,0,0,0;HS;FUNC=[{'transcript':'NM_005373.2','gene':'MPL','location':'exonic','exon':'10'}]    GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR:QT    0/0:73:5528:1675:4345:1675:0,0,1,3,0:0,0,0,0,0:0,0,0,0,0:0,0,0,3,0:0,0,1,0,0:2598:1747:0,0,0,0,0:0,0,0,0,0:1010:665:0

awk
Code:
awk 'FNR==1 { next }
     FNR == NR { file1[$1,$2,$7] = $1 " " $2 " " $6 " "$7 }
     FNR != NR { file2[$1,$2,$7] = $1 " " $2 " " $6 " "$7 }
     END { print "Match:"; for (k in file1) if (k in file2) print file1[k] # Or file2[k]
           print "Missing in file1:"; for (k in file2) if (!(k in file1)) print file2[k]
           print "Missing in file2:"; for (k in file1) if (!(k in file2)) print file1[k]
     }' file1 file2 > out

desired output
Code:
Match:
chr1    43814981    86.83350000000002    PASS    FR=.
Missing in file1:
chr1    43814979    86.92679999999999    PASS    FR=.,REALIGNEDx0.004    
Missing in file2:
chr1    43814978    86.92679999999999    PASS    FR=.,REALIGNEDx0.008


Last edited by cmccabe; 07-25-2017 at 09:36 PM.. Reason: fixed format
# 2  
Old 07-26-2017
Hello cmccabe,

Could you please try following and let me know if this helps you.
Code:
awk 'FNR==NR{
             a[$1,$2,$7]=$1 FS $2 FS $6 FS $7;
             next
            }
     (($1,$2,$7) in a){
             val_match=val_match?val_match ORS a[$1,$2,$7]:a[$1,$2,$7];
             delete a[$1,$2,$7];
             next
                      }
{
    val_mismatch_in_file1=val_mismatch_in_file1?$1 FS $2 FS $6 FS $7:$1 FS $2 FS $6 FS $7
}
 END{
    for(i in a){
        val_missing_in_file2=val_missing_in_file2?a[i]:a[i]};
        print "Match:" RS val_match RS "Missing in File1:" RS val_mismatch_in_file1 RS "Missing in File2:" RS val_missing_in_file2
   }
    '  Input_file1  Input_file2

Output will be as follows.
Code:
Match:
chr1 43814981 86.83350000000002 PASS
chr1 43815008 70.3099 PASS
Missing in File1:
chr1 43814979 86.92679999999999 PASS
Missing in File2:
chr1 43814978 86.92679999999999 PASS

Thanks,
R. Singh
This User Gave Thanks to RavinderSingh13 For This Post:
Login or Register to Ask a Question

Previous Thread | Next Thread

10 More Discussions You Might Find Interesting

1. Shell Programming and Scripting

awk to match file1 and extract specific tag values

File2 is tab-delimeted and I am trying to use $2 in file1 (space delimeted) as a search term in file2. If it is found then the AF= in and the FDP= values from file2 are extracted and printed next to the file1 line. I commented the awk before I added the lines in bold the current output resulted. I... (7 Replies)
Discussion started by: cmccabe
7 Replies

2. Shell Programming and Scripting

Extract values in a line using awk

hello all, I need your help in extracting values of some parameter within a line using awk. for example: i have the below line available in a file and i want to extract the values of only CustomerId, s_PackageId and s_HZINumbers in order the result to be as ... (13 Replies)
Discussion started by: nael_najib
13 Replies

3. Shell Programming and Scripting

Add the html tag first and last line the file

Hi, i have 30 html files and i want to add the html tag first (<html>) and end of the line </html> tag..How to do it in script. Thanks, (7 Replies)
Discussion started by: bmk
7 Replies

4. Shell Programming and Scripting

Add markup tag and sequential number after specific line

Hello, This one has me a bit stumped. I have data the looks like, M END > <PREDICTION_ACCURACY> PROBABLE > <NO_OF_PARENTS> 3 > <CLOGP> -13.373 > <SMILES> OCC(O)C(OC1OC(CO)C(OC2OC(CO)C > <MIMW> 1006.322419888 (3 Replies)
Discussion started by: LMHmedchem
3 Replies

5. Shell Programming and Scripting

Extract XML tag value from file

Hello, Hope you are doing fine. I have an log file which looks like as follows: Some junk text1 Date: Thu Mar 15 13:38:46 CDT 2012 DATA SENT SUCCESSFULL: Some jun text 2 Date: Thu Mar 15 13:38:46 CDT 2012 DATA SENT SUCCESSFULL: ... (3 Replies)
Discussion started by: srattani
3 Replies

6. Shell Programming and Scripting

How to retrieve the value from XML tag whose end tag is in next line

Hi All, Find the following code: <Universal>D38x82j1JJ </Universal> I want to retrieve the value of <Universal> tag as below: Please help me. (3 Replies)
Discussion started by: mjavalkar
3 Replies

7. Shell Programming and Scripting

how to extract part of xml line via awk?

Hi, I like to set a variable "name" automatically by reading an xml file. My code looks like this: set name = `awk '/<generationTime>/,/<\/generationTime>/ p' $xml_name` the "name" is thus set to <generationTime>2004-12-01T08:23:50.000000</generationTime> How can I separate this line,... (3 Replies)
Discussion started by: friend
3 Replies

8. Shell Programming and Scripting

sed, awk [TAG]$content[/TAG] How to get var in $content in textfile?

Hello, I got a Qstion. Im posting to a phpbb forum with bash and curl.. i have a text file with the following tags that i post to the forum: $var1 $var2 $var3 How can i with sed or awk put var content from shell script between the ... in the... (7 Replies)
Discussion started by: atmosroll
7 Replies

9. UNIX for Dummies Questions & Answers

how extract certain value within a line using awk

hi if would like to get the phone number as an output, can you guide me here please <A>213444555</A><B><B>ABCDEFG</B> I just want to get the phone number from the file in between <A> and </A> Thanks (9 Replies)
Discussion started by: imran721
9 Replies

10. Shell Programming and Scripting

awk: need to extract a line before a pattern

Hello , I need your help to extract a line in a big file , and this line is always 11 lines before a specific pattern . Do you know a way via Awk ? Thanks in advance npn35 (17 Replies)
Discussion started by: npn35
17 Replies
Login or Register to Ask a Question