Code:
#!/bin/bash
# VERSION2 FIXED INTRON BUG, ADDED NGS_SANGER MODULE, UPDATED MATRIX IMPORT
# clear batch entries
cd 'C:\Users\cmccabe\Desktop\annovar'
echo -n "" > target.txt
cd 'C:' C:/Users/cmccabe/Desktop/Python27/
echo -n "" > C:/Users/cmccabe/Desktop/Python27/out.txt
# clear Sanger files
cd 'C:' C:/Users/cmccabe/Desktop/Python27/
echo -n "" > C:/Users/cmccabe/Desktop/Python27/*$file.txt
menu() {
while true
do
printf "\n Welcome to target gene annotation, NGS sanger, and mutalyzer (v2), please make a selection from the MENU \n
==================================\n\n
\t 1 GJB2 analysis\n
\t 2 MECP2 analysis\n
\t 3 Phox2B analysis\n
\t 4 NGS sanger analysis\n
\t 5 Syntax checker\n
\t 6 Name checker\n
\t 7 Position converter\n\n
==================================\n\n"
printf "\t Your choice: "; read menu_choice
case "$menu_choice" in
1) gjb2 ;;
2) mecp2 ;;
3) phox2b ;;
4) ngs ;;
5) syntax ;;
6) name ;;
7) position ;;
*) printf "\n Invalid choice."; sleep 2 ;;
esac
done
}
gjb2() {
printf "\n\n"
printf "What is the id of the patient getting GJB2 analysis : "; read id
printf "Please enter the coding variant the following is an example"
echo " c.274G>T"
printf "variant(s), use a comma between multiple: "; IFS="," read -a variant
[ -z "$id" ] && printf "\n No ID supplied. Leaving match function." && sleep 2 && return
[ "$id" = "end" ] && printf "\n Leaving match function." && sleep 2 && return
for ((i=0; i<${#variant[@]}; i++))
do printf "NM_004004.5:%s\n" ${variant[$i]} >> c:/Users/cmccabe/Desktop/Python27/out.txt
done
add2text ${id}.txt
additional
}
mecp2() {
printf "\n\n"
printf "What is the id of the patient getting MECP2 analysis : "; read id
printf "Please enter the coding variant the following is an example"
echo " c.274G>T"
printf "variant(s), use a comma between multiple: "; IFS="," read -a variant
[ -z "$id" ] && printf "\n No ID supplied. Leaving match function." && sleep 2 && return
[ "$id" = "end" ] && printf "\n Leaving match function." && sleep 2 && return
for ((i=0; i<${#variant[@]}; i++))
do printf "NM_004992.3:%s\n" ${variant[$i]} >> c:/Users/cmccabe/Desktop/Python27/out.txt
done
add2text ${id}.txt
additional
}
phox2b() {
printf "\n\n"
printf "What is the id of the patient getting Phox2B analysis : "; read id
printf "Please enter the coding variant the following is an example"
echo " c.274G>T"
printf "variant(s), use a comma between multiple: "; IFS="," read -a variant
[ -z "$id" ] && printf "\n No ID supplied. Leaving match function." && sleep 2 && return
[ "$id" = "end" ] && printf "\n Leaving match function." && sleep 2 && return
for ((i=0; i<${#variant[@]}; i++))
do printf "NM_003924.3:%s\n" ${variant[$i]} >> c:/Users/cmccabe/Desktop/Python27/out.txt
done
add2text ${id}.txt
additional
}
additional() {
printf "\n\n"
printf "Are there additonal target gene patients to be analyzed? Y/N "; read match_choice
case "$match_choice" in
[yY]) id="${id}"; menu ;;
[nN]) id="${id}"; nomenclature ;;
esac
}
ngs() {
cmd /C "start c:\\Users\\cmccabe\\Desktop\\Sanger\\NGS_Sanger.xlsm"
cd 'C:' C:/Users/cmccabe/Desktop/Python27/
for file in C:/Users/cmccabe/Desktop/Python27/*_sanger.txt
do
cat $file >> c:/Users/cmccabe/Desktop/Python27/out.txt
done
convert
}
syntax() {
printf "\n\n"
# clear old entries
cd 'C:' C:/Users/cmccabe/Desktop/Python27/
echo -n "" > C:/Users/cmccabe/Desktop/Python27/syntax.txt
printf "Please enter HGVS description of variant(s), use a comma between multiple: "; IFS="," read -a hgvs
[ -z "hgvs" ] && printf "\n Nothing entered. Leaving match function." && sleep 2 && return
for ((i=0; i<${#hgvs[@]}; i++))
do printf ${hgvs[$i]} >> c:/Users/cmccabe/Desktop/Python27/syntax.txt
done
# run python SyntaxChecker
cd 'C:'
C:/Users/cmccabe/Desktop/Python27/python.exe C:/Users/cmccabe/Desktop/Python27/run_batch_job.py C:/Users/cmccabe/Desktop/Python27/syntax.txt C:/Users/cmccabe/Desktop/Python27/syntax_verify.txt SyntaxChecker
awk 'NR>1' c:/Users/cmccabe/Desktop/Python27/syntax_verify.txt
printf "Is this correct? Y/N "; read match_choice
case "$match_choice" in
[yY]) id=""; other ;;
[nN]) rm c:/Users/cmccabe/Desktop/Python27/syntax_verify.txt; menu ;;
esac
}
name() {
printf "\n\n"
printf "Please enter the gene name: "; read gene
shopt -s nocasematch
case $gene in
GJB2)
code="NM_004004.5"
;;
MECP2)
code="NM_004992.3"
;;
Phox2B)
code="NM_003924.3"
;;
*) # everything else we do not won't to process.
printf "%s \n" "This is not a target gene, or is a intronic variant, Please use one of the following formats to enter variant"
echo "
NM_003002.3:c.274G>T (reference transcript:coding change w/ nucleotide change)
chr11:g.111959693G>T (chromosome:genomic position w/ nucleotide change)
NC_000011.9:g.111959693G>T (Genebank accession #:genomic position w/ nucleotide change)"
;;
esac
printf "%s \n" "Please enter variant(s), use a comma between multiple: "
OLDIFS=$IFS
IFS=","
read -a variants
for (( i = 0; i < ${#variants[@]}; i++ ))
do
printf "%s %s\n" "$code" "${variants[$i]}" >> c:/Users/cmccabe/Desktop/Python27/out.txt
done
IFS=$OLDIFS
# run python NameChecker
printf "\n\n"
cd 'C:'
C:/Users/cmccabe/Desktop/Python27/python.exe C:/Users/cmccabe/Desktop/Python27/run_batch_job.py C:/Users/cmccabe/Desktop/Python27/out.txt C:/Users/cmccabe/Desktop/Python27/out_name.txt NameChecker
# confirm name and additional?
awk 'NR>1 { if ($2 ~ /^\(/ ) {$1=""; print "Found error: ", $0} else { sub(/.*:/, "", $1); sub(/.*:/, "", $7); print "No error: " $1 "," $7}}' C:/Users/cmccabe/Desktop/Python27/out_name.txt
printf "Is the variant correct? Y/N "; read match_choice
case "$match_choice" in
[yY]) id=""; other ;;
[nN]) cd 'C:' C:/Users/cmccabe/Desktop/Python27/; awk '/variantchecker/ {r[$1]} FNR==NR {next} !($1 in r)' C:/Users/cmccabe/Desktop/Python27/out.txt C:/Users/cmccabe/Desktop/Python27/out.txt > C:/Users/cmccabe/Desktop/Python27/out.txt; annotation ;
esac
}
position() {
printf "\n\n"
printf "%s \n" "Please use one of the following formats to enter variant"
echo "
NM_003002.3:c.274G>T (reference transcript:coding change w/ nucleotide change)
chr11:g.111959693G>T (chromosome:genomic position w/ nucleotide change)
NC_000011.9:g.111959693G>T (Genebank accession #:genomic position w/ nucleotide change)"
printf "%s \n" "Please enter variant(s), use a comma between multiple: "
OLDIFS=$IFS
IFS=","
read -a variants
for (( i = 0; i < ${#variants[@]}; i++ ))
do
printf "%s %s\n" "$code" "${variants[$i]}" >> c:/Users/cmccabe/Desktop/Python27/out.txt
done
IFS=$OLDIFS
# run python NameChecker
printf "\n\n"
cd 'C:'
C:/Users/cmccabe/Desktop/Python27/python.exe C:/Users/cmccabe/Desktop/Python27/run_batch_job.py C:/Users/cmccabe/Desktop/Python27/out.txt C:/Users/cmccabe/Desktop/Python27/out_position.txt PositionConverter
# parse output
perl -ne 'next if $. == 1;
if(/.*del([A-Z]+)ins([A-Z]+).*NC_0{4}([0-9]+).*g\.([0-9]+)_([0-9]+)/) # indel
{
print join("\t", $3, $4, $5, $1, $2), "\n";
}
else
{
while (/\t*NC_(\d+)\.\S+g\.(\d+)(\S+)/g) { # conditional parse
($num1, $num2, $common) = ($1, $2, $3);
$num3 = $num2;
if ($common =~ /^([A-Z])>([A-Z])$/) { ($ch1, $ch2) = ($1, $2) } # SNP
elsif ($common =~ /^del([A-Z])$/) { ($ch1, $ch2) = ($1, "-") } # deletion
elsif ($common =~ /^ins([A-Z])$/) { ($ch1, $ch2) = ("-", $1) } # insertion
elsif ($common =~ /^_(\d+)del([A-Z]+)$/) { ($num3, $ch1, $ch2) = ($1, $2, "-") } # multi deletion
elsif ($common =~ /^_(\d+)ins([A-Z]+)$/) { ($num3, $ch1, $ch2) = ("-", $1, $2) } # multi insertion
printf ("%d\t%d\t%d\t%s\t%s\n", $num1, $num2, $num3, $ch1, $ch2); # output
map {undef} ($num1, $num2, $num3, $common, $ch1, $ch2);
}
}
' C:/Users/cmccabe/Desktop/Python27/out_position.txt > C:/Users/cmccabe/Desktop/Python27/out_parse.txt
# dislay cordinates
printf "The genomic coordinates are: "
cat C:/Users/cmccabe/Desktop/Python27/out_parse.txt
# additional?
printf "Are there additional things to do? Y/N "; read match_choice
case "$match_choice" in
[yY]) rm echo -n "" > C:/Users/cmccabe/Desktop/Python27/out.txt; rm C:/Users/cmccabe/Desktop/Python27/out_parse.txt; rm C:/Users/cmccabe/Desktop/Python27/out_position.txt; menu ;;
[nN]) echo -n "" > C:/Users/cmccabe/Desktop/Python27/out.txt; rm C:/Users/cmccabe/Desktop/Python27/out_parse.txt; rm C:/Users/cmccabe/Desktop/Python27/out_position.txt; printf "\n Goodbye! "; sleep 2 && exit ;;
esac
}
nomenclature() {
# run python NameChecker
printf "\n\n"
cd 'C:'
C:/Users/cmccabe/Desktop/Python27/python.exe C:/Users/cmccabe/Desktop/Python27/run_batch_job.py C:/Users/cmccabe/Desktop/Python27/out.txt C:/Users/cmccabe/Desktop/Python27/out_name.txt NameChecker
check
}
other() {
printf "\n\n"
printf "Are there other things to do? Y/N "; read match_choice
case "$match_choice" in
[yY]) id=""; menu ;;
[nN]) id="$"; printf "\n Goodbye! "; sleep 2 && exit ;;
esac
}
check() {
printf "\n\n"
awk 'NR>1 { if ($2 ~ /^\(/ ) {$1=""; print "Found error: ", $0} else { sub(/.*:/, "", $1); sub(/.*:/, "", $7); print "No error: " $1 "," $7}}' C:/Users/cmccabe/Desktop/Python27/out_name.txt
printf "Is the variant correct? Y/N "; read match_choice
case "$match_choice" in
[yY]) id="${id}"; convert ;;
[nN]) cd 'C:' C:/Users/cmccabe/Desktop/Python27/; awk '/variantchecker/ {r[$1]} FNR==NR {next} !($1 in r)' c:/Users/cmccabe/Desktop/Python27/out_name.txt c:/Users/cmccabe/Desktop/Python27/out.txt > c:/Users/cmccabe/Desktop/Python27/out.txt; rm c:/Users/cmccabe/Desktop/Python27/out_name.txt; menu ;;
esac
}
convert() {
# run python PositionConverter
printf "\n\n"
cd 'C:'
C:/Users/cmccabe/Desktop/Python27/python.exe C:/Users/cmccabe/Desktop/Python27/run_batch_job.py C:/Users/cmccabe/Desktop/Python27/out.txt C:/Users/cmccabe/Desktop/annovar/out_position.txt PositionConverter
parse
}
parse() {
printf "\n\n"
cd 'C:\Users\cmccabe\Desktop\annovar'
perl -ne 'next if $. == 1;
if(/.*del([A-Z]+)ins([A-Z]+).*NC_0{4}([0-9]+).*g\.([0-9]+)_([0-9]+)/) # indel
{
print join("\t", $3, $4, $5, $1, $2), "\n";
}
else
{
while (/\t*NC_(\d+)\.\S+g\.(\d+)(\S+)/g) { # conditional parse
($num1, $num2, $common) = ($1, $2, $3);
$num3 = $num2;
if ($common =~ /^([A-Z])>([A-Z])$/) { ($ch1, $ch2) = ($1, $2) } # SNP
elsif ($common =~ /^del([A-Z])$/) { ($ch1, $ch2) = ($1, "-") } # deletion
elsif ($common =~ /^ins([A-Z])$/) { ($ch1, $ch2) = ("-", $1) } # insertion
elsif ($common =~ /^_(\d+)del([A-Z]+)$/) { ($num3, $ch1, $ch2) = ($1, $2, "-") } # multi deletion
elsif ($common =~ /^_(\d+)ins([A-Z]+)$/) { ($num3, $ch1, $ch2) = ("-", $1, $2) } # multi insertion
printf ("%d\t%d\t%d\t%s\t%s\n", $num1, $num2, $num3, $ch1, $ch2); # output
map {undef} ($num1, $num2, $num3, $common, $ch1, $ch2);
}
}
' C:/Users/cmccabe/Desktop/annovar/out_position.txt > C:/Users/cmccabe/Desktop/annovar/out_parse.txt
annovar
}
annovar() {
# combine id and position files
cd 'C:\Users\cmccabe\Desktop\annovar'
cp out_parse.txt "$(cat target.txt)"
# run annotation
$( perl -ne 'chomp; system ("perl table_annovar.pl $_ humandb/ -buildver hg19 -protocol refGene,popfreq_all,common,clinvar,clinvarsubmit,clinvarreference -operation g,f,f,f,f,f ")' < target.txt )
printf "The annotation is complete, would you like analyze additional target gene patients? Y/N "; read match_choice
case "$match_choice" in
[yY]) id="${id}"; menu ;;
[nN]) id="${id}"; remove ;;
esac
}
add2text() {
# $1 is the first argument to add2text
cd 'C:\Users\cmccabe\Desktop\annovar'
echo "$1" >> target.txt
}
remove() {
printf "\n\n"
printf "Removing old files, please wait ";
# delete annovar files
rm 'C:\Users\cmccabe\Desktop\annovar\out_position.txt'
rm 'C:\Users\cmccabe\Desktop\annovar\out_parse.txt'
cd 'C:\Users\cmccabe\Desktop\annovar'
rm "${id}".txt
rm "${id}".txt.refGene.log
rm "${id}".txt.hg19_clinvar_dropped
rm "${id}".txt.hg19_clinvar_filtered
rm "${id}".txt.hg19_clinvarreference_dropped
rm "${id}".txt.hg19_clinvarreference_filtered
rm "${id}".txt.hg19_clinvarsubmit_dropped
rm "${id}".txt.hg19_clinvarsubmit_filtered
rm "${id}".txt.hg19_common_dropped
rm "${id}".txt.hg19_common_filtered
rm "${id}".txt.hg19_popfreq_all_dropped
rm "${id}".txt.hg19_popfreq_all_filtered
rm "${id}".txt.refGene.variant_function
rm "${id}".txt.refGene.exonic_variant_function
rm "${id}".txt.log
# delete python files
cd 'C:'
rm c:/Users/cmccabe/Desktop/Python27/out_name.txt
printf "\n Old files removed, formatting for matrix, please wait ";
matrix
}
matrix() {
cd 'C:\Users\cmccabe\Desktop\annovar'
perl matrix.pl "${id}".txt.hg19_multianno.txt L:/NGS/3_BUSINESS/Matrix/Torrent/matrix_"${id}".txt
printf "Process complete and new file saved in L:/NGS/3_BUSINESS/Matrix/Torrent/matrix_${id}.txt, Are there additional target gene patients? Y/N "; read match_choice
case "$match_choice" in
[yY]) id="${id}"; rm C:/Users/cmccabe/Desktop/annovar/"${id}".txt.hg19_multianno.txt; menu ;;
[nN]) id="${id}"; rm C:/Users/cmccabe/Desktop/annovar/"${id}".txt.hg19_multianno.txt; printf "\n Goodbye! "; sleep 2 && exit ;;
esac
}
# actual start of this program
menu # run menu function