Repetitive scripts within a bash shell


 
Thread Tools Search this Thread
Top Forums UNIX for Dummies Questions & Answers Repetitive scripts within a bash shell
# 1  
Old 09-04-2011
Repetitive scripts within a bash shell

I have a bash shell that even though it does not look pretty is working very well. Some of the steps are repetitive, something like this:
Code:
	muscle -in ${e}.4 > $e.5 
	read -t1
	sed ':a /^>/!N;s/\n\([^>]\)/\1/;ta' $e.5 > $e.6 
	read -t2
	awk '/>/{fr=$3;getline;n=split ($0,a,""); for (i=1;i<=n;i++) b[i"-"a[i]]+=fr}\
	END{for (i in b) {split (i,c,"-"); if (d[c[1]]<=b[i]){e[c[1]]=c[2];d[c[1]]=b[i]}}\
	for (i in e) print i" "e[i]}' $e.6 | awk 'NR==FNR{a[$1]=$2;next}\
	{n=split($0,b,"");for (i=1;i<=n;i++) if (b[i]=="-") b[i]=a[i]; for (i=1;i<=n;i++) printf b[i];\
	printf "\n"}' - $e.6 > $e.7 
	read -t2
	awk -vRS=">" 'length($0)>0{a[$4]+=$3;b[$4]=$1}END{for (i in a) printf ">"b[i]" Freq "a[i]"\n"i"\n"}' $e.7 > $e.8 
	muscle -in ${e}.8 > $e.9 
	sed ':a /^>/!N;s/\n\([^>]\)/\1/;ta' $e.9 > $e.10 
	read -t2
	awk '/>/{fr=$3;getline;n=split ($0,a,""); for (i=1;i<=n;i++) b[i"-"a[i]]+=fr}\
	END{for (i in b) {split (i,c,"-"); if (d[c[1]]<=b[i]){e[c[1]]=c[2];d[c[1]]=b[i]}}\
	for (i in e) print i" "e[i]}' $e.10 | awk 'NR==FNR{a[$1]=$2;next}\
	{n=split($0,b,"");for (i=1;i<=n;i++) if (b[i]=="-") b[i]=a[i]; for (i=1;i<=n;i++) printf b[i];\
	printf "\n"}' - $e.10 > $e.11 
	read -t2
	awk -vRS=">" 'length($0)>0{a[$4]+=$3;b[$4]=$1}END{for (i in a) printf ">"b[i]" Freq "a[i]"\n"i"\n"}' $e.11 > $e.12 
	muscle -in ${e}.12 > $e.13 
	sed ':a /^>/!N;s/\n\([^>]\)/\1/;ta' $e.13 > $e.14 
	read -t2
	awk '/>/{fr=$3;getline;n=split ($0,a,""); for (i=1;i<=n;i++) b[i"-"a[i]]+=fr}\
	END{for (i in b) {split (i,c,"-"); if (d[c[1]]<=b[i]){e[c[1]]=c[2];d[c[1]]=b[i]}}\
	for (i in e) print i" "e[i]}' $e.14 | awk 'NR==FNR{a[$1]=$2;next}\
	{n=split($0,b,"");for (i=1;i<=n;i++) if (b[i]=="-") b[i]=a[i]; for (i=1;i<=n;i++) printf b[i];\
	printf "\n"}' - $e.14 > $e.15 
	read -t2
	awk -vRS=">" 'length($0)>0{a[$4]+=$3;b[$4]=$1}END{for (i in a) printf ">"b[i]" Freq "a[i]"\n"i"\n"}' $e.15 > $e.16 
	muscle -in ${e}.16 > $e.17 
	sed ':a /^>/!N;s/\n\([^>]\)/\1/;ta' $e.17 > $e.18 
	read -t2
	awk '/>/{fr=$3;getline;n=split ($0,a,""); for (i=1;i<=n;i++) b[i"-"a[i]]+=fr}\
	END{for (i in b) {split (i,c,"-"); if (d[c[1]]<=b[i]){e[c[1]]=c[2];d[c[1]]=b[i]}}\
	for (i in e) print i" "e[i]}' $e.18 | awk 'NR==FNR{a[$1]=$2;next}\
	{n=split($0,b,"");for (i=1;i<=n;i++) if (b[i]=="-") b[i]=a[i]; for (i=1;i<=n;i++) printf b[i];\
	printf "\n"}' - $e.18 > $e.19 
	read -t2
	awk -vRS=">" 'length($0)>0{a[$4]+=$3;b[$4]=$1}END{for (i in a) printf ">"b[i]" Freq "a[i]"\n"i"\n"}' $e.19 > $e.20

Which in reality is 4 steps repeated 4 times:
Code:
	muscle -in ${e}.4 > $e.5 
	read -t1
	sed ':a /^>/!N;s/\n\([^>]\)/\1/;ta' $e.5 > $e.6 
	read -t2
	awk '/>/{fr=$3;getline;n=split ($0,a,""); for (i=1;i<=n;i++) b[i"-"a[i]]+=fr}\
	END{for (i in b) {split (i,c,"-"); if (d[c[1]]<=b[i]){e[c[1]]=c[2];d[c[1]]=b[i]}}\
	for (i in e) print i" "e[i]}' $e.6 | awk 'NR==FNR{a[$1]=$2;next}\
	{n=split($0,b,"");for (i=1;i<=n;i++) if (b[i]=="-") b[i]=a[i]; for (i=1;i<=n;i++) printf b[i];\
	printf "\n"}' - $e.6 > $e.7 
	read -t2
	awk -vRS=">" 'length($0)>0{a[$4]+=$3;b[$4]=$1}END{for (i in a) printf ">"b[i]" Freq "a[i]"\n"i"\n"}' $e.7 > $e.8

I am trying to simplify it but I am not quite sure how to go about it.
Any help will be very much appreciated
# 2  
Old 09-04-2011
You can try creating function holding those repetitive commands. Check if it will work for you:
Code:
function f {
        i=$1
        muscle -in ${e}.$i > $e.$(( i+1 ))
        read -t1
        sed ':a /^>/!N;s/\n\([^>]\)/\1/;ta' $e.$(( i+1 )) > $e.$(( i+2 ))
        read -t2
        awk '/>/{fr=$3;getline;n=split ($0,a,""); for (i=1;i<=n;i++) b[i"-"a[i]]+=fr}\
        END{for (i in b) {split (i,c,"-"); if (d[c[1]]<=b[i]){e[c[1]]=c[2];d[c[1]]=b[i]}}\
        for (i in e) print i" "e[i]}' $e.$(( i+2 )) | awk 'NR==FNR{a[$1]=$2;next}\
        {n=split($0,b,"");for (i=1;i<=n;i++) if (b[i]=="-") b[i]=a[i]; for (i=1;i<=n;i++) printf b[i];\
        printf "\n"}' - $e.$(( i+2 )) > $e.$(( i+3 ))
        read -t2
        awk -vRS=">" 'length($0)>0{a[$4]+=$3;b[$4]=$1}END{for (i in a) printf ">"b[i]" Freq "a[i]"\n"i"\n"}' $e.$(( i+3 )) > $e.$(( i+4 ))
}

Then call it in your script like this:
Code:
f 4
f 8
f 12
f 16

# 3  
Old 09-05-2011
Standard refactoring steps:
1. Format you code - http://www.linux-kheops.com/doc/perl...ert/fmt.script and gawk --profile can help. (here you need only in gawk pretty-printing facilities)
2. Rename variables.
3. Extract functions,
4. Comment.
5. Do everything in small steps, test continuously and save you changes in git (it's really easy and very, very useful).

In addition to bartus11:
Code:
# comment and rename
function f {         
  i=$1  # comment
  e=$2  # comment and rename
  
  muscle -in ${e}.$i > $e.$(( i+1 ))
  
  read -t1 # do you really need it (at least here)?
  # comment 
  sed ':a /^>/!N;s/\n\([^>]\)/\1/;ta' $e.$(( i+1 )) > $e.$(( i+2 ))
  
  read -t2 # comment
  # comment
  awk '
    />/  { # comment, rename fr
      fr = $3              
      getline
      n = split($0, a, "")
      for (i = 1; i <= n; i++) {
        b[(i "-" a[i])] += fr
      }
    }
    # comment
    END {
      for (i in b) {
        split(i, c, "-")
        if (d[c[1]] <= b[i]) { # complex algorithm, comment
          e[c[1]] = c[2]
          d[c[1]] = b[i]
        }
      }
      for (i in e) {
        print (i " " e[i])
      }
    }
  ' - $e.$(( i+2 )) > $e.$(( i+3 ))          # what is in stdin?

  read -t2 # ??? no need
  # comment
  awk -vRS=">" '
    length($0) > 0  { # comment
      a[$4] += $3
      b[$4] = $1
    }
    END {                             # comment
      for (i in a) {
        printf (">" b[i] " Freq " a[i] "\n" i "\n")
      }
    }
  ' $e.$(( i+3 )) > $e.$(( i+4 ))
}

You can split this function father (to aliases, functions, command variables) to define easy understandable and testable small steps.

===

Well, after refactoring you should change this function to something like this (names should be renamed, of course):

Code:
step1_muscle_command INPUTFILE | 
step2_sed_processing  | 
step3_awk1_processing | 
step4_awk2_processing >OUTPUTFILE

and wrap this in a loop.

Last edited by yazu; 09-05-2011 at 01:25 AM..
 
Login or Register to Ask a Question

Previous Thread | Next Thread

9 More Discussions You Might Find Interesting

1. Shell Programming and Scripting

Usage of #!/bin/sh vs #!/bin/bash shell scripts?

Some question about the usage of shell scripts: 1.) Are the commands of the base shell scripts a subset of bash commands? 2.) Assume I got a long, long script WITHOUT the first line. How can I find out if the script was originally designed für "sh" or "bash"? 3.) How can I check a given... (3 Replies)
Discussion started by: pstein
3 Replies

2. Shell Programming and Scripting

Difference between kshell and bash shell scripts Example cited

Hi All, I need some urgent help regarding some info. I have a cluster of servers for which I have two scripts for management. control.sh is a bash script meant for restarting/stopping the servers. manger.ksh is a kshell script. It is a master script to manage restarting/stoppping and... (3 Replies)
Discussion started by: ankur328
3 Replies

3. Homework & Coursework Questions

Bash shell scripts

Use and complete the template provided. The entire template must be completed. If you don't, your post may be deleted! 1. The problem statement, all variables and given/known data: Test that exactly one command line argrument is enter from the command line. If not, display the usage... (1 Reply)
Discussion started by: jcoop12
1 Replies

4. Shell Programming and Scripting

Paid job for bash shell scripts

Job ad removed (1 Reply)
Discussion started by: starmation
1 Replies

5. Shell Programming and Scripting

Writing Bash shell scripts corresponding to windows bat files

Experts, I am newbie in shell scripting. I want to write Bash shell scripts corresponding to windows bat files. I have installed cygwin at c:\cygwin and i am trying to crate the sh file using vi editor. i am not able to understand how to use linux/unix convention for the code. following is my... (15 Replies)
Discussion started by: rajuchacha007
15 Replies

6. Shell Programming and Scripting

Create Bash shell scripts corresponding to windows bat files

Experts, I am newbie in shell scripting. I want to write Bash shell scripts corresponding to windows bat files. I have installed cygwin at c:\cygwin and i am trying to crate the sh file using vi editor. i am not able to understand how to use linux/unix convention for the code. following is my... (1 Reply)
Discussion started by: rajuchacha007
1 Replies

7. Shell Programming and Scripting

best way for removing comment from shell scripts -- bash

Again a comment removal requirement from me, refer my previous problem & solution for removing comment from ruby scripts: https://www.unix.com/shell-programming-scripting/118296-best-way-removing-comment-ruby-program.html This time, it is for stripping of comments from Shell Script. I search for... (2 Replies)
Discussion started by: thegeek
2 Replies

8. Shell Programming and Scripting

Changing the Bash Scripts to Bourne Scripts:URGENT

Hi, I have to write a program to compute the checksums of files ./script.sh I wrote the program using bash and it took me forever since I am a beginner but it works very well. I'm getting so close to the deadline and I realised today that actually I have to use normal Bourne shell... (3 Replies)
Discussion started by: pgarg1989
3 Replies

9. UNIX for Dummies Questions & Answers

Bash Shell Scripts

Hi all, plz can anybody tell me that if a script written for Bash shell also work for other shells and if yes , how ??? Thanks and Regards SONAL (2 Replies)
Discussion started by: sonbag_pspl
2 Replies
Login or Register to Ask a Question