Code:
#!/usr/bin/env perl
# reformat.pl
use strict;
use warnings;
my %nms = (
"NM_004004.5" => "AR",
"NM_004992.3" => "XLD",
"NM_003924.3" => "AD",
);
my $readf = shift || die "Missing input file: $!\n";
my $writef = shift || die "Missing output file: $!\n";
my @header = (
"Index",
"Chromosome Position",
"Gene",
"Inheritance",
"RNA Accession",
"Chr",
"Coverage",
"Score",
"A(#F,#R)",
"C(#F,#R)",
"G(#F,#R)",
"T(#F,#R)",
"Ins(#F,#R)",
"Del(#F,#R)",
"SNP db_xref",
"Mutation Call",
"Mutant Allele Frequency",
"Amino Acid Change",
"HP",
"SPLICE",
"Pseudogene",
"Classification",
"HGMD",
"Disease",
"Sanger",
"References",
);
open my $in, '<', $readf or die "Cannot open $readf: $!\n";
open my $out, '>', $writef or die "Cannot create $writef: $!\n";
my $add2header;
chomp( $add2header = <$in> );
splice @header, 18, 0, $add2header;
save(@header);
$.= 0; # reset lines count to remove header
while( <$in> ) {
chomp;
my @ruler = (("Null")x17, ("")x25, ("Null")x8);
my @fields = split "\t";
my $len = @fields;
splice @ruler, 17, $len, @fields;
my ($gene, $transcript, $exon, $coding, $aa) = split ":", $fields[9];
$ruler[0] = $.;
$ruler[2] = $gene;
$ruler[3] = $nms{$transcript};
$ruler[4] = $transcript;
$ruler[15] = $coding;
$ruler[17] = $aa;
$ruler[45] = "VUS";
save(@ruler);
}
sub save {
local $" = "\t";
print $out "@_\n";
}
close $in;
close $out;