Entering edit mode
5.3 years ago
MAPK
★
2.1k
Hi All, I need some help with this perl script as I am an absolute beginner of perl. I have my code below which gives me mismatch at specific position from bowtie output file (bowtie.out) for the given read size (here I am searching within read size 22bp). This gives me the number of mismatch at specific position for each mismatch type. However, I also want to add position_ref for each mismatch type and get the result as shown below. What do I need to do to this code to get the result as shown below? Thanks for your help in advance.
# usage ./script.pl bowtie.out 22
my $sread = "ACGT-read";
my $strand ="-";
my $name = "bill";
my $position = 1;
my $sequence = "ACGT";
my $quality = "good";
my $d2 = "d2";
my $d3 = "d3";
my $class= $ARGV[1];
my @mray;
my @lines;
my $min = 10;
for (my $i=0; $i < $class; $i++) {
$mray[$i][0]=0;
$mray[$i][1]=0;
$mray[$i][2]=0;
$mray[$i][3]=0;
$mray[$i][4]=0;
$mray[$i][5]=0;
$mray[$i][6]=0;
$mray[$i][7]=0;
$mray[$i][8]=0;
$mray[$i][9]=0;
$mray[$i][10]=0;
$mray[$i][11]=0;
}
open (INFILE, "<$ARGV[0]") || die "couldn't open the 1 infile!";
while ($rln = <INFILE>){
chomp $rln;
($sread, $strand, $name, $position, $sequence, $quality, $d2,$d3) = split("\t",$rln);
# print "what is d3 \n $d3\n";
$seq_len = length ($sequence);
if ($seq_len == $class && $d3){
$position_ref = $position;
# This is where I need help. I want to paste these values (from $position_ref) to the corresponding columns as shown in the result below.
print "what is position_ref \n $position_ref\n";
# print "what is d3 \n $d3\n";
($position, $d2) = split(":",$d3);
# print "what is d2 \n $d2\n";
$var = substr $d2, -1;
$var2 = substr $d2, 0;
# print "what is substring d2 \n (substr $d2)\n";
# print "what is var \n $var\n";
# print "what is var2 \n $var2\n";
# print "what is class \n $class\n";
# print "what is sequence \n $sequence\n";
if ($var2 eq "A>T" ) { $mray[$position][0]++ }
if ($var2 eq "A>G" ) { $mray[$position][1]++ }
if ($var2 eq "A>C" ) { $mray[$position][2]++ }
if ($var2 eq "C>T" ) { $mray[$position][3]++ }
if ($var2 eq "C>G" ) { $mray[$position][4]++ }
if ($var2 eq "C>A" ) { $mray[$position][5]++ }
if ($var2 eq "G>T" ) { $mray[$position][6]++ }
if ($var2 eq "G>A" ) { $mray[$position][7]++ }
if ($var2 eq "G>C" ) { $mray[$position][8]++ }
if ($var2 eq "T>A" ) { $mray[$position][9]++ }
if ($var2 eq "T>G" ) { $mray[$position][10]++ }
if ($var2 eq "T>C" ) { $mray[$position][11]++ }
# print "what is mray \n $mray[$position][3]\n";
if ($position == ($class-1)) {
$read = substr $sequence, 0, $class-1;
push (@lines, $read);
# print "what is read \n $read\n";
# print "what is position \n $position\n";
}
}
}
close (INFILE);
print "Pos\tA>T\tA>G\tA>C\tC>T\tC>G\tC>A\tG>T\tG>A\tG>C\tT>A\tT>G\tT>C\n";
for (my $i=0; $i < $class; $i++) {
$pnum = $i + 1;
print "$pnum\t$mray[$i][0]\t$mray[$i][1]\t$mray[$i][2]\t$mray[$i][3]\t$mray[$i][4]\t$mray[$i][5]\t$mray[$i][6]\t$mray[$i][7]\t$mray[$i][8]\t$mray[$i][9]\t$mray[$i][10]\t$mray[$i][11]\n";
}
@lines = sort(@lines); # sort the list
$count = 0;
foreach my $line(@lines) # loop thru list
{
if ($line eq $oldline)
{
$count++;
}
else
{
if ($count >= $min) {print "$oldline\t$count\n";}
# print "$count\n";
$count=1;
$oldline=$line;
}
}
exit;
bowtie.out
K00363:128:HV3CJBBXX:3:1101:9668:1894 1:N:0:TAATCG - reverseKF898354_1_14561 12006 TCACCAGGAAGATAAAACACGA JJJJJJJJJJJJJJJJJFFFAA 0 12:T>A
K00363:128:HV3CJBBXX:3:1101:3363:1894 1:N:0:TAATCG - reverseKF898354_1_14561 1108 GCACCAGGAAGATAAAACACGT JJJJJJJJJJJJJJJJJFFFAA 0 12:T>A
K00363:128:HV3CJBBXX:3:1101:8521:1912 1:N:0:TAATCG - reverseKF898354_1_14561 13807 CACACAAATCATGGACGAAGATGA JJJJJJJJJJJJJJJJJJJFFFAA 0 23:G>C
K00363:128:HV3CJBBXX:3:1101:11343:1912 1:N:0:TAATCG - reverseKF898354_1_14561 11823 TTTACAATCGTTTGCAGTCTATCA JJJJJJJJJJJJJJJJJJJFFFAA 0
K00363:128:HV3CJBBXX:3:1101:17056:1930 1:N:0:TAATCG + reverseKF898354_1_14561 1970 TGCGCAGGACAAGAACTGAATG AAFFFJJJJJJJJJJJJJJJJJ 0 19:C>A
K00363:128:HV3CJBBXX:3:1101:11515:1965 1:N:0:TAATCG - reverseKF898354_1_14561 9030 TGTCTGAAAATAACACGTCCAA JJJJJJJJJJJJJJJJJFFFAA 0 5:A>G
result (tab delimited):
Pos A>T position_ref A>G position_ref A>C position_ref C>T position_ref C>G position_ref C>A position_ref G>T position_ref G>A position_ref G>C position_ref T>A position_ref T>G position_ref T>C position_ref
1 0 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0 0 0 0
6 0 1 9030 0 0 0 0 0 0 0 0 0
7 0 0 0 0 0 0 0 0 0 0 0
8 0 0 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 0 0 0 0
10 0 0 0 0 0 0 0 0 0 0 0
11 0 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 0
13 0 0 0 0 0 0 0 0 0 2 12006 1108 0
14 0 0 0 0 0 0 0 0 0 0 0
15 0 0 0 0 0 0 0 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0 0
17 0 0 0 0 0 0 0 0 0 0 0
18 0 0 0 0 0 0 0 0 0 0 0
19 0 0 0 0 0 0 0 0 0 0 0
20 0 0 0 0 0 1 11823 0 0 0 0 0
21 0 0 0 0 0 0 0 0 0 0 0
22 0 0 0 0 0 0 0 0 0 0 0