Closed:Help with the perl script to extract mismatches
0
0
Entering edit mode
5.2 years ago
MAPK ★ 2.1k

Hi All, I need some help with this perl script as I am an absolute beginner of perl. I have my code below which gives me mismatch at specific position from bowtie output file (bowtie.out) for the given read size (here I am searching within read size 22bp). This gives me the number of mismatch at specific position for each mismatch type. However, I also want to add position_ref for each mismatch type and get the result as shown below. What do I need to do to this code to get the result as shown below? Thanks for your help in advance.

# usage ./script.pl bowtie.out 22

my $sread = "ACGT-read";
my $strand ="-";
my $name = "bill";
my $position = 1;
my $sequence = "ACGT";
my $quality = "good";
my $d2 = "d2";
my $d3 = "d3";
my $class= $ARGV[1];
my @mray;
my @lines;
my $min = 10;

for (my $i=0; $i < $class; $i++) {
    $mray[$i][0]=0;
    $mray[$i][1]=0;
    $mray[$i][2]=0;
    $mray[$i][3]=0;
    $mray[$i][4]=0;
    $mray[$i][5]=0;
    $mray[$i][6]=0;
    $mray[$i][7]=0;
    $mray[$i][8]=0;
    $mray[$i][9]=0;
    $mray[$i][10]=0;
    $mray[$i][11]=0;
}

open (INFILE, "<$ARGV[0]") || die "couldn't open the 1 infile!";

while ($rln = <INFILE>){
    chomp $rln;
    ($sread, $strand, $name, $position, $sequence, $quality, $d2,$d3) = split("\t",$rln);
#   print "what is d3 \n $d3\n";
    $seq_len = length ($sequence);
    if ($seq_len == $class && $d3){
        $position_ref = $position; 
# This is where I need help. I want to paste these values (from $position_ref) to the corresponding columns as shown in the result below.
        print "what is position_ref \n $position_ref\n";
#   print "what is d3 \n $d3\n";
       ($position, $d2) = split(":",$d3);
#   print "what is d2 \n $d2\n";
       $var = substr $d2, -1;
       $var2 = substr $d2, 0;
#   print "what is substring d2 \n (substr $d2)\n";
#   print "what is var \n $var\n";
#   print "what is var2 \n $var2\n";
#   print "what is class \n $class\n";
#   print "what is sequence \n $sequence\n";

       if ($var2 eq "A>T" ) { $mray[$position][0]++ }
       if ($var2 eq "A>G" ) { $mray[$position][1]++ }
       if ($var2 eq "A>C" ) { $mray[$position][2]++ }
       if ($var2 eq "C>T" ) { $mray[$position][3]++ }
       if ($var2 eq "C>G" ) { $mray[$position][4]++ }
       if ($var2 eq "C>A" ) { $mray[$position][5]++ }
       if ($var2 eq "G>T" ) { $mray[$position][6]++ }
       if ($var2 eq "G>A" ) { $mray[$position][7]++ }
       if ($var2 eq "G>C" ) { $mray[$position][8]++ }
       if ($var2 eq "T>A" ) { $mray[$position][9]++ }
       if ($var2 eq "T>G" ) { $mray[$position][10]++ }
       if ($var2 eq "T>C" ) { $mray[$position][11]++ }


#   print "what is mray \n $mray[$position][3]\n";
       if ($position == ($class-1)) {
           $read = substr $sequence, 0, $class-1;
           push (@lines, $read);
#   print "what is read \n $read\n";
#   print "what is position \n $position\n";
       }
    }
}
close (INFILE);
print "Pos\tA>T\tA>G\tA>C\tC>T\tC>G\tC>A\tG>T\tG>A\tG>C\tT>A\tT>G\tT>C\n";
for (my $i=0; $i < $class; $i++) {
    $pnum = $i + 1;
    print "$pnum\t$mray[$i][0]\t$mray[$i][1]\t$mray[$i][2]\t$mray[$i][3]\t$mray[$i][4]\t$mray[$i][5]\t$mray[$i][6]\t$mray[$i][7]\t$mray[$i][8]\t$mray[$i][9]\t$mray[$i][10]\t$mray[$i][11]\n";
}

@lines = sort(@lines); # sort the list
$count = 0;
foreach my $line(@lines) # loop thru list
 {
    if ($line eq $oldline)
        {
          $count++;
        }  
        else 
        { 
          if ($count >= $min) {print "$oldline\t$count\n";}
#          print "$count\n";
          $count=1;
          $oldline=$line;
        }
 }

exit;

bowtie.out

K00363:128:HV3CJBBXX:3:1101:9668:1894 1:N:0:TAATCG  -   reverseKF898354_1_14561 12006   TCACCAGGAAGATAAAACACGA  JJJJJJJJJJJJJJJJJFFFAA  0   12:T>A
K00363:128:HV3CJBBXX:3:1101:3363:1894 1:N:0:TAATCG  -   reverseKF898354_1_14561 1108    GCACCAGGAAGATAAAACACGT  JJJJJJJJJJJJJJJJJFFFAA  0   12:T>A
K00363:128:HV3CJBBXX:3:1101:8521:1912 1:N:0:TAATCG  -   reverseKF898354_1_14561 13807   CACACAAATCATGGACGAAGATGA    JJJJJJJJJJJJJJJJJJJFFFAA    0   23:G>C
K00363:128:HV3CJBBXX:3:1101:11343:1912 1:N:0:TAATCG -   reverseKF898354_1_14561 11823   TTTACAATCGTTTGCAGTCTATCA    JJJJJJJJJJJJJJJJJJJFFFAA    0   
K00363:128:HV3CJBBXX:3:1101:17056:1930 1:N:0:TAATCG +   reverseKF898354_1_14561 1970    TGCGCAGGACAAGAACTGAATG  AAFFFJJJJJJJJJJJJJJJJJ  0   19:C>A
K00363:128:HV3CJBBXX:3:1101:11515:1965 1:N:0:TAATCG -   reverseKF898354_1_14561 9030    TGTCTGAAAATAACACGTCCAA  JJJJJJJJJJJJJJJJJFFFAA  0   5:A>G

result (tab delimited):

Pos A>T position_ref    A>G position_ref    A>C position_ref    C>T position_ref    C>G position_ref    C>A position_ref    G>T position_ref    G>A position_ref    G>C position_ref    T>A position_ref    T>G position_ref    T>C position_ref
1   0       0       0       0       0       0       0       0       0       0               0   
2   0       0       0       0       0       0       0       0       0       0               0   
3   0       0       0       0       0       0       0       0       0       0               0   
4   0       0       0       0       0       0       0       0       0       0               0   
5   0       0       0       0       0       0       0       0       0       0               0   
6   0       1   9030    0       0       0       0       0       0       0       0               0   
7   0       0       0       0       0       0       0       0       0       0               0   
8   0       0       0       0       0       0       0       0       0       0               0   
9   0       0       0       0       0       0       0       0       0       0               0   
10  0       0       0       0       0       0       0       0       0       0               0   
11  0       0       0       0       0       0       0       0       0       0               0   
12  0       0       0       0       0       0       0       0       0       0               0   
13  0       0       0       0       0       0       0       0       0       2   12006   1108        0   
14  0       0       0       0       0       0       0       0       0       0               0   
15  0       0       0       0       0       0       0       0       0       0               0   
16  0       0       0       0       0       0       0       0       0       0               0   
17  0       0       0       0       0       0       0       0       0       0               0   
18  0       0       0       0       0       0       0       0       0       0               0   
19  0       0       0       0       0       0       0       0       0       0               0   
20  0       0       0       0       0       1   11823   0       0       0       0               0   
21  0       0       0       0       0       0       0       0       0       0               0   
22  0       0       0       0       0       0       0       0       0       0               0
perl • 154 views
ADD COMMENT
This thread is not open. No new answers may be added
Traffic: 2044 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6