Question

GBS pipeline POD Error: does not exist or is unreadable

0

Entering edit mode

6.1 years ago

csjlee08 ▴ 10

Hello, I am trying to use GBS-POD pipeline, having trouble with the system to find my files although I did specify the file paths in the script, and they are in the current working directory.

This is the command and error:

  greg@Genomics:/media/greg/StorageD/gbs3/GBSpipeline$ sudo ./GBS_pipeline.pl trim_reads /media/greg/StorageD/gbs3/GBSpipeline/Trimmomatic-0.36/trimmomatic-0.36.jar illuminaadaptersFR.fa
    [sudo] password for greg: 
    given is experimental at ./GBS_pipeline.pl line 114.
    when is experimental at ./GBS_pipeline.pl line 117.
    when is experimental at ./GBS_pipeline.pl line 161.
    when is experimental at ./GBS_pipeline.pl line 196.
    when is experimental at ./GBS_pipeline.pl line 232.
    Calling trim_reads ...
    ERROR:  /media/greg/StorageD/gbs3/GBSpipeline/indexshortcol.txt does not exist or is unreadable.

This is the .pl file: (note: I put my ###christine where I tried to remove the code to see if it would run, it does not..)

#!/usr/bin/perl -w

##### STEP 2 : TRIM DEMULTIPLEXED READS #####
##### Usage: trim_reads [TRIMMOMATIC_PATH] [TRIM_FILE]
##### Required input:
#####   TRIMMOMATIC_PATH : The full pathname to the trimmomatic jar file
#####   TRIM_FILE : A list of sequences to filter out the reads (ie. Illumina adaptors)
##### Output:
#####   [OUTPUT_DIR]/trim/[SAMPLE]_[POPULATION]_R1-s.fastq
#####   [OUTPUT_DIR]/trim/[SAMPLE]_[POPULATION]_R1-p.fastq
#####   [OUTPUT_DIR]/trim/[SAMPLE]_[POPULATION]_R2-s.fastq
#####   [OUTPUT_DIR]/trim/[SAMPLE]_[POPULATION]_R2-p.fastq
#####   [OUTPUT_DIR]/trim/[SAMPLE]_[POPULATION]_output.log

use strict;
use warnings;

sub f2
{
    #############################################
    ##### DEFAULT VARIABLES FOR TRIMMOMATIC #####
    #############################################
    my %trim_options = (
    # Version
        'VERSION' => '0.33',
    # THREADS
        'TRIM_THREADS' => '1',
    # ILLUMINACLIP:
        'SEED_MISMATCHES' => '2',
        'PALINDROME_CLIP_THRESHOLD' => '30',
        'SIMPLE_CLIP_THRESHOLD' => '10',
    # SLIDINGWINDOW:
        'WINDOW_SIZE' => '4',
        'REQUIRED_QUALITY' => '15',
    # LEADING:
        'LEADING_QUALITY' => '3',
    # TRAILING:
        'TRAILING_QUALITY' => '3',
    # MINLEN:
        'MINLEN' => '36',
    );
    #############################################

    # Collect function-specific parameters
    my $trimmomatic_path = $_[0];
    my $trim_file = $_[1];
    my $population = $_[2];
    my $index_file = $_[3];
    my $output_dir = $_[4];

    # Collect trimmomatic-specific options
    if ($_[5])
    {
        my %options = %{$_[5]};
        while ( my ($key, $value) = each %options )
        {
            if ($options{ $key })
            {
                $trim_options{ $key } = $value;
            }
        }
    }

    # Ensure index_file exists in the cwd
    ################################## christine: unless ( -f $index_file && -r $index_file )
    ################################## christine:{   die "ERROR: $index_file does not exist or is unreadable.\n";    }

    # @TODO: Lookup the version of the user's trimmomatic
    unless ( -s "$trimmomatic_path" )
    {
        print "WARNING: Can't locate Trimmomatic at $trimmomatic_path.\n",
              "Would you like to attempt to install v$trim_options{'VERSION'} there now? (yes/no) ";
        chomp (my $usr_input = <STDIN>);
        if ($usr_input =~ /yes/i)
        {
            system("curl -O http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-$trim_options{'VERSION'}.zip");
            system("unzip Trimmomatic-$trim_options{'VERSION'}.zip; rm -f Trimmomatic-$trim_options{'VERSION'}.zip; mv Trimmomatic-$trim_options{'VERSION'}/ $trimmomatic_path/");
            $trimmomatic_path = "Trimmomatic-$trim_options{'VERSION'}/trimmomatic-$trim_options{'VERSION'}.jar";
            my $path = `pwd`;
            if (-e "$trimmomatic_path")
            {
                print "$trimmomatic_path now located in $path\n";
            } else {
                die "ERROR: $trimmomatic_path was not successfully installed in $path\n";
            }
        }
        else {   die "Exiting\n";    }
    }

    # Check for trim_file
     ################################## christine: unless ( -r "$trim_file" && -f "$trim_file" )
     ################################## christine: {   die "ERROR: $trim_file does not exist or is not readable.\n";   }

    system("mkdir -p $output_dir/trim/");
    system("mkdir -p logs/");

    # Create summary file
    system("mkdir -p summary_files/");
    my $summary_file = "summary_files/$population\_trim\_summary.txt";
    open SUMMARY, ">$summary_file" or die "ERROR: Could not create $summary_file\n";
    print SUMMARY "Sample\tInput Read Pairs\tSurviving Read Pairs\t% Both Surviving\t",
        "Only Forward Surviving\t% Forward Surviving\tOnly Reverse Surviving\t% Reverse Surviving\t",
        "Dropped Reads\t% Dropped\n";
    close SUMMARY or die "Unable to close $summary_file\n";

    # The sample names are in the first column if the index_file is in 2-column format
    # Otherwise, we only have a list of indices so the following cut command should still work
    my @samples = `cut -f1 $index_file | sort -u`;
    my $num_samples = $#samples + 1;
    if ($num_samples == 0){    die "ERROR: $index_file exists but appears to be empty.\n";    }
    my $sample_count = 0;

    # BEGIN trimming by sample
    foreach my $sample (@samples)
    {
        chomp($sample);
        $sample =~ s/ //g;
        print_progress($sample_count, $num_samples, "  Current sample: $sample       ");

        my $R1_reads = "$output_dir/demultiplex/$sample\_$population\_R1.fastq";
        my $R2_reads = "$output_dir/demultiplex/$sample\_$population\_R2.fastq";

       ##### unless ( -f "$R1_reads" && -r "$R1_reads" )
        #####{   die "ERROR: $R1_reads does not exist or is unreadable.\n"; }
        ######unless ( -f "$R2_reads" && -r "$R2_reads" )
        ######{   die "ERROR: $R2_reads does not exist or is unreadable.\n"; }

        # Run Trimmomatic
         my $cmd = "java -classpath $trimmomatic_path org.usadellab.trimmomatic.TrimmomaticPE ";
         $cmd .= "-threads $trim_options{'TRIM_THREADS'} -phred33 ";
         $cmd .= "-trimlog logs/$sample.trim.log ";
         $cmd .= "$R1_reads $R2_reads ";
         $cmd .= "$output_dir/trim/$sample\_$population\_R1-p.fastq ";
         $cmd .= "$output_dir/trim/$sample\_$population\_R1-s.fastq ";
         $cmd .= "$output_dir/trim/$sample\_$population\_R2-p.fastq ";
         $cmd .= "$output_dir/trim/$sample\_$population\_R2-s.fastq ";
         $cmd .= "ILLUMINACLIP:$trim_file:$trim_options{'SEED_MISMATCHES'}:";
         $cmd .= "$trim_options{'PALINDROME_CLIP_THRESHOLD'}:$trim_options{'SIMPLE_CLIP_THRESHOLD'} ";
         $cmd .= "LEADING:$trim_options{'LEADING_QUALITY'} ";
         $cmd .= "TRAILING:$trim_options{'TRAILING_QUALITY'} ";
         $cmd .= "SLIDINGWINDOW:$trim_options{'WINDOW_SIZE'}:$trim_options{'REQUIRED_QUALITY'} ";
         $cmd .= "MINLEN:$trim_options{'MINLEN'} ";

         my ( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) =
             run( command => $cmd, verbose => 0 );
         if ($success)
         {
             my $trimlog = "logs/$sample\_$population\_trimmomatic\_output.log";
             open TRIMLOG, ">$trimlog";
             print TRIMLOG join " ", @$full_buf;
         } else
         {
             print "ERROR: Trimmomatic did not complete successfully:\n";
             die "$error_message\n@$stderr_buf\n";
         }

         $sample_count++;
         summarize_trim($sample, $population, $output_dir, \@$stderr_buf);
         print_progress($sample_count, $num_samples, "");
    }
    print "\n",
        " Processed reads located in:  $output_dir/trim/ \n",
        " Summary file:  $summary_file\n";
}

#################################
##### Summarize step 2 using trimmomatic output:
##### Sample    Input read pairs    Both surviving   Both surviving %    Forward only surviving  Forward surviving %
#####   Reverse only surviving  Reverse surviving %     Dropped    Dropped %
#################################
sub summarize_trim
{
    my $sample = $_[0];
    my $population = $_[1];
    my $output_dir = $_[2];
    my @trimmomatic_output = @{$_[3]};

    my $summary_file = "summary_files/$population\_trim\_summary.txt";
    open SUMMARY, ">>$summary_file" or die "ERROR: Could not open $summary_file\n";

    # Grep for the line containing stats about the trimming process
    # Save values of interest into variables
    my ($line_of_interest) = grep /Input Read Pairs/, @trimmomatic_output;
    my @trim_info = split(/ /, $line_of_interest);
    my $input_read_pairs = $trim_info[3];
    my $both_surviving = $trim_info[6];
    my $both_surviving_percent = $trim_info[7];
    my $forward_surviving = $trim_info[11];
    my $forward_surviving_percent = $trim_info[12];
    my $reverse_surviving = $trim_info[16];
    my $reverse_surviving_percent = $trim_info[17];
    my $dropped = $trim_info[19];
    chomp( my $dropped_percent = $trim_info[20] );

    print SUMMARY "$sample\t$input_read_pairs\t$both_surviving\t$both_surviving_percent\t",
        "$forward_surviving\t$forward_surviving_percent\t$reverse_surviving\t",
        "$reverse_surviving_percent\t$dropped\t$dropped_percent\n";

    close SUMMARY or die "ERROR: Could not close $summary_file\n";
}

1;

and the configuration file (where to specify parameters):

    #######################################
### GBS Pipeline Configuration File ###
#######################################
############ INSTRUCTIONS #############
# Fill out the following information which will be used for GBS analysis.
# Please ONLY make changes after an equals sign (=).
# Do not add any other text to the file, and do not remove any existing text.
# Do NOT include whitespace, instead use underscore: '_'
# Include the full pathname to files, directories and programs if they exist
#   outside of the current working directory (do NOT use '~' for home directory)
#######################################

#######################################
##### REQUIRED FOR MULTIPLE STEPS #####
#######################################

# The generic population name (used in naming files during processing)
# Example: LC-01
POPULATION = Adzuki

# The filename of indices (aka barcodes) used for this GBS run.
# This file can be in one of 2 formats. Either:
#   1) A single column list of indices
#   2) 2 columns where the first column consists of sample names, and the second column
#      of the sample's associated index
# Example: indices.txt
INDEX_FILE = /media/greg/Overflow/gbs3/GBSpipeline/indexshortcol.txt

# The location where output of processed reads are placed
# Example: /storage/lens_GBS/processed_reads
output_dir = /media/greg/Overflow/gbs3/GBSpipeline/processedreads

# The filename of the reference genome sequence
# Example: ../NC_008253.fna
REFERENCE = /media/greg/StorageD/gbs3/GBSpipeline/adzukireferencegenome.fasta

##################################################
### DEMULTIPLEX - STEP 1 SPECIFIC REQUIREMENTS ###
##################################################

# The rare-cutter restriction enzyme site. This is only necessary if the sequences of the
# indices include the RE site (to prevent the RE site from being clipped from the reads).
# If the RE site is not attached to your indices, leave this blank.
# Example: TGCA
RE_SITE = TGCA

# The filename of the 1st set of multiplexed read pairs
# Example: S001EA3_R1.fastq
R1_FILE = R1plate1.fastq

# The filename of the 2nd set of multiplexed read pairs
# Example: S001EA3_R2.fastq
R2_FILE = R2plate1.fastq

###########################################
### TRIM - STEP 2 SPECIFIC REQUIREMENTS ###

# The filename of the list of adaptor sequences to check for and remove
# Hint: If you do not have a list of adaptor sequences, Trimmomatic provides multiple lists
# of sequences in the adaptors folder in your Trimmomatic directory
# Example: trimfile.txt
TRIM_FILE = /media/greg/StorageD/gbs3/GBSpipeline/illuminaadaptersFR.fa

# The relative or full pathname to your compiled copy of Trimmomatic
# Note: This MUST include the executable .jar!
# Example: ../Tools/Trimmomatic-0.32/trimmomatic-0.32.jar
TRIMMOMATIC_PATH = /media/greg/StorageD/gbs3/GBSpipeline/Trimmomatic-0.36/trimmomatic-0.36.jar

### TRIMMOMATIC OPTIONS - Refer to Trimmomatic manual ###
### These are OPTIONAL. Trimmomatic will run the defaults specified if left blank ###
  # The number of threads to use on an HPC cluster
  # DEFAULT: 1
  TRIM_THREADS =
  # The maximum mismatch count to allow a full match
  # DEFAULT: 2
  SEED_MISMATCHES = 0
  # Specifies accuracy needed for a match between two adapter ligated reads
  # DEFAULT: 30
  PALINDROME_CLIP_THRESHOLD =
  # Specifies accuracy needed for a match between any adapter and read
  # DEFAULT: 20
  SIMPLE_CLIP_THRESHOLD =
  # The number of bases to average across in sliding window trimming
  # DEFAULT: 4
  WINDOW_SIZE =
  # The average quality required in a sliding window trimming procedure
  # DEFAULT : 15
  REQUIRED_QUALITY =
  # Threshold for minimum quality of leading bases
  # DEFAULT: 3
  LEADING_QUALITY =
  # Threshold for minimum quality of trailing bases
  # DEFAULT: 3
  TRAILING_QUALITY =
  # The minimum length for trimmed reads to be kept
  # DEFAULT: 36
  MINLEN =

Help is appreciated. Thanks.

GBS Pipeline • 1.3k views

ADD COMMENT • link 6.1 years ago by csjlee08 ▴ 10