GBS pipeline POD Error: does not exist or is unreadable
0
0
Entering edit mode
6.1 years ago
csjlee08 ▴ 10

Hello, I am trying to use GBS-POD pipeline, having trouble with the system to find my files although I did specify the file paths in the script, and they are in the current working directory.

This is the command and error:

  greg@Genomics:/media/greg/StorageD/gbs3/GBSpipeline$ sudo ./GBS_pipeline.pl trim_reads /media/greg/StorageD/gbs3/GBSpipeline/Trimmomatic-0.36/trimmomatic-0.36.jar illuminaadaptersFR.fa
    [sudo] password for greg: 
    given is experimental at ./GBS_pipeline.pl line 114.
    when is experimental at ./GBS_pipeline.pl line 117.
    when is experimental at ./GBS_pipeline.pl line 161.
    when is experimental at ./GBS_pipeline.pl line 196.
    when is experimental at ./GBS_pipeline.pl line 232.
    Calling trim_reads ...
    ERROR:  /media/greg/StorageD/gbs3/GBSpipeline/indexshortcol.txt does not exist or is unreadable.

This is the .pl file: (note: I put my ###christine where I tried to remove the code to see if it would run, it does not..)

#!/usr/bin/perl -w

##### STEP 2 : TRIM DEMULTIPLEXED READS #####
##### Usage: trim_reads [TRIMMOMATIC_PATH] [TRIM_FILE]
##### Required input:
#####   TRIMMOMATIC_PATH : The full pathname to the trimmomatic jar file
#####   TRIM_FILE : A list of sequences to filter out the reads (ie. Illumina adaptors)
##### Output:
#####   [OUTPUT_DIR]/trim/[SAMPLE]_[POPULATION]_R1-s.fastq
#####   [OUTPUT_DIR]/trim/[SAMPLE]_[POPULATION]_R1-p.fastq
#####   [OUTPUT_DIR]/trim/[SAMPLE]_[POPULATION]_R2-s.fastq
#####   [OUTPUT_DIR]/trim/[SAMPLE]_[POPULATION]_R2-p.fastq
#####   [OUTPUT_DIR]/trim/[SAMPLE]_[POPULATION]_output.log

use strict;
use warnings;

sub f2
{
    #############################################
    ##### DEFAULT VARIABLES FOR TRIMMOMATIC #####
    #############################################
    my %trim_options = (
    # Version
        'VERSION' => '0.33',
    # THREADS
        'TRIM_THREADS' => '1',
    # ILLUMINACLIP:
        'SEED_MISMATCHES' => '2',
        'PALINDROME_CLIP_THRESHOLD' => '30',
        'SIMPLE_CLIP_THRESHOLD' => '10',
    # SLIDINGWINDOW:
        'WINDOW_SIZE' => '4',
        'REQUIRED_QUALITY' => '15',
    # LEADING:
        'LEADING_QUALITY' => '3',
    # TRAILING:
        'TRAILING_QUALITY' => '3',
    # MINLEN:
        'MINLEN' => '36',
    );
    #############################################

    # Collect function-specific parameters
    my $trimmomatic_path = $_[0];
    my $trim_file = $_[1];
    my $population = $_[2];
    my $index_file = $_[3];
    my $output_dir = $_[4];

    # Collect trimmomatic-specific options
    if ($_[5])
    {
        my %options = %{$_[5]};
        while ( my ($key, $value) = each %options )
        {
            if ($options{ $key })
            {
                $trim_options{ $key } = $value;
            }
        }
    }

    # Ensure index_file exists in the cwd
    ################################## christine: unless ( -f $index_file && -r $index_file )
    ################################## christine:{   die "ERROR: $index_file does not exist or is unreadable.\n";    }

    # @TODO: Lookup the version of the user's trimmomatic
    unless ( -s "$trimmomatic_path" )
    {
        print "WARNING: Can't locate Trimmomatic at $trimmomatic_path.\n",
              "Would you like to attempt to install v$trim_options{'VERSION'} there now? (yes/no) ";
        chomp (my $usr_input = <STDIN>);
        if ($usr_input =~ /yes/i)
        {
            system("curl -O http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-$trim_options{'VERSION'}.zip");
            system("unzip Trimmomatic-$trim_options{'VERSION'}.zip; rm -f Trimmomatic-$trim_options{'VERSION'}.zip; mv Trimmomatic-$trim_options{'VERSION'}/ $trimmomatic_path/");
            $trimmomatic_path = "Trimmomatic-$trim_options{'VERSION'}/trimmomatic-$trim_options{'VERSION'}.jar";
            my $path = `pwd`;
            if (-e "$trimmomatic_path")
            {
                print "$trimmomatic_path now located in $path\n";
            } else {
                die "ERROR: $trimmomatic_path was not successfully installed in $path\n";
            }
        }
        else {   die "Exiting\n";    }
    }

    # Check for trim_file
     ################################## christine: unless ( -r "$trim_file" && -f "$trim_file" )
     ################################## christine: {   die "ERROR: $trim_file does not exist or is not readable.\n";   }

    system("mkdir -p $output_dir/trim/");
    system("mkdir -p logs/");

    # Create summary file
    system("mkdir -p summary_files/");
    my $summary_file = "summary_files/$population\_trim\_summary.txt";
    open SUMMARY, ">$summary_file" or die "ERROR: Could not create $summary_file\n";
    print SUMMARY "Sample\tInput Read Pairs\tSurviving Read Pairs\t% Both Surviving\t",
        "Only Forward Surviving\t% Forward Surviving\tOnly Reverse Surviving\t% Reverse Surviving\t",
        "Dropped Reads\t% Dropped\n";
    close SUMMARY or die "Unable to close $summary_file\n";

    # The sample names are in the first column if the index_file is in 2-column format
    # Otherwise, we only have a list of indices so the following cut command should still work
    my @samples = `cut -f1 $index_file | sort -u`;
    my $num_samples = $#samples + 1;
    if ($num_samples == 0){    die "ERROR: $index_file exists but appears to be empty.\n";    }
    my $sample_count = 0;

    # BEGIN trimming by sample
    foreach my $sample (@samples)
    {
        chomp($sample);
        $sample =~ s/ //g;
        print_progress($sample_count, $num_samples, "  Current sample: $sample       ");

        my $R1_reads = "$output_dir/demultiplex/$sample\_$population\_R1.fastq";
        my $R2_reads = "$output_dir/demultiplex/$sample\_$population\_R2.fastq";

       ##### unless ( -f "$R1_reads" && -r "$R1_reads" )
        #####{   die "ERROR: $R1_reads does not exist or is unreadable.\n"; }
        ######unless ( -f "$R2_reads" && -r "$R2_reads" )
        ######{   die "ERROR: $R2_reads does not exist or is unreadable.\n"; }

        # Run Trimmomatic
         my $cmd = "java -classpath $trimmomatic_path org.usadellab.trimmomatic.TrimmomaticPE ";
         $cmd .= "-threads $trim_options{'TRIM_THREADS'} -phred33 ";
         $cmd .= "-trimlog logs/$sample.trim.log ";
         $cmd .= "$R1_reads $R2_reads ";
         $cmd .= "$output_dir/trim/$sample\_$population\_R1-p.fastq ";
         $cmd .= "$output_dir/trim/$sample\_$population\_R1-s.fastq ";
         $cmd .= "$output_dir/trim/$sample\_$population\_R2-p.fastq ";
         $cmd .= "$output_dir/trim/$sample\_$population\_R2-s.fastq ";
         $cmd .= "ILLUMINACLIP:$trim_file:$trim_options{'SEED_MISMATCHES'}:";
         $cmd .= "$trim_options{'PALINDROME_CLIP_THRESHOLD'}:$trim_options{'SIMPLE_CLIP_THRESHOLD'} ";
         $cmd .= "LEADING:$trim_options{'LEADING_QUALITY'} ";
         $cmd .= "TRAILING:$trim_options{'TRAILING_QUALITY'} ";
         $cmd .= "SLIDINGWINDOW:$trim_options{'WINDOW_SIZE'}:$trim_options{'REQUIRED_QUALITY'} ";
         $cmd .= "MINLEN:$trim_options{'MINLEN'} ";

         my ( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) =
             run( command => $cmd, verbose => 0 );
         if ($success)
         {
             my $trimlog = "logs/$sample\_$population\_trimmomatic\_output.log";
             open TRIMLOG, ">$trimlog";
             print TRIMLOG join " ", @$full_buf;
         } else
         {
             print "ERROR: Trimmomatic did not complete successfully:\n";
             die "$error_message\n@$stderr_buf\n";
         }

         $sample_count++;
         summarize_trim($sample, $population, $output_dir, \@$stderr_buf);
         print_progress($sample_count, $num_samples, "");
    }
    print "\n",
        " Processed reads located in:  $output_dir/trim/ \n",
        " Summary file:  $summary_file\n";
}

#################################
##### Summarize step 2 using trimmomatic output:
##### Sample    Input read pairs    Both surviving   Both surviving %    Forward only surviving  Forward surviving %
#####   Reverse only surviving  Reverse surviving %     Dropped    Dropped %
#################################
sub summarize_trim
{
    my $sample = $_[0];
    my $population = $_[1];
    my $output_dir = $_[2];
    my @trimmomatic_output = @{$_[3]};

    my $summary_file = "summary_files/$population\_trim\_summary.txt";
    open SUMMARY, ">>$summary_file" or die "ERROR: Could not open $summary_file\n";

    # Grep for the line containing stats about the trimming process
    # Save values of interest into variables
    my ($line_of_interest) = grep /Input Read Pairs/, @trimmomatic_output;
    my @trim_info = split(/ /, $line_of_interest);
    my $input_read_pairs = $trim_info[3];
    my $both_surviving = $trim_info[6];
    my $both_surviving_percent = $trim_info[7];
    my $forward_surviving = $trim_info[11];
    my $forward_surviving_percent = $trim_info[12];
    my $reverse_surviving = $trim_info[16];
    my $reverse_surviving_percent = $trim_info[17];
    my $dropped = $trim_info[19];
    chomp( my $dropped_percent = $trim_info[20] );

    print SUMMARY "$sample\t$input_read_pairs\t$both_surviving\t$both_surviving_percent\t",
        "$forward_surviving\t$forward_surviving_percent\t$reverse_surviving\t",
        "$reverse_surviving_percent\t$dropped\t$dropped_percent\n";

    close SUMMARY or die "ERROR: Could not close $summary_file\n";
}

1;

and the configuration file (where to specify parameters):

    #######################################
### GBS Pipeline Configuration File ###
#######################################
############ INSTRUCTIONS #############
# Fill out the following information which will be used for GBS analysis.
# Please ONLY make changes after an equals sign (=).
# Do not add any other text to the file, and do not remove any existing text.
# Do NOT include whitespace, instead use underscore: '_'
# Include the full pathname to files, directories and programs if they exist
#   outside of the current working directory (do NOT use '~' for home directory)
#######################################

#######################################
##### REQUIRED FOR MULTIPLE STEPS #####
#######################################

# The generic population name (used in naming files during processing)
# Example: LC-01
POPULATION = Adzuki

# The filename of indices (aka barcodes) used for this GBS run.
# This file can be in one of 2 formats. Either:
#   1) A single column list of indices
#   2) 2 columns where the first column consists of sample names, and the second column
#      of the sample's associated index
# Example: indices.txt
INDEX_FILE = /media/greg/Overflow/gbs3/GBSpipeline/indexshortcol.txt

# The location where output of processed reads are placed
# Example: /storage/lens_GBS/processed_reads
output_dir = /media/greg/Overflow/gbs3/GBSpipeline/processedreads

# The filename of the reference genome sequence
# Example: ../NC_008253.fna
REFERENCE = /media/greg/StorageD/gbs3/GBSpipeline/adzukireferencegenome.fasta

##################################################
### DEMULTIPLEX - STEP 1 SPECIFIC REQUIREMENTS ###
##################################################

# The rare-cutter restriction enzyme site. This is only necessary if the sequences of the
# indices include the RE site (to prevent the RE site from being clipped from the reads).
# If the RE site is not attached to your indices, leave this blank.
# Example: TGCA
RE_SITE = TGCA

# The filename of the 1st set of multiplexed read pairs
# Example: S001EA3_R1.fastq
R1_FILE = R1plate1.fastq

# The filename of the 2nd set of multiplexed read pairs
# Example: S001EA3_R2.fastq
R2_FILE = R2plate1.fastq

###########################################
### TRIM - STEP 2 SPECIFIC REQUIREMENTS ###

# The filename of the list of adaptor sequences to check for and remove
# Hint: If you do not have a list of adaptor sequences, Trimmomatic provides multiple lists
# of sequences in the adaptors folder in your Trimmomatic directory
# Example: trimfile.txt
TRIM_FILE = /media/greg/StorageD/gbs3/GBSpipeline/illuminaadaptersFR.fa

# The relative or full pathname to your compiled copy of Trimmomatic
# Note: This MUST include the executable .jar!
# Example: ../Tools/Trimmomatic-0.32/trimmomatic-0.32.jar
TRIMMOMATIC_PATH = /media/greg/StorageD/gbs3/GBSpipeline/Trimmomatic-0.36/trimmomatic-0.36.jar

### TRIMMOMATIC OPTIONS - Refer to Trimmomatic manual ###
### These are OPTIONAL. Trimmomatic will run the defaults specified if left blank ###
  # The number of threads to use on an HPC cluster
  # DEFAULT: 1
  TRIM_THREADS =
  # The maximum mismatch count to allow a full match
  # DEFAULT: 2
  SEED_MISMATCHES = 0
  # Specifies accuracy needed for a match between two adapter ligated reads
  # DEFAULT: 30
  PALINDROME_CLIP_THRESHOLD =
  # Specifies accuracy needed for a match between any adapter and read
  # DEFAULT: 20
  SIMPLE_CLIP_THRESHOLD =
  # The number of bases to average across in sliding window trimming
  # DEFAULT: 4
  WINDOW_SIZE =
  # The average quality required in a sliding window trimming procedure
  # DEFAULT : 15
  REQUIRED_QUALITY =
  # Threshold for minimum quality of leading bases
  # DEFAULT: 3
  LEADING_QUALITY =
  # Threshold for minimum quality of trailing bases
  # DEFAULT: 3
  TRAILING_QUALITY =
  # The minimum length for trimmed reads to be kept
  # DEFAULT: 36
  MINLEN =

Help is appreciated. Thanks.

GBS Pipeline • 1.3k views
ADD COMMENT

Login before adding your answer.

Traffic: 3260 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6