Entering edit mode
6.1 years ago
csjlee08
▴
10
Hello, I am trying to use GBS-POD pipeline, having trouble with the system to find my files although I did specify the file paths in the script, and they are in the current working directory.
This is the command and error:
greg@Genomics:/media/greg/StorageD/gbs3/GBSpipeline$ sudo ./GBS_pipeline.pl trim_reads /media/greg/StorageD/gbs3/GBSpipeline/Trimmomatic-0.36/trimmomatic-0.36.jar illuminaadaptersFR.fa
[sudo] password for greg:
given is experimental at ./GBS_pipeline.pl line 114.
when is experimental at ./GBS_pipeline.pl line 117.
when is experimental at ./GBS_pipeline.pl line 161.
when is experimental at ./GBS_pipeline.pl line 196.
when is experimental at ./GBS_pipeline.pl line 232.
Calling trim_reads ...
ERROR: /media/greg/StorageD/gbs3/GBSpipeline/indexshortcol.txt does not exist or is unreadable.
This is the .pl file: (note: I put my ###christine where I tried to remove the code to see if it would run, it does not..)
#!/usr/bin/perl -w
##### STEP 2 : TRIM DEMULTIPLEXED READS #####
##### Usage: trim_reads [TRIMMOMATIC_PATH] [TRIM_FILE]
##### Required input:
##### TRIMMOMATIC_PATH : The full pathname to the trimmomatic jar file
##### TRIM_FILE : A list of sequences to filter out the reads (ie. Illumina adaptors)
##### Output:
##### [OUTPUT_DIR]/trim/[SAMPLE]_[POPULATION]_R1-s.fastq
##### [OUTPUT_DIR]/trim/[SAMPLE]_[POPULATION]_R1-p.fastq
##### [OUTPUT_DIR]/trim/[SAMPLE]_[POPULATION]_R2-s.fastq
##### [OUTPUT_DIR]/trim/[SAMPLE]_[POPULATION]_R2-p.fastq
##### [OUTPUT_DIR]/trim/[SAMPLE]_[POPULATION]_output.log
use strict;
use warnings;
sub f2
{
#############################################
##### DEFAULT VARIABLES FOR TRIMMOMATIC #####
#############################################
my %trim_options = (
# Version
'VERSION' => '0.33',
# THREADS
'TRIM_THREADS' => '1',
# ILLUMINACLIP:
'SEED_MISMATCHES' => '2',
'PALINDROME_CLIP_THRESHOLD' => '30',
'SIMPLE_CLIP_THRESHOLD' => '10',
# SLIDINGWINDOW:
'WINDOW_SIZE' => '4',
'REQUIRED_QUALITY' => '15',
# LEADING:
'LEADING_QUALITY' => '3',
# TRAILING:
'TRAILING_QUALITY' => '3',
# MINLEN:
'MINLEN' => '36',
);
#############################################
# Collect function-specific parameters
my $trimmomatic_path = $_[0];
my $trim_file = $_[1];
my $population = $_[2];
my $index_file = $_[3];
my $output_dir = $_[4];
# Collect trimmomatic-specific options
if ($_[5])
{
my %options = %{$_[5]};
while ( my ($key, $value) = each %options )
{
if ($options{ $key })
{
$trim_options{ $key } = $value;
}
}
}
# Ensure index_file exists in the cwd
################################## christine: unless ( -f $index_file && -r $index_file )
################################## christine:{ die "ERROR: $index_file does not exist or is unreadable.\n"; }
# @TODO: Lookup the version of the user's trimmomatic
unless ( -s "$trimmomatic_path" )
{
print "WARNING: Can't locate Trimmomatic at $trimmomatic_path.\n",
"Would you like to attempt to install v$trim_options{'VERSION'} there now? (yes/no) ";
chomp (my $usr_input = <STDIN>);
if ($usr_input =~ /yes/i)
{
system("curl -O http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-$trim_options{'VERSION'}.zip");
system("unzip Trimmomatic-$trim_options{'VERSION'}.zip; rm -f Trimmomatic-$trim_options{'VERSION'}.zip; mv Trimmomatic-$trim_options{'VERSION'}/ $trimmomatic_path/");
$trimmomatic_path = "Trimmomatic-$trim_options{'VERSION'}/trimmomatic-$trim_options{'VERSION'}.jar";
my $path = `pwd`;
if (-e "$trimmomatic_path")
{
print "$trimmomatic_path now located in $path\n";
} else {
die "ERROR: $trimmomatic_path was not successfully installed in $path\n";
}
}
else { die "Exiting\n"; }
}
# Check for trim_file
################################## christine: unless ( -r "$trim_file" && -f "$trim_file" )
################################## christine: { die "ERROR: $trim_file does not exist or is not readable.\n"; }
system("mkdir -p $output_dir/trim/");
system("mkdir -p logs/");
# Create summary file
system("mkdir -p summary_files/");
my $summary_file = "summary_files/$population\_trim\_summary.txt";
open SUMMARY, ">$summary_file" or die "ERROR: Could not create $summary_file\n";
print SUMMARY "Sample\tInput Read Pairs\tSurviving Read Pairs\t% Both Surviving\t",
"Only Forward Surviving\t% Forward Surviving\tOnly Reverse Surviving\t% Reverse Surviving\t",
"Dropped Reads\t% Dropped\n";
close SUMMARY or die "Unable to close $summary_file\n";
# The sample names are in the first column if the index_file is in 2-column format
# Otherwise, we only have a list of indices so the following cut command should still work
my @samples = `cut -f1 $index_file | sort -u`;
my $num_samples = $#samples + 1;
if ($num_samples == 0){ die "ERROR: $index_file exists but appears to be empty.\n"; }
my $sample_count = 0;
# BEGIN trimming by sample
foreach my $sample (@samples)
{
chomp($sample);
$sample =~ s/ //g;
print_progress($sample_count, $num_samples, " Current sample: $sample ");
my $R1_reads = "$output_dir/demultiplex/$sample\_$population\_R1.fastq";
my $R2_reads = "$output_dir/demultiplex/$sample\_$population\_R2.fastq";
##### unless ( -f "$R1_reads" && -r "$R1_reads" )
#####{ die "ERROR: $R1_reads does not exist or is unreadable.\n"; }
######unless ( -f "$R2_reads" && -r "$R2_reads" )
######{ die "ERROR: $R2_reads does not exist or is unreadable.\n"; }
# Run Trimmomatic
my $cmd = "java -classpath $trimmomatic_path org.usadellab.trimmomatic.TrimmomaticPE ";
$cmd .= "-threads $trim_options{'TRIM_THREADS'} -phred33 ";
$cmd .= "-trimlog logs/$sample.trim.log ";
$cmd .= "$R1_reads $R2_reads ";
$cmd .= "$output_dir/trim/$sample\_$population\_R1-p.fastq ";
$cmd .= "$output_dir/trim/$sample\_$population\_R1-s.fastq ";
$cmd .= "$output_dir/trim/$sample\_$population\_R2-p.fastq ";
$cmd .= "$output_dir/trim/$sample\_$population\_R2-s.fastq ";
$cmd .= "ILLUMINACLIP:$trim_file:$trim_options{'SEED_MISMATCHES'}:";
$cmd .= "$trim_options{'PALINDROME_CLIP_THRESHOLD'}:$trim_options{'SIMPLE_CLIP_THRESHOLD'} ";
$cmd .= "LEADING:$trim_options{'LEADING_QUALITY'} ";
$cmd .= "TRAILING:$trim_options{'TRAILING_QUALITY'} ";
$cmd .= "SLIDINGWINDOW:$trim_options{'WINDOW_SIZE'}:$trim_options{'REQUIRED_QUALITY'} ";
$cmd .= "MINLEN:$trim_options{'MINLEN'} ";
my ( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) =
run( command => $cmd, verbose => 0 );
if ($success)
{
my $trimlog = "logs/$sample\_$population\_trimmomatic\_output.log";
open TRIMLOG, ">$trimlog";
print TRIMLOG join " ", @$full_buf;
} else
{
print "ERROR: Trimmomatic did not complete successfully:\n";
die "$error_message\n@$stderr_buf\n";
}
$sample_count++;
summarize_trim($sample, $population, $output_dir, \@$stderr_buf);
print_progress($sample_count, $num_samples, "");
}
print "\n",
" Processed reads located in: $output_dir/trim/ \n",
" Summary file: $summary_file\n";
}
#################################
##### Summarize step 2 using trimmomatic output:
##### Sample Input read pairs Both surviving Both surviving % Forward only surviving Forward surviving %
##### Reverse only surviving Reverse surviving % Dropped Dropped %
#################################
sub summarize_trim
{
my $sample = $_[0];
my $population = $_[1];
my $output_dir = $_[2];
my @trimmomatic_output = @{$_[3]};
my $summary_file = "summary_files/$population\_trim\_summary.txt";
open SUMMARY, ">>$summary_file" or die "ERROR: Could not open $summary_file\n";
# Grep for the line containing stats about the trimming process
# Save values of interest into variables
my ($line_of_interest) = grep /Input Read Pairs/, @trimmomatic_output;
my @trim_info = split(/ /, $line_of_interest);
my $input_read_pairs = $trim_info[3];
my $both_surviving = $trim_info[6];
my $both_surviving_percent = $trim_info[7];
my $forward_surviving = $trim_info[11];
my $forward_surviving_percent = $trim_info[12];
my $reverse_surviving = $trim_info[16];
my $reverse_surviving_percent = $trim_info[17];
my $dropped = $trim_info[19];
chomp( my $dropped_percent = $trim_info[20] );
print SUMMARY "$sample\t$input_read_pairs\t$both_surviving\t$both_surviving_percent\t",
"$forward_surviving\t$forward_surviving_percent\t$reverse_surviving\t",
"$reverse_surviving_percent\t$dropped\t$dropped_percent\n";
close SUMMARY or die "ERROR: Could not close $summary_file\n";
}
1;
and the configuration file (where to specify parameters):
#######################################
### GBS Pipeline Configuration File ###
#######################################
############ INSTRUCTIONS #############
# Fill out the following information which will be used for GBS analysis.
# Please ONLY make changes after an equals sign (=).
# Do not add any other text to the file, and do not remove any existing text.
# Do NOT include whitespace, instead use underscore: '_'
# Include the full pathname to files, directories and programs if they exist
# outside of the current working directory (do NOT use '~' for home directory)
#######################################
#######################################
##### REQUIRED FOR MULTIPLE STEPS #####
#######################################
# The generic population name (used in naming files during processing)
# Example: LC-01
POPULATION = Adzuki
# The filename of indices (aka barcodes) used for this GBS run.
# This file can be in one of 2 formats. Either:
# 1) A single column list of indices
# 2) 2 columns where the first column consists of sample names, and the second column
# of the sample's associated index
# Example: indices.txt
INDEX_FILE = /media/greg/Overflow/gbs3/GBSpipeline/indexshortcol.txt
# The location where output of processed reads are placed
# Example: /storage/lens_GBS/processed_reads
output_dir = /media/greg/Overflow/gbs3/GBSpipeline/processedreads
# The filename of the reference genome sequence
# Example: ../NC_008253.fna
REFERENCE = /media/greg/StorageD/gbs3/GBSpipeline/adzukireferencegenome.fasta
##################################################
### DEMULTIPLEX - STEP 1 SPECIFIC REQUIREMENTS ###
##################################################
# The rare-cutter restriction enzyme site. This is only necessary if the sequences of the
# indices include the RE site (to prevent the RE site from being clipped from the reads).
# If the RE site is not attached to your indices, leave this blank.
# Example: TGCA
RE_SITE = TGCA
# The filename of the 1st set of multiplexed read pairs
# Example: S001EA3_R1.fastq
R1_FILE = R1plate1.fastq
# The filename of the 2nd set of multiplexed read pairs
# Example: S001EA3_R2.fastq
R2_FILE = R2plate1.fastq
###########################################
### TRIM - STEP 2 SPECIFIC REQUIREMENTS ###
# The filename of the list of adaptor sequences to check for and remove
# Hint: If you do not have a list of adaptor sequences, Trimmomatic provides multiple lists
# of sequences in the adaptors folder in your Trimmomatic directory
# Example: trimfile.txt
TRIM_FILE = /media/greg/StorageD/gbs3/GBSpipeline/illuminaadaptersFR.fa
# The relative or full pathname to your compiled copy of Trimmomatic
# Note: This MUST include the executable .jar!
# Example: ../Tools/Trimmomatic-0.32/trimmomatic-0.32.jar
TRIMMOMATIC_PATH = /media/greg/StorageD/gbs3/GBSpipeline/Trimmomatic-0.36/trimmomatic-0.36.jar
### TRIMMOMATIC OPTIONS - Refer to Trimmomatic manual ###
### These are OPTIONAL. Trimmomatic will run the defaults specified if left blank ###
# The number of threads to use on an HPC cluster
# DEFAULT: 1
TRIM_THREADS =
# The maximum mismatch count to allow a full match
# DEFAULT: 2
SEED_MISMATCHES = 0
# Specifies accuracy needed for a match between two adapter ligated reads
# DEFAULT: 30
PALINDROME_CLIP_THRESHOLD =
# Specifies accuracy needed for a match between any adapter and read
# DEFAULT: 20
SIMPLE_CLIP_THRESHOLD =
# The number of bases to average across in sliding window trimming
# DEFAULT: 4
WINDOW_SIZE =
# The average quality required in a sliding window trimming procedure
# DEFAULT : 15
REQUIRED_QUALITY =
# Threshold for minimum quality of leading bases
# DEFAULT: 3
LEADING_QUALITY =
# Threshold for minimum quality of trailing bases
# DEFAULT: 3
TRAILING_QUALITY =
# The minimum length for trimmed reads to be kept
# DEFAULT: 36
MINLEN =
Help is appreciated. Thanks.