I am trying to write a nextflow script to check the adapter content and trim the sequences that contain adapter using Trimmomatic. Here's my nextflow script:
#!/usr/bin/env nextflow
inputFile = file(params.infile)
logParams(params, "nextflow_parameters.txt")
// Header log info
log.info ""
log.info "========================================="
log.info "Nextflow Version: $workflow.nextflow.version"
log.info "Command Line: $workflow.commandLine"
log.info "========================================="
log.info ""
// Send FASTQ files to FASTQC, Trimmomatic, and STAR /////////////////////////////////////////
Channel.from(inputFile)
.splitCsv(sep: '\t', header: true)
.into {
readInput_to_runFastQC
readInput_to_runTrimmomatic
}
process runFastQC {
tag "Checking adapter content for ${sampleID}"
publishDir "${params.output_dir}/Samples/${sampleID}/FastQC"
module params.modules.fastqc
input:
path params.output_dir
set indivID, sampleID, libraryID, rgID, platform_unit, platform,
platform_model, run_date, center, R1, R2 \
from readInput_to_runFastQC
output:
file("${sampleID}_001.R1_fastqc/summary.txt") into runFastQC_to_runTrimmomatic_R1
file("${sampleID}_001.R2_fastqc/summary.txt") into runFastQC_to_runTrimmomatic_R2
script:
fastqcDir = "${params.output_dir}/Samples/${sampleID}/FastQC"
"""
module list
mkdir -p ${fastqcDir}
# Run FastQC to check adapter content
fastqc -o ${fastqcDir} --extract "${R1}" "${R2}" 2> /dev/null
"""
}
process runTrimmomatic {
tag "Trimming adapter for ${sampleID}"
publishDir "${params.output_dir}/Output/Trimmomatic"
module params.modules.java
// Define input parameters
input:
set indivID, sampleID, libraryID, rgID, platform_unit, platform,
platform_model, run_date, center, R1, R2 \
from readInput_to_runTrimmomatic
file("${sampleID}_001.R1_fastqc/summary.txt") from runFastQC_to_runTrimmomatic_R1
file("${sampleID}_001.R2_fastqc/summary.txt") from runFastQC_to_runTrimmomatic_R2
// Define output parameters
output:
file("${trimmed_R1}") into trimmed_R1
file("${trimmed_R2}") into trimmed_R2
script:
trimmed_R1="${params.output_dir}/Output/Trimmomatic/trimmed_fastq/${sampleID}_R1_trimmed.fastq"
trimmed_R2="${params.output_dir}/Output/Trimmomatic/trimmed_fastq/${sampleID}_R2_trimmed.fastq"
"""
module list
mkdir -p ${params.output_dir}/Output/Trimmomatic
// Check if the last line contains "fail" or "warn" in either R1 or R2 summary
def lastLineR1 = file("${params.output_dir}/Samples/${sampleID}/fastqc/*_fastqc/summary.txt").text.readLines().last()
if (lastLineR1 ==~ /fail|warn/) {
echo "Adapter content detected in ${sampleID}, running Trimmomatic"
java -jar ${params.PATH.adapter_path}/trimmomatic-0.39.jar PE -threads 27 \
${R1} ${R2} \
${trimmed_R1} ${trimmed_R2} \
ILLUMINACLIP:${params.PATH.adapter}:2:30:10:8:true \
LEADING:3 TRAILING:3 SLIDINGWINDOW:4:20 MINLEN:50
} else {
echo "No adapter content detected in ${sampleID}, no need to run Trimmomatic"
ln -s ${R1} ${trimmed_R1}
ln -s ${R2} ${trimmed_R2}
}
"""
}
workflow.onComplete {
log.info ""
log.info "========================================="
log.info "Duration: $workflow.duration"
log.info "========================================="
}
// FUNCTIONS ///////////////////////////////////////////////////////////////////
// Read input file and save it into a list of lists //////////////////////////////
def logParams(p, n) {
File file = new File(n)
file.write "Parameter:\tValue\n"
for (s in p) {
file << "${s.key}:\t${s.value}\n"
}
}
I keep getting this error:
Caused by:
Missing output file(s) `${sampleID}_001.R1_fastqc/summary.txt` expected by process `runFastQC (Checking adapter content for ${sampleID)`
The fastqc was successfully run and the summary.txt exists in the designated directory. However, it says it is missing. Any thought or better approach? Thank you so much!