Nextflow script to check adapter content and run trimmomatic

104 views Asked by At

I am trying to write a nextflow script to check the adapter content and trim the sequences that contain adapter using Trimmomatic. Here's my nextflow script:

#!/usr/bin/env nextflow

inputFile = file(params.infile)

logParams(params, "nextflow_parameters.txt")

// Header log info
log.info ""
log.info "========================================="
log.info "Nextflow Version: $workflow.nextflow.version"
log.info "Command Line:     $workflow.commandLine"
log.info "========================================="
log.info ""

// Send FASTQ files to FASTQC, Trimmomatic, and STAR /////////////////////////////////////////

Channel.from(inputFile)
  .splitCsv(sep: '\t', header: true)
  .into {
    readInput_to_runFastQC
    readInput_to_runTrimmomatic
  }

process runFastQC {
  tag "Checking adapter content for ${sampleID}"
  publishDir "${params.output_dir}/Samples/${sampleID}/FastQC"

  module params.modules.fastqc

  input:
  path params.output_dir
  set indivID, sampleID, libraryID, rgID, platform_unit, platform,
    platform_model, run_date, center, R1, R2 \
    from readInput_to_runFastQC

  output:
  file("${sampleID}_001.R1_fastqc/summary.txt") into runFastQC_to_runTrimmomatic_R1
  file("${sampleID}_001.R2_fastqc/summary.txt") into runFastQC_to_runTrimmomatic_R2

  script:
  fastqcDir = "${params.output_dir}/Samples/${sampleID}/FastQC"
 
  """
  module list 

  mkdir -p ${fastqcDir}
  
  # Run FastQC to check adapter content
  fastqc -o ${fastqcDir} --extract "${R1}" "${R2}" 2> /dev/null
 
  """
}

process runTrimmomatic {
   tag "Trimming adapter for ${sampleID}"
   publishDir "${params.output_dir}/Output/Trimmomatic"
   
   module params.modules.java    
   
   // Define input parameters
   input:
   set indivID, sampleID, libraryID, rgID, platform_unit, platform,
   platform_model, run_date, center, R1, R2 \
   from readInput_to_runTrimmomatic 
   file("${sampleID}_001.R1_fastqc/summary.txt") from runFastQC_to_runTrimmomatic_R1
   file("${sampleID}_001.R2_fastqc/summary.txt") from runFastQC_to_runTrimmomatic_R2

   
    // Define output parameters
    output:
    file("${trimmed_R1}") into trimmed_R1 
    file("${trimmed_R2}") into trimmed_R2

    script:
    
    trimmed_R1="${params.output_dir}/Output/Trimmomatic/trimmed_fastq/${sampleID}_R1_trimmed.fastq"
    trimmed_R2="${params.output_dir}/Output/Trimmomatic/trimmed_fastq/${sampleID}_R2_trimmed.fastq"
   
    """
    module list
    
    mkdir -p ${params.output_dir}/Output/Trimmomatic 
    
    // Check if the last line contains "fail" or "warn" in either R1 or R2 summary
    def lastLineR1 = file("${params.output_dir}/Samples/${sampleID}/fastqc/*_fastqc/summary.txt").text.readLines().last()
    
    if (lastLineR1 ==~ /fail|warn/) {
        echo "Adapter content detected in ${sampleID}, running Trimmomatic"
        java -jar ${params.PATH.adapter_path}/trimmomatic-0.39.jar PE -threads 27 \
        ${R1} ${R2} \
        ${trimmed_R1} ${trimmed_R2} \
        ILLUMINACLIP:${params.PATH.adapter}:2:30:10:8:true \
        LEADING:3 TRAILING:3 SLIDINGWINDOW:4:20 MINLEN:50
    } else {
        echo "No adapter content detected in ${sampleID}, no need to run Trimmomatic"
        ln -s ${R1} ${trimmed_R1}
        ln -s ${R2} ${trimmed_R2}
    }
    """
}

workflow.onComplete {
  log.info ""
  log.info "========================================="
  log.info "Duration:       $workflow.duration"
  log.info "========================================="
}

// FUNCTIONS ///////////////////////////////////////////////////////////////////

// Read input file and save it into a list of lists //////////////////////////////
def logParams(p, n) {
  File file = new File(n)
  file.write "Parameter:\tValue\n"

  for (s in p) {
    file << "${s.key}:\t${s.value}\n"
  }
}

I keep getting this error:

Caused by:
  Missing output file(s) `${sampleID}_001.R1_fastqc/summary.txt` expected by process `runFastQC (Checking adapter content for ${sampleID)`

The fastqc was successfully run and the summary.txt exists in the designated directory. However, it says it is missing. Any thought or better approach? Thank you so much!

0

There are 0 answers