From 6a6b967e17cd8f12677d7aef6958eaaf7b27543c Mon Sep 17 00:00:00 2001 From: nick-youngblut Date: Tue, 13 May 2025 14:05:52 -0700 Subject: [PATCH 1/2] Added chopper; gzip'd split fastq files --- .gitignore | 1 + CITATIONS.md | 4 +- README.md | 13 ++++- bin/cat_fastq.py | 81 +++++++++++++++++++++++++++++++ conf/modules.config | 8 +-- docs/output.md | 8 +-- modules/local/chopper.nf | 50 +++++++++++++++++++ modules/local/nanofilt.nf | 13 +++-- modules/local/split_file.nf | 2 +- modules/local/split_seq.nf | 65 +++++++++++++++++++++++++ modules/nf-core/cat/fastq/main.nf | 38 +++++---------- workflows/scnanoseq.nf | 45 +++++++++-------- 12 files changed, 266 insertions(+), 62 deletions(-) create mode 100644 bin/cat_fastq.py create mode 100644 modules/local/chopper.nf create mode 100644 modules/local/split_seq.nf diff --git a/.gitignore b/.gitignore index b74c0fd..c845c55 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ samplesheet.csv *.swp input* null/ +tmp/ \ No newline at end of file diff --git a/CITATIONS.md b/CITATIONS.md index c04b8d1..a574a3c 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -38,9 +38,9 @@ > De Coster W, D'Hert S, Schultz DT, Cruts M, Van Broeckhoven C. NanoPack: visualizing and processing long-read sequencing data. Bioinformatics 2018 Aug 1; 34(15):2666-9 doi:10.1093/bioinformatics/bty149. PubMed PMID: 29547981; PubMed Central PMCID: PMC6061794. -- [Nanofilt](https://pubmed.ncbi.nlm.nih.gov/29547981/) +- [Chopper](https://doi.org/10.1093/bioinformatics/btad311) - > De Coster W, D'Hert S, Schultz DT, Cruts M, Van Broeckhoven C. NanoPack: visualizing and processing long-read sequencing data. Bioinformatics 2018 Aug 1; 34(15):2666-9 doi:10.1093/bioinformatics/bty149. PubMed PMID: 29547981; PubMed Central PMCID: PMC6061794. +> Wouter De Coster, Rosa Rademakers, NanoPack2: population-scale evaluation of long-read sequencing data, Bioinformatics, Volume 39, Issue 5, May 2023, btad311, https://doi.org/10.1093/bioinformatics/btad311 - [NanoPlot](https://pubmed.ncbi.nlm.nih.gov/29547981/) diff --git a/README.md b/README.md index a60dcdc..e4ce05a 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,17 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/scnanoseq/results). +## test + + +```bash +nextflow run main.nf \ + -profile test,apptainer \ + --input tmp/samples.csv \ + --outdir tmp/outdir +``` + + ## Pipeline summary ![scnanoseq diagram](assets/scnanoseq_tube_map.png) @@ -32,7 +43,7 @@ On release, automated continuous integration tests run the pipeline on a full-si 1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), [`NanoPlot`](https://github.com/wdecoster/NanoPlot), [`NanoComp`](https://github.com/wdecoster/nanocomp) and [`ToulligQC`](https://github.com/GenomiqueENS/toulligQC)) 2. Unzip and split FASTQ ([`pigz`](https://github.com/madler/pigz)) 1. Optional: Split FASTQ for faster processing ([`split`](https://linux.die.net/man/1/split)) -3. Trim and filter reads ([`Nanofilt`](https://github.com/wdecoster/nanofilt)) +3. Trim and filter reads ([`Chopper`](https://github.com/wdecoster/chopper)) 4. Post trim QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), [`NanoPlot`](https://github.com/wdecoster/NanoPlot), [`NanoComp`](https://github.com/wdecoster/nanocomp) and [`ToulligQC`](https://github.com/GenomiqueENS/toulligQC)) 5. Barcode detection using a custom whitelist or 10X whitelist. ([`BLAZE`](https://github.com/shimlab/BLAZE)) 6. Extract barcodes. Consists of the following steps: diff --git a/bin/cat_fastq.py b/bin/cat_fastq.py new file mode 100644 index 0000000..7e60fa2 --- /dev/null +++ b/bin/cat_fastq.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 + +import argparse +import gzip +import shutil +import os +from pathlib import Path + +def cat_files(input_files: list[str], output_file: str) -> None: + """Concatenate gzipped files.""" + with open(output_file, 'wb') as f_out: + for f_path in input_files: + try: + with gzip.open(f_path, 'rb') as f_in: + shutil.copyfileobj(f_in, f_out) + except gzip.BadGzipFile: + print(f"Warning: {f_path} is not a valid gzip file. Attempting to read as plain text.") + try: + with open(f_path, 'rb') as f_in_plain: # Read as binary for consistency + shutil.copyfileobj(f_in_plain, f_out) + except Exception as e: + print(f"Error processing file {f_path}: {e}") + except Exception as e: + print(f"Error processing file {f_path}: {e}") + +def main(): + parser = argparse.ArgumentParser(description="Concatenate FASTQ files.") + parser.add_argument("--prefix", type=str, required=True, help="Output file prefix.") + parser.add_argument("--single_end", action="store_true", help="Input files are single-end.") + parser.add_argument("--reads", nargs='+', required=True, help="List of input FASTQ files.") + + args = parser.parse_args() + + output_dir = Path(".") + + read_paths = [Path(f) for f in args.reads] + + if args.single_end: + output_file = output_dir / f"{args.prefix}.merged.fastq.gz" + if len(read_paths) == 1: + print(f"Symlinking {read_paths[0]} to {output_file}...") + os.symlink(read_paths[0], output_file) + print("Symlink complete.") + elif len(read_paths) > 1: + print(f"Concatenating {len(read_paths)} single-end files to {output_file}...") + cat_files([str(p) for p in read_paths], str(output_file)) + print("Concatenation complete.") + else: + print("Warning: No input files provided for single-end processing.") + else: # Paired-end + output_file_1 = output_dir / f"{args.prefix}_1.merged.fastq.gz" + output_file_2 = output_dir / f"{args.prefix}_2.merged.fastq.gz" + + if len(read_paths) == 2: + print(f"Symlinking {read_paths[0]} to {output_file_1}...") + os.symlink(read_paths[0], output_file_1) + print("R1 symlink complete.") + print(f"Symlinking {read_paths[1]} to {output_file_2}...") + os.symlink(read_paths[1], output_file_2) + print("R2 symlink complete.") + elif len(read_paths) > 2: + if len(read_paths) % 2 != 0: + print("Error: Paired-end reads require an even number of files.") + return # Or raise error + + read1_paths = [str(read_paths[i]) for i in range(0, len(read_paths), 2)] + read2_paths = [str(read_paths[i]) for i in range(1, len(read_paths), 2)] + + print(f"Concatenating {len(read1_paths)} R1 files to {output_file_1}...") + cat_files(read1_paths, str(output_file_1)) + print("R1 concatenation complete.") + + print(f"Concatenating {len(read2_paths)} R2 files to {output_file_2}...") + cat_files(read2_paths, str(output_file_2)) + print("R2 concatenation complete.") + else: # len(read_paths) < 2 + print("Warning: Less than 2 input files provided for paired-end processing.") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/conf/modules.config b/conf/modules.config index 16d2016..b1b8e2d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -295,7 +295,7 @@ if (!params.skip_trimming) { if (params.split_amount > 0){ process { - withName: '.*:SPLIT_FILE' { + withName: '.*:SPLIT_SEQ' { publishDir = [ enabled: false ] @@ -312,13 +312,13 @@ if (!params.skip_trimming) { } } - // NANOFILT + // CHOPPER if ( !params.skip_trimming ){ process { - withName:'.*:NANOFILT' { + withName:'.*:CHOPPER' { ext.args = { [ - params.min_length ? "--length ${params.min_length}" : "", + params.min_length ? "--minlength ${params.min_length}" : "", params.min_q_score ? "--quality ${params.min_q_score}" : "" ].join(' ').trim() } diff --git a/docs/output.md b/docs/output.md index 0f7ecb8..6822856 100644 --- a/docs/output.md +++ b/docs/output.md @@ -11,7 +11,7 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: - [Preprocessing](#preprocessing) - - [Nanofilt](#nanofilt) - Read Quality Filtering and Trimming + - [Chopper](#chopper) - Read Quality Filtering and Trimming - [Barcode Calling](#barcode-calling) - [BLAZE](#blaze) - Barcode caller - [Alignment](#alignment) @@ -39,19 +39,19 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d ## Preprocessing -### Nanofilt +### Chopper
Output files - `/` - `fastq/` - - `trimmed_nanofilt/` + - `trimmed_chopper/` - `*_filtered.fastq.gz`: The post-trimmed fastq. By default this will be mostly quality trimmed.
-[Nanofilt](https://github.com/wdecoster/nanofilt) is a tool used for filtering and trimming of long read sequencing data. +[Chopper](https://github.com/wdecoster/chopper) is a tool used for filtering and trimming of long read sequencing data. ## Barcode Calling diff --git a/modules/local/chopper.nf b/modules/local/chopper.nf new file mode 100644 index 0000000..5febac2 --- /dev/null +++ b/modules/local/chopper.nf @@ -0,0 +1,50 @@ +process CHOPPER { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::nanofilt=0.10.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/chopper:0.10.0--hcdda2d0_0': + 'biocontainers/chopper:0.10.0--hcdda2d0_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.filtered.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + FILE_PREFIX=${prefix} + if [ ${params.split_amount} -gt 0 ]; then + IDX=\$(basename ${reads} | cut -f2 -d'.') + FILE_PREFIX=\${FILE_PREFIX}.\${IDX} + fi + + chopper -t ${task.cpus} $args --input $reads | \\ + gzip -c > \${FILE_PREFIX}.filtered.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + chopper: \$( chopper --version | sed -e "s/chopper //g" ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.filtered.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + chopper: \$( chopper --version | sed -e "s/chopper //g" ) + END_VERSIONS + """ +} diff --git a/modules/local/nanofilt.nf b/modules/local/nanofilt.nf index 70f0175..af34bad 100644 --- a/modules/local/nanofilt.nf +++ b/modules/local/nanofilt.nf @@ -11,7 +11,7 @@ process NANOFILT { tuple val(meta), path(reads) output: - tuple val(meta), path("*.filtered.fastq") , emit: reads + tuple val(meta), path("*.filtered.fastq.gz"), emit: reads path "versions.yml" , emit: versions when: @@ -27,7 +27,13 @@ process NANOFILT { IDX=\$(basename ${reads} | cut -f2 -d'.') FILE_PREFIX=\${FILE_PREFIX}.\${IDX} fi - cat $reads | NanoFilt $args > \${FILE_PREFIX}.filtered.fastq + + # if reads ends with .gz, then uncompress it + if [[ "${reads}" == *.gz ]]; then + gunzip -c $reads | NanoFilt $args | gzip -c > \${FILE_PREFIX}.filtered.fastq.gz + else + cat $reads | NanoFilt $args | gzip -c > \${FILE_PREFIX}.filtered.fastq.gz + fi cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -36,8 +42,9 @@ process NANOFILT { """ stub: + def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.filtered.fastq + touch ${prefix}.filtered.fastq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/split_file.nf b/modules/local/split_file.nf index ee0748b..b8e8a76 100644 --- a/modules/local/split_file.nf +++ b/modules/local/split_file.nf @@ -41,4 +41,4 @@ process SPLIT_FILE { split: \$(echo \$(split --version 2>&1 | head -n1 | sed 's#split (GNU coreutils) ##g')) END_VERSIONS """ -} +} \ No newline at end of file diff --git a/modules/local/split_seq.nf b/modules/local/split_seq.nf new file mode 100644 index 0000000..c9ff6b0 --- /dev/null +++ b/modules/local/split_seq.nf @@ -0,0 +1,65 @@ +process SPLIT_SEQ { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::seqkit=2.10.0 conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqkit:0.10.0--1' : + 'quay.io/biocontainers/seqkit:2.10.0--h9ee0642_0' }" + + input: + tuple val(meta), path(unsplit_file) + val file_ext + val split_amount + + output: + tuple val(meta), path("output/*$file_ext"), emit: split_files + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + # Split the file by number of reads + seqkit -j ${task.cpus} split2 ${args} \\ + -s ${split_amount} --out-dir output --force ${unsplit_file} + + # rename files to have the correct extension + for file in ./output/*.part_*; do + if [[ -f "\$file" ]]; then + base_name=\$(basename "\$file") + # Remove .gz suffix if present + if [[ "\$base_name" == *.gz ]]; then + base_name_no_gz="\${base_name%.gz}" + else + base_name_no_gz="\$base_name" + fi + # Remove the remaining extension (after the last dot) + base_name_final="\${base_name_no_gz%.*}" + # Remove up to .part_ + base_name_final="\${base_name_final#*.part_}" + # Rename the file + mv "\$file" "output/${prefix}.\${base_name_final}${file_ext}" + fi + done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$(echo \$(seqkit version | head -n1 | sed 's/seqkit version //g')) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.part_001${file_ext} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$(echo \$(seqkit version | head -n1 | sed 's/seqkit version //g')) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf index f132b2a..d93005e 100644 --- a/modules/nf-core/cat/fastq/main.nf +++ b/modules/nf-core/cat/fastq/main.nf @@ -20,34 +20,20 @@ process CAT_FASTQ { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] - if (meta.single_end) { - if (readList.size >= 1) { - """ - cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz + def read_list_str = reads instanceof List ? reads.join(' ') : reads.toString() + def single_end_flag = meta.single_end ? '--single_end' : '' - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') - END_VERSIONS - """ - } - } else { - if (readList.size >= 2) { - def read1 = [] - def read2 = [] - readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } - """ - cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz - cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz + """ + cat_fastq.py \\ + --prefix ${prefix} \\ + ${single_end_flag} \\ + --reads ${read_list_str} - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') - END_VERSIONS - """ - } - } + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //') + END_VERSIONS + """ stub: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/workflows/scnanoseq.nf b/workflows/scnanoseq.nf index 6b793c0..b8f3342 100644 --- a/workflows/scnanoseq.nf +++ b/workflows/scnanoseq.nf @@ -63,9 +63,9 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? f // MODULE: Loaded from modules/local/ // -include { NANOFILT } from "../modules/local/nanofilt" -include { SPLIT_FILE } from "../modules/local/split_file" -include { SPLIT_FILE as SPLIT_FILE_BC_FASTQ } from "../modules/local/split_file" +include { CHOPPER } from "../modules/local/chopper" +include { SPLIT_SEQ } from "../modules/local/split_seq" +include { SPLIT_SEQ as SPLIT_SEQ_BC_FASTQ } from "../modules/local/split_seq" include { SPLIT_FILE as SPLIT_FILE_BC_CSV } from "../modules/local/split_file" include { BLAZE } from "../modules/local/blaze" include { PREEXTRACT_FASTQ } from "../modules/local/preextract_fastq.nf" @@ -154,7 +154,7 @@ workflow SCNANOSEQ { // // SUBWORKFLOW: Fastq QC with Nanoplot, ToulligQC and FastQC - pre-trim QC // - + ch_fastqc_multiqc_pretrim = Channel.empty() if (!params.skip_qc){ @@ -229,9 +229,9 @@ workflow SCNANOSEQ { // // MODULE: Unzip fastq // - GUNZIP_FASTQ( ch_cat_fastq ) - ch_unzipped_fastqs = GUNZIP_FASTQ.out.file - ch_versions = ch_versions.mix( GUNZIP_FASTQ.out.versions ) + //GUNZIP_FASTQ( ch_cat_fastq ) + //ch_unzipped_fastqs = GUNZIP_FASTQ.out.file + //ch_versions = ch_versions.mix( GUNZIP_FASTQ.out.versions ) // // MODULE: Trim and filter reads @@ -243,40 +243,39 @@ workflow SCNANOSEQ { // // MODULE: Split fastq // - ch_fastqs = ch_unzipped_fastqs + //ch_fastqs = ch_unzipped_fastqs if (params.split_amount > 0) { - SPLIT_FILE( ch_unzipped_fastqs, '.fastq', params.split_amount ) + SPLIT_SEQ( ch_cat_fastq, '.fastq.gz', params.split_amount ) // Temporarily change the meta object so that the id is present on the // fastq to prevent duplicated names - SPLIT_FILE.out.split_files + SPLIT_SEQ.out.split_files .transpose() .set { ch_fastqs } - ch_versions = ch_versions.mix(SPLIT_FILE.out.versions) + ch_versions = ch_versions.mix(SPLIT_SEQ.out.versions) } ch_trimmed_reads = ch_fastqs if (!params.skip_trimming) { - - NANOFILT ( ch_fastqs ) - ch_trimmed_reads = NANOFILT.out.reads - ch_versions = ch_versions.mix(NANOFILT.out.versions) + CHOPPER ( ch_fastqs ) + ch_trimmed_reads = CHOPPER.out.reads + ch_versions = ch_versions.mix(CHOPPER.out.versions) } // If the fastqs were split, combine them together - ch_trimmed_reads_combined = ch_trimmed_reads if (params.split_amount > 0){ CAT_CAT(ch_trimmed_reads.groupTuple()) ch_trimmed_reads_combined = CAT_CAT.out.file_out + } else { + ch_trimmed_reads_combined = ch_trimmed_reads } // // SUBWORKFLOW: Fastq QC with Nanoplot and FastQC - post-trim QC // if (!params.skip_qc){ - // // MODULE: Run qc on the post trimmed reads // @@ -289,9 +288,13 @@ workflow SCNANOSEQ { ch_versions = ch_versions.mix(FASTQC_NANOPLOT_POST_TRIM.out.fastqc_version.first().ifEmpty(null)) } } else { - ch_trimmed_reads_combined = ch_unzipped_fastqs + ch_trimmed_reads_combined = ch_cat_fastq } + ch_trimmed_reads_combined.view() // TODO: Remove + + // compress + // // MODULE: Unzip whitelist // @@ -327,13 +330,13 @@ workflow SCNANOSEQ { ch_split_bc_fastqs = ch_trimmed_reads_combined ch_split_bc = ch_putative_bc if (params.split_amount > 0) { - SPLIT_FILE_BC_FASTQ( ch_trimmed_reads_combined, '.fastq', params.split_amount ) + SPLIT_SEQ_BC_FASTQ( ch_trimmed_reads_combined, '.fastq.gz', params.split_amount ) - SPLIT_FILE_BC_FASTQ.out.split_files + SPLIT_SEQ_BC_FASTQ.out.split_files .transpose() .set { ch_split_bc_fastqs } - ch_versions = ch_versions.mix(SPLIT_FILE_BC_FASTQ.out.versions) + ch_versions = ch_versions.mix(SPLIT_SEQ_BC_FASTQ.out.versions) SPLIT_FILE_BC_CSV ( ch_putative_bc, '.csv', (params.split_amount / 4) ) SPLIT_FILE_BC_CSV.out.split_files From bfe3388e82f11bd33c108d8ee5a20a779ccffd4e Mon Sep 17 00:00:00 2001 From: nick-youngblut Date: Tue, 13 May 2025 14:31:01 -0700 Subject: [PATCH 2/2] fixed '--split_amount 0' bug for CHOPPER --- .nf-core.yml | 3 --- workflows/scnanoseq.nf | 8 ++++++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.nf-core.yml b/.nf-core.yml index 6db149f..885eee5 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -6,9 +6,6 @@ lint: - docs/images/nf-core-scnanoseq_logo_dark.png - docs/images/nf-core-scnanoseq_logo_light.png - .gitignore - pipeline_todos: - - README.md - - main.nf template_strings: false nf_core_version: 3.0.2 org_path: null diff --git a/workflows/scnanoseq.nf b/workflows/scnanoseq.nf index b8f3342..7f6408d 100644 --- a/workflows/scnanoseq.nf +++ b/workflows/scnanoseq.nf @@ -141,6 +141,7 @@ workflow SCNANOSEQ { } .set { ch_fastqs } + // // MODULE: Combine fastqs from the same sample // @@ -255,10 +256,13 @@ workflow SCNANOSEQ { .set { ch_fastqs } ch_versions = ch_versions.mix(SPLIT_SEQ.out.versions) + } else { + ch_fastqs = ch_cat_fastq } - ch_trimmed_reads = ch_fastqs - if (!params.skip_trimming) { + if (params.skip_trimming){ + ch_trimmed_reads = ch_fastqs + } else { CHOPPER ( ch_fastqs ) ch_trimmed_reads = CHOPPER.out.reads ch_versions = ch_versions.mix(CHOPPER.out.versions)