From 6a6b967e17cd8f12677d7aef6958eaaf7b27543c Mon Sep 17 00:00:00 2001
From: nick-youngblut <nyoungb2@gmail.com>
Date: Tue, 13 May 2025 14:05:52 -0700
Subject: [PATCH 1/2] Added chopper; gzip'd split fastq files

---
 .gitignore                        |  1 +
 CITATIONS.md                      |  4 +-
 README.md                         | 13 ++++-
 bin/cat_fastq.py                  | 81 +++++++++++++++++++++++++++++++
 conf/modules.config               |  8 +--
 docs/output.md                    |  8 +--
 modules/local/chopper.nf          | 50 +++++++++++++++++++
 modules/local/nanofilt.nf         | 13 +++--
 modules/local/split_file.nf       |  2 +-
 modules/local/split_seq.nf        | 65 +++++++++++++++++++++++++
 modules/nf-core/cat/fastq/main.nf | 38 +++++----------
 workflows/scnanoseq.nf            | 45 +++++++++--------
 12 files changed, 266 insertions(+), 62 deletions(-)
 create mode 100644 bin/cat_fastq.py
 create mode 100644 modules/local/chopper.nf
 create mode 100644 modules/local/split_seq.nf

diff --git a/.gitignore b/.gitignore
index b74c0fd..c845c55 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,4 @@ samplesheet.csv
 *.swp
 input*
 null/
+tmp/
\ No newline at end of file
diff --git a/CITATIONS.md b/CITATIONS.md
index c04b8d1..a574a3c 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -38,9 +38,9 @@
 
   > De Coster W, D'Hert S, Schultz DT, Cruts M, Van Broeckhoven C. NanoPack: visualizing and processing long-read sequencing data. Bioinformatics 2018 Aug 1; 34(15):2666-9 doi:10.1093/bioinformatics/bty149. PubMed PMID: 29547981; PubMed Central PMCID: PMC6061794.
 
-- [Nanofilt](https://pubmed.ncbi.nlm.nih.gov/29547981/)
+- [Chopper](https://doi.org/10.1093/bioinformatics/btad311)
 
-  > De Coster W, D'Hert S, Schultz DT, Cruts M, Van Broeckhoven C. NanoPack: visualizing and processing long-read sequencing data. Bioinformatics 2018 Aug 1; 34(15):2666-9 doi:10.1093/bioinformatics/bty149. PubMed PMID: 29547981; PubMed Central PMCID: PMC6061794.
+> Wouter De Coster, Rosa Rademakers, NanoPack2: population-scale evaluation of long-read sequencing data, Bioinformatics, Volume 39, Issue 5, May 2023, btad311, https://doi.org/10.1093/bioinformatics/btad311
 
 - [NanoPlot](https://pubmed.ncbi.nlm.nih.gov/29547981/)
 
diff --git a/README.md b/README.md
index a60dcdc..e4ce05a 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,17 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool
 
 On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/scnanoseq/results).
 
+## test
+
+
+```bash
+nextflow run main.nf \
+   -profile test,apptainer \
+   --input tmp/samples.csv \
+   --outdir tmp/outdir
+```
+
+
 ## Pipeline summary
 
 ![scnanoseq diagram](assets/scnanoseq_tube_map.png)
@@ -32,7 +43,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
 1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), [`NanoPlot`](https://github.com/wdecoster/NanoPlot), [`NanoComp`](https://github.com/wdecoster/nanocomp) and [`ToulligQC`](https://github.com/GenomiqueENS/toulligQC))
 2. Unzip and split FASTQ ([`pigz`](https://github.com/madler/pigz))
    1. Optional: Split FASTQ for faster processing ([`split`](https://linux.die.net/man/1/split))
-3. Trim and filter reads ([`Nanofilt`](https://github.com/wdecoster/nanofilt))
+3. Trim and filter reads ([`Chopper`](https://github.com/wdecoster/chopper))
 4. Post trim QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), [`NanoPlot`](https://github.com/wdecoster/NanoPlot), [`NanoComp`](https://github.com/wdecoster/nanocomp) and [`ToulligQC`](https://github.com/GenomiqueENS/toulligQC))
 5. Barcode detection using a custom whitelist or 10X whitelist. ([`BLAZE`](https://github.com/shimlab/BLAZE))
 6. Extract barcodes. Consists of the following steps:
diff --git a/bin/cat_fastq.py b/bin/cat_fastq.py
new file mode 100644
index 0000000..7e60fa2
--- /dev/null
+++ b/bin/cat_fastq.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+
+import argparse
+import gzip
+import shutil
+import os
+from pathlib import Path
+
+def cat_files(input_files: list[str], output_file: str) -> None:
+    """Concatenate gzipped files."""
+    with open(output_file, 'wb') as f_out:
+        for f_path in input_files:
+            try:
+                with gzip.open(f_path, 'rb') as f_in:
+                    shutil.copyfileobj(f_in, f_out)
+            except gzip.BadGzipFile:
+                print(f"Warning: {f_path} is not a valid gzip file. Attempting to read as plain text.")
+                try:
+                    with open(f_path, 'rb') as f_in_plain: # Read as binary for consistency
+                        shutil.copyfileobj(f_in_plain, f_out)
+                except Exception as e:
+                     print(f"Error processing file {f_path}: {e}")
+            except Exception as e:
+                print(f"Error processing file {f_path}: {e}")
+
+def main():
+    parser = argparse.ArgumentParser(description="Concatenate FASTQ files.")
+    parser.add_argument("--prefix", type=str, required=True, help="Output file prefix.")
+    parser.add_argument("--single_end", action="store_true", help="Input files are single-end.")
+    parser.add_argument("--reads", nargs='+', required=True, help="List of input FASTQ files.")
+
+    args = parser.parse_args()
+
+    output_dir = Path(".")
+
+    read_paths = [Path(f) for f in args.reads]
+
+    if args.single_end:
+        output_file = output_dir / f"{args.prefix}.merged.fastq.gz"
+        if len(read_paths) == 1:
+            print(f"Symlinking {read_paths[0]} to {output_file}...")
+            os.symlink(read_paths[0], output_file)
+            print("Symlink complete.")
+        elif len(read_paths) > 1:
+            print(f"Concatenating {len(read_paths)} single-end files to {output_file}...")
+            cat_files([str(p) for p in read_paths], str(output_file))
+            print("Concatenation complete.")
+        else:
+            print("Warning: No input files provided for single-end processing.")
+    else: # Paired-end
+        output_file_1 = output_dir / f"{args.prefix}_1.merged.fastq.gz"
+        output_file_2 = output_dir / f"{args.prefix}_2.merged.fastq.gz"
+
+        if len(read_paths) == 2:
+            print(f"Symlinking {read_paths[0]} to {output_file_1}...")
+            os.symlink(read_paths[0], output_file_1)
+            print("R1 symlink complete.")
+            print(f"Symlinking {read_paths[1]} to {output_file_2}...")
+            os.symlink(read_paths[1], output_file_2)
+            print("R2 symlink complete.")
+        elif len(read_paths) > 2:
+            if len(read_paths) % 2 != 0:
+                print("Error: Paired-end reads require an even number of files.")
+                return # Or raise error
+
+            read1_paths = [str(read_paths[i]) for i in range(0, len(read_paths), 2)]
+            read2_paths = [str(read_paths[i]) for i in range(1, len(read_paths), 2)]
+
+            print(f"Concatenating {len(read1_paths)} R1 files to {output_file_1}...")
+            cat_files(read1_paths, str(output_file_1))
+            print("R1 concatenation complete.")
+
+            print(f"Concatenating {len(read2_paths)} R2 files to {output_file_2}...")
+            cat_files(read2_paths, str(output_file_2))
+            print("R2 concatenation complete.")
+        else: # len(read_paths) < 2
+            print("Warning: Less than 2 input files provided for paired-end processing.")
+
+
+if __name__ == "__main__":
+    main() 
\ No newline at end of file
diff --git a/conf/modules.config b/conf/modules.config
index 16d2016..b1b8e2d 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -295,7 +295,7 @@ if (!params.skip_trimming) {
 
     if (params.split_amount > 0){
         process {
-            withName: '.*:SPLIT_FILE' {
+            withName: '.*:SPLIT_SEQ' {
                 publishDir = [
                     enabled: false
                 ]
@@ -312,13 +312,13 @@ if (!params.skip_trimming) {
         }
     }
 
-    // NANOFILT
+    // CHOPPER
     if ( !params.skip_trimming ){
         process {
-            withName:'.*:NANOFILT' {
+            withName:'.*:CHOPPER' {
                 ext.args = {
                     [
-                        params.min_length ? "--length ${params.min_length}" : "",
+                        params.min_length ? "--minlength ${params.min_length}" : "",
                         params.min_q_score ? "--quality ${params.min_q_score}" : ""
                     ].join(' ').trim()
                 }
diff --git a/docs/output.md b/docs/output.md
index 0f7ecb8..6822856 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -11,7 +11,7 @@ The directories listed below will be created in the results directory after the
 The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps:
 
 - [Preprocessing](#preprocessing)
-  - [Nanofilt](#nanofilt) - Read Quality Filtering and Trimming
+  - [Chopper](#chopper) - Read Quality Filtering and Trimming
 - [Barcode Calling](#barcode-calling)
   - [BLAZE](#blaze) - Barcode caller
 - [Alignment](#alignment)
@@ -39,19 +39,19 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 
 ## Preprocessing
 
-### Nanofilt
+### Chopper
 
 <details markdown="1">
 <summary>Output files</summary>
 
 - `<sample_identifier>/`
   - `fastq/`
-    - `trimmed_nanofilt/`
+    - `trimmed_chopper/`
       - `*_filtered.fastq.gz`: The post-trimmed fastq. By default this will be mostly quality trimmed.
 
 </details>
 
-[Nanofilt](https://github.com/wdecoster/nanofilt) is a tool used for filtering and trimming of long read sequencing data.
+[Chopper](https://github.com/wdecoster/chopper) is a tool used for filtering and trimming of long read sequencing data.
 
 ## Barcode Calling
 
diff --git a/modules/local/chopper.nf b/modules/local/chopper.nf
new file mode 100644
index 0000000..5febac2
--- /dev/null
+++ b/modules/local/chopper.nf
@@ -0,0 +1,50 @@
+process CHOPPER {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "bioconda::nanofilt=0.10.0"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/chopper:0.10.0--hcdda2d0_0':
+        'biocontainers/chopper:0.10.0--hcdda2d0_0' }"
+
+    input:
+    tuple val(meta), path(reads)
+
+    output:
+    tuple val(meta), path("*.filtered.fastq.gz"), emit: reads
+    path "versions.yml"                         , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args   = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    FILE_PREFIX=${prefix}
+    if [ ${params.split_amount} -gt 0 ]; then
+        IDX=\$(basename ${reads} | cut -f2 -d'.')
+        FILE_PREFIX=\${FILE_PREFIX}.\${IDX}
+    fi
+
+    chopper -t ${task.cpus} $args --input $reads | \\
+      gzip -c > \${FILE_PREFIX}.filtered.fastq.gz 
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        chopper: \$( chopper --version | sed -e "s/chopper //g" )
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.filtered.fastq.gz
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        chopper: \$( chopper --version | sed -e "s/chopper //g" )
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/nanofilt.nf b/modules/local/nanofilt.nf
index 70f0175..af34bad 100644
--- a/modules/local/nanofilt.nf
+++ b/modules/local/nanofilt.nf
@@ -11,7 +11,7 @@ process NANOFILT {
     tuple val(meta), path(reads)
 
     output:
-    tuple val(meta), path("*.filtered.fastq")   , emit: reads
+    tuple val(meta), path("*.filtered.fastq.gz"), emit: reads
     path "versions.yml"                         , emit: versions
 
     when:
@@ -27,7 +27,13 @@ process NANOFILT {
         IDX=\$(basename ${reads} | cut -f2 -d'.')
         FILE_PREFIX=\${FILE_PREFIX}.\${IDX}
     fi
-    cat $reads | NanoFilt $args > \${FILE_PREFIX}.filtered.fastq
+
+    # if reads ends with .gz, then uncompress it
+    if [[ "${reads}" == *.gz ]]; then
+        gunzip -c $reads | NanoFilt $args | gzip -c > \${FILE_PREFIX}.filtered.fastq.gz 
+    else    
+        cat $reads | NanoFilt $args | gzip -c > \${FILE_PREFIX}.filtered.fastq.gz
+    fi
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -36,8 +42,9 @@ process NANOFILT {
     """
 
     stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    touch ${prefix}.filtered.fastq
+    touch ${prefix}.filtered.fastq.gz
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/local/split_file.nf b/modules/local/split_file.nf
index ee0748b..b8e8a76 100644
--- a/modules/local/split_file.nf
+++ b/modules/local/split_file.nf
@@ -41,4 +41,4 @@ process SPLIT_FILE {
         split: \$(echo \$(split --version 2>&1 | head -n1 | sed 's#split (GNU coreutils) ##g'))
     END_VERSIONS
     """
-}
+}
\ No newline at end of file
diff --git a/modules/local/split_seq.nf b/modules/local/split_seq.nf
new file mode 100644
index 0000000..c9ff6b0
--- /dev/null
+++ b/modules/local/split_seq.nf
@@ -0,0 +1,65 @@
+process SPLIT_SEQ {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "bioconda::seqkit=2.10.0 conda-forge::sed=4.7"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/seqkit:0.10.0--1' :
+        'quay.io/biocontainers/seqkit:2.10.0--h9ee0642_0' }"
+
+    input:
+    tuple val(meta), path(unsplit_file)
+    val file_ext
+    val split_amount
+
+    output:
+    tuple val(meta), path("output/*$file_ext"), emit: split_files
+    path "versions.yml"                       , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args   = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    # Split the file by number of reads
+    seqkit -j ${task.cpus} split2 ${args} \\
+      -s ${split_amount} --out-dir output --force ${unsplit_file}
+
+    # rename files to have the correct extension
+    for file in ./output/*.part_*; do
+        if [[ -f "\$file" ]]; then
+            base_name=\$(basename "\$file")
+            # Remove .gz suffix if present
+            if [[ "\$base_name" == *.gz ]]; then
+                base_name_no_gz="\${base_name%.gz}"
+            else
+                base_name_no_gz="\$base_name"
+            fi
+            # Remove the remaining extension (after the last dot)
+            base_name_final="\${base_name_no_gz%.*}"
+            # Remove up to .part_
+            base_name_final="\${base_name_final#*.part_}"
+            # Rename the file
+            mv "\$file" "output/${prefix}.\${base_name_final}${file_ext}"
+        fi
+    done
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        seqkit: \$(echo \$(seqkit version | head -n1 | sed 's/seqkit version //g'))
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.part_001${file_ext}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        seqkit: \$(echo \$(seqkit version | head -n1 | sed 's/seqkit version //g'))
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf
index f132b2a..d93005e 100644
--- a/modules/nf-core/cat/fastq/main.nf
+++ b/modules/nf-core/cat/fastq/main.nf
@@ -20,34 +20,20 @@ process CAT_FASTQ {
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
-    def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()]
-    if (meta.single_end) {
-        if (readList.size >= 1) {
-            """
-            cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz
+    def read_list_str = reads instanceof List ? reads.join(' ') : reads.toString()
+    def single_end_flag = meta.single_end ? '--single_end' : ''
 
-            cat <<-END_VERSIONS > versions.yml
-            "${task.process}":
-                cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//')
-            END_VERSIONS
-            """
-        }
-    } else {
-        if (readList.size >= 2) {
-            def read1 = []
-            def read2 = []
-            readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v }
-            """
-            cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz
-            cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz
+    """
+    cat_fastq.py \\
+        --prefix ${prefix} \\
+        ${single_end_flag} \\
+        --reads ${read_list_str}
 
-            cat <<-END_VERSIONS > versions.yml
-            "${task.process}":
-                cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//')
-            END_VERSIONS
-            """
-        }
-    }
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //')
+    END_VERSIONS
+    """
 
     stub:
     def prefix = task.ext.prefix ?: "${meta.id}"
diff --git a/workflows/scnanoseq.nf b/workflows/scnanoseq.nf
index 6b793c0..b8f3342 100644
--- a/workflows/scnanoseq.nf
+++ b/workflows/scnanoseq.nf
@@ -63,9 +63,9 @@ ch_multiqc_custom_methods_description   = params.multiqc_methods_description ? f
 // MODULE: Loaded from modules/local/
 //
 
-include { NANOFILT                          } from "../modules/local/nanofilt"
-include { SPLIT_FILE                        } from "../modules/local/split_file"
-include { SPLIT_FILE as SPLIT_FILE_BC_FASTQ } from "../modules/local/split_file"
+include { CHOPPER                           } from "../modules/local/chopper"
+include { SPLIT_SEQ                         } from "../modules/local/split_seq"
+include { SPLIT_SEQ as SPLIT_SEQ_BC_FASTQ   } from "../modules/local/split_seq"
 include { SPLIT_FILE as SPLIT_FILE_BC_CSV   } from "../modules/local/split_file"
 include { BLAZE                             } from "../modules/local/blaze"
 include { PREEXTRACT_FASTQ                  } from "../modules/local/preextract_fastq.nf"
@@ -154,7 +154,7 @@ workflow SCNANOSEQ {
     //
     // SUBWORKFLOW: Fastq QC with Nanoplot, ToulligQC and FastQC - pre-trim QC
     //
-
+    
     ch_fastqc_multiqc_pretrim = Channel.empty()
     if (!params.skip_qc){
 
@@ -229,9 +229,9 @@ workflow SCNANOSEQ {
     //
     // MODULE: Unzip fastq
     //
-    GUNZIP_FASTQ( ch_cat_fastq )
-    ch_unzipped_fastqs = GUNZIP_FASTQ.out.file
-    ch_versions = ch_versions.mix( GUNZIP_FASTQ.out.versions )
+    //GUNZIP_FASTQ( ch_cat_fastq )
+    //ch_unzipped_fastqs = GUNZIP_FASTQ.out.file
+    //ch_versions = ch_versions.mix( GUNZIP_FASTQ.out.versions )
 
     //
     // MODULE: Trim and filter reads
@@ -243,40 +243,39 @@ workflow SCNANOSEQ {
         //
         // MODULE: Split fastq
         //
-        ch_fastqs = ch_unzipped_fastqs
+        //ch_fastqs = ch_unzipped_fastqs
 
         if (params.split_amount > 0) {
-            SPLIT_FILE( ch_unzipped_fastqs, '.fastq', params.split_amount )
+            SPLIT_SEQ( ch_cat_fastq, '.fastq.gz', params.split_amount )
 
             // Temporarily change the meta object so that the id is present on the
             // fastq to prevent duplicated names
-            SPLIT_FILE.out.split_files
+            SPLIT_SEQ.out.split_files
                 .transpose()
                 .set { ch_fastqs }
 
-            ch_versions = ch_versions.mix(SPLIT_FILE.out.versions)
+            ch_versions = ch_versions.mix(SPLIT_SEQ.out.versions)
         }
 
         ch_trimmed_reads = ch_fastqs
         if (!params.skip_trimming) {
-
-            NANOFILT ( ch_fastqs )
-            ch_trimmed_reads = NANOFILT.out.reads
-            ch_versions = ch_versions.mix(NANOFILT.out.versions)
+            CHOPPER ( ch_fastqs )
+            ch_trimmed_reads = CHOPPER.out.reads
+            ch_versions = ch_versions.mix(CHOPPER.out.versions)
         }
 
         // If the fastqs were split, combine them together
-        ch_trimmed_reads_combined = ch_trimmed_reads
         if (params.split_amount > 0){
             CAT_CAT(ch_trimmed_reads.groupTuple())
             ch_trimmed_reads_combined = CAT_CAT.out.file_out
+        } else {
+            ch_trimmed_reads_combined = ch_trimmed_reads
         }
 
         //
         // SUBWORKFLOW: Fastq QC with Nanoplot and FastQC - post-trim QC
         //
         if (!params.skip_qc){
-
             //
             // MODULE: Run qc on the post trimmed reads
             //
@@ -289,9 +288,13 @@ workflow SCNANOSEQ {
             ch_versions = ch_versions.mix(FASTQC_NANOPLOT_POST_TRIM.out.fastqc_version.first().ifEmpty(null))
         }
     } else {
-        ch_trimmed_reads_combined = ch_unzipped_fastqs
+        ch_trimmed_reads_combined = ch_cat_fastq
     }
 
+    ch_trimmed_reads_combined.view() // TODO: Remove
+
+    // compress 
+
     //
     // MODULE: Unzip whitelist
     //
@@ -327,13 +330,13 @@ workflow SCNANOSEQ {
     ch_split_bc_fastqs = ch_trimmed_reads_combined
     ch_split_bc = ch_putative_bc
     if (params.split_amount > 0) {
-        SPLIT_FILE_BC_FASTQ( ch_trimmed_reads_combined, '.fastq', params.split_amount )
+        SPLIT_SEQ_BC_FASTQ( ch_trimmed_reads_combined, '.fastq.gz', params.split_amount )
 
-        SPLIT_FILE_BC_FASTQ.out.split_files
+        SPLIT_SEQ_BC_FASTQ.out.split_files
             .transpose()
             .set { ch_split_bc_fastqs }
 
-        ch_versions = ch_versions.mix(SPLIT_FILE_BC_FASTQ.out.versions)
+        ch_versions = ch_versions.mix(SPLIT_SEQ_BC_FASTQ.out.versions)
 
         SPLIT_FILE_BC_CSV ( ch_putative_bc, '.csv', (params.split_amount / 4) )
         SPLIT_FILE_BC_CSV.out.split_files

From bfe3388e82f11bd33c108d8ee5a20a779ccffd4e Mon Sep 17 00:00:00 2001
From: nick-youngblut <nyoungb2@gmail.com>
Date: Tue, 13 May 2025 14:31:01 -0700
Subject: [PATCH 2/2] fixed '--split_amount 0' bug for CHOPPER

---
 .nf-core.yml           | 3 ---
 workflows/scnanoseq.nf | 8 ++++++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/.nf-core.yml b/.nf-core.yml
index 6db149f..885eee5 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -6,9 +6,6 @@ lint:
     - docs/images/nf-core-scnanoseq_logo_dark.png
     - docs/images/nf-core-scnanoseq_logo_light.png
     - .gitignore
-  pipeline_todos:
-    - README.md
-    - main.nf
   template_strings: false
 nf_core_version: 3.0.2
 org_path: null
diff --git a/workflows/scnanoseq.nf b/workflows/scnanoseq.nf
index b8f3342..7f6408d 100644
--- a/workflows/scnanoseq.nf
+++ b/workflows/scnanoseq.nf
@@ -141,6 +141,7 @@ workflow SCNANOSEQ {
         }
         .set { ch_fastqs }
 
+
     //
     // MODULE: Combine fastqs from the same sample
     //
@@ -255,10 +256,13 @@ workflow SCNANOSEQ {
                 .set { ch_fastqs }
 
             ch_versions = ch_versions.mix(SPLIT_SEQ.out.versions)
+        } else {
+            ch_fastqs = ch_cat_fastq
         }
 
-        ch_trimmed_reads = ch_fastqs
-        if (!params.skip_trimming) {
+        if (params.skip_trimming){
+            ch_trimmed_reads = ch_fastqs
+        } else {
             CHOPPER ( ch_fastqs )
             ch_trimmed_reads = CHOPPER.out.reads
             ch_versions = ch_versions.mix(CHOPPER.out.versions)