Skip to content

STARSolo add --outBAMsortingBinsN an add a param to force samtools sort #7051

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion tools/rgrnastar/macros.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
the index versions in sync, but you should manually update @IDX_VERSION_SUFFIX@ -->
<!-- STAR version to be used -->
<token name="@TOOL_VERSION@">2.7.11b</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@VERSION_SUFFIX@">1</token>
<token name="@PROFILE@">21.01</token>
<!-- STAR index version compatible with this version of STAR
This is the STAR version that introduced the index structure expected
Expand Down Expand Up @@ -584,4 +584,30 @@
#end if
]]></token>

<xml name="perf_params">
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no way user will set reasonable values here. I would suggest calculating reasonable values for these.

<section name="perf" title="Performance tweaks / Troubleshooting" expanded="false">
<param argument="--outBAMsortingBinsN" type="integer" value="50" min="1" label="Number of genome bins for coordinate-sorting" help="Higher values result in lower RAM requirements during the sorting step. The default value is 50. Tweak this if you are facing memory-related errors." />
<param argument="--winAnchorMultimapNmax" type="integer" value="50" min="50" label="Maximum number of loci anchors are allowed to map to" help="Higher value can increase the runtime singificantly. This value should be set greater or equal to --outFilterMultimapNmax" />
</section>
</xml>
<token name="@PERF@"><![CDATA[
--outBAMsortingBinsN $perf.outBAMsortingBinsN
--winAnchorMultimapNmax $perf.winAnchorMultimapNmax
]]></token>

<token name="@SAMTOOLS_SORT@"><![CDATA[
## BAM sorting (logic copied from samtools_sort wrapper)
## choosing BAM SortedByCoord appeared once to give fewer reads
## than BAM Unsorted followed by a samtools sort
## so better go with the latter?

&&
##compute the number of ADDITIONAL threads to be used by samtools (-@)
addthreads=\${GALAXY_SLOTS:-2} && (( addthreads-- )) &&
##compute the number of memory available to samtools sort (-m)
##use only 75% of available: https://github.com/samtools/samtools/issues/831
addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} &&
((addmemory=addmemory*75/100)) &&
samtools sort -@ \$addthreads -m \$addmemory"M" -T "\${TMPDIR:-.}" -O bam -o '$output_BAM' Aligned.out.bam
]]></token>
</macros>
8 changes: 2 additions & 6 deletions tools/rgrnastar/rg_rnaStar.xml
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,7 @@
@ALGO_DEFAULT@
#end if
--outBAMsortingThreadN \${GALAXY_SLOTS:-4}
--outBAMsortingBinsN $perf.outBAMsortingBinsN
--winAnchorMultimapNmax $perf.winAnchorMultimapNmax
@PERF@
--limitBAMsortRAM \$((\${GALAXY_MEMORY_MB:-0}*1000000))

#if $oformat.wasp_conditional.waspOutputMode == "wasp_mode":
Expand Down Expand Up @@ -402,10 +401,7 @@ with Cufflinks if your sequences come from an unstranded library preparation.">
</when>
</conditional>
</section>
<section name="perf" title="Performance tweaks / Troubleshooting" expanded="false">
<param argument="--outBAMsortingBinsN" type="integer" value="50" min="1" label="Number of genome bins for coordinate-sorting" help="Higher values result in lower RAM requirements during the sorting step. The default value is 50. Tweak this if you are facing memory-related errors." />
<param argument="--winAnchorMultimapNmax" type="integer" value="50" min="50" label="Maximum number of loci anchors are allowed to map to" help="Higher value can increase the runtime singificantly. This value should be set greater or equal to --outFilterMultimapNmax" />
</section>
<expand macro="perf_params"/>
<expand macro="outWig"/>
</inputs>

Expand Down
30 changes: 15 additions & 15 deletions tools/rgrnastar/rg_rnaStarSolo.xml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,11 @@
--quantMode TranscriptomeSAM $solo.quantModeGene
#set $tag_names = str($solo.outSAMattributes).replace(',', ' ')
--outSAMattributes $tag_names
#if "CB" in $tag_names or "UB" in $tag_names or str($outWig.outWigType) != 'None':

## force unsorted BAM output and later use samtools sort
#if $use_samtools_sort
--outSAMtype BAM Unsorted
#elif "CB" in $tag_names or "UB" in $tag_names or str($outWig.outWigType) != 'None':
--outSAMtype BAM SortedByCoordinate
#else:
--outSAMtype BAM Unsorted
Expand Down Expand Up @@ -137,6 +141,9 @@
#else:
@ALGO_DEFAULT@
#end if
--outBAMsortingThreadN \${GALAXY_SLOTS:-4}
@PERF@
--limitBAMsortRAM \$((\${GALAXY_MEMORY_MB:-0}*1000000))

##outWig:
@OUTWIG@
Expand All @@ -149,23 +156,14 @@
## put the barcodes and features stats into a single file
&& cat <(echo "Barcodes:") Solo.out/Barcodes.stats <(echo "Genes:") Solo.out/soloFeatures/Features.stats > '${output_stats}'

#if "CB" in $tag_names or "UB" in $tag_names or str($outWig.outWigType) != 'None':
## force samtools sort regardless of other params
#if $use_samtools_sort
@SAMTOOLS_SORT@
#elif "CB" in $tag_names or "UB" in $tag_names or str($outWig.outWigType) != 'None'
## recompress BAM output for smaller file size
&& samtools view -b -o '$output_BAM' Aligned.sortedByCoord.out.bam
#else:
## BAM sorting (logic copied from samtools_sort wrapper)
## choosing BAM SortedByCoord appeared once to give fewer reads
## than BAM Unsorted followed by a samtools sort
## so better go with the latter?

&&
##compute the number of ADDITIONAL threads to be used by samtools (-@)
addthreads=\${GALAXY_SLOTS:-2} && (( addthreads-- )) &&
##compute the number of memory available to samtools sort (-m)
##use only 75% of available: https://github.com/samtools/samtools/issues/831
addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} &&
((addmemory=addmemory*75/100)) &&
samtools sort -@ \$addthreads -m \$addmemory"M" -T "\${TMPDIR:-.}" -O bam -o '$output_BAM' Aligned.out.bam
@SAMTOOLS_SORT@
#end if
##outWig:
@OUTWIGOUTPUTS@
Expand Down Expand Up @@ -432,7 +430,9 @@
</conditional>
</section>
<expand macro="chim_params"/>
<expand macro="perf_params"/>
<expand macro="outWig"/>
<param name="use_samtools_sort" type="boolean" checked="false" label="Use Samtools to sort the BAM file" help="Otherwise, use options in 'Performance tweaks / Troubleshooting' section"/>
</inputs>
<outputs>
<data format="txt" name="output_log" label="${tool.name} on ${on_string}: log" from_work_dir="Log.final.out">
Expand Down