|
34 | 34 | subs=["all", "high_ident"] |
35 | 35 |
|
36 | 36 |
|
37 | | -localrules: all, AnnotateResolvedTandemDups, GetUniqueGencodeUnresolvedDupGenes, IntersectGenesWithFullSDList, FullDupToBed12, FullDupToLinks, MakeWMBed, MaskFile, ConvertHMMCopyNumberToCollapsedDuplications, SortSedef, FilterSedef, CountMaskedSedef, RemoveSedefTooMasked, MakeSedefGraph, MakeSedefGraphTable, FilterByGraphClusters, FullDupToBed12, FiltDupToBed12, GetUniqueGencodeUnresolvedDupGenesCN, GetUniqueGencodeUnresolvedDupGenes, GetGencodeMulticopy, GetGencodeMappedInDup, GetSupportedMulticopy,FindResolvedDuplicatedGenes, Bed12ToBed6, CombineGenesWithCollapsedDups, CombineDuplicatedGenes, MinimapGeneModelBed, FilterGencodeBed12, FindGenesInResolvedDups, SelectOneIsoform, SplitSplicedAndSingleExon, AnnotateLowCoverageFlanks, UnionMasked,GetNamedFasta, SelectDups, SortDups, GetDepthOverDups, FilterLowDepthDups, GetFullGeneCountTable, AddCollapsedGenes, GetCombinedTable, SelectDupsOneIsoform, GetFinalMerged, DupsPerContig, GetAllMultiGenes, AnnotateHighIdentity, GetTotalMasked, AnnotateResolvedTandemDups, GeneCountFact, GetFullGeneCountTable, FilterMultiExonBed, MappedSamIdentityDups, RemoveOriginal, RemoveBams, MakeSedefIntv, HighestIdentPairs, SelectHighIdent, GetCollapseByRange, GetCollapsedMask, GetCN |
| 37 | +localrules: all, AnnotateResolvedTandemDups, GetUniqueGencodeUnresolvedDupGenes, IntersectGenesWithFullSDList, FullDupToBed12, FullDupToLinks, MakeWMBed, MaskFile, ConvertHMMCopyNumberToCollapsedDuplications, SortSedef, FilterSedef, CountMaskedSedef, RemoveSedefTooMasked, MakeSedefGraph, MakeSedefGraphTable, FilterByGraphClusters, FullDupToBed12, FiltDupToBed12, GetUniqueGencodeUnresolvedDupGenesCN, GetUniqueGencodeUnresolvedDupGenes, GetGencodeMulticopy, GetGencodeMappedInDup, GetSupportedMulticopy,FindResolvedDuplicatedGenes, Bed12ToBed6, CombineGenesWithCollapsedDups, CombineDuplicatedGenes, MinimapGeneModelBed, FilterGencodeBed12, FindGenesInResolvedDups, SelectOneIsoform, SplitSplicedAndSingleExon, AnnotateLowCoverageFlanks, UnionMasked,GetNamedFasta, SelectDups, SortDups, GetDepthOverDups, FilterLowDepthDups, GetFullGeneCountTable, AddCollapsedGenes, GetCombinedTable, SelectDupsOneIsoform, GetFinalMerged, DupsPerContig, GetAllMultiGenes, AnnotateHighIdentity, GetTotalMasked, AnnotateResolvedTandemDups, GeneCountFact, GetGeneCountTableAbbreviatedNames, FilterMultiExonBed, MappedSamIdentityDups, RemoveOriginal, RemoveBams, MakeSedefIntv, HighestIdentPairs, SelectHighIdent, GetCollapseByRange, GetCollapsedMask, GetCN |
38 | 38 |
|
39 | 39 |
|
40 | 40 |
|
@@ -64,7 +64,8 @@ rule all: |
64 | 64 | comb_with_depth="gencode.mapped.bam.bed12.multi_exon.fasta.named.mm2.dups.one_isoform.txt.combined.depth", |
65 | 65 | fact="gencode.mapped.bam.bed12.multi_exon.fasta.named.mm2.dups.one_isoform.txt.combined.depth.filt.fact", |
66 | 66 | gene_count="gencode.mapped.bam.bed12.multi_exon.fasta.named.mm2.dups.one_isoform.txt.combined.depth.filt.gene_count", |
67 | | - gene_count_2column="gencode.mapped.bam.bed12.multi_exon.fasta.named.mm2.dups.one_isoform.txt.combined.depth.filt.gene_count_multi_single", |
| 67 | + gene_count_2column="gencode.mapped.bam.bed12.multi_exon.fasta.named.mm2.dups.one_isoform.txt.combined.depth.filt.gene_count_multi_single", |
| 68 | + gene_count_abbrvNames="gencode.mapped.bam.bed12.multi_exon.fasta.named.mm2.dups.one_isoform.txt.combined.depth.filt.gene_count.abbrv_names", |
68 | 69 | asmMask=expand("{asm}.count_masked", asm=["assembly.orig.fasta", "assembly.masked.fasta", "assembly.repeat_masked.fasta", "assembly.union_masked.fasta"]), |
69 | 70 | uniqueDupGenes="gencode.mapped.bam.bed12.dups.unique", |
70 | 71 | sedef_high_uniq="sedef_out/all/final.sorted.bed.uniq.high", |
@@ -1466,7 +1467,8 @@ rule AnnotateOriginal: |
1466 | 1467 | output: |
1467 | 1468 | annot="gencode.mapped.bam.bed12.multi_exon.fasta.named.mm2.sam.bed.dups.annot_orig", |
1468 | 1469 | params: |
1469 | | - sd=SD |
| 1470 | + sd=SD, |
| 1471 | + grid_opts=config["grid_medium"] # KEON |
1470 | 1472 | shell:""" |
1471 | 1473 | {params.sd}/AnnotateOriginal.py {input.mappedsambeddups} > {output.annot} |
1472 | 1474 | """ |
@@ -1681,6 +1683,19 @@ cat {input.depth_filt} | awk '{{ if (NR == 1) {{ print "gene\\tresolved\\tcollap |
1681 | 1683 | cat {output.gene_count_2column} | awk '{{ if (NR == 1) {{ print "gene\\tcopies";}} else {{ print $1"\\t"$2+$3;}} }}' > {output.gene_count} |
1682 | 1684 | """ |
1683 | 1685 |
|
| 1686 | +rule GetGeneCountTableAbbreviatedNames: |
| 1687 | + input: |
| 1688 | + gene_count="gencode.mapped.bam.bed12.multi_exon.fasta.named.mm2.dups.one_isoform.txt.combined.depth.filt.gene_count", |
| 1689 | + output: |
| 1690 | + gene_count_abbrvNames="gencode.mapped.bam.bed12.multi_exon.fasta.named.mm2.dups.one_isoform.txt.combined.depth.filt.gene_count.abbrv_names", |
| 1691 | + params: |
| 1692 | + sd=SD |
| 1693 | + shell:""" |
| 1694 | +awk 'BEGIN {{ FS = "[ |\t]"}} \ |
| 1695 | +NR == 1 {{print}} \ |
| 1696 | +NR > 1 {{print $1,"\t" $3}}' {input.gene_count} > {output.gene_count_abbrvNames} |
| 1697 | +""" |
| 1698 | + |
1684 | 1699 | rule cramBam: |
1685 | 1700 | input: |
1686 | 1701 | bam=config['bam'], |
|
0 commit comments