|
34 | 34 | subs=["all", "high_ident"] |
35 | 35 |
|
36 | 36 |
|
37 | | -localrules: all, AnnotateResolvedTandemDups, GetUniqueGencodeUnresolvedDupGenes, IntersectGenesWithFullSDList, FullDupToBed12, FullDupToLinks, MakeWMBed, MaskFile, ConvertHMMCopyNumberToCollapsedDuplications, SortSedef, FilterSedef, CountMaskedSedef, RemoveSedefTooMasked, MakeSedefGraph, MakeSedefGraphTable, FilterByGraphClusters, FullDupToBed12, FiltDupToBed12, GetUniqueGencodeUnresolvedDupGenesCN, GetUniqueGencodeUnresolvedDupGenes, GetGencodeMulticopy, GetGencodeMappedInDup, GetSupportedMulticopy,FindResolvedDuplicatedGenes, Bed12ToBed6, CombineGenesWithCollapsedDups, CombineDuplicatedGenes, MinimapGeneModelBed, FilterGencodeBed12, FindGenesInResolvedDups, SelectOneIsoform, SplitSplicedAndSingleExon, AnnotateLowCoverageFlanks, UnionMasked,GetNamedFasta, SelectDups, SortDups, GetDepthOverDups, FilterLowDepthDups, GetFullGeneCountTable, AddCollapsedGenes, GetCombinedTable, SelectDupsOneIsoform, GetFinalMerged, DupsPerContig, GetAllMultiGenes, AnnotateHighIdentity, GetTotalMasked, AnnotateResolvedTandemDups, GeneCountFact, GetFullGeneCountTable, FilterMultiExonBed, MappedSamIdentityDups, RemoveOriginal, RemoveBams, MakeSedefIntv, HighestIdentPairs, SelectHighIdent, GetCollapseByRange, GetCollapsedMask, GetCN |
| 37 | +localrules: all, AnnotateResolvedTandemDups, GetUniqueGencodeUnresolvedDupGenes, IntersectGenesWithFullSDList, FullDupToBed12, FullDupToLinks, MakeWMBed, MaskFile, ConvertHMMCopyNumberToCollapsedDuplications, SortSedef, FilterSedef, CountMaskedSedef, RemoveSedefTooMasked, MakeSedefGraph, MakeSedefGraphTable, FilterByGraphClusters, FullDupToBed12, FiltDupToBed12, GetUniqueGencodeUnresolvedDupGenesCN, GetUniqueGencodeUnresolvedDupGenes, GetGencodeMulticopy, GetGencodeMappedInDup, GetSupportedMulticopy,FindResolvedDuplicatedGenes, Bed12ToBed6, CombineGenesWithCollapsedDups, CombineDuplicatedGenes, MinimapGeneModelBed, FilterGencodeBed12, FindGenesInResolvedDups, SelectOneIsoform, SplitSplicedAndSingleExon, AnnotateLowCoverageFlanks, UnionMasked,GetNamedFasta, SelectDups, SortDups, GetDepthOverDups, FilterLowDepthDups, GetFullGeneCountTable, AddCollapsedGenes, GetCombinedTable, SelectDupsOneIsoform, GetFinalMerged, DupsPerContig, GetAllMultiGenes, AnnotateHighIdentity, GetTotalMasked, AnnotateResolvedTandemDups, GeneCountFact, GetGeneCountTableAbbreviatedNames, FilterMultiExonBed, MappedSamIdentityDups, RemoveOriginal, RemoveBams, MakeSedefIntv, HighestIdentPairs, SelectHighIdent, GetCollapseByRange, GetCollapsedMask, GetCN |
38 | 38 |
|
39 | 39 |
|
40 | 40 |
|
@@ -64,7 +64,8 @@ rule all: |
64 | 64 | comb_with_depth="gencode.mapped.bam.bed12.multi_exon.fasta.named.mm2.dups.one_isoform.txt.combined.depth", |
65 | 65 | fact="gencode.mapped.bam.bed12.multi_exon.fasta.named.mm2.dups.one_isoform.txt.combined.depth.filt.fact", |
66 | 66 | gene_count="gencode.mapped.bam.bed12.multi_exon.fasta.named.mm2.dups.one_isoform.txt.combined.depth.filt.gene_count", |
67 | | - gene_count_2column="gencode.mapped.bam.bed12.multi_exon.fasta.named.mm2.dups.one_isoform.txt.combined.depth.filt.gene_count_multi_single", |
| 67 | + gene_count_2column="gencode.mapped.bam.bed12.multi_exon.fasta.named.mm2.dups.one_isoform.txt.combined.depth.filt.gene_count_multi_single", |
| 68 | + gene_count_abbrvNames="gencode.mapped.bam.bed12.multi_exon.fasta.named.mm2.dups.one_isoform.txt.combined.depth.filt.gene_count.abbrv_names", |
68 | 69 | asmMask=expand("{asm}.count_masked", asm=["assembly.orig.fasta", "assembly.masked.fasta", "assembly.repeat_masked.fasta", "assembly.union_masked.fasta"]), |
69 | 70 | uniqueDupGenes="gencode.mapped.bam.bed12.dups.unique", |
70 | 71 | sedef_high_uniq="sedef_out/all/final.sorted.bed.uniq.high", |
@@ -1428,7 +1429,7 @@ rule MappedSamIdentity: |
1428 | 1429 | grid_opts=config["grid_small"], |
1429 | 1430 | sd=SD, |
1430 | 1431 | shell:""" |
1431 | | - {params.sd}/hmcnc/HMM/samToBed {input.mappedsam} --reportAccuracy > {output.mappedsambed} |
| 1432 | + {params.sd}/hmcnc/src/samToBed {input.mappedsam} --reportAccuracy > {output.mappedsambed} |
1432 | 1433 | """ |
1433 | 1434 |
|
1434 | 1435 | rule AddDepthCopyNumber: |
@@ -1466,7 +1467,8 @@ rule AnnotateOriginal: |
1466 | 1467 | output: |
1467 | 1468 | annot="gencode.mapped.bam.bed12.multi_exon.fasta.named.mm2.sam.bed.dups.annot_orig", |
1468 | 1469 | params: |
1469 | | - sd=SD |
| 1470 | + sd=SD, |
| 1471 | + grid_opts=config["grid_medium"] # KEON |
1470 | 1472 | shell:""" |
1471 | 1473 | {params.sd}/AnnotateOriginal.py {input.mappedsambeddups} > {output.annot} |
1472 | 1474 | """ |
@@ -1688,6 +1690,19 @@ cat {input.depth_filt} | awk '{{ if (NR == 1) {{ print "gene\\tresolved\\tcollap |
1688 | 1690 | cat {output.gene_count_2column} | awk '{{ if (NR == 1) {{ print "gene\\tcopies";}} else {{ print $1"\\t"$2+$3;}} }}' > {output.gene_count} |
1689 | 1691 | """ |
1690 | 1692 |
|
| 1693 | +rule GetGeneCountTableAbbreviatedNames: |
| 1694 | + input: |
| 1695 | + gene_count="gencode.mapped.bam.bed12.multi_exon.fasta.named.mm2.dups.one_isoform.txt.combined.depth.filt.gene_count", |
| 1696 | + output: |
| 1697 | + gene_count_abbrvNames="gencode.mapped.bam.bed12.multi_exon.fasta.named.mm2.dups.one_isoform.txt.combined.depth.filt.gene_count.abbrv_names", |
| 1698 | + params: |
| 1699 | + sd=SD |
| 1700 | + shell:""" |
| 1701 | +awk 'BEGIN {{ FS = "[ |\t]"}} \ |
| 1702 | +NR == 1 {{print}} \ |
| 1703 | +NR > 1 {{print $1,"\t" $3}}' {input.gene_count} > {output.gene_count_abbrvNames} |
| 1704 | +""" |
| 1705 | + |
1691 | 1706 | rule cramBam: |
1692 | 1707 | input: |
1693 | 1708 | bam=config['bam'], |
|
0 commit comments