Skip to content
8 changes: 8 additions & 0 deletions browser/src/GenePage/GeneFlags.spec.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,12 @@ describe('GeneFlags', () => {

expect(tree).toMatchSnapshot()
})

test('renders VEP 115 warning for RNU4ATAC', () => {
const testGene = geneFactory.build({ symbol: 'RNU4ATAC', reference_genome: 'GRCh38' })

const tree = renderer.create(<GeneFlags gene={testGene} />)

expect(tree).toMatchSnapshot()
})
})
12 changes: 12 additions & 0 deletions browser/src/GenePage/GeneFlags.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,15 @@ type Props = {
}

const allOfUsCMRGGenes = ['CBS', 'KCNE1', 'CRYAA']
const vep115Genes = ['RNU4ATAC']

const GeneFlags = ({ gene }: Props) => {
const shouldDisplayCMRGWarning =
gene.reference_genome === 'GRCh38' && allOfUsCMRGGenes.includes(gene.symbol)

const shouldDisplayVEP115Warning =
gene.reference_genome === 'GRCh38' && vep115Genes.includes(gene.symbol)

return (
<>
{shouldDisplayCMRGWarning && (
Expand All @@ -35,6 +39,14 @@ const GeneFlags = ({ gene }: Props) => {
) callset to remedy this issue in the future.
</p>
)}
{shouldDisplayVEP115Warning && (
<p>
<Badge level="warning">Warning</Badge> MANE Select and variant consequence information in
this gene were annotated using Ensembl VEP version 115 (GENCODE v49). For more
information, see our{' '}
<ExternalLink href="https://gnomad.broadinstitute.org/help/vep">help page</ExternalLink>.
</p>
)}
{gene.flags.includes('chip') && (
<p>
<Badge level="warning">Note</Badge> Analysis of allele balance and age data indicates that
Expand Down
53 changes: 30 additions & 23 deletions browser/src/GenePage/GeneInfo.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,39 +8,40 @@ import Link from '../Link'
import GeneReferences from './GeneReferences'

type ManeSelectTranscriptIdProps = {
gene: {
mane_select_transcript: {
ensembl_id: string
ensembl_version: string
refseq_id: string
refseq_version: string
}
transcripts: {
transcript_id: string
transcript_version: string
}[]
mane_select_transcript: {
ensembl_id: string
ensembl_version: string
refseq_id: string
refseq_version: string
}
transcripts: {
transcript_id: string
transcript_version: string
}[]
}

const ManeSelectTranscriptId = ({ gene }: ManeSelectTranscriptIdProps) => {
const gencodeVersionOfManeSelectTransript = gene.transcripts.find(
(transcript: any) => transcript.transcript_id === gene.mane_select_transcript.ensembl_id
const ManeSelectTranscriptId = ({
mane_select_transcript,
transcripts,
}: ManeSelectTranscriptIdProps) => {
const gencodeVersionOfManeSelectTranscript = transcripts.find(
(transcript) => transcript.transcript_id === mane_select_transcript.ensembl_id
)
const shouldLinkToTranscriptPage =
gencodeVersionOfManeSelectTransript &&
gencodeVersionOfManeSelectTransript.transcript_version ===
gene.mane_select_transcript.ensembl_version
gencodeVersionOfManeSelectTranscript &&
gencodeVersionOfManeSelectTranscript.transcript_version ===
mane_select_transcript.ensembl_version

return (
<React.Fragment>
{shouldLinkToTranscriptPage ? (
<Link to={`/transcript/${gene.mane_select_transcript.ensembl_id}`}>
{gene.mane_select_transcript.ensembl_id}.{gene.mane_select_transcript.ensembl_version}
<Link to={`/transcript/${mane_select_transcript.ensembl_id}`}>
{mane_select_transcript.ensembl_id}.{mane_select_transcript.ensembl_version}
</Link>
) : (
`${gene.mane_select_transcript.ensembl_id}.${gene.mane_select_transcript.ensembl_version}`
`${mane_select_transcript.ensembl_id}.${mane_select_transcript.ensembl_version}`
)}{' '}
/ {gene.mane_select_transcript.refseq_id}.{gene.mane_select_transcript.refseq_version}
/ {mane_select_transcript.refseq_id}.{mane_select_transcript.refseq_version}
</React.Fragment>
)
}
Expand Down Expand Up @@ -109,8 +110,14 @@ const GeneInfo = ({ gene }: GeneInfoProps) => {
</React.Fragment>
}
>
{/* @ts-expect-error TS(2322) FIXME: Type '{ gene_id: string; gene_version: string; sym... Remove this comment to see the full error message */}
{gene.mane_select_transcript ? <ManeSelectTranscriptId gene={gene} /> : 'Not available'}
{gene.mane_select_transcript ? (
<ManeSelectTranscriptId
mane_select_transcript={gene.mane_select_transcript}
transcripts={gene.transcripts}
/>
) : (
'Not available'
)}
</AttributeListItem>
)}

Expand Down
21 changes: 21 additions & 0 deletions browser/src/GenePage/__snapshots__/GeneFlags.spec.tsx.snap
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,27 @@ exports[`GeneFlags renders CMRG flag if one of 3 relevant genes 1`] = `
</p>
`;

exports[`GeneFlags renders VEP 115 warning for RNU4ATAC 1`] = `
<p>
<span
className="Badge__BadgeWrapper-sc-j4izdp-1 gRPPXC"
>
Warning
</span>
MANE Select and variant consequence information in this gene were annotated using Ensembl VEP version 115 (GENCODE v49). For more information, see our

<a
className="Link-sc-14lgydv-0 Link__ExternalLink-sc-14lgydv-1 kswbwW"
href="https://gnomad.broadinstitute.org/help/vep"
rel="noopener noreferrer"
target="_blank"
>
help page
</a>
.
</p>
`;

exports[`GeneFlags renders chip flag if present on gene 1`] = `
<p>
<span
Expand Down
60 changes: 60 additions & 0 deletions data-pipeline/src/data_pipeline/data_types/gene.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,66 @@ def reject_par_y_genes(genes_path=None):
return genes


def patch_rnu4atac(genes_path=None):
gene_symbol = "RNU4ATAC"

genes = hl.read_table(genes_path)
genes = genes.filter(genes.symbol == gene_symbol)

correct_start = 121530880
correct_stop = 121531009
correct_start_locus = hl.locus(contig="chr2", pos=correct_start, reference_genome="GRCh38")
correct_stop_locus = hl.locus(contig="chr2", pos=correct_stop, reference_genome="GRCh38")
correct_xstart = x_position(correct_start_locus)
correct_xstop = x_position(correct_stop_locus)

correct_interval = hl.interval(correct_start_locus, correct_stop_locus, includes_start=True, includes_end=True)

correct_exon = hl.struct(
feature_type="exon", start=correct_start, stop=correct_stop, xstart=correct_xstart, xstop=correct_xstop
)

incorrect_transcript = genes.take(1)[0].transcripts[0]
correct_transcript = hl.struct(
interval=correct_interval,
transcript_version="2",
gene_version="2",
start=correct_start,
stop=correct_stop,
xstart=correct_xstart,
xstop=correct_xstop,
exons=hl.array([correct_exon]),
transcript_id=incorrect_transcript.transcript_id,
gene_id=incorrect_transcript.gene_id,
chrom=incorrect_transcript.chrom,
strand=incorrect_transcript.strand,
reference_genome=incorrect_transcript.reference_genome,
gtex_tissue_expression=incorrect_transcript.gtex_tissue_expression,
refseq_id="NR_023343",
refseq_version="3",
)

correct_mane_select_transcript = hl.struct(
matched_gene_version="2",
ensembl_id="ENST00000580972",
ensembl_version="2",
refseq_id="NR_023343",
refseq_version="3",
)

genes = genes.annotate(
gene_version=2,
start=correct_start,
stop=correct_stop,
xstart=correct_xstart,
xstop=correct_xstop,
exons=[correct_exon],
transcripts=[correct_transcript],
mane_select_transcript=correct_mane_select_transcript,
)
return genes


###############################################
# Transcripts #
###############################################
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import hail as hl

from data_pipeline.data_types.variant.transcript_consequence.annotate_transcript_consequences import (
annotate_transcript_consequences_in_table,
)


def patch_rnu4atac_variants(vepped_path=None, freq_path=None, transcripts_data={}):
veps = hl.read_table(vepped_path)
freqs = hl.read_table(freq_path)
# Drop all consequences except for gene RNU4ATAC and transcript ENST00000580972
veps = veps.filter(veps.vep.transcript_consequences.any(lambda tc: tc.gene_symbol == "RNU4ATAC"))
veps = veps.annotate(
vep=veps.vep.annotate(
transcript_consequences=veps.vep.transcript_consequences.filter(
lambda tc: tc.transcript_id == "ENST00000580972"
)
)
)
veps = veps.filter(veps.vep.transcript_consequences.length() > 0)
veps = annotate_transcript_consequences_in_table(veps, transcripts_data=transcripts_data)

# We filter the data again here because annotate_transcript_consequences_in_table removes consequences with unimportant consequences terms
veps = veps.filter(veps.transcript_consequences.length() > 0)
veps = veps.annotate(
transcript_consequences=veps.transcript_consequences.map(
lambda tc: tc.annotate(
transcript_version="2",
gene_version="2",
is_mane_select=False,
is_mane_select_version=False,
refseq_id=hl.null(hl.tstr),
refseq_version=hl.null(hl.tstr),
)
)
)
veps = veps.annotate(
transcript_consequences=veps.transcript_consequences.map(
lambda tc: tc.drop("polyphen_prediction", "sift_prediction")
)
)

freqs = freqs.drop("transcript_consequences")
veps = veps.join(freqs)

# Include just consequences and index fields
veps = veps.select(veps.variant_id, veps.rsids, veps.caid, veps.vrs, veps.transcript_consequences)
return veps
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,18 @@
from .hgvs import hgvsp_from_consequence_amino_acids
from .vep import consequence_term_rank


OMIT_CONSEQUENCE_TERMS = hl.set(["upstream_gene_variant", "downstream_gene_variant"])

# ruff doesn't like explicit comparisons to None, but we need them in here, so:
# ruff: noqa: E711


def annotate_transcript_consequences(variants_path, transcripts_path, mane_transcripts_path=None):
def annotate_transcript_consequences(variants_path, transcripts_path=None, mane_transcripts_path=None):
ds = hl.read_table(variants_path)
return annotate_transcript_consequences_in_table(ds, transcripts_path, mane_transcripts_path)


def annotate_transcript_consequences_in_table(ds, transcripts_path=None, mane_transcripts_path=None):
most_severe_consequence = ds.vep.most_severe_consequence

transcript_consequences = ds.vep.transcript_consequences
Expand Down Expand Up @@ -62,26 +67,25 @@ def annotate_transcript_consequences(variants_path, transcripts_path, mane_trans

transcript_consequences = transcript_consequences.map(lambda c: c.select(*consequences))

transcripts = hl.read_table(transcripts_path)

# TODO: This can potentially be improved by removing Table.collect
# See https://hail.zulipchat.com/#narrow/stream/123010-Hail-0.2E2.20support/topic/Optimize.20annotation.20with.20small.20dataset
# and https://github.com/Nealelab/ukb_common/blob/ad94d20f8c9f3b711e40a473425925775f0b1f30/utils/generic.py#L18
transcript_info = hl.dict(
[
(row.transcript_id, row.transcript_info)
for row in transcripts.select(
transcript_info=hl.struct(
transcript_version=transcripts.transcript_version,
gene_version=transcripts.gene.gene_version,
)
).collect()
]
)

transcript_consequences = transcript_consequences.map(
lambda csq: csq.annotate(**transcript_info.get(csq.transcript_id))
)
if transcripts_path != None:
transcripts = hl.read_table(transcripts_path)
# TODO: This can potentially be improved by removing Table.collect
# See https://hail.zulipchat.com/#narrow/stream/123010-Hail-0.2E2.20support/topic/Optimize.20annotation.20with.20small.20dataset
# and https://github.com/Nealelab/ukb_common/blob/ad94d20f8c9f3b711e40a473425925775f0b1f30/utils/generic.py#L18
transcript_info = hl.dict(
[
(row.transcript_id, row.transcript_info)
for row in transcripts.select(
transcript_info=hl.struct(
transcript_version=transcripts.transcript_version,
gene_version=transcripts.gene.gene_version,
)
).collect()
]
)
transcript_consequences = transcript_consequences.map(
lambda csq: csq.annotate(**transcript_info.get(csq.transcript_id))
)

if mane_transcripts_path:
mane_transcripts = hl.read_table(mane_transcripts_path)
Expand Down
Loading