Skip to content
11 changes: 0 additions & 11 deletions v03_pipeline/bin/pipeline_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from v03_pipeline.lib.logger import get_logger
from v03_pipeline.lib.misc.runs import get_oldest_queue_path
from v03_pipeline.lib.model import FeatureFlag
from v03_pipeline.lib.paths import project_pedigree_path
from v03_pipeline.lib.tasks.trigger_hail_backend_reload import TriggerHailBackendReload
from v03_pipeline.lib.tasks.write_success_file import WriteSuccessFileTask

Expand All @@ -24,22 +23,12 @@ def main():
continue
with open(latest_queue_path) as f:
lpr = LoadingPipelineRequest.model_validate_json(f.read())
project_pedigree_paths = [
project_pedigree_path(
lpr.reference_genome,
lpr.dataset_type,
lpr.sample_type,
project_guid,
)
for project_guid in lpr.projects_to_run
]
run_id = re.search(
r'request_(\d{8}-\d{6}_\d+)\.json',
os.path.basename(latest_queue_path),
).group(1)
loading_run_task_params = {
'project_guids': lpr.projects_to_run,
'project_pedigree_paths': project_pedigree_paths,
'run_id': run_id,
**{k: v for k, v in lpr.model_dump().items() if k != 'projects_to_run'},
}
Expand Down
1 change: 0 additions & 1 deletion v03_pipeline/lib/methods/sex_check_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
TEST_SEX_AND_RELATEDNESS_CALLSET_MT = (
'v03_pipeline/var/test/callsets/sex_and_relatedness_1.mt'
)
TEST_PEDIGREE = 'v03_pipeline/var/test/pedigrees/test_pedigree_6.tsv'


class SexCheckTest(unittest.TestCase):
Expand Down
1 change: 0 additions & 1 deletion v03_pipeline/lib/tasks/base/base_loading_run_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ class BaseLoadingRunParams(luigi.Task):
sample_type = luigi.EnumParameter(enum=SampleType)
callset_path = luigi.Parameter()
project_guids = luigi.ListParameter(default=[])
project_pedigree_paths = luigi.ListParameter(default=[])
skip_check_sex_and_relatedness = luigi.BoolParameter(
default=False,
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
Expand Down
3 changes: 0 additions & 3 deletions v03_pipeline/lib/tasks/dataproc/misc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def test_to_kebab_str_args(self, _: Mock):
sample_type=SampleType.WGS,
callset_path='test_callset',
project_guids=['R0113_test_project'],
project_pedigree_paths=['test_pedigree'],
run_id='a_misc_run',
)
self.assertListEqual(
Expand All @@ -37,8 +36,6 @@ def test_to_kebab_str_args(self, _: Mock):
'test_callset',
'--project-guids',
'["R0113_test_project"]',
'--project-pedigree-paths',
'["test_pedigree"]',
'--skip-check-sex-and-relatedness',
'False',
'--skip-expect-tdr-metrics',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def test_rsync_to_seqr_app_dirs_no_sync(
sample_type=SampleType.WGS,
callset_path='test_callset',
project_guids=['R0113_test_project'],
project_pedigree_paths=['test_pedigree'],
run_id='manual__2024-04-01',
)
worker.add(task)
Expand Down Expand Up @@ -76,7 +75,6 @@ def test_rsync_to_seqr_app_dirs_sync(
sample_type=SampleType.WGS,
callset_path='test_callset',
project_guids=['R0113_test_project'],
project_pedigree_paths=['test_pedigree'],
run_id='manual__2024-04-02',
)
worker.add(task)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ def test_job_already_exists_failed(
sample_type=SampleType.WGS,
callset_path='test_callset',
project_guids=['R0113_test_project'],
project_pedigree_paths=['test_pedigree'],
run_id='manual__2024-04-03',
)
worker.add(task)
Expand Down Expand Up @@ -78,7 +77,6 @@ def test_job_already_exists_success(
sample_type=SampleType.WGS,
callset_path='test_callset',
project_guids=['R0113_test_project'],
project_pedigree_paths=['test_pedigree'],
run_id='manual__2024-04-04',
)
worker.add(task)
Expand Down Expand Up @@ -109,7 +107,6 @@ def test_job_failed(
sample_type=SampleType.WGS,
callset_path='test_callset',
project_guids=['R0113_test_project'],
project_pedigree_paths=['test_pedigree'],
run_id='manual__2024-04-05',
)
worker.add(task)
Expand Down Expand Up @@ -149,7 +146,6 @@ def test_job_success(
sample_type=SampleType.WGS,
callset_path='test_callset',
project_guids=['R0113_test_project'],
project_pedigree_paths=['test_pedigree'],
run_id='manual__2024-04-06',
)
worker.add(task)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
from v03_pipeline.lib.tasks.exports.write_new_entries_parquet import (
WriteNewEntriesParquetTask,
)
from v03_pipeline.lib.test.misc import convert_ndarray_to_list
from v03_pipeline.lib.test.misc import (
convert_ndarray_to_list,
copy_project_pedigree_to_mocked_dir,
)
from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase

TEST_PEDIGREE_3_REMAP = 'v03_pipeline/var/test/pedigrees/test_pedigree_3_remap.tsv'
Expand Down Expand Up @@ -77,14 +80,27 @@ def setUp(self) -> None:
)

def test_write_new_entries_parquet(self):
copy_project_pedigree_to_mocked_dir(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this refactor was a bit more painful than necessary, pointing to a need to better align the test pedigrees with their actual function.

TEST_PEDIGREE_3_REMAP,
ReferenceGenome.GRCh38,
DatasetType.SNV_INDEL,
SampleType.WGS,
'R0113_test_project',
)
copy_project_pedigree_to_mocked_dir(
TEST_PEDIGREE_4_REMAP,
ReferenceGenome.GRCh38,
DatasetType.SNV_INDEL,
SampleType.WGS,
'R0114_project4',
)
worker = luigi.worker.Worker()
task = WriteNewEntriesParquetTask(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
callset_path=TEST_SNV_INDEL_VCF,
project_guids=['R0113_test_project', 'R0114_project4'],
project_pedigree_paths=[TEST_PEDIGREE_3_REMAP, TEST_PEDIGREE_4_REMAP],
skip_validation=True,
run_id=TEST_RUN_ID,
)
Expand Down Expand Up @@ -171,14 +187,20 @@ def test_write_new_entries_parquet(self):
)

def test_mito_write_new_entries_parquet(self):
copy_project_pedigree_to_mocked_dir(
TEST_MITO_EXPORT_PEDIGREE,
ReferenceGenome.GRCh38,
DatasetType.MITO,
SampleType.WGS,
'R0116_test_project3',
)
worker = luigi.worker.Worker()
task = WriteNewEntriesParquetTask(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.MITO,
sample_type=SampleType.WGS,
callset_path=TEST_MITO_CALLSET,
project_guids=['R0116_test_project3'],
project_pedigree_paths=[TEST_MITO_EXPORT_PEDIGREE],
skip_validation=True,
run_id=TEST_RUN_ID,
)
Expand Down Expand Up @@ -220,14 +242,20 @@ def test_mito_write_new_entries_parquet(self):
)

def test_sv_write_new_entries_parquet(self):
copy_project_pedigree_to_mocked_dir(
TEST_PEDIGREE_5,
ReferenceGenome.GRCh38,
DatasetType.SV,
SampleType.WGS,
'R0115_test_project2',
)
worker = luigi.worker.Worker()
task = WriteNewEntriesParquetTask(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SV,
sample_type=SampleType.WGS,
callset_path=TEST_SV_VCF_2,
project_guids=['R0115_test_project2'],
project_pedigree_paths=[TEST_PEDIGREE_5],
skip_validation=True,
run_id=TEST_RUN_ID,
)
Expand Down Expand Up @@ -296,14 +324,20 @@ def test_sv_write_new_entries_parquet(self):
)

def test_gcnv_write_new_entries_parquet(self):
copy_project_pedigree_to_mocked_dir(
TEST_PEDIGREE_5,
ReferenceGenome.GRCh38,
DatasetType.GCNV,
SampleType.WES,
'R0115_test_project2',
)
worker = luigi.worker.Worker()
task = WriteNewEntriesParquetTask(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.GCNV,
sample_type=SampleType.WES,
callset_path=TEST_GCNV_BED_FILE,
project_guids=['R0115_test_project2'],
project_pedigree_paths=[TEST_PEDIGREE_5],
skip_validation=True,
run_id=TEST_RUN_ID,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ def test_write_new_transcripts_parquet_test(
project_guids=[
'fake_project',
],
project_pedigree_paths=['fake_pedigree'],
skip_validation=True,
run_id=TEST_RUN_ID,
)
Expand Down Expand Up @@ -188,7 +187,6 @@ def test_grch37_write_new_transcripts_parquet_test(
project_guids=[
'fake_project',
],
project_pedigree_paths=['fake_pedigree'],
skip_validation=True,
run_id=TEST_RUN_ID,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ def test_write_new_variants_parquet_test(
project_guids=[
'fake_project',
],
project_pedigree_paths=['fake_pedigree'],
skip_validation=True,
run_id=TEST_RUN_ID,
)
Expand Down Expand Up @@ -232,7 +231,6 @@ def test_grch37_write_new_variants_parquet_test(
project_guids=[
'fake_project',
],
project_pedigree_paths=['fake_pedigree'],
skip_validation=True,
run_id=TEST_RUN_ID,
)
Expand Down Expand Up @@ -344,7 +342,6 @@ def test_mito_write_new_variants_parquet_test(
project_guids=[
'fake_project',
],
project_pedigree_paths=['fake_pedigree'],
skip_validation=True,
run_id=TEST_RUN_ID,
)
Expand Down Expand Up @@ -439,7 +436,6 @@ def test_sv_write_new_variants_parquet_test(
project_guids=[
'fake_project',
],
project_pedigree_paths=['fake_pedigree'],
skip_validation=True,
run_id=TEST_RUN_ID,
)
Expand Down Expand Up @@ -517,7 +513,6 @@ def test_gcnv_write_new_variants_parquet_test(
project_guids=[
'fake_project',
],
project_pedigree_paths=['fake_pedigree'],
skip_validation=True,
run_id=TEST_RUN_ID,
)
Expand Down
10 changes: 8 additions & 2 deletions v03_pipeline/lib/tasks/trigger_hail_backend_reload_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
from v03_pipeline.lib.paths import variant_annotations_table_path
from v03_pipeline.lib.tasks.trigger_hail_backend_reload import TriggerHailBackendReload
from v03_pipeline.lib.test.misc import copy_project_pedigree_to_mocked_dir
from v03_pipeline.lib.test.mock_complete_task import MockCompleteTask
from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase

Expand Down Expand Up @@ -47,6 +48,13 @@ def setUp(self) -> None:
DatasetType.SNV_INDEL,
),
)
copy_project_pedigree_to_mocked_dir(
TEST_PEDIGREE_3_REMAP,
ReferenceGenome.GRCh38,
DatasetType.SNV_INDEL,
SampleType.WES,
'R0113_test_project',
)

@mock.patch.object(requests, 'post')
@mock.patch(
Expand All @@ -70,7 +78,6 @@ def test_success(
callset_path=TEST_VCF,
project_guids=['R0113_test_project'],
run_id='manual__2024-09-20',
project_pedigree_paths=[TEST_PEDIGREE_3_REMAP],
)
worker.add(task)
worker.run()
Expand Down Expand Up @@ -98,7 +105,6 @@ def test_failure(
callset_path=TEST_VCF,
project_guids=['R0113_test_project'],
run_id='manual__2024-09-20',
project_pedigree_paths=[TEST_PEDIGREE_3_REMAP],
)
worker.add(task)
self.assertFalse(task.complete())
30 changes: 24 additions & 6 deletions v03_pipeline/lib/tasks/update_lookup_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
remove_family_guids,
)
from v03_pipeline.lib.model.constants import PROJECTS_EXCLUDED_FROM_LOOKUP
from v03_pipeline.lib.paths import remapped_and_subsetted_callset_path
from v03_pipeline.lib.paths import (
project_pedigree_path,
remapped_and_subsetted_callset_path,
)
from v03_pipeline.lib.tasks.base.base_loading_run_params import (
BaseLoadingRunParams,
)
Expand All @@ -33,11 +36,16 @@ def complete(self) -> bool:
callset=self.callset_path,
project_guid=project_guid,
remap_pedigree_hash=remap_pedigree_hash(
self.project_pedigree_paths[i],
project_pedigree_path(
self.reference_genome,
self.dataset_type,
self.sample_type,
project_guid,
),
),
),
)
for i, project_guid in enumerate(self.project_guids)
for project_guid in self.project_guids
],
),
hl.read_table(self.output().path).updates,
Expand All @@ -54,15 +62,20 @@ def requires(self) -> list[luigi.Task]:
def update_table(self, ht: hl.Table) -> hl.Table:
# NB: there's a chance this many hail operations blows the DAG compute stack
# in an unfortunate way. Please keep an eye out!
for i, project_guid in enumerate(self.project_guids):
for project_guid in self.project_guids:
if project_guid in PROJECTS_EXCLUDED_FROM_LOOKUP:
ht = ht.annotate_globals(
updates=ht.updates.add(
hl.Struct(
callset=self.callset_path,
project_guid=project_guid,
remap_pedigree_hash=remap_pedigree_hash(
self.project_pedigree_paths[i],
project_pedigree_path(
self.reference_genome,
self.dataset_type,
self.sample_type,
project_guid,
),
),
),
),
Expand Down Expand Up @@ -100,7 +113,12 @@ def update_table(self, ht: hl.Table) -> hl.Table:
callset=self.callset_path,
project_guid=project_guid,
remap_pedigree_hash=remap_pedigree_hash(
self.project_pedigree_paths[i],
project_pedigree_path(
self.reference_genome,
self.dataset_type,
self.sample_type,
project_guid,
),
),
),
),
Expand Down
Loading