From 3105038bf0a62bf57fc10a1f28d0fc4722f8bc65 Mon Sep 17 00:00:00 2001 From: Mustafa Nural Date: Fri, 22 Aug 2025 16:41:38 -0400 Subject: [PATCH 1/6] Add missing filter to fix slow running query. --- Model/lib/psql/webready/orgSpecific/EstAttributes.psql | 1 + 1 file changed, 1 insertion(+) diff --git a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql index 1fdf47504..d2cb73ce9 100644 --- a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql @@ -55,5 +55,6 @@ and ds.taxon_id = o.taxon_id and o.is_reference_strain = 1 and o.taxon_id = :TAXON_ID + and o.taxon_id = taxon.taxon_id AND oterm.name = 'EST'; :DECLARE_PARTITION; From 3236d14eb2c3cbac9ef4c6df36cec29043af100f Mon Sep 17 00:00:00 2001 From: Mustafa Nural Date: Fri, 22 Aug 2025 16:42:46 -0400 Subject: [PATCH 2/6] Filter sequences at the species level instead of checking all sequences. --- .../psql/webready/orgSpecific/EstAlignmentGeneSummary.psql | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql index 34ac471c7..a292b2f3b 100644 --- a/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql +++ b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql @@ -50,11 +50,14 @@ /* (because they overlap no genes) */ SELECT ba.blat_alignment_id FROM dots.BlatAlignment ba, dots.NaSequence query_sequence, - sres.OntologyTerm so, dots.NaSequence target_sequence + sres.OntologyTerm so, dots.NaSequence target_sequence, + webready.taxonspecies tsq, webready.taxonspecies tst WHERE query_sequence.na_sequence_id = ba.query_na_sequence_id AND query_sequence.sequence_ontology_id = so.ontology_term_id AND ba.target_na_sequence_id = target_sequence.na_sequence_id - AND target_sequence.taxon_id = :TAXON_ID + AND query_sequence.taxon_id = tsq.taxon_id + AND tst.species_taxon_id = tsq.species_taxon_id + AND tst.taxon_id = :TAXON_ID AND so.name = 'EST' EXCEPT SELECT blat_alignment_id FROM :SCHEMA.:CLEAN_ORG_ABBREVEstAlignmentGeneTmp) From 8b240baefbb8a9f72bbd24f9d39b4f6615209589 Mon Sep 17 00:00:00 2001 From: Steve <43149795+steve-fischer-200@users.noreply.github.com> Date: Fri, 22 Aug 2025 18:46:56 -0400 Subject: [PATCH 3/6] Update EstAlignmentGeneSummary.psql add a comment --- Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql | 1 + 1 file changed, 1 insertion(+) diff --git a/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql index a292b2f3b..cf517fe05 100644 --- a/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql +++ b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql @@ -55,6 +55,7 @@ WHERE query_sequence.na_sequence_id = ba.query_na_sequence_id AND query_sequence.sequence_ontology_id = so.ontology_term_id AND ba.target_na_sequence_id = target_sequence.na_sequence_id + /* we filter the query sequence (at the species level) because doing so provides a dramatic performance boost */ AND query_sequence.taxon_id = tsq.taxon_id AND tst.species_taxon_id = tsq.species_taxon_id AND tst.taxon_id = :TAXON_ID From d897014277e09057d66e9c4334eb0363b09a0745 Mon Sep 17 00:00:00 2001 From: Steve <43149795+steve-fischer-200@users.noreply.github.com> Date: Sat, 23 Aug 2025 10:52:08 -0400 Subject: [PATCH 4/6] Update EstAlignmentGeneSummary.psql use :SCHEMA macro --- .../lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql index cf517fe05..d3b3e142c 100644 --- a/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql +++ b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql @@ -51,7 +51,7 @@ SELECT ba.blat_alignment_id FROM dots.BlatAlignment ba, dots.NaSequence query_sequence, sres.OntologyTerm so, dots.NaSequence target_sequence, - webready.taxonspecies tsq, webready.taxonspecies tst + :SCHEMA.taxonspecies tsq, :SCHEMA.taxonspecies tst WHERE query_sequence.na_sequence_id = ba.query_na_sequence_id AND query_sequence.sequence_ontology_id = so.ontology_term_id AND ba.target_na_sequence_id = target_sequence.na_sequence_id From f95b1573055478a5c804568259f7a5124ea3a20f Mon Sep 17 00:00:00 2001 From: Steve <43149795+steve-fischer-200@users.noreply.github.com> Date: Sat, 23 Aug 2025 11:01:31 -0400 Subject: [PATCH 5/6] Update EstAttributes.psql use species filter --- Model/lib/psql/webready/orgSpecific/EstAttributes.psql | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql index d2cb73ce9..552ac4e85 100644 --- a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql @@ -26,6 +26,8 @@ FROM dots.Est e, dots.Library l, sres.Taxon, sres.OntologyTerm oterm, sres.TaxonName tn, sres.ExternalDatabase ed, apidb.datasource ds, apidb.organism o, + :SCHEMA.taxonspecies ts_r, -- ref organism + :SCHEMA.taxonspecies ts_e, -- est organism sres.ExternalDatabaseRelease edr, dots.ExternalNaSequence ens LEFT JOIN (select query_na_sequence_id,max(ct) as best_alignment_count @@ -52,9 +54,11 @@ AND edr.external_database_id = ed.external_database_id AND ens.sequence_ontology_id = oterm.ontology_term_id AND ed.name = ds.name + AND ens.taxon_id = ts_e.taxon_id + AND o.taxon_id = ts_r.taxon_id + AND ts_e.species_taxon_id = ts_r.species_taxon_id and ds.taxon_id = o.taxon_id and o.is_reference_strain = 1 and o.taxon_id = :TAXON_ID - and o.taxon_id = taxon.taxon_id AND oterm.name = 'EST'; :DECLARE_PARTITION; From 83ba1a6c8df1c7aa46e2a8913ea1f03a78691b1f Mon Sep 17 00:00:00 2001 From: John Brestelli Date: Tue, 26 Aug 2025 14:12:59 -0400 Subject: [PATCH 6/6] remove unused column from estattributes and minor fix to where clause for estgenealignment --- .../orgSpecific/EstAlignmentGeneSummary.psql | 3 ++- .../webready/orgSpecific/EstAttributes.psql | 19 +------------------ 2 files changed, 3 insertions(+), 19 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql index d3b3e142c..a09bedda6 100644 --- a/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql +++ b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql @@ -18,7 +18,8 @@ dots.NaSequence query_sequence WHERE e.na_sequence_id = ba.query_na_sequence_id AND ga.na_sequence_id = ba.target_na_sequence_id - AND least(ba.target_end, ga.gene_end_max) - greatest(ba.target_start, ga.gene_start_min) >= 0 + AND ba.target_end >= ga.gene_start_min + AND ga.gene_end_max >= ba.target_start AND query_sequence.na_sequence_id = ba.query_na_sequence_id AND ga.org_abbrev = ':ORG_ABBREV' diff --git a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql index 552ac4e85..09cd03863 100644 --- a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql @@ -21,7 +21,6 @@ END, 1, 100) AS organism, taxon.ncbi_tax_id, ed.name AS external_db_name, - coalesce(best.best_alignment_count, 0) AS best_alignment_count, l.library_id, replace(l.dbest_name, '''', '-') as library_dbest_name FROM dots.Est e, dots.Library l, sres.Taxon, sres.OntologyTerm oterm, sres.TaxonName tn, sres.ExternalDatabase ed, @@ -29,22 +28,6 @@ :SCHEMA.taxonspecies ts_r, -- ref organism :SCHEMA.taxonspecies ts_e, -- est organism sres.ExternalDatabaseRelease edr, dots.ExternalNaSequence ens - LEFT JOIN - (select query_na_sequence_id,max(ct) as best_alignment_count - from ( - SELECT query_na_sequence_id, COUNT(*) AS ct - FROM dots.BlatAlignment ba, apidb.datasource ds, apidb.organism o, - sres.externaldatabase d, sres.externaldatabaserelease r - WHERE is_best_alignment = 1 - AND ba.query_external_db_release_id = r.external_database_release_id - AND r.external_database_id = d.external_database_id - AND d.name = ds.name - AND ds.taxon_id = o.taxon_id - AND o.is_reference_strain = 1 - AND o.taxon_id = :TAXON_ID - GROUP BY target_external_db_release_id,query_na_sequence_id) t - group by query_na_sequence_id - ) best ON ens.na_sequence_id = best.query_na_sequence_id WHERE e.na_sequence_id = ens.na_sequence_id AND e.library_id = l.library_id AND ens.taxon_id = tn.taxon_id @@ -58,7 +41,7 @@ AND o.taxon_id = ts_r.taxon_id AND ts_e.species_taxon_id = ts_r.species_taxon_id and ds.taxon_id = o.taxon_id - and o.is_reference_strain = 1 + and o.is_reference_strain = 1 -- We want ESTs for the species of the reference strain and o.taxon_id = :TAXON_ID AND oterm.name = 'EST'; :DECLARE_PARTITION;