Skip to content

Commit 9dd0d8c

Browse files
authored
Merge pull request #114 from Ensembl/update/beta3_handover
Update/beta3 handover
2 parents fd94633 + acfa9c9 commit 9dd0d8c

File tree

25 files changed

+319
-526
lines changed

25 files changed

+319
-526
lines changed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
3.2.1
1+
3.3.0

src/ensembl/production/metadata/api/factories/datasets.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515

1616
import sqlalchemy.orm
1717
from ensembl.utils.database.dbconnection import DBConnection
18-
from sqlalchemy.engine import make_url
1918
from sqlalchemy.sql import func
2019

2120
from ensembl.production.metadata.api.exceptions import *
@@ -41,6 +40,7 @@ def create_all_child_datasets(self, dataset_uuid: str,
4140
topic: str = 'production_process',
4241
status: DatasetStatus = None,
4342
release: EnsemblRelease = None):
43+
# CURRENTLY BROKEN FOR STATUS AND RELEASE. Marc broke it with his last update. Trace back to fix.
4444
# Retrieve the top-level dataset
4545
# Will not work on datasets that are tied to multiple genomes!
4646
# !!!! WILL CREATE THE DATASETS EVEN IF THEY ALREADY EXIST
@@ -178,7 +178,7 @@ def __create_child_datasets_recursive(self, session, parent_dataset, topic=None,
178178
version = parent_dataset.version
179179
# Create the child dataset
180180
if not exist_ds:
181-
logger.debug(f"Creating dataset {dataset_type.name}/{dataset_source.name}/{status.value}/{release}")
181+
# logger.debug(f"Creating dataset {dataset_type.name}/{dataset_source.name}/{status.value}/{release}")
182182
child_uuid, dataset, attributes, g_dataset = self.create_dataset(session=session,
183183
genome_input=genome_uuid,
184184
dataset_source=dataset_source,

src/ensembl/production/metadata/api/factory.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def meta_factory(db_uri, metadata_uri, force=False):
3030
elif '_funcgen_' in db_url.database:
3131
raise Exception("funcgen not implemented yet")
3232
elif '_core_' in db_url.database:
33-
return CoreMetaUpdater(db_uri, metadata_uri, force=force)
33+
return CoreMetaUpdater(db_uri, metadata_uri)
3434
elif '_otherfeatures_' in db_url.database:
3535
raise Exception("otherfeatures not implemented yet")
3636
elif '_rnaseq_' in db_url.database:

src/ensembl/production/metadata/api/models/dataset.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import sqlalchemy
1818
from sqlalchemy import Column, Integer, String, text, ForeignKey, Index, JSON
19-
from sqlalchemy.dialects.mysql import DATETIME
19+
from sqlalchemy.dialects.mysql import DATETIME, TINYINT
2020
from sqlalchemy.orm import relationship, backref
2121
from sqlalchemy.sql import func
2222
from sqlalchemy.types import Enum
@@ -49,6 +49,7 @@ class Attribute(LoadAble, Base):
4949
name = Column(String(128), nullable=False)
5050
label = Column(String(128), nullable=False)
5151
description = Column(String(255))
52+
required = Column(TINYINT(1), nullable=False, default=0)
5253
type = Column(Enum('string', 'percent', 'float', 'integer', 'bp', 'number'), server_default=text("'string'"))
5354
# One to many relationships
5455
# attribute_id within dataset attribute

src/ensembl/production/metadata/updater/base.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@
1818

1919

2020
class BaseMetaUpdater:
21-
def __init__(self, db_uri, metadata_uri, release=None, force=None):
21+
def __init__(self, db_uri, metadata_uri, release=None):
2222
self.db_uri = db_uri
23-
self.force = force
2423
self.metadata_uri = metadata_uri
2524
self.db = DBConnection(self.db_uri)
2625
self.metadata_db = DBConnection(metadata_uri)

src/ensembl/production/metadata/updater/core.py

Lines changed: 94 additions & 132 deletions
Large diffs are not rendered by default.

src/ensembl/production/metadata/updater/updater_utils.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,22 @@
1313
from ensembl.production.metadata.api.models import Attribute, DatasetAttribute
1414

1515

16-
def update_attributes(dataset, attributes, session):
16+
def update_attributes(dataset, attributes, session, replace=False):
1717
# TODO If attributes already exist, update them. Add option to replace all.
1818
dataset_attributes = []
19+
if replace:
20+
for dataset_attribute in dataset.dataset_attributes:
21+
session.delete(dataset_attribute)
22+
session.flush()
1923
for attribute, value in attributes.items():
2024
meta_attribute = session.query(Attribute).filter(Attribute.name == attribute).one_or_none()
2125
if meta_attribute is None:
2226
raise UpdaterException(f"{attribute} does not exist. Add it to the database and reload.")
23-
dataset_attributes.append(DatasetAttribute(
27+
new_dataset_attribute = DatasetAttribute(
2428
value=value,
2529
dataset=dataset,
2630
attribute=meta_attribute,
27-
))
31+
)
32+
session.add(new_dataset_attribute)
33+
dataset_attributes.append(new_dataset_attribute)
2834
return dataset_attributes

src/tests/databases/core_1/meta.txt

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,22 @@
33
13 1 assembly.name jaber01
44
11 1 assembly.ucsc_alias SCARY
55
15 1 gencode.version 999
6-
3 1 species.common_name jabberwocky
7-
7 1 species.division Ensembl_TEST
8-
6 1 species.production_name Jabberwocky
9-
4 1 species.scientific_name carol_jabberwocky
10-
1 1 species.species_taxonomy_id 10029
11-
8 1 species.strain reference
12-
9 1 species.strain_group testing
13-
2 1 species.taxonomy_id 10029
14-
10 1 species.type monsters
15-
5 1 species.url Jabbe
6+
3 1 organism.common_name jabberwocky
7+
7 1 organism.division Ensembl_TEST
8+
6 1 organism.production_name Jabberwocky
9+
4 1 organism.scientific_name carol_jabberwocky
10+
1 1 organism.species_taxonomy_id 10029
11+
8 1 organism.strain reference
12+
9 1 organism.strain_group testing
13+
2 1 organism.taxonomy_id 10029
14+
10 1 organism.type monsters
15+
5 1 organism.url Jabbe
1616
17 1 genebuild.version ENS01
1717
18 1 genebuild.sample_gene ENSAMXG00005000318
1818
19 1 genebuild.sample_location KB871578.1:9766653-9817473
1919
20 1 strain.type test
2020
23 1 genebuild.provider_name test
2121
24 1 genebuild.start_date 2023-07-Ensembl
2222
25 1 assembly.alt_accession GCA_0000012345.3
23-
26 \N schema_version 110
23+
26 \N schema_version 110
24+
27 1 genebuild.last_geneset_update 2023-01

src/tests/databases/core_2/meta.txt

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,22 @@
44
11 1 assembly.ucsc_alias SCARY
55
15 1 gencode.version 999
66
16 1 genebuild.last_geneset_update 01
7-
3 1 species.common_name jabberwocky
8-
7 1 species.division Ensembl_TEST
9-
6 1 species.production_name Jabberwocky
10-
4 1 species.scientific_name carol_jabberwocky
11-
1 1 species.species_taxonomy_id 6666666
12-
8 1 species.strain reference
13-
9 1 species.strain_group testing
14-
2 1 species.taxonomy_id 666668
15-
10 1 species.type monsters
16-
5 1 species.url Jabbe
7+
3 1 organism.common_name jabberwocky
8+
7 1 organism.division Ensembl_TEST
9+
6 1 organism.production_name Jabberwocky
10+
4 1 organism.scientific_name carol_jabberwocky
11+
1 1 organism.species_taxonomy_id 6666666
12+
8 1 organism.strain reference
13+
9 1 organism.strain_group testing
14+
2 1 organism.taxonomy_id 666668
15+
10 1 organism.type monsters
16+
5 1 organism.url Jabbe
1717
17 1 genebuild.version ENS01
1818
18 1 genebuild.sample_gene ENSAMXG00005000318
1919
19 1 genebuild.sample_location KB871578.1:9766653-9817473
2020
20 1 strain.type test
2121
21 1 genome.genome_uuid test
2222
23 1 genebuild.provider_name test2
2323
24 1 genebuild.start_date 2023-07-Ensembl
24-
25 \N schema_version 110
24+
25 \N schema_version 110
25+
26 1 genebuild.last_geneset_update 2023-01

src/tests/databases/core_3/meta.txt

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,21 @@
33
11 1 assembly.ucsc_alias SCARYIER
44
14 1 gencode.version 999
55
15 1 genebuild.last_geneset_update 2024-02
6-
3 1 species.common_name jabberwocky
7-
7 1 species.division Ensembl_TEST
8-
6 1 species.production_name Jabberwocky
9-
4 1 species.scientific_name carol_jabberwocky
10-
1 1 species.species_taxonomy_id 6666666
11-
8 1 species.strain reference
12-
9 1 species.strain_group testing
13-
2 1 species.taxonomy_id 666668
14-
10 1 species.type monsters
15-
5 1 species.url Jabbe
6+
3 1 organism.common_name jabberwocky
7+
7 1 organism.division Ensembl_TEST
8+
6 1 organism.production_name Jabberwocky
9+
4 1 organism.scientific_name carol_jabberwocky
10+
1 1 organism.species_taxonomy_id 6666666
11+
8 1 organism.strain reference
12+
9 1 organism.strain_group testing
13+
2 1 organism.taxonomy_id 666668
14+
10 1 organism.type monsters
15+
5 1 organism.url Jabbe
1616
17 1 genebuild.version ENS01
1717
18 1 genebuild.sample_gene ENSAMXG00005000318
1818
19 1 genebuild.sample_location KB871578.1:9766653-9817473
1919
20 1 strain.type test
2020
23 1 genebuild.provider_name test
2121
24 1 genebuild.start_date 2023-07-Ensembl
22-
25 \N schema_version 110
22+
25 \N schema_version 110
23+
26 1 genebuild.last_geneset_update 2023-01

0 commit comments

Comments
 (0)