diff --git a/core/api/utils/samples.py b/core/api/utils/samples.py index 98e3741a..bf002743 100644 --- a/core/api/utils/samples.py +++ b/core/api/utils/samples.py @@ -27,9 +27,14 @@ def prepare_fields_in_sample(s_data): def split_sample_data(data): """Split the json request into dictionnaries with the right fields""" + ALIASES = { + "collecting_institution_code_1": "lab_code_1", + } split_data = {"sample": {}, "author": {}, "gisaid": {}, "ena": {}} - for item, value in data.items(): + normalized_items = {ALIASES.get(item, item): value for item, value in data.items()} + + for item, value in normalized_items.items(): if "author" in item: split_data["author"][item] = value continue diff --git a/core/api/views.py b/core/api/views.py index fedcbc5d..da829dca 100644 --- a/core/api/views.py +++ b/core/api/views.py @@ -29,6 +29,7 @@ import core.api.utils.variants import core.api.utils.common_functions import core.config +import core.utils.lab_catalog @extend_schema( @@ -58,6 +59,7 @@ "study_title": "", "study_type": "Whole Genome Sequencing", "submitting_lab_sample_id": "LAB_856232", + "collecting_institution_code_1": "1328000027", }, ) ], @@ -85,6 +87,7 @@ "study_title": serializers.CharField(required=False), "study_type": serializers.CharField(required=False), "submitting_lab_sample_id": serializers.CharField(), + "collecting_institution_code_1": serializers.CharField(), }, ), description="More descriptive text", @@ -105,6 +108,7 @@ "collecting_lab_sample_id": "1000", "submitting_lab_sample_id": "None", "collecting_institution": "Instituto de Salud Carlos III", + "lab_code_1": "1328000027", "submitting_institution": "Instituto de Salud Carlos III", "sequence_file_R1": "SAMPLE1_R1.fastq.gz", "sequence_file_R2": "SAMPLE1_R2.fastq.gz", @@ -142,6 +146,7 @@ "collecting_lab_sample_id": "1000", "submitting_lab_sample_id": "None", "collecting_institution": "Instituto de Salud Carlos III", + "lab_code_1": "1328000027", "submitting_institution": "Instituto de Salud Carlos III", "sequence_file_R1": "SAMPLE1_R1.fastq.gz", "sequence_file_R2": "SAMPLE1_R2.fastq.gz", @@ -186,20 +191,58 @@ def create_sample_data(request): } return Response(error, status=status.HTTP_400_BAD_REQUEST) schema_id = schema_obj.get_schema_id() - # check if sample id field and collecting_institution are in the request + # Check mandatory identifiers (lab name is derived further below if needed) required_db_fields = [ "sequencing_sample_id", "collecting_lab_sample_id", "submitting_institution", - "collecting_institution", ] - if any(field not in data for field in required_db_fields): - missing_fields = [f for f in required_db_fields if f not in data] + missing_fields = [f for f in required_db_fields if not data.get(f)] + if missing_fields: print(f"ERROR. Missing: {missing_fields}") return Response( {"ERROR": f"Missing: {missing_fields}", "message": "", "data": {}}, status=status.HTTP_409_CONFLICT, ) + lab_code_field = "collecting_institution_code_1" + lab_code_raw = data.get(lab_code_field) + lab_code_value = str(lab_code_raw).strip() if lab_code_raw else "" + if lab_code_value in core.config.FIELD_EMPTY_VALUES or not lab_code_value: + print(f"ERROR. Missing: [{lab_code_field}]") + return Response( + { + "ERROR": f"Missing: [{lab_code_field}]", + "message": "", + "data": {}, + }, + status=status.HTTP_409_CONFLICT, + ) + resolved_collecting_name = core.utils.lab_catalog.ensure_lab_display( + lab_code_value, fallback_name=data.get("collecting_institution") + ) + provided_collecting_name = (data.get("collecting_institution") or "").strip() + if provided_collecting_name: + if ( + resolved_collecting_name + and provided_collecting_name.lower() != resolved_collecting_name.lower() + ): + # Canonicalise to the catalog name + data["collecting_institution"] = resolved_collecting_name + else: + if resolved_collecting_name: + data["collecting_institution"] = resolved_collecting_name + else: + print("Unable to resolve collecting_institution from lab_code_1") + return Response( + { + "ERROR": "Missing: ['collecting_institution']", + "message": "", + "data": {}, + }, + status=status.HTTP_409_CONFLICT, + ) + # Include collecting_institution in the required fields list for fingerprint + required_db_fields.append("collecting_institution") # check if sample is already defined temp_fingerprint = core.utils.samples.build_sample_fingerprint( *[data[field] for field in required_db_fields] @@ -220,6 +263,8 @@ def create_sample_data(request): split_data = core.api.utils.samples.split_sample_data(data) # Add schema id to store in database split_data["sample"]["schema_obj"] = schema_id + split_data["sample"]["lab_code_1"] = lab_code_value + split_data["sample"]["collecting_institution"] = data["collecting_institution"] sample_serializer = core.api.serializers.CreateSampleSerializer( data=split_data["sample"] ) @@ -892,11 +937,19 @@ def check_sample_exists(request): status=status.HTTP_400_BAD_REQUEST, ) data = request.query_params + lab_code_field = "collecting_institution_code_1" + lab_code_raw = data.get(lab_code_field) + lab_code_value = str(lab_code_raw).strip() if lab_code_raw else "" + collecting_institution = (data.get("collecting_institution") or "").strip() + resolved_collecting_name = core.utils.lab_catalog.ensure_lab_display( + lab_code_value, fallback_name=collecting_institution + ) + required_dict = { "sequencing_sample_id": data.get("sequencing_sample_id"), "collecting_lab_sample_id": data.get("collecting_lab_sample_id"), "submitting_institution": data.get("submitting_institution"), - "collecting_institution": data.get("collecting_institution"), + "collecting_institution": collecting_institution or resolved_collecting_name, } if not all(required_dict.values()): missing_fields = [x for x, v in required_dict.items() if not v] @@ -908,6 +961,12 @@ def check_sample_exists(request): }, status=status.HTTP_400_BAD_REQUEST, ) + if ( + collecting_institution + and resolved_collecting_name + and collecting_institution.lower() != resolved_collecting_name.lower() + ): + required_dict["collecting_institution"] = resolved_collecting_name temp_fingerprint = core.utils.samples.build_sample_fingerprint( *[value for value in required_dict.values()] ) diff --git a/core/config.py b/core/config.py index fd456eab..c26e8817 100644 --- a/core/config.py +++ b/core/config.py @@ -135,6 +135,7 @@ "microbiology_lab_sample_id", "sequencing_sample_id", "submitting_lab_sample_id", + "lab_code_1", "sequence_file_R1", "sequence_file_R2", "sequence_file_R1_md5", @@ -157,5 +158,5 @@ GROUPS_HIERARCHY_ORDERLIST = ("RelecovManager", "Submitter", "Collector") INSTITUTION_FIELD_MAPDICT = { "Submitter": "submitting_institution", - "Collector": "collecting_institution", + "Collector": "lab_code_1", } diff --git a/core/models.py b/core/models.py index e07efad7..e34f5a81 100644 --- a/core/models.py +++ b/core/models.py @@ -1,5 +1,6 @@ # Generic imports import hashlib +import logging from django.db import models, IntegrityError, transaction from django.contrib.auth.models import User from django.db.models.signals import post_save @@ -25,8 +26,54 @@ def get_lab_name(self): # Mapped to submitting_institution field in Sample model return "%s" % (self.laboratory) - def get_lab_code(self): - return "%s" % (self.code_id) + def _resolve_lab_code(self): + """Return the `lab_code_1` associated with the laboratory name.""" + + if not self.laboratory: + return "" + try: + from core.utils import lab_catalog + except ImportError: + logging.getLogger(__name__).warning( + "Unable to import lab_catalog to resolve lab codes" + ) + return "" + resolved = lab_catalog.get_lab_code(self.laboratory) + return resolved or "" + + def get_lab_code(self, fallback_to_lookup=True): + """Return the stored lab code, looking it up if missing.""" + + if self.code_id: + return "%s" % (self.code_id) + if not fallback_to_lookup: + return "" + + resolved = self._resolve_lab_code() + if resolved: + if self.pk: + type(self).objects.filter(pk=self.pk).update(code_id=resolved) + else: + self.code_id = resolved + return resolved + + def save(self, *args, **kwargs): + resolved_code = self._resolve_lab_code() + if resolved_code: + self.code_id = resolved_code + update_fields = kwargs.get("update_fields") + if update_fields is not None: + if isinstance(update_fields, (list, tuple, set, frozenset)): + fields = [] + for item in update_fields: + if item not in fields: + fields.append(item) + else: + fields = [update_fields] + if "code_id" not in fields: + fields.append("code_id") + kwargs["update_fields"] = fields + super().save(*args, **kwargs) @receiver(post_save, sender=User) @@ -705,6 +752,7 @@ class Sample(models.Model): sequencing_sample_id = models.CharField(max_length=80, null=True, blank=True) submitting_lab_sample_id = models.CharField(max_length=80, null=True, blank=True) collecting_institution = models.CharField(max_length=120, null=True, blank=True) + lab_code_1 = models.CharField(max_length=80, null=True, blank=True) submitting_institution = models.CharField(max_length=120, null=True, blank=True) sequence_file_R1 = models.CharField(max_length=80, null=True, blank=True) sequence_file_R2 = models.CharField(max_length=80, null=True, blank=True) diff --git a/core/utils/bioinfo_analysis.py b/core/utils/bioinfo_analysis.py index 2b53c71f..4b696295 100644 --- a/core/utils/bioinfo_analysis.py +++ b/core/utils/bioinfo_analysis.py @@ -1,7 +1,10 @@ -from django.db.models import Count, QuerySet -from django.core.cache import cache +from collections import Counter +from itertools import islice from typing import Iterable, Union +from django.core.cache import cache +from django.db.models import QuerySet + import core.config import core.models import core.utils.samples @@ -9,6 +12,15 @@ SchemaLike = Union["core.models.Schema", Iterable["core.models.Schema"], QuerySet] +def _chunked(iterator, chunk_size): + """Yield lists pulled from ``iterator`` with up to ``chunk_size`` items.""" + while True: + chunk = list(islice(iterator, chunk_size)) + if not chunk: + break + yield chunk + + def get_bio_analysis_stats_from_lab( lab_name=None, institution_type="submitting_institution" ): @@ -71,53 +83,79 @@ def get_bioinfo_analyis_fields_utilization( schemas (all by default). Executes **one** heavy query + one light query. Results can be cached for `cache_seconds`. """ - # -- 0. Pick or normalise schemas ------------------------------------ + # -- 0. Normalise schema identifiers --------------------------------- if schema_qs is None: - schema_qs = core.models.Schema.objects.all() + schema_ids = list(core.models.Schema.objects.values_list("pk", flat=True)) elif isinstance(schema_qs, QuerySet): - pass + schema_ids = list(schema_qs.values_list("pk", flat=True)) + elif isinstance(schema_qs, (list, tuple, set)): + schema_ids = [getattr(item, "pk", item) for item in schema_qs] else: - schema_qs = schema_qs if isinstance(schema_qs, (list, tuple)) else [schema_qs] + schema_ids = [getattr(schema_qs, "pk", schema_qs)] + + schema_ids = sorted({sid for sid in schema_ids if sid is not None}) + if not schema_ids: + return {} # -- 1. Check cache --------------------------------------------------- + cache_key = None if use_cache: - cache_key = f"bioinfo_util_{hash(tuple(x.pk for x in schema_qs))}" + cache_key = f"bioinfo_util_{hash(tuple(schema_ids))}" cached = cache.get(cache_key) if cached: return cached + sample_filter = {"schema_obj_id__in": schema_ids} + # -- 2. Total samples ------------------------------------------------- - num_samples = ( - core.models.Sample.objects.filter(schema_obj__in=schema_qs) - .only("id") # lighter count(*) - .count() - ) + num_samples = core.models.Sample.objects.filter(**sample_filter).count() if num_samples == 0: return {} - # -- 3. One grouped query: filled counts ------------------------------ - FIELD_EMPTY = core.config.FIELD_EMPTY_VALUES - rows = ( - core.models.BioinfoAnalysisValue.objects.filter( - bioinfo_analysis_fieldID__schemaID__in=schema_qs, - value__isnull=False, - ) - .exclude(value__in=FIELD_EMPTY) - .values("bioinfo_analysis_fieldID__label_name") - .annotate(filled=Count("sample", distinct=True)) + # -- 3. Chunked scan of bioinfo values ------------------------------- + FIELD_EMPTY = set(core.config.FIELD_EMPTY_VALUES) + through_model = core.models.Sample.bio_analysis_values.through + batch_size = 500 + fields_counter = Counter() + + sample_iter = ( + core.models.Sample.objects.filter(**sample_filter) + .values_list("pk", flat=True) + .iterator(chunk_size=batch_size) ) - fields_value = { - r["bioinfo_analysis_fieldID__label_name"]: r["filled"] for r in rows - } + for batch in _chunked(sample_iter, batch_size): + if not batch: + continue + + seen_pairs = set() + rows = ( + through_model.objects.filter(sample_id__in=batch) + .filter(bioinfoanalysisvalue__value__isnull=False) + .exclude(bioinfoanalysisvalue__value__in=FIELD_EMPTY) + .values_list( + "bioinfoanalysisvalue__bioinfo_analysis_fieldID__label_name", + "sample_id", + ) + .iterator(chunk_size=batch_size) + ) + + for label, sample_id in rows: + key = (label, sample_id) + if key in seen_pairs: + continue + seen_pairs.add(key) + fields_counter[label] += 1 + + fields_value = dict(fields_counter) fields_norm = {k: v / num_samples for k, v in fields_value.items()} labels_with_value = set(fields_value) - # -- 4. Single light query to fetch ALL labels ------------------------ + # -- 4. Fetch defined labels ----------------------------------------- defined_labels = set( - core.models.BioinfoAnalysisField.objects.filter( - schemaID__in=schema_qs - ).values_list("label_name", flat=True) + core.models.BioinfoAnalysisField.objects.filter(schemaID__pk__in=schema_ids) + .values_list("label_name", flat=True) + .distinct() ) never_used = defined_labels - labels_with_value @@ -130,7 +168,7 @@ def get_bioinfo_analyis_fields_utilization( } # -- 5. Cache for N seconds ------------------------------------------ - if use_cache: + if cache_key: cache.set(cache_key, result, cache_seconds) return result diff --git a/core/utils/lab_catalog.py b/core/utils/lab_catalog.py new file mode 100644 index 00000000..467ca189 --- /dev/null +++ b/core/utils/lab_catalog.py @@ -0,0 +1,141 @@ +"""Utility helpers to resolve lab codes to display information.""" + +from __future__ import annotations + +import json +import os +from functools import lru_cache +from pathlib import Path +from typing import Dict, Optional + +try: + from importlib import resources +except ImportError: # pragma: no cover + import importlib_resources as resources # type: ignore + + +CATALOG_PACKAGE = "relecov_tools.conf" +CATALOG_FILENAME = "laboratory_address.json" + + +class LabCatalogError(RuntimeError): + """Raised when the laboratory catalog cannot be loaded.""" + + +def _resolve_catalog_path() -> Path: + """Return the path to the catalog JSON, using multiple fallbacks.""" + + custom_path = os.getenv("LABORATORY_ADDRESS_JSON") + if custom_path: + candidate = Path(custom_path) + if candidate.exists(): + return candidate + raise LabCatalogError( + f"Laboratory catalog file defined in LABORATORY_ADDRESS_JSON not found: {candidate}" + ) + + try: + file_ref = resources.files(CATALOG_PACKAGE).joinpath(CATALOG_FILENAME) + except ModuleNotFoundError: + repo_root = Path(__file__).resolve().parents[2] + candidate = ( + repo_root / "relecov-tools" / "relecov_tools" / "conf" / CATALOG_FILENAME + ) + if not candidate.exists(): + candidate = ( + repo_root.parent + / "relecov-tools" + / "relecov_tools" + / "conf" + / CATALOG_FILENAME + ) + except (AttributeError, FileNotFoundError): # pragma: no cover - Py<3.9 + try: + file_ref = resources.files(CATALOG_PACKAGE) / CATALOG_FILENAME # type: ignore + except Exception as exc: # pragma: no cover + raise LabCatalogError( + "Unable to resolve laboratory catalog file from relecov_tools" + ) from exc + else: + candidate = Path(file_ref) + else: + candidate = Path(file_ref) + + if candidate.exists(): + return candidate + raise LabCatalogError(f"Laboratory catalog file not found: {candidate}") + + +@lru_cache(maxsize=1) +def _load_catalog() -> Dict[str, Dict[str, str]]: + """Return the raw catalog keyed by collecting_institution_code_1.""" + + path = _resolve_catalog_path() + with path.open("r", encoding="utf-8") as handle: + payload = json.load(handle) + + catalog: Dict[str, Dict[str, str]] = {} + for entry in payload.values(): + code = (entry.get("collecting_institution_code_1") or "").strip() + if not code: + continue + catalog[code] = entry + if not catalog: + raise LabCatalogError("Laboratory catalog is empty") + return catalog + + +def get_catalog() -> Dict[str, Dict[str, str]]: + """Expose the raw catalog (cached).""" + + return _load_catalog().copy() + + +def get_lab_entry(code: Optional[str]) -> Optional[Dict[str, str]]: + """Return the catalog entry for the given lab code.""" + + if not code: + return None + return _load_catalog().get(code) + + +def get_lab_name(code: Optional[str], default: str = "") -> str: + """Return the display name for the provided lab code.""" + + entry = get_lab_entry(code) + if not entry: + return default + name = entry.get("collecting_institution") + return name or default + + +@lru_cache(maxsize=1) +def _build_name_index() -> Dict[str, str]: + """Construct a case-insensitive index to resolve codes by name.""" + + index: Dict[str, str] = {} + for code, entry in _load_catalog().items(): + name = entry.get("collecting_institution") + if not name: + continue + index[name.lower()] = code + return index + + +def get_lab_code(name: Optional[str]) -> Optional[str]: + """Resolve a lab code from a display name (case-insensitive).""" + + if not name: + return None + return _build_name_index().get(name.lower()) + + +def ensure_lab_display(code: Optional[str], fallback_name: Optional[str] = None) -> str: + """Return the preferred display name for a lab code.""" + + if not code: + return fallback_name or "" + display = get_lab_name(code) + if display: + return display + return fallback_name or code diff --git a/core/utils/labs.py b/core/utils/labs.py index 4ccf6f27..c9696401 100644 --- a/core/utils/labs.py +++ b/core/utils/labs.py @@ -2,6 +2,7 @@ import core.models import core.utils.rest_api import core.utils.samples +import core.utils.lab_catalog def get_lab_contact_details(user_obj): @@ -14,9 +15,9 @@ def get_lab_contact_details(user_obj): if "ERROR" in data: return data["ERROR"] - if not data["DATA"]: + if not data["data"]: return "" - lab_data = data.get("DATA", {}).copy() + lab_data = data.get("data", {}).copy() return lab_data @@ -45,6 +46,24 @@ def get_collecting_insts_from_user(user_obj): return available_samples.values_list("collecting_institution", flat=True).distinct() +def get_lab_codes_from_user(user_obj): + """Return the set of lab_code_1 values associated with the user's profile.""" + + profile = core.models.Profile.objects.filter(user=user_obj).last() + if not profile: + return [] + code = profile.get_lab_code() + return [code] if code else [] + + +def get_display_name_from_code(lab_code): + """Resolve a human-readable name for the given lab code.""" + + if not lab_code: + return "" + return core.utils.lab_catalog.ensure_lab_display(lab_code) + + def update_contact_lab(data): """Update the contact information. If any field is empty it will set the old value. In case that all new_data are empty returns than no changes diff --git a/core/utils/plotly_dash_graphics.py b/core/utils/plotly_dash_graphics.py index f4266f6a..d5383ca3 100644 --- a/core/utils/plotly_dash_graphics.py +++ b/core/utils/plotly_dash_graphics.py @@ -5,6 +5,7 @@ import plotly.express as px from dash.exceptions import PreventUpdate import plotly.graph_objects as go +import pandas as pd COLOR_PALETTE = [ "#448873", @@ -49,9 +50,26 @@ def dash_bar_lab(option_list, data): - option = [] - for opt_list in option_list: - option.append({"label": opt_list, "value": opt_list}) + """Build the Dash app that renders weekly sample counts per laboratory.""" + + options = [] + seen_values = set() + for raw_option in option_list: + if isinstance(raw_option, dict): + label = raw_option.get("label") or raw_option.get("collecting_institution") + value = raw_option.get("value") or raw_option.get("lab_code_1") + if not value: + value = raw_option.get("legacy_name") + if not label: + label = raw_option.get("legacy_name") or value + else: + value = str(raw_option) + label = value + if not value or value in seen_values: + continue + seen_values.add(value) + options.append({"label": label or value, "value": value}) + app = DjangoDash( "samplePerLabGraphic", external_stylesheets=[ @@ -61,19 +79,19 @@ def dash_bar_lab(option_list, data): ) empty_fig = px.bar(x=[0], y=[0], height=300) + default_value = options[0]["value"] if options else None + app.layout = html.Div( [ - html.H4( - "Select the collecting institution", style={"fontFamily": "Oxanium"} - ), + html.H4("Select the laboratory", style={"fontFamily": "Oxanium"}), html.Div( [ dcc.Dropdown( id="select_collecting_inst", - options=option, + options=options, clearable=False, multi=False, - value=1, + value=default_value, style={"width": "400px"}, ), ] @@ -88,23 +106,37 @@ def dash_bar_lab(option_list, data): Input("select_collecting_inst", "value"), ) def update_graph(select_collecting_inst): - if select_collecting_inst is None or select_collecting_inst == 1: + if not select_collecting_inst: raise PreventUpdate - sub_data = data[data.collecting_institution == select_collecting_inst] - sub_data = sub_data.drop_duplicates(subset=["iso_yearweek"]).reset_index( - drop=True - ) + df = data.copy() + selected = str(select_collecting_inst) + if "lab_code_1" in df.columns: + mask = df["lab_code_1"].fillna("").astype(str) == selected + fallback_mask = pd.Series(False, index=df.index) + for column in [ + col + for col in ["collecting_institution", "legacy_collecting_institution"] + if col in df.columns + ]: + fallback_mask = fallback_mask | ( + df[column].fillna("").astype(str) == selected + ) + df = df[mask | fallback_mask] + elif "collecting_institution" in df.columns: + df = df[df["collecting_institution"].fillna("").astype(str) == selected] + else: + df = df.iloc[0:0] + + sub_data = df.drop_duplicates(subset=["iso_yearweek"]).reset_index(drop=True) sub_data["iso_yearweek"] = sub_data["iso_yearweek"].str.replace( r"W(\d{1})$", r"W0\1", regex=True ) # Add padding: W5 -> W05 sub_data["num_samples"] = sub_data["num_samples"].astype(int) sub_data = sub_data.sort_values("iso_yearweek") if sub_data.empty: - # Return an empty figure if no data is available - return ( - empty_fig, - f"Laboratory selected: {select_collecting_inst} (No data available)", - ) + fig = empty_fig + fig.update_layout(title="No data available for the selected laboratory") + return fig graph = px.bar( sub_data, x=sub_data["iso_yearweek"].astype(str), diff --git a/core/utils/samples.py b/core/utils/samples.py index e72c2ea9..06b3de59 100644 --- a/core/utils/samples.py +++ b/core/utils/samples.py @@ -309,23 +309,77 @@ def create_date_sample_bar(lab_sample, cust_data): return histogram -def create_dash_bar_for_each_lab(labs_data, labs_list=[]): - """Create a dash_bar plot for the given labs_data and list of laboratories +def create_dash_bar_for_each_lab(labs_data, labs_list=None): + """Create the Dash bar plot that compares the weekly reception per lab. Args: - labs_data (dict): Dictionary with the following structure: - { - 'submitting_institution': subinst, - 'collecting_institution': colinst, - 'iso_yearweek': single_date, - 'num_samples': num_samples - } - labs_list (list, optional): _description_. Defaults to []. + labs_data (Iterable[dict]): Entries containing at least + ``lab_code_1`` (optional), ``collecting_institution`` (display name) + and ``iso_yearweek`` + ``num_samples``. + labs_list (Iterable[dict|str], optional): Option definitions for the + dropdown. When omitted an option list is built from ``labs_data``. """ + df_data = pd.DataFrame(labs_data) - if not labs_list: - labs_list = get_all_collecting_insts() - core.utils.plotly_dash_graphics.dash_bar_lab(labs_list, df_data) + if df_data.empty: + return + + if "lab_code_1" not in df_data.columns: + df_data["lab_code_1"] = None + + legacy_column = df_data.get("legacy_collecting_institution") + if "collecting_institution" in df_data.columns: + df_data["collecting_institution"] = df_data["collecting_institution"].fillna( + legacy_column + ) + else: + df_data["collecting_institution"] = legacy_column + + if labs_list is None: + labs_list = [] + + normalised_options = [] + seen_values = set() + + def _add_option(value, label): + if not value or value in seen_values: + return + seen_values.add(value) + normalised_options.append({"label": label or value, "value": value}) + + for raw_option in labs_list: + if isinstance(raw_option, dict): + value = ( + raw_option.get("value") + or raw_option.get("lab_code_1") + or raw_option.get("collecting_institution") + or raw_option.get("legacy_name") + ) + label = ( + raw_option.get("label") + or raw_option.get("collecting_institution") + or raw_option.get("legacy_name") + or raw_option.get("value") + or raw_option.get("lab_code_1") + ) + else: + value = str(raw_option) + label = value + _add_option(value, label) + + if not normalised_options: + for _, row in df_data.iterrows(): + code = row.get("lab_code_1") + display = row.get("collecting_institution") or row.get( + "legacy_collecting_institution" + ) + value = code or display + label = ( + core.utils.labs.get_display_name_from_code(code) if code else display + ) + _add_option(value, label) + + core.utils.plotly_dash_graphics.dash_bar_lab(normalised_options, df_data) return @@ -437,12 +491,32 @@ def get_sample_display_data(sample_id, user): # Allow to see information obut sample to relecovManager manager_group = Group.objects.get(name="RelecovManager") if manager_group not in user.groups.all(): + user_role = core.utils.generic_functions.get_user_role(user) user_inst_field = core.utils.generic_functions.get_user_lab_field(user) - sample_lab = sample_obj.__dict__.get(user_inst_field) - if not core.models.Profile.objects.filter( - user=user, laboratory__iexact=sample_lab - ).exists(): - return {"ERROR": core.config.ERROR_NOT_ALLOWED_TO_SEE_THE_SAMPLE} + user_lab_name = core.utils.labs.get_lab_name_from_user(user) + + if user_role == "Collector": + allowed_codes = set(core.utils.labs.get_lab_codes_from_user(user)) + sample_code = getattr(sample_obj, "lab_code_1", None) + sample_name = getattr(sample_obj, "collecting_institution", "") + if sample_code in allowed_codes: + pass + elif ( + user_lab_name + and sample_name + and sample_name.lower() == user_lab_name.lower() + ): + pass + else: + return {"ERROR": core.config.ERROR_NOT_ALLOWED_TO_SEE_THE_SAMPLE} + else: + sample_lab = getattr(sample_obj, user_inst_field, None) + if ( + not sample_lab + or not user_lab_name + or sample_lab.lower() != user_lab_name.lower() + ): + return {"ERROR": core.config.ERROR_NOT_ALLOWED_TO_SEE_THE_SAMPLE} s_data = {} s_data["basic"] = list( @@ -580,7 +654,7 @@ def get_sample_per_date_per_all_lab(detailed=None): def get_search_table_for_user(user_obj): """Extract the data to fill the table of available samples to search - for the given lab_name, based on collecting_institution""" + for the given user, prioritizing lab_code_1 when available""" samples_to_search = dashboard.utils.generic_graphic_data.get_graphic_json_data( "search_samples_summary_table" ) @@ -594,20 +668,36 @@ def get_search_table_for_user(user_obj): ) user_role = core.utils.generic_functions.get_user_role(user_obj) table_data = [] + if user_role == "RelecovManager": + for subinst_labs_dict in samples_to_search.values(): + for info in subinst_labs_dict.values(): + table_data.extend(info.get("rows", [])) + return table_data if user_role == "Submitter": user_lab = core.utils.labs.get_lab_name_from_user(user_obj) - for _, table_rows in samples_to_search.get(user_lab, {}).items(): - table_data.extend(table_rows) + lab_entries = samples_to_search.get(user_lab, {}) + for info in lab_entries.values(): + table_data.extend(info.get("rows", [])) else: - lab_list = core.utils.labs.get_collecting_insts_from_user(user_obj) - for lab in lab_list: - found = False - for subinst_labs_dict in samples_to_search.values(): - if lab in subinst_labs_dict.keys(): - found = True - table_data.extend(subinst_labs_dict[lab]) - if not found: - print(f"Found no samples for lab {lab} in search_samples_summary") + lab_codes = set(core.utils.labs.get_lab_codes_from_user(user_obj)) + user_lab = core.utils.labs.get_lab_name_from_user(user_obj) + for subinst_labs_dict in samples_to_search.values(): + for info in subinst_labs_dict.values(): + bucket_code = info.get("lab_code_1") + bucket_display = info.get("collecting_institution") + rows = info.get("rows", []) + matched = False + if lab_codes and bucket_code and bucket_code in lab_codes: + matched = True + elif ( + (not lab_codes or not bucket_code) + and user_lab + and bucket_display + and bucket_display.lower() == user_lab.lower() + ): + matched = True + if matched: + table_data.extend(rows) if not table_data: print(f"Found no sample for user {user_obj.username} in search_samples_summary") return table_data @@ -695,13 +785,36 @@ def get_all_submitting_insts(): def get_all_collecting_insts(): - """Function to get all collecting_institutions in an ordered list""" - return list( - core.models.Sample.objects.values_list("collecting_institution", flat=True) + """Return the list of collecting institutions mapped to their lab codes.""" + + records = ( + core.models.Sample.objects.order_by() + .values("lab_code_1", "collecting_institution") .distinct() - .order_by("collecting_institution") ) + options = [] + seen_values = set() + for entry in records: + lab_code = entry.get("lab_code_1") + legacy_name = entry.get("collecting_institution") + value = lab_code or legacy_name + if not value or value in seen_values: + continue + label = core.utils.labs.get_display_name_from_code(lab_code) + if not label: + label = legacy_name or value + options.append( + { + "value": value, + "label": label, + "lab_code_1": lab_code, + "legacy_name": legacy_name, + } + ) + seen_values.add(value) + return options + def get_all_recieved_samples_with_dates(accumulated=False): """Get all samples that are received in the platform. If accumulated is @@ -857,7 +970,8 @@ def get_available_samples_for_user(user_obj): """Return the samples that the user should be able to see, wether its a Manager, Submitter or Collector. Based on its laboratory""" user_role = core.utils.generic_functions.get_user_role(user_obj) - user_lab = core.models.Profile.objects.filter(user=user_obj).last().get_lab_name() + profile_obj = core.models.Profile.objects.filter(user=user_obj).last() + user_lab = profile_obj.get_lab_name() if profile_obj else "" if user_role == "RelecovManager": return core.models.Sample.objects.all() elif user_role == "Submitter": @@ -865,9 +979,15 @@ def get_available_samples_for_user(user_obj): submitting_institution__iexact=user_lab ) elif user_role == "Collector": - return core.models.Sample.objects.filter( - collecting_institution__iexact=user_lab - ) + lab_codes = core.utils.labs.get_lab_codes_from_user(user_obj) + filters = Q() + if lab_codes: + filters |= Q(lab_code_1__in=lab_codes) + if user_lab: + filters |= Q(collecting_institution__iexact=user_lab) + if not filters.children: + return core.models.Sample.objects.none() + return core.models.Sample.objects.filter(filters).distinct() else: return core.models.Sample.objects.none() diff --git a/core/views.py b/core/views.py index 18802b3d..030dfd52 100644 --- a/core/views.py +++ b/core/views.py @@ -211,15 +211,36 @@ def intranet(request): if manager_group not in request.user.groups.all(): start = time.time() intra_data = {} + user_role = core.utils.generic_functions.get_user_role(request.user) lab_name = core.utils.labs.get_lab_name_from_user(request.user) + lab_code = None + lab_codes = core.utils.labs.get_lab_codes_from_user(request.user) + if lab_codes: + lab_code = lab_codes[0] lab_field = core.utils.generic_functions.get_user_lab_field(request.user) if not lab_field: print(f"No institution field - group found for user: {str(request.user)}") return render(request, "core/intranet.html", {"intra_data": {}}) counted_dates = defaultdict(int) for d in all_sample_per_date_detailed: - if d[lab_field] != lab_name: - continue + value = d.get(lab_field) + matches_lab = False + if user_role == "Collector": + if lab_code and value == lab_code: + matches_lab = True + else: + fallback_name = ( + d.get("legacy_collecting_institution") + or d.get("collecting_institution") + or "" + ) + if lab_name and fallback_name: + matches_lab = fallback_name.lower() == lab_name.lower() + if not matches_lab: + continue + else: + if value != lab_name: + continue # Adapt YYYY-WNN to datetime format so it can be converted to date object converted_date = datetime.strptime(d["iso_yearweek"] + "-1", "%G-W%V-%u") # Filter out old data @@ -234,7 +255,12 @@ def intranet(request): key=lambda x: datetime.strptime(x + "-1", "%G-W%V-%u"), ) date_lab_samples = OrderedDict({k: counted_dates[k] for k in dates_sorted}) - intra_data["lab"] = lab_name + display_lab_name = ( + core.utils.labs.get_display_name_from_code(lab_code) + if lab_code + else lab_name + ) + intra_data["lab"] = display_lab_name print(f"Took {start - time.time()} seconds for date_lab_samples") if len(date_lab_samples) > 0: start = time.time() @@ -242,7 +268,7 @@ def intranet(request): print(f"Took {start - time.time()} seconds for sample_lab_objs") analysis_percent = ( core.utils.bioinfo_analysis.get_bio_analysis_stats_from_lab( - lab_name=lab_name, institution_type=lab_field + lab_name=(lab_code or lab_name), institution_type=lab_field ) ) print(f"Took {start - time.time()} seconds for analysis_percent") @@ -265,9 +291,23 @@ def intranet(request): else 0 ) ) - lablist = set( - [x["collecting_institution"] for x in clean_samples_per_date_detailed] - ) + lablist = [] + seen_values = set() + for entry in clean_samples_per_date_detailed: + code = entry.get("lab_code_1") + display = entry.get("collecting_institution") or entry.get( + "legacy_collecting_institution" + ) + value = code or display + if not value or value in seen_values: + continue + seen_values.add(value) + label = ( + core.utils.labs.get_display_name_from_code(code) + if code + else display + ) + lablist.append({"value": value, "label": label or value}) if len(lablist) > 1: core.utils.samples.create_dash_bar_for_each_lab( clean_samples_per_date_detailed, lablist diff --git a/dashboard/utils/generic_process_data.py b/dashboard/utils/generic_process_data.py index 58582e0a..999af730 100644 --- a/dashboard/utils/generic_process_data.py +++ b/dashboard/utils/generic_process_data.py @@ -25,6 +25,7 @@ import core.utils.generic_functions import core.utils.public_db import core.utils.bioinfo_analysis +import core.utils.lab_catalog import dashboard.models import dashboard.dashboard_config import core.config @@ -189,7 +190,7 @@ def pre_proc_variant_graphic(): date_sample = {} date_variant = {} - for s_data in in_date_samples["DATA"]: + for s_data in in_date_samples["data"]: if s_data["collection_sample_date"] not in date_sample: date_sample[s_data["collection_sample_date"]] = [] date_sample[s_data["collection_sample_date"]].append(s_data["Sample Name"]) @@ -778,7 +779,7 @@ def pre_proc_samples_per_date_all_lab(detailed=None): ) # Else just process date directly ) for x in in_date_samples[ - "DATA" + "data" ] # each x is a dict of [{"Sample Name": name, "collection_sample_date": date}] if isinstance( x["collection_sample_date"], (datetime, str) @@ -812,97 +813,92 @@ def pre_proc_samples_per_date_all_lab(detailed=None): return {"SUCCESS": "Success"} else: # Start processing samples per date and for each lab - lab_date_count = [] - lab_list = list( - core.models.Sample.objects.values_list("collecting_institution", flat=True) - .distinct() - .order_by("collecting_institution") - ) samples_dates_dict = { x["Sample Name"]: x["collection_sample_date"] - for x in in_date_samples["DATA"] + for x in in_date_samples["data"] } join_conditions = [ When(sample_unique_id=sample_id, then=Value(collect_date)) for sample_id, collect_date in samples_dates_dict.items() ] - all_sample_counts_by_lab = ( - core.models.Sample.objects.filter(collecting_institution__in=lab_list) - .annotate(collecting_date=Case(*join_conditions, output_field=DateField())) - .values( - "submitting_institution", "collecting_institution", "collecting_date" - ) - ) - valid_insts = all_sample_counts_by_lab.exclude( - collecting_date__isnull=True, - ) + relevant_samples = core.models.Sample.objects.filter( + sample_unique_id__in=samples_dates_dict.keys() + ).annotate(collecting_date=Case(*join_conditions, output_field=DateField())) + + valid_insts = relevant_samples.exclude(collecting_date__isnull=True) # FIXME: This filter should not be necessary if database was correctly curated valid_insts = valid_insts.filter(collecting_date__gte=datetime(2019, 1, 1)) - # Collect all the data https://stackoverflow.com/questions/56219162/annotate-response-of-query-based-on-week-number lab_date_count = ( valid_insts.annotate( iso_yearweek=Concat( - Cast( - ExtractIsoYear(F("collecting_date")), IntegerField() - ), # get ISO year - Value("-W"), # This just adds a W to match ISO format of YYYY-WW - LPad( - ExtractWeek(F("collecting_date")), 2, Value("0") - ), # get ISO week and add leading 0 padding + Cast(ExtractIsoYear(F("collecting_date")), IntegerField()), + Value("-W"), + LPad(ExtractWeek(F("collecting_date")), 2, Value("0")), output_field=CharField(), - ), + ) ) - .values("submitting_institution", "collecting_institution", "iso_yearweek") - .order_by("collecting_institution", "iso_yearweek") + .values( + "submitting_institution", + "lab_code_1", + "collecting_institution", + "iso_yearweek", + ) + .order_by("lab_code_1", "collecting_institution", "iso_yearweek") .annotate(num_samples=Count("iso_yearweek")) ) - # Create an auxiliar dict to get all registered events of collecting_institution+iso_yearweek - lab_date_count_dict = { - ( + + lab_date_count_dict = {} + for item in lab_date_count: + raw_display = item.get("collecting_institution") + lab_code = item.get("lab_code_1") or core.utils.lab_catalog.get_lab_code( + raw_display + ) + display_name = core.utils.lab_catalog.ensure_lab_display( + lab_code, fallback_name=raw_display + ) + key = ( item["submitting_institution"], - item["collecting_institution"], - item["iso_yearweek"], - ): item["num_samples"] - for item in lab_date_count - } - all_submitters = set([x[0] for x in lab_date_count_dict.keys()]) + lab_code or display_name or "", + ) + entry = lab_date_count_dict.setdefault( + key, + { + "lab_code_1": lab_code, + "display": display_name, + "legacy_collecting_institution": raw_display, + "dates": {}, + }, + ) + entry["dates"][item["iso_yearweek"]] = item["num_samples"] - # Fill non-registered dates in lab_date_count_dict with num_samples=0 final_lab_dates_count = [] - for subinst in all_submitters: - subinst_date_counts = [x for x in lab_date_count_dict if x[0] == subinst] - related_colinsts = set([x[1] for x in subinst_date_counts]) - for colinst in related_colinsts: - inst_date_counts = [x for x in subinst_date_counts if x[1] == colinst] - # For each institution, simulate all the possible weeks between the first and the last registered date - first_date = min( - [ - datetime.strptime(x[2] + "-1", "%G-W%V-%u") - for x in inst_date_counts - ] - ) - last_date = max( - [ - datetime.strptime(x[2] + "-1", "%G-W%V-%u") - for x in inst_date_counts - ] - ) - date_range = core.utils.generic_functions.list_all_possible_weeks( - first_date, last_date, output_format="%G-W%V" + for (subinst, _lab_key), info in lab_date_count_dict.items(): + dates_dict = info["dates"] + display_name = info["display"] + sorted_dates = sorted( + datetime.strptime(iso + "-1", "%G-W%V-%u") for iso in dates_dict.keys() + ) + if not sorted_dates: + continue + first_date = sorted_dates[0] + last_date = sorted_dates[-1] + date_range = core.utils.generic_functions.list_all_possible_weeks( + first_date, last_date, output_format="%G-W%V" + ) + for date in date_range: + num_samples = dates_dict.get(date, 0) + final_lab_dates_count.append( + { + "submitting_institution": subinst, + "collecting_institution": display_name, + "lab_code_1": info.get("lab_code_1"), + "legacy_collecting_institution": info.get( + "legacy_collecting_institution" + ), + "iso_yearweek": date, + "num_samples": num_samples, + } ) - for date in date_range: - # Get the number of samples (defaulting to 0 if not found) - num_samples = lab_date_count_dict.get((subinst, colinst, date), 0) - - # Append the result for this institution and week - final_lab_dates_count.append( - { - "submitting_institution": subinst, - "collecting_institution": colinst, - "iso_yearweek": date, - "num_samples": num_samples, - } - ) dashboard.models.GraphicJsonFile.objects.create_new_graphic_json( { "graphic_name": "samples_per_date_all_lab_detailed", @@ -1013,7 +1009,7 @@ def pre_proc_search_samples_summary(): in_date_samples = core.utils.rest_api.fetch_samples_on_condition( "collection_sample_date" ) - logger.info(f"Fetched {len(in_date_samples['DATA'])} samples with collection_date") + logger.info(f"Fetched {len(in_date_samples['data'])} samples with collection_date") # Prefetch only lineage_values with the desired property filtered_lineages = Prefetch( "lineage_values", @@ -1024,7 +1020,7 @@ def pre_proc_search_samples_summary(): ) processed_samples_qs = ( core.models.Sample.objects.filter( - sample_unique_id__in=[x["Sample Name"] for x in in_date_samples["DATA"]] + sample_unique_id__in=[x["Sample Name"] for x in in_date_samples["data"]] ) .prefetch_related(filtered_lineages) .order_by("id") @@ -1039,36 +1035,62 @@ def pre_proc_search_samples_summary(): for sample in chunk: sample_id = sample.sample_unique_id seq_id = sample.sequencing_sample_id - col_inst = sample.collecting_institution + display_name = sample.collecting_institution + lab_code = sample.lab_code_1 or core.utils.lab_catalog.get_lab_code( + display_name + ) sub_inst = sample.submitting_institution sample_pk = sample.pk if sample.filt_lineages: lineage = sample.filt_lineages[0].value else: lineage = "Not Defined" - match_sampdict[sample_id] = (sample_pk, lineage, col_inst, sub_inst, seq_id) + match_sampdict[sample_id] = ( + sample_pk, + lineage, + lab_code, + display_name, + sub_inst, + seq_id, + ) - final_data = defaultdict(lambda: defaultdict(list)) - for s_data in in_date_samples["DATA"]: + final_data = defaultdict(dict) + for s_data in in_date_samples["data"]: sample_name = s_data["Sample Name"] if sample_name not in match_sampdict.keys(): errtxt = f"Could not find sample {sample_name} from iskylims. Skipped from search pre_proc_search_samples_summary()" logger.error(errtxt) continue col_date = s_data["collection_sample_date"] - sample_pk = match_sampdict[sample_name][0] - lineage_name = match_sampdict[sample_name][1] - col_inst = match_sampdict[sample_name][2] - sub_inst = match_sampdict[sample_name][3] - seq_id = match_sampdict[sample_name][4] - final_data[sub_inst][col_inst].append( - (sample_pk, seq_id, col_date, lineage_name, col_inst) + sample_pk, lineage_name, lab_code, display_name, sub_inst, seq_id = ( + match_sampdict[sample_name] + ) + lab_identifier = lab_code or display_name or "" + bucket = final_data[sub_inst].setdefault( + lab_identifier, + { + "lab_code_1": lab_code, + "collecting_institution": display_name, + "rows": [], + }, ) + bucket["rows"].append([sample_pk, seq_id, col_date, lineage_name, display_name]) + serializable_data = { + sub_inst: { + lab_key: { + "lab_code_1": info.get("lab_code_1"), + "collecting_institution": info.get("collecting_institution"), + "rows": info.get("rows", []), + } + for lab_key, info in labs.items() + } + for sub_inst, labs in final_data.items() + } dashboard.models.GraphicJsonFile.objects.create_new_graphic_json( { "graphic_name": "search_samples_summary_table", - "graphic_data": final_data, + "graphic_data": serializable_data, } ) return {"SUCCESS": "Success"}