API: add /api/v1/study/<id>/associations to retrieve comprehensive id, path, processing information for a study

wasade · wasade · commit 6bb87290c948 · 2025-11-06T16:20:28.000-07:00
diff --git a/qiita_pet/handlers/rest/__init__.py b/qiita_pet/handlers/rest/__init__.py
@@ -7,6 +7,7 @@
 # -----------------------------------------------------------------------------
 
 from .study import StudyHandler, StudyCreatorHandler, StudyStatusHandler
+from .study_association import StudyAssociationHandler
 from .study_samples import (StudySamplesHandler, StudySamplesInfoHandler,
                             StudySamplesCategoriesHandler,
                             StudySamplesDetailHandler,
@@ -25,6 +26,7 @@
 ENDPOINTS = (
     (r"/api/v1/study$", StudyCreatorHandler),
     (r"/api/v1/study/([0-9]+)$", StudyHandler),
+    (r"/api/v1/study/([0-9]+)/associations$", StudyAssociationHandler),
     (r"/api/v1/study/([0-9]+)/samples/categories=([a-zA-Z\-0-9\.:,_]*)",
         StudySamplesCategoriesHandler),
     (r"/api/v1/study/([0-9]+)/samples", StudySamplesHandler),
diff --git a/qiita_pet/handlers/rest/study_association.py b/qiita_pet/handlers/rest/study_association.py
@@ -0,0 +1,195 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2014--, The Qiita Development Team.
+#
+# Distributed under the terms of the BSD 3-clause License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# -----------------------------------------------------------------------------
+import warnings
+
+from tornado.escape import json_decode
+
+from qiita_db.handlers.oauth2 import authenticate_oauth
+from qiita_db.study import StudyPerson, Study
+from qiita_db.user import User
+from .rest_handler import RESTHandler
+from qiita_db.metadata_template.constants import SAMPLE_TEMPLATE_COLUMNS
+
+
+# terms used more than once
+_STUDY = 'study'
+_PREP = 'prep'
+_FILEPATH = 'filepath'
+_STATUS = 'status'
+_ARTIFACT = 'artifact'
+_SAMPLE = 'sample'
+_METADATA = 'metadata'
+_TEMPLATE = 'template'
+_ID = 'id'
+_PROCESSING = 'processing'
+_TYPE = 'type'
+
+# payload keys
+STUDY_ID = f'{_STUDY}_{_ID}'
+STUDY_SAMPLE_METADATA_FILEPATH = f'{_STUDY}_{_SAMPLE}_{_METADATA}_{_FILEPATH}'
+PREP_TEMPLATES = f'{_PREP}_{_TEMPLATE}s'
+PREP_ID = f'{_PREP}_{_ID}'
+PREP_STATUS = f'{_PREP}_{_STATUS}'
+PREP_SAMPLE_METADATA_FILEPATH = f'{_PREP}_{_SAMPLE}_{_METADATA}_{_FILEPATH}'
+PREP_DATA_TYPE = f'{_PREP}_data_{_TYPE}'
+PREP_HUMAN_FILTERING = f'{_PREP}_human_filtering'
+PREP_ARTIFACTS = f'{_PREP}_{_ARTIFACT}s'
+ARTIFACT_ID = f'{_ARTIFACT}_{_ID}'
+ARTIFACT_STATUS = f'{_ARTIFACT}_{_STATUS}'
+ARTIFACT_PARENT_IDS = f'{_ARTIFACT}_parent_{_ID}s'
+ARTIFACT_BASAL_ID = f'{_ARTIFACT}_basal_{_ID}'
+ARTIFACT_PROCESSING_ID = f'{_ARTIFACT}_{_PROCESSING}_{_ID}'
+ARTIFACT_PROCESSING_NAME = f'{_ARTIFACT}_{_PROCESSING}_name'
+ARTIFACT_PROCESSING_ARGUMENTS = f'{_ARTIFACT}_{_PROCESSING}_arguments'
+ARTIFACT_FILEPATHS = f'{_ARTIFACT}_{_FILEPATH}s'
+ARTIFACT_FILEPATH = f'{_ARTIFACT}_{_FILEPATH}'
+ARTIFACT_FILEPATH_TYPE = f'{_ARTIFACT}_{_FILEPATH}_{_TYPE}'
+ARTIFACT_FILEPATH_ID = f'{_ARTIFACT}_{_FILEPATH}_{_ID}'
+
+
+def _most_recent_template_path(template):
+    filepaths = template.get_filepaths()
+
+    # the test dataset shows that a prep can exist without a prep template
+    if len(filepaths) == 0:
+        return None
+
+    metadata_paths = sorted(filepaths, reverse=True)
+
+    # [0] -> the highest file by ID
+    # [1] -> the filepath
+    return metadata_paths[0][1]
+
+
+def _set_study(payload, study):
+    filepath = _most_recent_template_path(study.sample_template)
+
+    payload[STUDY_ID] = study.id
+    payload[STUDY_SAMPLE_METADATA_FILEPATH] = filepath
+
+
+def _set_prep_templates(payload, study):
+    template_data = []
+    for pt in study.prep_templates():
+        _set_prep_template(template_data, pt)
+    payload[PREP_TEMPLATES] = template_data
+
+
+def _get_human_filtering(prep_template):
+    # .current_human_filtering does not describe what the human filter is
+    if prep_template.artifact is not None:
+        return prep_template.artifact.human_reads_filter_method
+
+
+def _set_prep_template(template_payload, prep_template):
+    filepath = _most_recent_template_path(prep_template)
+
+    current_template = {}
+    current_template[PREP_ID] = prep_template.id
+    current_template[PREP_STATUS] = prep_template.status
+    current_template[PREP_SAMPLE_METADATA_FILEPATH] = filepath
+    current_template[PREP_DATA_TYPE] = prep_template.data_type()
+    current_template[PREP_HUMAN_FILTERING] = _get_human_filtering(prep_template)
+
+    _set_artifacts(current_template, prep_template)
+
+    template_payload.append(current_template)
+
+
+def _get_artifacts(prep_template):
+    pending_artifact_objects = [prep_template.artifact, ]
+    all_artifact_objects = set(pending_artifact_objects[:])
+
+    while pending_artifact_objects:
+        artifact = pending_artifact_objects.pop()
+        pending_artifact_objects.extend(artifact.children)
+        all_artifact_objects.update(set(artifact.children))
+
+    return sorted(all_artifact_objects, key=lambda artifact: artifact.id)
+
+
+def _set_artifacts(template_payload, prep_template):
+    prep_artifacts = []
+
+    if prep_template.artifact is None:
+        basal_id = None
+    else:
+        basal_id = prep_template.artifact.id
+
+    for artifact in _get_artifacts(prep_template):
+        _set_artifact(prep_artifacts, artifact, basal_id)
+    template_payload[PREP_ARTIFACTS] = prep_artifacts
+
+
+def _set_artifact(prep_artifacts, artifact, basal_id):
+    artifact_payload = {}
+    artifact_payload[ARTIFACT_ID] = artifact.id
+
+    # Prep uses .status, artifact uses .visibility
+    # favoring .status as visibility implies a UI
+    artifact_payload[ARTIFACT_STATUS] = artifact.visibility
+
+    parents = [parent.id for parent in artifact.parents]
+    artifact_payload[ARTIFACT_PARENT_IDS] = parents if parents else None
+    artifact_payload[ARTIFACT_BASAL_ID] = basal_id
+
+    _set_artifact_processing(artifact_payload, artifact)
+    _set_artifact_filepaths(artifact_payload, artifact)
+
+    prep_artifacts.append(artifact_payload)
+
+
+def _set_artifact_processing(artifact_payload, artifact):
+    processing_parameters = artifact.processing_parameters
+    if processing_parameters is None:
+        artifact_processing_id = None
+        artifact_processing_name = None
+        artifact_processing_arguments = None
+    else:
+        command = processing_parameters.command
+        artifact_processing_id = command.id
+        artifact_processing_name = command.name
+        artifact_processing_arguments = processing_parameters.values
+
+    artifact_payload[ARTIFACT_PROCESSING_ID] = artifact_processing_id
+    artifact_payload[ARTIFACT_PROCESSING_NAME] = artifact_processing_name
+    artifact_payload[ARTIFACT_PROCESSING_ARGUMENTS] = artifact_processing_arguments
+
+
+def _set_artifact_filepaths(artifact_payload, artifact):
+    artifact_filepaths = []
+    for filepath_data in artifact.filepaths:
+        local_payload = {}
+        local_payload[ARTIFACT_FILEPATH] = filepath_data['fp']
+        local_payload[ARTIFACT_FILEPATH_ID] = filepath_data['fp_id']
+        local_payload[ARTIFACT_FILEPATH_TYPE] = filepath_data['fp_type']
+        artifact_filepaths.append(local_payload)
+
+    # the test study includes an artifact which does not have filepaths
+    if len(artifact_filepaths) == 0:
+        artifact_filepaths = None
+
+    artifact_payload[ARTIFACT_FILEPATHS] = artifact_filepaths
+
+
+class StudyAssociationHandler(RESTHandler):
+    @authenticate_oauth
+    def get(self, study_id):
+        study = self.safe_get_study(study_id)
+        if study is None:
+            return
+
+        payload = {}
+        _set_study(payload, study)
+        _set_prep_templates(payload, study)
+        self.write(payload)
+        self.finish()
+
+
+        # get all the things
+
diff --git a/qiita_pet/test/rest/test_study_associations.py b/qiita_pet/test/rest/test_study_associations.py
@@ -16,14 +16,17 @@
 
 class StudyAssociationTests(RESTHandlerTestCase):
     def test_get_valid(self):
-        IGNORE = IGNORE
-        exp = {'study': 1,
+        IGNORE = 'IGNORE'
+        exp = {'study_id': 1,
+               'study_sample_metadata_filepath': IGNORE,
                'prep_templates': [{'prep_id': 1,
-                   'prep_filepath': IGNORE,
-                   'prep_datatype': '18S',
+                   'prep_status': 'private',
+                   'prep_sample_metadata_filepath': IGNORE,
+                   'prep_data_type': '18S',
                    'prep_human_filtering': 'The greatest human filtering method',
                    'prep_artifacts': [{'artifact_id': 1,
-                     'artifact_parent_ids': [1],
+                     'artifact_status': 'private',
+                     'artifact_parent_ids': None,
                      'artifact_basal_id': 1,
                      'artifact_processing_id': None,
                      'artifact_processing_name': None,
@@ -35,7 +38,8 @@ def test_get_valid(self):
                        'artifact_filepath': IGNORE,
                        'artifact_filepath_type': 'raw_barcodes'}]},
                     {'artifact_id': 2,
-                     'artifact_parent_ids': None,
+                     'artifact_status': 'private',
+                     'artifact_parent_ids': [1],
                      'artifact_basal_id': 1,
                      'artifact_processing_id': 1,
                      'artifact_processing_name': 'Split libraries FASTQ',
@@ -60,6 +64,7 @@ def test_get_valid(self):
                        'artifact_filepath_id': 5,
                        'artifact_filepath_type': 'preprocessed_demux'}]},
                     {'artifact_id': 3,
+                     'artifact_status': 'private',
                      'artifact_parent_ids': [1],
                      'artifact_basal_id': 1,
                      'artifact_processing_id': 1,
@@ -69,14 +74,15 @@ def test_get_valid(self):
                       'min_per_read_length_fraction': '0.75',
                       'sequence_max_n': '0',
                       'rev_comp_barcode': 'False',
-                      'rev_comp_mapping_barcodes': 'False',
+                      'rev_comp_mapping_barcodes': 'True',
                       'rev_comp': 'False',
                       'phred_quality_threshold': '3',
                       'barcode_type': 'golay_12',
                       'max_barcode_errors': '1.5',
                       'phred_offset': 'auto'},
-                     'artifact_filepaths': []},
+                     'artifact_filepaths': None},
                     {'artifact_id': 4,
+                     'artifact_status': 'private',
                      'artifact_parent_ids': [2],
                      'artifact_basal_id': 1,
                      'artifact_processing_id': 3,
@@ -92,6 +98,7 @@ def test_get_valid(self):
                        'artifact_filepath': IGNORE,
                        'artifact_filepath_type': 'biom'}]},
                     {'artifact_id': 5,
+                     'artifact_status': 'private',
                      'artifact_parent_ids': [2],
                      'artifact_basal_id': 1,
                      'artifact_processing_id': 3,
@@ -107,6 +114,7 @@ def test_get_valid(self):
                        'artifact_filepath': IGNORE,
                        'artifact_filepath_type': 'biom'}]},
                     {'artifact_id': 6,
+                     'artifact_status': 'private',
                      'artifact_parent_ids': [2],
                      'artifact_basal_id': 1,
                      'artifact_processing_id': 3,
@@ -122,29 +130,52 @@ def test_get_valid(self):
                        'artifact_filepath': IGNORE,
                        'artifact_filepath_type': 'biom'}]}]},
                   {'prep_id': 2,
-                   'prep_filepath': IGNORE,
-                   'prep_datatype': '18S',
+                   'prep_status': 'private',
+                   'prep_sample_metadata_filepath': IGNORE,
+                   'prep_data_type': '18S',
                    'prep_human_filtering': None,
                    'prep_artifacts': [{'artifact_id': 7,
-                     'artifact_parent_ids': [],
+                     'artifact_parent_ids': None,
                      'artifact_basal_id': 7,
+                     'artifact_status': 'private',
                      'artifact_processing_id': None,
                      'artifact_processing_name': None,
                      'artifact_processing_arguments': None,
                      'artifact_filepaths': [{'artifact_filepath_id': 22,
                        'artifact_filepath': IGNORE,
                        'artifact_filepath_type': 'biom'}]}]}]}
 
-        response = self.get('/api/v1/study-association/1', headers=self.headers)
+        response = self.get('/api/v1/study/1/associations', headers=self.headers)
         self.assertEqual(response.code, 200)
         obs = json_decode(response.body)
+
+        def _process_dict(d):
+            return [(d, k) for k in d]
+
+        def _process_list(l):
+            if l is None:
+                return []
+
+            return [dk for d in l
+                    for dk in _process_dict(d)]
+
+        stack = _process_dict(obs)
+        while stack:
+            (d, k) = stack.pop()
+            if k.endswith('filepath'):
+                d[k] = IGNORE
+            elif k.endswith('filepaths'):
+                stack.extend(_process_list(d[k]))
+            elif k.endswith('templates'):
+                stack.extend(_process_list(d[k]))
+            elif k.endswith('artifacts'):
+                stack.extend(_process_list(d[k]))
+
         self.assertEqual(obs, exp)
 
     def test_get_invalid(self):
-        response = self.get('/api/v1/study-association/0', headers=self.headers)
+        response = self.get('/api/v1/study/0/associations', headers=self.headers)
         self.assertEqual(response.code, 404)
-        self.assertEqual(json_decode(response.body),
-                         {'message': 'Study not found'})
 
 
 if __name__ == '__main__':