Merge pull request #72 from broadinstitute/sn_add_tests

snovod · web-flow · commit 41f0cd0f024c · 2025-07-28T09:26:07.000-04:00
Sn add tests
diff --git a/VERSION.txt b/VERSION.txt
@@ -1,2 +1,2 @@
-11.3.1
-- Clean up float/int logic to be more concise
+11.3.2
+- Adding tests
diff --git a/ops_utils/request_util.py b/ops_utils/request_util.py
@@ -4,7 +4,7 @@
 import backoff
 
 from .token_util import Token
-from .vars import ARG_DEFAULTS
+from .vars import ARG_DEFAULTS, APPLICATION_JSON
 
 GET = "GET"
 """Method used for API GET endpoints"""
@@ -71,7 +71,7 @@ def run_request(
             files: Any = None,
             params: Optional[dict] = None,
             factor: int = 15,
-            accept: Optional[str] = "application/json",
+            accept: Optional[str] = APPLICATION_JSON,
             content_type: Optional[str] = None,
             accept_return_codes: list[int] = []
     ) -> requests.Response:
@@ -149,7 +149,7 @@ def _make_request() -> requests.Response:
 
         return _make_request()
 
-    def create_headers(self, content_type: Optional[str] = None, accept: Optional[str] = "application/json") -> dict:
+    def create_headers(self, content_type: Optional[str] = None, accept: Optional[str] = APPLICATION_JSON) -> dict:
         """
         Create headers for API calls.
 
diff --git a/ops_utils/tdr_utils/tdr_api_utils.py b/ops_utils/tdr_utils/tdr_api_utils.py
@@ -10,7 +10,7 @@
 from ..tdr_api_schema.create_dataset_schema import CreateDatasetSchema
 from ..tdr_api_schema.update_dataset_schema import UpdateSchema
 from .tdr_job_utils import MonitorTDRJob, SubmitAndMonitorMultipleJobs
-from ..vars import ARG_DEFAULTS, GCP
+from ..vars import ARG_DEFAULTS, GCP, APPLICATION_JSON
 
 
 class TDR:
@@ -204,7 +204,7 @@ def add_user_to_dataset(self, dataset_id: str, user: str, policy: str) -> reques
             uri=uri,
             method=POST,
             data=json.dumps(member_dict),
-            content_type="application/json"
+            content_type=APPLICATION_JSON
         )
 
     def remove_user_from_dataset(self, dataset_id: str, user: str, policy: str) -> requests.Response:
@@ -253,7 +253,7 @@ def make_snapshot_public(self, snapshot_id: str) -> requests.Response:
         """
         uri = f"{self.tdr_link}/snapshots/{snapshot_id}/public"
         logging.info(f"Making snapshot {snapshot_id} public")
-        return self.request_util.run_request(uri=uri, method=PUT, content_type="application/json", data="true")
+        return self.request_util.run_request(uri=uri, method=PUT, content_type=APPLICATION_JSON, data="true")
 
     def get_snapshot_info(
             self,
@@ -505,7 +505,7 @@ def ingest_to_dataset(self, dataset_id: str, data: dict) -> requests.Response:
         return self.request_util.run_request(
             uri=uri,
             method=POST,
-            content_type="application/json",
+            content_type=APPLICATION_JSON,
             data=data
         )
 
@@ -539,7 +539,7 @@ def file_ingest_to_dataset(
         response = self.request_util.run_request(
             uri=uri,
             method=POST,
-            content_type="application/json",
+            content_type=APPLICATION_JSON,
             data=json.dumps(data)
         )
         job_id = response.json()['id']
@@ -592,7 +592,7 @@ def _yield_dataset_metrics(self, dataset_id: str, target_table_name: str, query_
             response = self.request_util.run_request(
                 uri=uri,
                 method=POST,
-                content_type="application/json",
+                content_type=APPLICATION_JSON,
                 data=json.dumps(search_request)
             )
             if not response or not response.json()["result"]:
@@ -709,7 +709,7 @@ def soft_delete_entries(
             method=POST,
             uri=uri,
             data=json.dumps(payload),
-            content_type="application/json"
+            content_type=APPLICATION_JSON
         )
         job_id = response.json()["id"]
         return MonitorTDRJob(tdr=self, job_id=job_id, check_interval=check_intervals, return_json=False).run()
@@ -851,7 +851,7 @@ def create_dataset(  # type: ignore[return]
             method=POST,
             uri=uri,
             data=json.dumps(dataset_properties),
-            content_type="application/json"
+            content_type=APPLICATION_JSON
         )
         job_id = response.json()["id"]
         job_results = MonitorTDRJob(tdr=self, job_id=job_id, check_interval=30, return_json=True).run()
@@ -899,7 +899,7 @@ def update_dataset_schema(  # type: ignore[return]
         response = self.request_util.run_request(
             uri=uri,
             method=POST,
-            content_type="application/json",
+            content_type=APPLICATION_JSON,
             data=json.dumps(request_body)
         )
         job_id = response.json()["id"]
@@ -1021,7 +1021,7 @@ def create_snapshot(
         response = self.request_util.run_request(
             uri=uri,
             method=POST,
-            content_type="application/json",
+            content_type=APPLICATION_JSON,
             data=json.dumps(payload)
         )
         job_id = response.json()["id"]
diff --git a/ops_utils/terra_util.py b/ops_utils/terra_util.py
@@ -9,7 +9,7 @@
 import os
 
 from . import deprecated
-from .vars import GCP
+from .vars import GCP, APPLICATION_JSON
 from .gcp_utils import GCPCloudFunctions
 from .request_util import GET, POST, PATCH, PUT, DELETE, RunRequest
 
@@ -266,7 +266,7 @@ def _yield_all_entity_metrics(self, entity: str, total_entities_per_page: int =
         response = self.request_util.run_request(
             uri=url,
             method=GET,
-            content_type="application/json"
+            content_type=APPLICATION_JSON
         )
         raw_text = response.text
         first_page_json = json.loads(
@@ -283,7 +283,7 @@ def _yield_all_entity_metrics(self, entity: str, total_entities_per_page: int =
             next_page = self.request_util.run_request(
                 uri=url,
                 method=GET,
-                content_type="application/json",
+                content_type=APPLICATION_JSON,
                 params={"page": page}
             )
             raw_text = next_page.text
@@ -494,7 +494,7 @@ def update_user_acl(
         response = self.request_util.run_request(
             uri=url,
             method=PATCH,
-            content_type="application/json",
+            content_type=APPLICATION_JSON,
             data="[" + json.dumps(payload) + "]"
         )
 
@@ -549,7 +549,7 @@ def update_multiple_users_acl(
         response = self.request_util.run_request(
             uri=url,
             method=PATCH,
-            content_type="application/json",
+            content_type=APPLICATION_JSON,
             data=json.dumps(acl_list)
         )
 
@@ -592,7 +592,7 @@ def create_workspace(
         response = self.request_util.run_request(
             uri=f"{self.terra_link}/workspaces",
             method=POST,
-            content_type="application/json",
+            content_type=APPLICATION_JSON,
             data=json.dumps(payload),
             accept_return_codes=accept_return_codes
         )
@@ -681,7 +681,7 @@ def import_workflow(self, workflow_dict: dict, continue_if_exists: bool = False)
             uri=uri,
             method=POST,
             data=workflow_json,
-            content_type="application/json",
+            content_type=APPLICATION_JSON,
             accept_return_codes=accept_return_codes
         )
 
@@ -711,7 +711,7 @@ def update_workspace_attributes(self, attributes: list[dict]) -> requests.Respon
             uri=f"{self.terra_link}/workspaces/{self.billing_project}/{self.workspace_name}/updateAttributes",
             method=PATCH,
             data=json.dumps(attributes),
-            content_type="application/json"
+            content_type=APPLICATION_JSON
         )
 
     def leave_workspace(
@@ -768,7 +768,7 @@ def change_workspace_public_setting(self, public: bool) -> requests.Response:
         return self.request_util.run_request(
             uri=f"{RAWLS_LINK}/workspaces/v2/{self.billing_project}/{self.workspace_name}/settings",
             method=PUT,
-            content_type="application/json",
+            content_type=APPLICATION_JSON,
             data=json.dumps(body)
         )
 
@@ -876,7 +876,7 @@ def add_user_comment_to_submission(self, submission_id: str, user_comment: str)
         return self.request_util.run_request(
             uri=f"{RAWLS_LINK}/workspaces/{self.billing_project}/{self.workspace_name}/submissions/{submission_id}",
             method=PATCH,
-            content_type="application/json",
+            content_type=APPLICATION_JSON,
             data=json.dumps({"userComment": user_comment}),
         )
 
@@ -928,7 +928,7 @@ def initiate_submission(
         return self.request_util.run_request(
             uri=f"{self.terra_link}/workspaces/{self.billing_project}/{self.workspace_name}/submissions",
             method=POST,
-            content_type="application/json",
+            content_type=APPLICATION_JSON,
             data=json.dumps(payload),
         )
       
@@ -950,7 +950,7 @@ def retry_failed_submission(self, submission_id: str) -> requests.Response:
         return self.request_util.run_request(
             uri=url,
             method=POST,
-            content_type="application/json",
+            content_type=APPLICATION_JSON,
             data=json.dumps(payload)
         )
 
diff --git a/ops_utils/tests/test_tdr_table_utils.py b/ops_utils/tests/test_tdr_table_utils.py
@@ -1,7 +1,7 @@
 from unittest import TestCase
 from unittest.mock import patch, MagicMock, call
 
-from ops_utils.tdr_utils.tdr_table_utils import SetUpTDRTables
+from ops_utils.tdr_utils.tdr_table_utils import SetUpTDRTables, MatchSchemas
 
 TARGET_TABLE = "sample"
 PRIMARY_KEY = f'{TARGET_TABLE}_id'
@@ -295,4 +295,154 @@ def test_compare_table_mis_matched_schemas(self):
         # Assertions
         self.assertEqual(
             columns_to_update, [{'name': 'participant', 'required': True, 'datatype': 'string', 'array_of': False, "action": "add"}]
-        )
+        )
+
+
+class TestMatchSchemas(TestCase):
+    """Tests for the MatchSchemas class"""
+
+    def setUp(self):
+        # Create mock TDR instance
+        self.mock_tdr = MagicMock()
+
+        # Define test data for original dataset
+        self.orig_dataset_info = {
+            "name": "original_dataset",
+            "schema": {
+                "tables": [
+                    {
+                        "name": "table_a",
+                        "columns": [
+                            {"name": "id", "datatype": "string", "mode": "required"},
+                            {"name": "value", "datatype": "string", "mode": "nullable"}
+                        ]
+                    },
+                    {
+                        "name": "table_b",
+                        "columns": [
+                            {"name": "id", "datatype": "string", "mode": "required"},
+                            {"name": "count", "datatype": "integer", "mode": "nullable"}
+                        ]
+                    }
+                ]
+            }
+        }
+
+        # Define test data for destination dataset
+        self.dest_dataset_info = {
+            "name": "destination_dataset",
+            "schema": {
+                "tables": [
+                    {
+                        "name": "table_a",
+                        "columns": [
+                            {"name": "id", "datatype": "string", "mode": "required"},
+                            {"name": "value", "datatype": "string", "mode": "nullable"}
+                        ]
+                    }
+                ]
+            }
+        }
+
+        self.dest_dataset_id = "dest-dataset-123"
+
+        # Create MatchSchemas instance
+        self.match_schemas = MatchSchemas(
+            orig_dataset_info=self.orig_dataset_info,
+            dest_dataset_info=self.dest_dataset_info,
+            dest_dataset_id=self.dest_dataset_id,
+            tdr=self.mock_tdr
+        )
+
+    def test_init(self):
+        """Test initialization of MatchSchemas"""
+        self.assertEqual(self.match_schemas.orig_dataset_info, self.orig_dataset_info)
+        self.assertEqual(self.match_schemas.dest_dataset_info, self.dest_dataset_info)
+        self.assertEqual(self.match_schemas.dest_dataset_id, self.dest_dataset_id)
+        self.assertEqual(self.match_schemas.tdr, self.mock_tdr)
+
+    def test_run_adds_missing_tables(self):
+        """Test that the run method adds tables that exist in the original dataset but not in the destination"""
+        # Run the matching process
+        self.match_schemas.run()
+
+        # Verify that update_dataset_schema was called with the correct parameters
+        self.mock_tdr.update_dataset_schema.assert_called_once()
+
+        # Get the arguments from the call
+        args, kwargs = self.mock_tdr.update_dataset_schema.call_args
+
+        # Check dataset_id
+        self.assertEqual(kwargs['dataset_id'], self.dest_dataset_id)
+
+        # Check that the tables_to_add contains table_b
+        self.assertEqual(len(kwargs['tables_to_add']), 1)
+        self.assertEqual(kwargs['tables_to_add'][0]['name'], 'table_b')
+
+        # Check that the update note is set
+        self.assertTrue('update_note' in kwargs)
+
+    def test_run_no_missing_tables(self):
+        """Test that the run method doesn't update anything when all tables already exist"""
+        # Modify destination dataset to include all tables from the original dataset
+        self.dest_dataset_info = {
+            "name": "destination_dataset",
+            "schema": {
+                "tables": [
+                    {
+                        "name": "table_a",
+                        "columns": [
+                            {"name": "id", "datatype": "string", "mode": "required"},
+                            {"name": "value", "datatype": "string", "mode": "nullable"}
+                        ]
+                    },
+                    {
+                        "name": "table_b",
+                        "columns": [
+                            {"name": "id", "datatype": "string", "mode": "required"},
+                            {"name": "count", "datatype": "integer", "mode": "nullable"}
+                        ]
+                    }
+                ]
+            }
+        }
+
+        # Create a new MatchSchemas instance with the updated destination dataset
+        match_schemas = MatchSchemas(
+            orig_dataset_info=self.orig_dataset_info,
+            dest_dataset_info=self.dest_dataset_info,
+            dest_dataset_id=self.dest_dataset_id,
+            tdr=self.mock_tdr
+        )
+
+        # Run the matching process
+        match_schemas.run()
+
+        # Verify that update_dataset_schema was not called
+        self.mock_tdr.update_dataset_schema.assert_not_called()
+
+    def test_run_multiple_missing_tables(self):
+        """Test that the run method adds multiple missing tables"""
+        # Add another table to the original dataset
+        self.orig_dataset_info["schema"]["tables"].append({
+            "name": "table_c",
+            "columns": [
+                {"name": "id", "datatype": "string", "mode": "required"},
+                {"name": "description", "datatype": "string", "mode": "nullable"}
+            ]
+        })
+
+        # Run the matching process
+        self.match_schemas.run()
+
+        # Verify that update_dataset_schema was called with the correct parameters
+        self.mock_tdr.update_dataset_schema.assert_called_once()
+
+        # Get the arguments from the call
+        args, kwargs = self.mock_tdr.update_dataset_schema.call_args
+
+        # Check that the tables_to_add contains both missing tables
+        self.assertEqual(len(kwargs['tables_to_add']), 2)
+        table_names = [table['name'] for table in kwargs['tables_to_add']]
+        self.assertIn('table_b', table_names)
+        self.assertIn('table_c', table_names)
diff --git a/ops_utils/tests/test_terra_utils.py b/ops_utils/tests/test_terra_utils.py
diff --git a/ops_utils/vars.py b/ops_utils/vars.py

-Original file line number
+Diff line change
@@ @@ -1,2 +1,2 @@ @@
 -11.3.1
 -- Clean up float/int logic to be more concise
 +11.3.2
 +- Adding tests