1
1
from unittest import TestCase
2
2
from unittest .mock import patch , MagicMock , call
3
3
4
- from ops_utils .tdr_utils .tdr_table_utils import SetUpTDRTables
4
+ from ops_utils .tdr_utils .tdr_table_utils import SetUpTDRTables , MatchSchemas
5
5
6
6
TARGET_TABLE = "sample"
7
7
PRIMARY_KEY = f'{ TARGET_TABLE } _id'
@@ -295,4 +295,154 @@ def test_compare_table_mis_matched_schemas(self):
295
295
# Assertions
296
296
self .assertEqual (
297
297
columns_to_update , [{'name' : 'participant' , 'required' : True , 'datatype' : 'string' , 'array_of' : False , "action" : "add" }]
298
- )
298
+ )
299
+
300
+
301
+ class TestMatchSchemas (TestCase ):
302
+ """Tests for the MatchSchemas class"""
303
+
304
+ def setUp (self ):
305
+ # Create mock TDR instance
306
+ self .mock_tdr = MagicMock ()
307
+
308
+ # Define test data for original dataset
309
+ self .orig_dataset_info = {
310
+ "name" : "original_dataset" ,
311
+ "schema" : {
312
+ "tables" : [
313
+ {
314
+ "name" : "table_a" ,
315
+ "columns" : [
316
+ {"name" : "id" , "datatype" : "string" , "mode" : "required" },
317
+ {"name" : "value" , "datatype" : "string" , "mode" : "nullable" }
318
+ ]
319
+ },
320
+ {
321
+ "name" : "table_b" ,
322
+ "columns" : [
323
+ {"name" : "id" , "datatype" : "string" , "mode" : "required" },
324
+ {"name" : "count" , "datatype" : "integer" , "mode" : "nullable" }
325
+ ]
326
+ }
327
+ ]
328
+ }
329
+ }
330
+
331
+ # Define test data for destination dataset
332
+ self .dest_dataset_info = {
333
+ "name" : "destination_dataset" ,
334
+ "schema" : {
335
+ "tables" : [
336
+ {
337
+ "name" : "table_a" ,
338
+ "columns" : [
339
+ {"name" : "id" , "datatype" : "string" , "mode" : "required" },
340
+ {"name" : "value" , "datatype" : "string" , "mode" : "nullable" }
341
+ ]
342
+ }
343
+ ]
344
+ }
345
+ }
346
+
347
+ self .dest_dataset_id = "dest-dataset-123"
348
+
349
+ # Create MatchSchemas instance
350
+ self .match_schemas = MatchSchemas (
351
+ orig_dataset_info = self .orig_dataset_info ,
352
+ dest_dataset_info = self .dest_dataset_info ,
353
+ dest_dataset_id = self .dest_dataset_id ,
354
+ tdr = self .mock_tdr
355
+ )
356
+
357
+ def test_init (self ):
358
+ """Test initialization of MatchSchemas"""
359
+ self .assertEqual (self .match_schemas .orig_dataset_info , self .orig_dataset_info )
360
+ self .assertEqual (self .match_schemas .dest_dataset_info , self .dest_dataset_info )
361
+ self .assertEqual (self .match_schemas .dest_dataset_id , self .dest_dataset_id )
362
+ self .assertEqual (self .match_schemas .tdr , self .mock_tdr )
363
+
364
+ def test_run_adds_missing_tables (self ):
365
+ """Test that the run method adds tables that exist in the original dataset but not in the destination"""
366
+ # Run the matching process
367
+ self .match_schemas .run ()
368
+
369
+ # Verify that update_dataset_schema was called with the correct parameters
370
+ self .mock_tdr .update_dataset_schema .assert_called_once ()
371
+
372
+ # Get the arguments from the call
373
+ args , kwargs = self .mock_tdr .update_dataset_schema .call_args
374
+
375
+ # Check dataset_id
376
+ self .assertEqual (kwargs ['dataset_id' ], self .dest_dataset_id )
377
+
378
+ # Check that the tables_to_add contains table_b
379
+ self .assertEqual (len (kwargs ['tables_to_add' ]), 1 )
380
+ self .assertEqual (kwargs ['tables_to_add' ][0 ]['name' ], 'table_b' )
381
+
382
+ # Check that the update note is set
383
+ self .assertTrue ('update_note' in kwargs )
384
+
385
+ def test_run_no_missing_tables (self ):
386
+ """Test that the run method doesn't update anything when all tables already exist"""
387
+ # Modify destination dataset to include all tables from the original dataset
388
+ self .dest_dataset_info = {
389
+ "name" : "destination_dataset" ,
390
+ "schema" : {
391
+ "tables" : [
392
+ {
393
+ "name" : "table_a" ,
394
+ "columns" : [
395
+ {"name" : "id" , "datatype" : "string" , "mode" : "required" },
396
+ {"name" : "value" , "datatype" : "string" , "mode" : "nullable" }
397
+ ]
398
+ },
399
+ {
400
+ "name" : "table_b" ,
401
+ "columns" : [
402
+ {"name" : "id" , "datatype" : "string" , "mode" : "required" },
403
+ {"name" : "count" , "datatype" : "integer" , "mode" : "nullable" }
404
+ ]
405
+ }
406
+ ]
407
+ }
408
+ }
409
+
410
+ # Create a new MatchSchemas instance with the updated destination dataset
411
+ match_schemas = MatchSchemas (
412
+ orig_dataset_info = self .orig_dataset_info ,
413
+ dest_dataset_info = self .dest_dataset_info ,
414
+ dest_dataset_id = self .dest_dataset_id ,
415
+ tdr = self .mock_tdr
416
+ )
417
+
418
+ # Run the matching process
419
+ match_schemas .run ()
420
+
421
+ # Verify that update_dataset_schema was not called
422
+ self .mock_tdr .update_dataset_schema .assert_not_called ()
423
+
424
+ def test_run_multiple_missing_tables (self ):
425
+ """Test that the run method adds multiple missing tables"""
426
+ # Add another table to the original dataset
427
+ self .orig_dataset_info ["schema" ]["tables" ].append ({
428
+ "name" : "table_c" ,
429
+ "columns" : [
430
+ {"name" : "id" , "datatype" : "string" , "mode" : "required" },
431
+ {"name" : "description" , "datatype" : "string" , "mode" : "nullable" }
432
+ ]
433
+ })
434
+
435
+ # Run the matching process
436
+ self .match_schemas .run ()
437
+
438
+ # Verify that update_dataset_schema was called with the correct parameters
439
+ self .mock_tdr .update_dataset_schema .assert_called_once ()
440
+
441
+ # Get the arguments from the call
442
+ args , kwargs = self .mock_tdr .update_dataset_schema .call_args
443
+
444
+ # Check that the tables_to_add contains both missing tables
445
+ self .assertEqual (len (kwargs ['tables_to_add' ]), 2 )
446
+ table_names = [table ['name' ] for table in kwargs ['tables_to_add' ]]
447
+ self .assertIn ('table_b' , table_names )
448
+ self .assertIn ('table_c' , table_names )
0 commit comments