@@ -176,7 +176,7 @@ def _check_multitable_spec(ds_spec):
176176 )
177177
178178
179- def _table_name_of_path (table_path ):
179+ def table_name_of_path (table_path ):
180180 return table_path .split ("/" )[- 1 ]
181181
182182
@@ -387,7 +387,6 @@ def __init__(self, X, y=None, categorical_target=True):
387387 # Initialize members
388388 self .main_table = None
389389 self .additional_data_tables = None
390- self .relations = None
391390 self .categorical_target = categorical_target
392391 self .target_column = None
393392 self .target_column_id = None
@@ -437,7 +436,8 @@ def __init__(self, X, y=None, categorical_target=True):
437436 # Index the tables by name
438437 self ._tables_by_name = {
439438 table .name : table
440- for table in [self .main_table ] + self .additional_data_tables
439+ for table in [self .main_table ]
440+ + [table for _ , table , _ in self .additional_data_tables ]
441441 }
442442
443443 # Post-conditions
@@ -513,32 +513,21 @@ def _init_tables_from_mapping(self, X):
513513 key = main_table_key ,
514514 )
515515 self .additional_data_tables = []
516- self .relations = []
517516 if "additional_data_tables" in X :
518517 for table_path , table_spec in X ["additional_data_tables" ].items ():
519518 table_source , table_key = table_spec [:2 ]
520- table_name = _table_name_of_path (table_path )
519+ table_name = table_name_of_path (table_path )
521520 table = PandasTable (
522521 table_name ,
523522 table_source ,
524- data_path = table_path ,
525523 key = table_key ,
526524 )
527- self .additional_data_tables .append (table )
528525 is_one_to_one_relation = False
529526 if len (table_spec ) == 3 and table_spec [2 ] is True :
530527 is_one_to_one_relation = True
531528
532- # Set relation parent: if no "/" in path, main_table is the parent
533- if not "/" in table_path :
534- parent_table_name = self .main_table .name
535- else :
536- table_path_fragments = table_path .split ("/" )
537- parent_table_name = _table_name_of_path (
538- "/" .join (table_path_fragments [:- 1 ])
539- )
540- self .relations .append (
541- (parent_table_name , table_name , is_one_to_one_relation )
529+ self .additional_data_tables .append (
530+ (table_path , table , is_one_to_one_relation )
542531 )
543532 # Initialize a sparse dataset (monotable)
544533 elif isinstance (main_table_source , sp .spmatrix ):
@@ -548,7 +537,6 @@ def _init_tables_from_mapping(self, X):
548537 key = main_table_key ,
549538 )
550539 self .additional_data_tables = []
551- self .relations = []
552540 # Initialize a numpyarray dataset (monotable)
553541 elif hasattr (main_table_source , "__array__" ):
554542 self .main_table = NumpyTable (
@@ -561,7 +549,6 @@ def _init_tables_from_mapping(self, X):
561549 "with pandas dataframe source tables"
562550 )
563551 self .additional_data_tables = []
564- self .relations = []
565552 else :
566553 raise TypeError (
567554 type_error_message (
@@ -680,11 +667,12 @@ def to_spec(self):
680667 ds_spec = {}
681668 ds_spec ["main_table" ] = (self .main_table .data_source , self .main_table .key )
682669 ds_spec ["additional_data_tables" ] = {}
683- for table in self .additional_data_tables :
684- assert table . data_path is not None
685- ds_spec ["additional_data_tables" ][table . data_path ] = (
670+ for table_path , table , is_one_to_one_relation in self .additional_data_tables :
671+ assert table_path is not None
672+ ds_spec ["additional_data_tables" ][table_path ] = (
686673 table .data_source ,
687674 table .key ,
675+ is_one_to_one_relation ,
688676 )
689677
690678 return ds_spec
@@ -748,31 +736,32 @@ def create_khiops_dictionary_domain(self):
748736 # Note: In general 'name' and 'object_type' fields of Variable can be different
749737 if self .additional_data_tables :
750738 main_dictionary .root = True
751- table_names = [table .name for table in self .additional_data_tables ]
752- tables_to_visit = [self .main_table .name ]
753- while tables_to_visit :
754- current_table = tables_to_visit .pop (0 )
755- for relation in self .relations :
756- parent_table , child_table , is_one_to_one_relation = relation
757- if parent_table == current_table :
758- tables_to_visit .append (child_table )
759- parent_table_name = parent_table
760- index_table = table_names .index (child_table )
761- table = self .additional_data_tables [index_table ]
762- parent_table_dictionary = dictionary_domain .get_dictionary (
763- parent_table_name
764- )
765- dictionary = table .create_khiops_dictionary ()
766- dictionary_domain .add_dictionary (dictionary )
767- table_variable = kh .Variable ()
768- if is_one_to_one_relation :
769- table_variable .type = "Entity"
770- else :
771- table_variable .type = "Table"
772- table_variable .name = table .name
773- table_variable .object_type = table .name
774- parent_table_dictionary .add_variable (table_variable )
739+ for (
740+ table_path ,
741+ table ,
742+ is_one_to_one_relation ,
743+ ) in self .additional_data_tables :
744+ if not "/" in table_path :
745+ parent_table_name = self .main_table .name
746+ else :
747+ table_path_fragments = table_path .split ("/" )
748+ parent_table_name = table_name_of_path (
749+ "/" .join (table_path_fragments [:- 1 ])
750+ )
751+ parent_table_dictionary = dictionary_domain .get_dictionary (
752+ parent_table_name
753+ )
775754
755+ dictionary = table .create_khiops_dictionary ()
756+ dictionary_domain .add_dictionary (dictionary )
757+ table_variable = kh .Variable ()
758+ if is_one_to_one_relation :
759+ table_variable .type = "Entity"
760+ else :
761+ table_variable .type = "Table"
762+ table_variable .name = table .name
763+ table_variable .object_type = table .name
764+ parent_table_dictionary .add_variable (table_variable )
776765 return dictionary_domain
777766
778767 def create_table_files_for_khiops (self , output_dir , sort = True ):
@@ -811,9 +800,9 @@ def create_table_files_for_khiops(self, output_dir, sort=True):
811800
812801 # Create a copy of each secondary table
813802 secondary_table_paths = {}
814- for table in self .additional_data_tables :
815- assert table . data_path is not None
816- secondary_table_paths [table . data_path ] = table .create_table_file_for_khiops (
803+ for table_path , table , _ in self .additional_data_tables :
804+ assert table_path is not None
805+ secondary_table_paths [table_path ] = table .create_table_file_for_khiops (
817806 output_dir , sort = sort
818807 )
819808
@@ -918,13 +907,11 @@ class PandasTable(DatasetTable):
918907 Name for the table.
919908 dataframe : `pandas.DataFrame`
920909 The data frame to be encapsulated. It must be non-empty.
921- data_path : str, optional
922- Data path of the table. Unset for main tables.
923910 key : list of str, optional
924911 The names of the columns composing the key.
925912 """
926913
927- def __init__ (self , name , dataframe , data_path = None , key = None ):
914+ def __init__ (self , name , dataframe , key = None ):
928915 # Call the parent method
929916 super ().__init__ (name = name , key = key )
930917
@@ -937,7 +924,6 @@ def __init__(self, name, dataframe, data_path=None, key=None):
937924 # Initialize the attributes
938925 self .data_source = dataframe
939926 self .n_samples = len (self .data_source )
940- self .data_path = data_path
941927
942928 # Initialize feature columns and verify their types
943929 self .column_ids = self .data_source .columns .values
0 commit comments