Skip to content

Commit d54cab6

Browse files
authored
Merge pull request #8 from emmanueljordy/add_data_processing
Add data processing
2 parents b8214cf + 7094ecb commit d54cab6

File tree

2 files changed

+8
-8
lines changed

2 files changed

+8
-8
lines changed

synthpop/metrics/efficacy_metrics.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -76,15 +76,15 @@ def evaluate(self, real_df: pd.DataFrame, synthetic_df: pd.DataFrame) -> dict:
7676
y_real = real_df[self.target_column]
7777

7878
# Handle categorical encoding only if it's a classification task
79-
if self.task == 'classification':
80-
categorical_cols = X_syn.select_dtypes(include=['object', 'category']).columns.tolist()
79+
80+
categorical_cols = X_syn.select_dtypes(include=['object', 'category']).columns.tolist()
8181

82-
if categorical_cols:
83-
X_syn = pd.get_dummies(X_syn, columns=categorical_cols, drop_first=True)
84-
X_real = pd.get_dummies(X_real, columns=categorical_cols, drop_first=True)
82+
if categorical_cols:
83+
X_syn = pd.get_dummies(X_syn, columns=categorical_cols, drop_first=True)
84+
X_real = pd.get_dummies(X_real, columns=categorical_cols, drop_first=True)
8585

86-
# Align columns in case of different categorical levels between real and synthetic data
87-
X_syn, X_real = X_syn.align(X_real, join='left', axis=1, fill_value=0)
86+
# Align columns in case of different categorical levels between real and synthetic data
87+
X_syn, X_real = X_syn.align(X_real, join='left', axis=1, fill_value=0)
8888

8989
# Model Training and Evaluation
9090
if self.task == 'regression':

synthpop/processor/data_processor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def _preprocess(self, data: pd.DataFrame) -> pd.DataFrame:
6161
data = pd.concat([data, transformed_data], axis=1)
6262

6363
elif dtype == "numerical":
64-
scaler = StandardScaler()
64+
scaler = StandardScaler(with_mean= False, with_std= False)
6565
data[col] = scaler.fit_transform(data[[col]])
6666
self.scalers[col] = scaler
6767

0 commit comments

Comments
 (0)