Skip to content

Error while using it with CatBoost #131

@Diyacmenezes202

Description

@Diyacmenezes202

`TypeError Traceback (most recent call last)
File ~\anaconda3\Lib\site-packages\boruta\boruta_py.py:384, in BorutaPy._get_imp(self, X, y)
383 try:
--> 384 self.estimator.fit(X, y)
385 except Exception as e:

File ~\anaconda3\Lib\site-packages\catboost\core.py:5220, in CatBoostClassifier.fit(self, X, y, cat_features, text_features, embedding_features, sample_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
5218 CatBoostClassifier._check_is_compatible_loss(params['loss_function'])
-> 5220 self._fit(X, y, cat_features, text_features, embedding_features, None, sample_weight, None, None, None, None, baseline, use_best_model,
5221 eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period,
5222 silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
5223 return self

File ~\anaconda3\Lib\site-packages\catboost\core.py:2385, in CatBoost._fit(self, X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
2383 raise CatBoostError("y may be None only when X is an instance of catboost.Pool or string")
-> 2385 train_params = self._prepare_train_params(
2386 X=X, y=y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features,
2387 pairs=pairs, sample_weight=sample_weight, group_id=group_id, group_weight=group_weight,
2388 subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline, use_best_model=use_best_model,
2389 eval_set=eval_set, verbose=verbose, logging_level=logging_level, plot=plot, plot_file=plot_file,
2390 column_description=column_description, verbose_eval=verbose_eval, metric_period=metric_period,
2391 silent=silent, early_stopping_rounds=early_stopping_rounds, save_snapshot=save_snapshot,
2392 snapshot_file=snapshot_file, snapshot_interval=snapshot_interval, init_model=init_model,
2393 callbacks=callbacks
2394 )
2395 params = train_params["params"]

File ~\anaconda3\Lib\site-packages\catboost\core.py:2311, in CatBoost._prepare_train_params(self, X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks)
2310 params = _params_type_cast(params)
-> 2311 _check_train_params(params)
2313 if params.get('eval_fraction', 0.0) != 0.0:

File _catboost.pyx:6393, in _catboost._check_train_params()

File _catboost.pyx:6414, in _catboost._check_train_params()

File _catboost.pyx:1830, in _catboost._PreprocessParams.init()

File ~\anaconda3\Lib\json_init_.py:238, in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
233 cls = JSONEncoder
234 return cls(
235 skipkeys=skipkeys, ensure_ascii=ensure_ascii,
236 check_circular=check_circular, allow_nan=allow_nan, indent=indent,
237 separators=separators, default=default, sort_keys=sort_keys,
--> 238 **kw).encode(obj)

File ~\anaconda3\Lib\json\encoder.py:200, in JSONEncoder.encode(self, o)
197 # This doesn't pass the iterator directly to ''.join() because the
198 # exceptions aren't as detailed. The list call should be roughly
199 # equivalent to the PySequence_Fast that ''.join() would do.
--> 200 chunks = self.iterencode(o, _one_shot=True)
201 if not isinstance(chunks, (list, tuple)):

File ~\anaconda3\Lib\json\encoder.py:258, in JSONEncoder.iterencode(self, o, _one_shot)
254 _iterencode = _make_iterencode(
255 markers, self.default, _encoder, self.indent, floatstr,
256 self.key_separator, self.item_separator, self.sort_keys,
257 self.skipkeys, _one_shot)
--> 258 return _iterencode(o, 0)

File _catboost.pyx:158, in _catboost._NumpyAwareEncoder.default()

File ~\anaconda3\Lib\json\encoder.py:180, in JSONEncoder.default(self, o)
162 """Implement this method in a subclass such that it returns
163 a serializable object for o, or calls the base implementation
164 (to raise a TypeError).
(...)
178
179 """
--> 180 raise TypeError(f'Object of type {o.class.name} '
181 f'is not JSON serializable')

TypeError: Object of type RandomState is not JSON serializable

During handling of the above exception, another exception occurred:

ValueError Traceback (most recent call last)
Cell In[12], line 46
43 boruta = BorutaPy(catboost, n_estimators='auto',random_state=random_state, verbose=2)
45 # Fit Boruta
---> 46 boruta.fit(X_scaled, y)
48 # Get selected features
49 selected_features = X.columns[boruta.support_].tolist()

File ~\anaconda3\Lib\site-packages\boruta\boruta_py.py:201, in BorutaPy.fit(self, X, y)
188 def fit(self, X, y):
189 """
190 Fits the Boruta feature selection with the provided estimator.
191
(...)
198 The target values.
199 """
--> 201 return self._fit(X, y)

File ~\anaconda3\Lib\site-packages\boruta\boruta_py.py:285, in BorutaPy._fit(self, X, y)
282 self.estimator.set_params(random_state=self.random_state)
284 # add shadow attributes, shuffle them and train estimator, get imps
--> 285 cur_imp = self._add_shadows_get_imps(X, y, dec_reg)
287 # get the threshold of shadow importances we will use for rejection
288 imp_sha_max = np.percentile(cur_imp[1], self.perc)

File ~\anaconda3\Lib\site-packages\boruta\boruta_py.py:412, in BorutaPy._add_shadows_get_imps(self, X, y, dec_reg)
410 x_sha = np.apply_along_axis(self._get_shuffle, 0, x_sha)
411 # get importance of the merged matrix
--> 412 imp = self._get_imp(np.hstack((x_cur, x_sha)), y)
413 # separate importances of real and shadow features
414 imp_sha = imp[x_cur_w:]

File ~\anaconda3\Lib\site-packages\boruta\boruta_py.py:386, in BorutaPy.get_imp(self, X, y)
384 self.estimator.fit(X, y)
385 except Exception as e:
--> 386 raise ValueError('Please check your X and y variable. The provided'
387 'estimator cannot be fitted to your data.\n' + str(e))
388 try:
389 imp = self.estimator.feature_importances

ValueError: Please check your X and y variable. The providedestimator cannot be fitted to your data.
Object of type RandomState is not JSON serializable`

I'm getting this error and I've tried serializing the random_state variable to json, but it still persists with the same error.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions