Error while using it with CatBoost

`TypeError                                 Traceback (most recent call last)
File ~\anaconda3\Lib\site-packages\boruta\boruta_py.py:384, in BorutaPy._get_imp(self, X, y)
    383 try:
--> 384     self.estimator.fit(X, y)
    385 except Exception as e:

File ~\anaconda3\Lib\site-packages\catboost\core.py:5220, in CatBoostClassifier.fit(self, X, y, cat_features, text_features, embedding_features, sample_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
   5218     CatBoostClassifier._check_is_compatible_loss(params['loss_function'])
-> 5220 self._fit(X, y, cat_features, text_features, embedding_features, None, sample_weight, None, None, None, None, baseline, use_best_model,
   5221           eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period,
   5222           silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
   5223 return self

File ~\anaconda3\Lib\site-packages\catboost\core.py:2385, in CatBoost._fit(self, X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
   2383     raise CatBoostError("y may be None only when X is an instance of catboost.Pool or string")
-> 2385 train_params = self._prepare_train_params(
   2386     X=X, y=y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features,
   2387     pairs=pairs, sample_weight=sample_weight, group_id=group_id, group_weight=group_weight,
   2388     subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline, use_best_model=use_best_model,
   2389     eval_set=eval_set, verbose=verbose, logging_level=logging_level, plot=plot, plot_file=plot_file,
   2390     column_description=column_description, verbose_eval=verbose_eval, metric_period=metric_period,
   2391     silent=silent, early_stopping_rounds=early_stopping_rounds, save_snapshot=save_snapshot,
   2392     snapshot_file=snapshot_file, snapshot_interval=snapshot_interval, init_model=init_model,
   2393     callbacks=callbacks
   2394 )
   2395 params = train_params["params"]

File ~\anaconda3\Lib\site-packages\catboost\core.py:2311, in CatBoost._prepare_train_params(self, X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks)
   2310 params = _params_type_cast(params)
-> 2311 _check_train_params(params)
   2313 if params.get('eval_fraction', 0.0) != 0.0:

File _catboost.pyx:6393, in _catboost._check_train_params()

File _catboost.pyx:6414, in _catboost._check_train_params()

File _catboost.pyx:1830, in _catboost._PreprocessParams.__init__()

File ~\anaconda3\Lib\json\__init__.py:238, in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
    233     cls = JSONEncoder
    234 return cls(
    235     skipkeys=skipkeys, ensure_ascii=ensure_ascii,
    236     check_circular=check_circular, allow_nan=allow_nan, indent=indent,
    237     separators=separators, default=default, sort_keys=sort_keys,
--> 238     **kw).encode(obj)

File ~\anaconda3\Lib\json\encoder.py:200, in JSONEncoder.encode(self, o)
    197 # This doesn't pass the iterator directly to ''.join() because the
    198 # exceptions aren't as detailed.  The list call should be roughly
    199 # equivalent to the PySequence_Fast that ''.join() would do.
--> 200 chunks = self.iterencode(o, _one_shot=True)
    201 if not isinstance(chunks, (list, tuple)):

File ~\anaconda3\Lib\json\encoder.py:258, in JSONEncoder.iterencode(self, o, _one_shot)
    254     _iterencode = _make_iterencode(
    255         markers, self.default, _encoder, self.indent, floatstr,
    256         self.key_separator, self.item_separator, self.sort_keys,
    257         self.skipkeys, _one_shot)
--> 258 return _iterencode(o, 0)

File _catboost.pyx:158, in _catboost._NumpyAwareEncoder.default()

File ~\anaconda3\Lib\json\encoder.py:180, in JSONEncoder.default(self, o)
    162 """Implement this method in a subclass such that it returns
    163 a serializable object for ``o``, or calls the base implementation
    164 (to raise a ``TypeError``).
   (...)
    178 
    179 """
--> 180 raise TypeError(f'Object of type {o.__class__.__name__} '
    181                 f'is not JSON serializable')

TypeError: Object of type RandomState is not JSON serializable

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
Cell In[12], line 46
     43 boruta = BorutaPy(catboost, n_estimators='auto',random_state=random_state, verbose=2)
     45 # Fit Boruta
---> 46 boruta.fit(X_scaled, y)
     48 # Get selected features
     49 selected_features = X.columns[boruta.support_].tolist()

File ~\anaconda3\Lib\site-packages\boruta\boruta_py.py:201, in BorutaPy.fit(self, X, y)
    188 def fit(self, X, y):
    189     """
    190     Fits the Boruta feature selection with the provided estimator.
    191 
   (...)
    198         The target values.
    199     """
--> 201     return self._fit(X, y)

File ~\anaconda3\Lib\site-packages\boruta\boruta_py.py:285, in BorutaPy._fit(self, X, y)
    282 self.estimator.set_params(random_state=self.random_state)
    284 # add shadow attributes, shuffle them and train estimator, get imps
--> 285 cur_imp = self._add_shadows_get_imps(X, y, dec_reg)
    287 # get the threshold of shadow importances we will use for rejection
    288 imp_sha_max = np.percentile(cur_imp[1], self.perc)

File ~\anaconda3\Lib\site-packages\boruta\boruta_py.py:412, in BorutaPy._add_shadows_get_imps(self, X, y, dec_reg)
    410 x_sha = np.apply_along_axis(self._get_shuffle, 0, x_sha)
    411 # get importance of the merged matrix
--> 412 imp = self._get_imp(np.hstack((x_cur, x_sha)), y)
    413 # separate importances of real and shadow features
    414 imp_sha = imp[x_cur_w:]

File ~\anaconda3\Lib\site-packages\boruta\boruta_py.py:386, in BorutaPy._get_imp(self, X, y)
    384     self.estimator.fit(X, y)
    385 except Exception as e:
--> 386     raise ValueError('Please check your X and y variable. The provided'
    387                      'estimator cannot be fitted to your data.\n' + str(e))
    388 try:
    389     imp = self.estimator.feature_importances_

ValueError: Please check your X and y variable. The providedestimator cannot be fitted to your data.
Object of type RandomState is not JSON serializable`

I'm getting this error and I've tried serializing the random_state variable to json, but it still persists with the same error.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Error while using it with CatBoost #131

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Error while using it with CatBoost #131

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions