-
Notifications
You must be signed in to change notification settings - Fork 546
Description
Describe the bug
Hi:
I've created two customized lightGBM estimators for automl:
class MyMonotonicLightGBMGBDTClassifier(BaseEstimator):
def __init__(self, task = 'binary:logistic', n_jobs = num_cores, **params):
super().__init__(task, **params)
self.estimator_class = LGBMClassifier
# convert to int for integer hyperparameters
self.params = {
'n_jobs': params['n_jobs'] if 'n_jobs' in params else num_cores,
'boosting_type':params['boosting_type'] if 'boosting_type' in params else 'gbdt',
'colsample_bytree':params['colsample_bytree'],
'n_estimators':int(params['n_estimators']),
'random_state': params['random_state'] if 'random_state' in params else randomseed,
"monotone_constraints":params['monotone_constraints'] if 'monotone_constraints' in params else monotone,
}
@classmethod
def search_space(cls, data_size, task):
space = {
'n_estimators': {'domain': tune.uniform(lower = 50, upper = 500), 'init_value': 200, 'low_cost_init_value': 200},
'colsample_bytree': {'domain': tune.uniform(lower = 0.5, upper = 1), 'init_value': 0.9, 'low_cost_init_value': 0.9},
}
return space
automl.add_learner(learner_name = 'MonotonicLightGBMGBDT', learner_class = MyMonotonicLightGBMGBDTClassifier)
class MyMonotonicLightGBMDartClassifier(BaseEstimator):
def __init__(self, task = 'binary:logistic', n_jobs = num_cores, **params):
super().__init__(task, **params)
self.estimator_class = LGBMClassifier
self.params = {
'n_jobs': params['n_jobs'] if 'n_jobs' in params else num_cores,
'boosting_type':params['boosting_type'] if 'boosting_type' in params else 'dart',
'colsample_bytree':params['colsample_bytree'],
'n_estimators':int(params['n_estimators']),
'drop_rate': params['drop_rate'],
'random_state': params['random_state'] if 'random_state' in params else randomseed,
"monotone_constraints":params['monotone_constraints'] if 'monotone_constraints' in params else monotone,
}
@classmethod
def search_space(cls, data_size, task):
space = {
'n_estimators': {'domain': tune.uniform(lower = 50, upper = 500), 'init_value': 200, 'low_cost_init_value': 200},
'colsample_bytree': {'domain': tune.uniform(lower = 0.5, upper = 1), 'init_value': 0.9, 'low_cost_init_value': 0.9},
'drop_rate': {'domain': tune.uniform(lower = 0.1, upper = 0.4), 'init_value': 0.2, 'low_cost_init_value': 0.2},
}
return space
automl.add_learner(learner_name = 'MonotonicLightGBMDart', learner_class = MyMonotonicLightGBMDartClassifier)
Then I call these two estimators for automl with the setting below:
from flaml import AutoML
from flaml.automl.model import BaseEstimator, LRL1Classifier
from xgboost.sklearn import XGBClassifier
from lightgbm.sklearn import LGBMClassifier
estimator_list= [ 'MonotonicLightGBMDart', 'MonotonicLightGBMGBDT']
settings = {
"keep_search_state": True,
"time_budget": flaml_time_budget,
'max_iter': 15,
'mem_thres': flaml_mem_thres,
"metric": 'roc_auc',
"task": 'classification',
"estimator_list": estimator_list,
"log_file_name": logfilename,
"log_type":'all',
"seed":randomseed,
"model_history":True
}
The process usually runs well; however I noticed one issue: sometimes the best result of the estimator which is not the optimal one is not saved. For example, after the search I want to retrieve the best models of both MonotonicLightGBMDart and MonotonicLightGBMGBDT. In this case, if the overall optimal model returned is built by MonotonicLightGBMDart, then sometimes, the best model by MonotonicLightGBMGBDT is not saved (returned an empty model when I tried automl.best_model_for_estimator('MonotonicLightGBMGBDT')._model).
What makes me more confused is that it does not happen every time and is not always repeatable. Sometimes if I restarted the kernel and re-ran the process the issue disappeared.
Could anyone check my codes and tell me the reason for this problem?
Thank you.
Steps to reproduce
No response
Model Used
No response
Expected Behavior
No response
Screenshots and logs
No response
Additional Information
No response