Message380220
Note: problem occurs only after performing the RandomizedSearchCV...
When applying a function in a multiprocess using concurrent.futures if the function includes anything else other than print(), it is not executed and the process freezes.
Here is the code to reproduce.
from xgboost import XGBRegressor
from sklearn.model_selection import KFold
import concurrent.futures
from sklearn.datasets import make_regression
import pandas as pd
import numpy as np
from sklearn.model_selection import RandomizedSearchCV
# STEP 1
# ----------------------------------------------------------------------------
# simulate RandomizedSearchCV
data = make_regression(n_samples=500, n_features=100, n_informative=10, n_targets=1, random_state=5)
X = pd.DataFrame(data[0])
y = pd.Series(data[1])
kf = KFold(n_splits = 3, shuffle = True, random_state = 5)
model = XGBRegressor(n_jobs = -1)
params = {
'min_child_weight': [0.1, 1, 5],
'subsample': [0.5, 0.7, 1.0],
'colsample_bytree': [0.5, 0.7, 1.0],
'eta': [0.005, 0.01, 0.1],
'n_jobs': [-1]
}
random_search = RandomizedSearchCV(
model,
param_distributions = params,
n_iter = 50,
n_jobs = -1,
refit = True, # necessary for random_search.best_estimator_
cv = kf.split(X,y),
verbose = 1,
random_state = 5
)
random_search.fit(X, np.array(y))
# STEP 2.0
# ----------------------------------------------------------------------------
# test if multiprocessing is working in the first place
def just_print():
print('Just printing')
with concurrent.futures.ProcessPoolExecutor() as executor:
results_temp = [executor.submit(just_print) for i in range(0,12)]
# ----------------------------------------------------------------------------
# STEP 2.1
# ----------------------------------------------------------------------------
# test on a slightly more complex function
def fit_model():
# JUST CREATING A DATASET, NOT EVEN FITTING ANY MODEL!!! AND IT FREEZES
data = make_regression(n_samples=500, n_features=100, n_informative=10, n_targets=1, random_state=5)
# model = XGBRegressor(n_jobs = -1)
# model.fit(data[0],data[1])
print('Fit complete')
with concurrent.futures.ProcessPoolExecutor() as executor:
results_temp = [executor.submit(fit_model) for i in range(0,12)]
# ----------------------------------------------------------------------------
Attached this code in a .py file. |
|
Date |
User |
Action |
Args |
2020-11-02 13:33:56 | DanilZ | set | recipients:
+ DanilZ, bquinlan, ronaldoussoren, pitrou, ned.deily |
2020-11-02 13:33:55 | DanilZ | set | messageid: <1604324035.95.0.0561998847241.issue42245@roundup.psfhosted.org> |
2020-11-02 13:33:55 | DanilZ | link | issue42245 messages |
2020-11-02 13:33:55 | DanilZ | create | |
|