Author DanilZ
Recipients DanilZ, bquinlan, ned.deily, pitrou, ronaldoussoren
Date 2020-11-02.13:33:54
SpamBayes Score -1.0
Marked as misclassified Yes
Message-id <1604324035.95.0.0561998847241.issue42245@roundup.psfhosted.org>
In-reply-to
Content
Note: problem occurs only after performing the RandomizedSearchCV...

When applying a function in a multiprocess using concurrent.futures if the function includes anything else other than print(), it is not executed and the process freezes.

Here is the code to reproduce.

from xgboost import XGBRegressor
from sklearn.model_selection import KFold
import concurrent.futures
from sklearn.datasets import make_regression
import pandas as pd
import numpy as np
from sklearn.model_selection import RandomizedSearchCV

# STEP 1
# ----------------------------------------------------------------------------
# simulate RandomizedSearchCV

data = make_regression(n_samples=500, n_features=100, n_informative=10, n_targets=1, random_state=5)
X = pd.DataFrame(data[0])
y = pd.Series(data[1])
kf = KFold(n_splits = 3, shuffle = True, random_state = 5)
model = XGBRegressor(n_jobs = -1)
params = {
        'min_child_weight':     [0.1, 1, 5],
        'subsample':            [0.5, 0.7, 1.0],
        'colsample_bytree':     [0.5, 0.7, 1.0],
        'eta':                  [0.005, 0.01, 0.1],
        'n_jobs':               [-1]
        }
random_search = RandomizedSearchCV(
        model,
        param_distributions =   params,
        n_iter =                50,
        n_jobs =                -1,
        refit =                 True, # necessary for random_search.best_estimator_
        cv =                    kf.split(X,y),
        verbose =               1,
        random_state =          5
        )
random_search.fit(X, np.array(y))

# STEP 2.0
# ----------------------------------------------------------------------------
# test if multiprocessing is working in the first place

def just_print():
    print('Just printing')

with concurrent.futures.ProcessPoolExecutor() as executor:
    results_temp = [executor.submit(just_print) for i in range(0,12)]
# ----------------------------------------------------------------------------


# STEP 2.1
# ----------------------------------------------------------------------------
# test on a slightly more complex function

def fit_model():
    # JUST CREATING A DATASET, NOT EVEN FITTING ANY MODEL!!! AND IT FREEZES
    data = make_regression(n_samples=500, n_features=100, n_informative=10, n_targets=1, random_state=5)
    # model = XGBRegressor(n_jobs = -1)
    # model.fit(data[0],data[1])
    print('Fit complete')

with concurrent.futures.ProcessPoolExecutor() as executor:
    results_temp = [executor.submit(fit_model) for i in range(0,12)]
# ----------------------------------------------------------------------------


Attached this code in a .py file.
History
Date User Action Args
2020-11-02 13:33:56DanilZsetrecipients: + DanilZ, bquinlan, ronaldoussoren, pitrou, ned.deily
2020-11-02 13:33:55DanilZsetmessageid: <1604324035.95.0.0561998847241.issue42245@roundup.psfhosted.org>
2020-11-02 13:33:55DanilZlinkissue42245 messages
2020-11-02 13:33:55DanilZcreate