from xgboost import XGBRegressor from sklearn.model_selection import KFold import concurrent.futures from sklearn.datasets import make_regression import pandas as pd import numpy as np from sklearn.model_selection import RandomizedSearchCV # STEP 1 # ---------------------------------------------------------------------------- # simulate RandomizedSearchCV data = make_regression(n_samples=500, n_features=100, n_informative=10, n_targets=1, random_state=5) X = pd.DataFrame(data[0]) y = pd.Series(data[1]) kf = KFold(n_splits = 3, shuffle = True, random_state = 5) model = XGBRegressor(n_jobs = -1) params = { 'min_child_weight': [0.1, 1, 5], 'subsample': [0.5, 0.7, 1.0], 'colsample_bytree': [0.5, 0.7, 1.0], 'eta': [0.005, 0.01, 0.1], 'n_jobs': [-1] } random_search = RandomizedSearchCV( model, param_distributions = params, n_iter = 50, n_jobs = -1, refit = True, # necessary for random_search.best_estimator_ cv = kf.split(X,y), verbose = 1, random_state = 5 ) random_search.fit(X, np.array(y)) # STEP 2.0 # ---------------------------------------------------------------------------- # test if multiprocessing is working in the first place def just_print(): print('Just printing') with concurrent.futures.ProcessPoolExecutor() as executor: results_temp = [executor.submit(just_print) for i in range(0,12)] # ---------------------------------------------------------------------------- # STEP 2.1 # ---------------------------------------------------------------------------- # test on a slightly more complex function def fit_model(): # JUST CREATING A DATASET, NOT EVEN FITTING ANY MODEL!!! AND IT FREEZES data = make_regression(n_samples=500, n_features=100, n_informative=10, n_targets=1, random_state=5) # model = XGBRegressor(n_jobs = -1) # model.fit(data[0],data[1]) print('Fit complete') with concurrent.futures.ProcessPoolExecutor() as executor: results_temp = [executor.submit(fit_model) for i in range(0,12)] # ----------------------------------------------------------------------------