getml.hyperopt¶
Automatically find the best parameters for
Examples
from getml import data
from getml import datasets
from getml import engine
from getml import feature_learning
from getml.feature_learning import aggregations
from getml.feature_learning import loss_functions
from getml import hyperopt
from getml import pipeline
from getml import predictors
# ----------------
engine.set_project("examples")
# ----------------
population_table, peripheral_table = datasets.make_numerical()
# ----------------
# Construct placeholders
population_placeholder = data.Placeholder("POPULATION")
peripheral_placeholder = data.Placeholder("PERIPHERAL")
population_placeholder.join(peripheral_placeholder, "join_key", "time_stamp")
# ----------------
# Base model - any parameters not included
# in param_space will be taken from this.
fe1 = feature_learning.MultirelModel(
aggregation=[
aggregations.Count,
aggregations.Sum
],
loss_function=loss_functions.SquareLoss,
num_features=10,
share_aggregations=1.0,
max_length=1,
num_threads=0
)
# ----------------
# Base model - any parameters not included
# in param_space will be taken from this.
fe2 = feature_learning.RelboostModel(
loss_function=loss_functions.SquareLoss,
num_features=10
)
# ----------------
# Base model - any parameters not included
# in param_space will be taken from this.
predictor = predictors.LinearRegression()
# ----------------
pipe = pipeline.Pipeline(
population=population_placeholder,
peripheral=[peripheral_placeholder],
feature_learners=[fe1, fe2],
predictors=[predictor]
)
# ----------------
# Build a hyperparameter space.
# We have two feature learners and one
# predictor, so this is how we must
# construct our hyperparameter space.
# If we only wanted to optimize the predictor,
# we could just leave out the feature_learners.
param_space = {
"feature_learners": [
{
"num_features": [10, 50],
},
{
"max_depth": [1, 10],
"min_num_samples": [100, 500],
"num_features": [10, 50],
"reg_lambda": [0.0, 0.1],
"shrinkage": [0.01, 0.4]
}],
"predictors": [
{
"reg_lambda": [0.0, 10.0]
}
]
}
# ----------------
# Wrap a GaussianHyperparameterSearch around the reference model
gaussian_search = hyperopt.GaussianHyperparameterSearch(
pipeline=pipe,
param_space=param_space,
n_iter=30,
score=pipeline.scores.rsquared
)
gaussian_search.fit(
population_table_training=population_table,
population_table_validation=population_table,
peripheral_tables=[peripheral_table]
)
# ----------------
# We want 5 additional iterations.
gaussian_search.n_iter = 5
# We do not want another burn-in-phase,
# so we set ratio_iter to 0.
gaussian_search.ratio_iter = 0.0
# This widens the hyperparameter space.
gaussian_search.param_space["feature_learners"][1]["num_features"] = [10, 100]
# This narrows the hyperparameter space.
gaussian_search.param_space["predictors"][0]["reg_lambda"] = [0.0, 0.0]
# This continues the hyperparameter search using the previous iterations as
# prior knowledge.
gaussian_search.fit(
population_table_training=population_table,
population_table_validation=population_table,
peripheral_tables=[peripheral_table]
)
# ----------------
all_hyp = hyperopt.list_hyperopts()
best_pipeline = gaussian_search.best_pipeline
Functions¶
Lists all hyperparameter optimization objects present in the engine. |
|
|
Loads a hyperparameter optimization object from the getML engine into Python. |
Classes¶
|
Bayesian hyperparameter optimization using a Gaussian process. |
|
Latin hypercube sampling of the hyperparameters. |
|
Uniformly distributed sampling of the hyperparameters. |