Note
Click here to download the full example code
Experiment Tracking: Coloring a Single Run#
This example demostrates how to use opt-sugar in combination with mlflow for single objective optimization experiment tracking
# sphinx_gallery_thumbnail_path = '_static/coloring.png'
import datetime
from urllib.parse import urlparse
from itertools import product
from collections import defaultdict
import logging
import gurobipy as gp
import mlflow
from mlflow.exceptions import MlflowException
# import sys; sys.path.append('/Users/Juan.ChaconLeon/opt/opt-sugar/src') # when running locally
from opt_sugar import opt_flow
from opt_sugar import low_sugar
# The following function helper is very handy to visualize our colored graphs.
from utils.coloring import get_graph_to_show
# The generate_graph_data generates random graphs given a graph size and an edge probability.
from utils.coloring import generate_graph_data
# TODO: reformat this example similar to
# https://github.com/scikit-learn/scikit-learn/blob/main/examples/calibration/plot_calibration_multiclass.py
The Optimizations Model Builder#
The following class is the builder for the coloring problem.
class ColoringModelBuilder:
def __init__(self, data):
self.data = data
self.degree = None
self.variables = None
def build_variables(self, base_model):
degrees = defaultdict(int)
for v1, v2 in self.data["edges"]:
degrees[v1] += 1
degrees[v2] += 1
self.degree = max(degrees.items(), key=lambda x: x[1])[1]
color_keys = list(product(self.data["nodes"], range(self.degree)))
color = base_model.addVars(color_keys, vtype="B", name="color")
max_color = base_model.addVar(lb=0, ub=self.degree, vtype="C", name="max_color")
self.variables = {"color": color, "max_color": max_color}
def build_constraints(self, base_model):
color = self.variables["color"]
for v1, c in color:
# if color[v1, c] == 1 -> color[v2, c] == 0 for all v2 such that (v1, v2) or
# belongs to E
for v2 in self.data["nodes"]:
if (v2, v1) in self.data["edges"] or (v1, v2) in self.data["edges"]:
base_model.addConstr(
color[v2, c] <= 1 - color[v1, c], name=f"color_{c}_{v1}_{v2}"
)
for v in self.data["nodes"]:
base_model.addConstr(
gp.quicksum(color[v, c] for c in range(self.degree)) == 1,
name=f"every_node_has_color_{v}",
)
max_color = self.variables["max_color"]
for v, c in color:
base_model.addConstr(
c * color[v, c] <= max_color, name=f"max_color_{v}_{c}"
)
def build_objective(self, base_model):
max_color = self.variables["max_color"]
base_model.setObjective(max_color, gp.GRB.MINIMIZE)
Tracking an Optimization Experiment#
Add description here.
logging.getLogger("mlflow").setLevel(logging.CRITICAL) # Can be set DEBUG
try:
experiment_name = f"opt_exp_{datetime.datetime.now().strftime('%Y_%m_%d')}"
experiment_id = mlflow.create_experiment(name=experiment_name)
except MlflowException:
experiment_id = mlflow.get_experiment_by_name(name=experiment_name).experiment_id
with mlflow.start_run(experiment_id=experiment_id):
def build(data):
# Create a new model
m = gp.Model("coloring")
model_builder = ColoringModelBuilder(data)
model_builder.build_variables(m)
model_builder.build_constraints(m)
model_builder.build_objective(m)
# setting parameters
m.setParam('MIPFocus', 2)
return m
def callback(m):
objective = m.getObjective()
color_count = objective.getValue()
callback_result = {"color_count": color_count, "MIPFocus": m.getParamInfo('MIPFocus')[2], "RunTime": m.RunTime}
return callback_result
# Generating a graph instance
data = generate_graph_data(node_count=16, edge_probability=0.5)
# Building
opt_model = low_sugar.Model(build)
result = opt_model.optimize(data=data, callback=callback)
var_values = result["vars"]
g = get_graph_to_show(data, var_values)
# g.show(name="vis.html")
# Note: Above is replacement for opt_model.fit(data, fit_callback) and opt_model.predict(data)
mlflow.log_param("MIPFocus", result["callback_result"]["MIPFocus"])
mlflow.log_param("RunTime", result["callback_result"]["RunTime"])
mlflow.log_metric("color_count", result["callback_result"]["color_count"])
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
print(f"tracking_url_type_store: {tracking_url_type_store}")
# Register the model
if tracking_url_type_store != "file":
# There are other ways to use the Model Registry, which depends on the use case,
# please refer to the doc for more information:
# https://mlflow.org/docs/latest/model-registry.html#api-workflow
model_info = mlflow.sklearn.log_model(
opt_model, "opt_model", registered_model_name="OptModel"
)
else:
model_info = mlflow.sklearn.log_model(opt_model, "opt_model")
Set parameter MIPFocus to value 2
Gurobi Optimizer version 9.5.2 build v9.5.2rc0 (linux64)
Thread count: 1 physical cores, 2 logical processors, using up to 2 threads
Optimize a model with 1816 rows, 193 columns and 3776 nonzeros
Model fingerprint: 0xd84f25f1
Variable types: 1 continuous, 192 integer (192 binary)
Coefficient statistics:
Matrix range [1e+00, 1e+01]
Objective range [1e+00, 1e+00]
Bounds range [1e+00, 1e+01]
RHS range [1e+00, 1e+00]
Found heuristic solution: objective 11.0000000
Presolve removed 1414 rows and 0 columns
Presolve time: 0.02s
Presolved: 402 rows, 193 columns, 1549 nonzeros
Variable types: 0 continuous, 193 integer (192 binary)
Root relaxation presolve removed 31 rows and 16 columns
Root relaxation presolved: 371 rows, 177 columns, 1420 nonzeros
Root relaxation: objective 1.000000e+00, 164 iterations, 0.00 seconds (0.00 work units)
Nodes | Current Node | Objective Bounds | Work
Expl Unexpl | Obj Depth IntInf | Incumbent BestBd Gap | It/Node Time
0 0 1.00000 0 62 11.00000 1.00000 90.9% - 0s
H 0 0 6.0000000 1.00000 83.3% - 0s
H 0 0 5.0000000 1.00000 80.0% - 0s
H 0 0 4.0000000 1.00000 75.0% - 0s
0 0 infeasible 0 4.00000 4.00000 0.00% - 0s
Cutting planes:
Gomory: 7
Implied bound: 4
Projected implied bound: 6
Zero half: 5
RLT: 12
Relax-and-lift: 1
Explored 1 nodes (325 simplex iterations) in 0.03 seconds (0.02 work units)
Thread count was 2 (of 2 available processors)
Solution count 4: 4 5 6 11
Optimal solution found (tolerance 1.00e-04)
Best objective 4.000000000000e+00, best bound 4.000000000000e+00, gap 0.0000%
Local cdn resources have problems on chrome/safari when used in jupyter-notebook.
tracking_url_type_store: file
Load the Registered Model and Optimize with new Data#
Add description here.
logged_model_uri = model_info.model_uri
print(f"logged_model_uri: {logged_model_uri}")
# Load model as a PyFuncModel.
loaded_model = opt_flow.pyfunc.load_model(logged_model_uri)
# Data generation
data = generate_graph_data(node_count=6, edge_probability=0.5)
solution = loaded_model.optimize(data)
print(f"Optimized Coloring: {solution}")
var_values = result["vars"]
g = get_graph_to_show(data, var_values)
# g.show(name="vis.html")
logged_model_uri: runs:/871f60c45b4146e8bf70641193491513/opt_model
Set parameter MIPFocus to value 2
Gurobi Optimizer version 9.5.2 build v9.5.2rc0 (linux64)
Thread count: 1 physical cores, 2 logical processors, using up to 2 threads
Optimize a model with 66 rows, 19 columns and 132 nonzeros
Model fingerprint: 0x8e3726c5
Variable types: 1 continuous, 18 integer (18 binary)
Coefficient statistics:
Matrix range [1e+00, 2e+00]
Objective range [1e+00, 1e+00]
Bounds range [1e+00, 3e+00]
RHS range [1e+00, 1e+00]
Found heuristic solution: objective 2.0000000
Presolve removed 42 rows and 0 columns
Presolve time: 0.00s
Presolved: 24 rows, 19 columns, 68 nonzeros
Variable types: 0 continuous, 19 integer (18 binary)
Root relaxation presolve removed 11 rows and 12 columns
Root relaxation: infeasible, 0 iterations, 0.00 seconds (0.00 work units)
Explored 1 nodes (0 simplex iterations) in 0.00 seconds (0.00 work units)
Thread count was 2 (of 2 available processors)
Solution count 1: 2
Optimal solution found (tolerance 1.00e-04)
Best objective 2.000000000000e+00, best bound 2.000000000000e+00, gap 0.0000%
Optimized Coloring: {'vars': {'color': {(0, 0): 0.0, (0, 1): 1.0, (0, 2): 0.0, (1, 0): 1.0, (1, 1): 0.0, (1, 2): 0.0, (2, 0): 0.0, (2, 1): 0.0, (2, 2): 1.0, (3, 0): 0.0, (3, 1): 1.0, (3, 2): 0.0, (4, 0): 1.0, (4, 1): 0.0, (4, 2): 0.0, (5, 0): 0.0, (5, 1): 0.0, (5, 2): 1.0}, 'max_color': 2.0}, 'objective_value': 2.0}
Local cdn resources have problems on chrome/safari when used in jupyter-notebook.
Total running time of the script: ( 0 minutes 1.454 seconds)