Experiment Tracking: Coloring a Single Run#

This example demostrates how to use opt-sugar in combination with mlflow for single objective optimization experiment tracking

https://mybinder.org/badge_logo.svg
# sphinx_gallery_thumbnail_path = '_static/coloring.png'
import datetime
from urllib.parse import urlparse
from itertools import product
from collections import defaultdict
import logging

import gurobipy as gp
import mlflow
from mlflow.exceptions import MlflowException

# import sys; sys.path.append('/Users/Juan.ChaconLeon/opt/opt-sugar/src')  # when running locally
from opt_sugar import opt_flow
from opt_sugar import low_sugar

# The following function helper is very handy to visualize our colored graphs.
from utils.coloring import get_graph_to_show

# The generate_graph_data generates random graphs given a graph size and an edge probability.
from utils.coloring import generate_graph_data

# TODO: reformat this example similar to
#  https://github.com/scikit-learn/scikit-learn/blob/main/examples/calibration/plot_calibration_multiclass.py

The Optimizations Model Builder#

The following class is the builder for the coloring problem.

class ColoringModelBuilder:

    def __init__(self, data):
        self.data = data
        self.degree = None
        self.variables = None

    def build_variables(self, base_model):
        degrees = defaultdict(int)
        for v1, v2 in self.data["edges"]:
            degrees[v1] += 1
            degrees[v2] += 1
        self.degree = max(degrees.items(), key=lambda x: x[1])[1]
        color_keys = list(product(self.data["nodes"], range(self.degree)))
        color = base_model.addVars(color_keys, vtype="B", name="color")
        max_color = base_model.addVar(lb=0, ub=self.degree, vtype="C", name="max_color")
        self.variables = {"color": color, "max_color": max_color}

    def build_constraints(self, base_model):
        color = self.variables["color"]
        for v1, c in color:
            # if color[v1, c] == 1 -> color[v2, c] == 0 for all v2 such that (v1, v2) or
            # belongs to E
            for v2 in self.data["nodes"]:
                if (v2, v1) in self.data["edges"] or (v1, v2) in self.data["edges"]:
                    base_model.addConstr(
                        color[v2, c] <= 1 - color[v1, c], name=f"color_{c}_{v1}_{v2}"
                    )

        for v in self.data["nodes"]:
            base_model.addConstr(
                gp.quicksum(color[v, c] for c in range(self.degree)) == 1,
                name=f"every_node_has_color_{v}",
            )

        max_color = self.variables["max_color"]
        for v, c in color:
            base_model.addConstr(
                c * color[v, c] <= max_color, name=f"max_color_{v}_{c}"
            )

    def build_objective(self, base_model):
        max_color = self.variables["max_color"]
        base_model.setObjective(max_color, gp.GRB.MINIMIZE)

Tracking an Optimization Experiment#

Add description here.

logging.getLogger("mlflow").setLevel(logging.CRITICAL)  # Can be set DEBUG

try:
    experiment_name = f"opt_exp_{datetime.datetime.now().strftime('%Y_%m_%d')}"
    experiment_id = mlflow.create_experiment(name=experiment_name)
except MlflowException:
    experiment_id = mlflow.get_experiment_by_name(name=experiment_name).experiment_id

with mlflow.start_run(experiment_id=experiment_id):
    def build(data):
        # Create a new model
        m = gp.Model("coloring")
        model_builder = ColoringModelBuilder(data)
        model_builder.build_variables(m)
        model_builder.build_constraints(m)
        model_builder.build_objective(m)
        # setting parameters
        m.setParam('MIPFocus', 2)
        return m

    def callback(m):
        objective = m.getObjective()
        color_count = objective.getValue()
        callback_result = {"color_count": color_count, "MIPFocus": m.getParamInfo('MIPFocus')[2], "RunTime": m.RunTime}
        return callback_result

    # Generating a graph instance
    data = generate_graph_data(node_count=16, edge_probability=0.5)

    # Building
    opt_model = low_sugar.Model(build)
    result = opt_model.optimize(data=data, callback=callback)
    var_values = result["vars"]
    g = get_graph_to_show(data, var_values)
    # g.show(name="vis.html")

    # Note: Above is replacement for opt_model.fit(data, fit_callback) and opt_model.predict(data)
    mlflow.log_param("MIPFocus", result["callback_result"]["MIPFocus"])
    mlflow.log_param("RunTime", result["callback_result"]["RunTime"])
    mlflow.log_metric("color_count", result["callback_result"]["color_count"])

    tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
    print(f"tracking_url_type_store: {tracking_url_type_store}")

    # Register the model
    if tracking_url_type_store != "file":
        # There are other ways to use the Model Registry, which depends on the use case,
        # please refer to the doc for more information:
        # https://mlflow.org/docs/latest/model-registry.html#api-workflow
        model_info = mlflow.sklearn.log_model(
            opt_model, "opt_model", registered_model_name="OptModel"
        )
    else:
        model_info = mlflow.sklearn.log_model(opt_model, "opt_model")
Set parameter MIPFocus to value 2
Gurobi Optimizer version 9.5.2 build v9.5.2rc0 (linux64)
Thread count: 1 physical cores, 2 logical processors, using up to 2 threads
Optimize a model with 1816 rows, 193 columns and 3776 nonzeros
Model fingerprint: 0xd84f25f1
Variable types: 1 continuous, 192 integer (192 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+01]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+01]
  RHS range        [1e+00, 1e+00]
Found heuristic solution: objective 11.0000000
Presolve removed 1414 rows and 0 columns
Presolve time: 0.02s
Presolved: 402 rows, 193 columns, 1549 nonzeros
Variable types: 0 continuous, 193 integer (192 binary)
Root relaxation presolve removed 31 rows and 16 columns
Root relaxation presolved: 371 rows, 177 columns, 1420 nonzeros


Root relaxation: objective 1.000000e+00, 164 iterations, 0.00 seconds (0.00 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0    1.00000    0   62   11.00000    1.00000  90.9%     -    0s
H    0     0                       6.0000000    1.00000  83.3%     -    0s
H    0     0                       5.0000000    1.00000  80.0%     -    0s
H    0     0                       4.0000000    1.00000  75.0%     -    0s
     0     0 infeasible    0         4.00000    4.00000  0.00%     -    0s

Cutting planes:
  Gomory: 7
  Implied bound: 4
  Projected implied bound: 6
  Zero half: 5
  RLT: 12
  Relax-and-lift: 1

Explored 1 nodes (325 simplex iterations) in 0.03 seconds (0.02 work units)
Thread count was 2 (of 2 available processors)

Solution count 4: 4 5 6 11

Optimal solution found (tolerance 1.00e-04)
Best objective 4.000000000000e+00, best bound 4.000000000000e+00, gap 0.0000%
Local cdn resources have problems on chrome/safari when used in jupyter-notebook.
tracking_url_type_store: file

Load the Registered Model and Optimize with new Data#

Add description here.

logged_model_uri = model_info.model_uri
print(f"logged_model_uri: {logged_model_uri}")

# Load model as a PyFuncModel.
loaded_model = opt_flow.pyfunc.load_model(logged_model_uri)

# Data generation
data = generate_graph_data(node_count=6, edge_probability=0.5)
solution = loaded_model.optimize(data)
print(f"Optimized Coloring: {solution}")

var_values = result["vars"]
g = get_graph_to_show(data, var_values)
# g.show(name="vis.html")
logged_model_uri: runs:/871f60c45b4146e8bf70641193491513/opt_model
Set parameter MIPFocus to value 2
Gurobi Optimizer version 9.5.2 build v9.5.2rc0 (linux64)
Thread count: 1 physical cores, 2 logical processors, using up to 2 threads
Optimize a model with 66 rows, 19 columns and 132 nonzeros
Model fingerprint: 0x8e3726c5
Variable types: 1 continuous, 18 integer (18 binary)
Coefficient statistics:
  Matrix range     [1e+00, 2e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 3e+00]
  RHS range        [1e+00, 1e+00]
Found heuristic solution: objective 2.0000000
Presolve removed 42 rows and 0 columns
Presolve time: 0.00s
Presolved: 24 rows, 19 columns, 68 nonzeros
Variable types: 0 continuous, 19 integer (18 binary)
Root relaxation presolve removed 11 rows and 12 columns

Root relaxation: infeasible, 0 iterations, 0.00 seconds (0.00 work units)

Explored 1 nodes (0 simplex iterations) in 0.00 seconds (0.00 work units)
Thread count was 2 (of 2 available processors)

Solution count 1: 2

Optimal solution found (tolerance 1.00e-04)
Best objective 2.000000000000e+00, best bound 2.000000000000e+00, gap 0.0000%
Optimized Coloring: {'vars': {'color': {(0, 0): 0.0, (0, 1): 1.0, (0, 2): 0.0, (1, 0): 1.0, (1, 1): 0.0, (1, 2): 0.0, (2, 0): 0.0, (2, 1): 0.0, (2, 2): 1.0, (3, 0): 0.0, (3, 1): 1.0, (3, 2): 0.0, (4, 0): 1.0, (4, 1): 0.0, (4, 2): 0.0, (5, 0): 0.0, (5, 1): 0.0, (5, 2): 1.0}, 'max_color': 2.0}, 'objective_value': 2.0}
Local cdn resources have problems on chrome/safari when used in jupyter-notebook.
https://mybinder.org/badge_logo.svg

Total running time of the script: ( 0 minutes 1.454 seconds)

Gallery generated by Sphinx-Gallery