Fixed issue with highs solving problem when the objective was added; added support for weights superset in LeastAbsErrors

alexandrutomescu · alexandrutomescu · commit e545ea846748 · 2025-04-25T15:53:44.000+03:00
diff --git a/flowpaths/kleastabserrors.py b/flowpaths/kleastabserrors.py
@@ -2,12 +2,13 @@
 import flowpaths.stdigraph as stdigraph
 import flowpaths.abstractpathmodeldag as pathmodel
 import flowpaths.utils as utils
+import copy
 
 
 class kLeastAbsErrors(pathmodel.AbstractPathModelDAG):
     """
-    This class implements the k-LeastAbsoluteErrors, namely it looks for a decomposition of a weighted DAG into 
-    k weighted paths (specified by `num_paths`), minimizing the absolute errors on the edges. The error on an edge 
+    This class implements the k-LeastAbsoluteErrors problem, namely it looks for a decomposition of a weighted DAG into 
+    $k$ weighted paths, minimizing the absolute errors on the edges. The error on an edge 
     is defined as the absolute value of the difference between the weight of the edge and the sum of the weights of 
     the paths that go through it.
     """
@@ -28,6 +29,7 @@ def __init__(
         optimization_options: dict = None,
         solver_options: dict = {},
         trusted_edges_for_safety: list = None,
+        solution_weights_superset: list = None,
     ):
         """
         Initialize the Least Absolute Errors model for a given number of paths.
@@ -102,6 +104,12 @@ def __init__(
             If set, the model can apply the safety optimizations for these edges, so it can be significantly faster.
             See [optimizations documentation](solver-options-optimizations.md#2-optimizations)
 
+        - `solution_weights_superset: list`, optional
+
+            List of allowed weights for the paths. Default is `None`. 
+            If set, the model will use the solution path weights only from this set, with the property that **every weight in the superset
+            appears at most once in the solution weight**.
+
         Raises
         ------
         - `ValueError`
@@ -135,12 +143,19 @@ def __init__(
         )
 
         self.k = k
+        self.original_k = k
+        self.solution_weights_superset = solution_weights_superset
+        self.optimization_options = optimization_options or {}        
+
+        if self.solution_weights_superset is not None:
+            self.k = len(self.solution_weights_superset)
+            self.optimization_options["allow_empty_paths"] = True
+
         self.subpath_constraints = subpath_constraints
         self.subpath_constraints_coverage = subpath_constraints_coverage
         self.subpath_constraints_coverage_length = subpath_constraints_coverage_length
         self.edge_length_attr = edge_length_attr
         
-
         self.pi_vars = {}
         self.path_weights_vars = {}
         self.edge_errors_vars = {}
@@ -151,8 +166,6 @@ def __init__(
         self.__lowerbound_k = None
 
         self.solve_statistics = {}
-
-        self.optimization_options = optimization_options or {}        
         
         # If we get subpath constraints, and the coverage fraction is 1
         # then we know their edges must appear in the solution, so we add their edges to the trusted edges for safety
@@ -163,11 +176,10 @@ def __init__(
                 for constraint in self.subpath_constraints:
                     self.optimization_options["trusted_edges_for_safety"].update(constraint)
 
-
         # Call the constructor of the parent class AbstractPathModelDAG
         super().__init__(
             self.G, 
-            k, 
+            self.k,
             subpath_constraints=self.subpath_constraints, 
             subpath_constraints_coverage=self.subpath_constraints_coverage, 
             subpath_constraints_coverage_length=self.subpath_constraints_coverage_length,
@@ -183,6 +195,9 @@ def __init__(
         # This method is called from the current class 
         self.__encode_leastabserrors_decomposition()
 
+        # This method is called from the current class    
+        self.__encode_solution_weights_superset()
+
         # This method is called from the current class to add the objective function
         self.__encode_objective()
 
@@ -210,7 +225,7 @@ def __encode_leastabserrors_decomposition(self):
         
         self.edge_errors_vars = self.solver.add_variables(
             self.edge_indexes_basic,
-            name_prefix="errorofedge",
+            name_prefix="ee",
             lb=0,
             ub=self.w_max,
             var_type="integer" if self.weight_type == int else "continuous",
@@ -248,17 +263,74 @@ def __encode_leastabserrors_decomposition(self):
                 name=f"9aa_u={u}_v={v}_i={i}",
             )
 
+    def __encode_solution_weights_superset(self):
+
+        if self.solution_weights_superset is not None:
+
+            if len(self.solution_weights_superset) != self.k:
+                utils.logger.error(f"{__name__}: solution_weights_superset must have length {self.k}, not {len(self.solution_weights_superset)}")
+                raise ValueError(f"solution_weights_superset must have length {self.k}, not {len(self.solution_weights_superset)}")
+            if not self.allow_empty_paths:
+                utils.logger.error(f"{__name__}: solution_weights_superset is not allowed when allow_empty_paths is False")
+                raise ValueError(f"solution_weights_superset is not allowed when allow_empty_paths is False")
+            
+            # We state that the weight of the i-th path equals the i-th entry of the solution_weights_superset
+            for i in range(self.k):
+                self.solver.add_constraint(
+                    self.path_weights_vars[i] == self.solution_weights_superset[i],
+                    name=f"solution_weights_superset_{i}",
+                )
+
+            # We state that at most self.original_k paths can be used
+            self.solver.add_constraint(            
+                self.solver.quicksum(
+                    self.solver.quicksum(
+                            self.edge_vars[(self.G.source, v, i)]
+                            for v in self.G.successors(self.G.source)
+                    ) for i in range(self.k)
+                ) <= self.original_k,
+                name="max_paths_original_k_paths",
+            )
+
     def __encode_objective(self):
 
         self.solver.set_objective(
             self.solver.quicksum(
-                self.edge_errors_vars[(u, v)] 
-                * self.edge_error_scaling.get((u, v), 1)
+                self.edge_errors_vars[(u, v)] * self.edge_error_scaling.get((u, v), 1) if self.edge_error_scaling.get((u, v), 1) != 1 else self.edge_errors_vars[(u, v)]
                 for (u,v) in self.edge_indexes_basic), 
             sense="minimize"
         )
 
-    def get_solution(self):
+    def __remove_empty_paths(self, solution):
+        """
+        Removes empty paths from the solution. Empty paths are those with 0 or 1 nodes.
+
+        Parameters
+        ----------
+        - `solution: dict`
+            
+            The solution dictionary containing paths and weights.
+
+        Returns
+        -------
+        - `solution: dict`
+            
+            The solution dictionary with empty paths removed.
+
+        """
+        solution_copy = copy.deepcopy(solution)
+        non_empty_paths = []
+        non_empty_weights = []
+        for path, weight in zip(solution["paths"], solution["weights"]):
+            if len(path) > 1:
+                non_empty_paths.append(path)
+                non_empty_weights.append(weight)
+
+        solution_copy["paths"] = non_empty_paths
+        solution_copy["weights"] = non_empty_weights
+        return solution_copy
+
+    def get_solution(self, remove_empty_paths=True):
         """
         Retrieves the solution for the flow decomposition problem.
 
@@ -279,7 +351,7 @@ def get_solution(self):
         """
 
         if self.__solution is not None:
-            return self.__solution
+            return self.__remove_empty_paths(self.__solution) if remove_empty_paths else self.__solution
 
         self.check_is_solved()
 
@@ -292,7 +364,7 @@ def get_solution(self):
             )
             for i in range(self.k)
         ]
-        self.edge_errors_sol = self.solver.get_variable_values("errorofedge", [str, str])
+        self.edge_errors_sol = self.solver.get_variable_values("ee", [str, str])
         print("self.edge_errors_sol", self.edge_errors_sol)
         for (u,v) in self.edge_indexes_basic:
             self.edge_errors_sol[(u,v)] = round(self.edge_errors_sol[(u,v)]) if self.weight_type == int else float(self.edge_errors_sol[(u,v)])
@@ -303,7 +375,7 @@ def get_solution(self):
             "edge_errors": self.edge_errors_sol # This is a dictionary with keys (u,v) and values the error on the edge (u,v)
         }
 
-        return self.__solution
+        return self.__remove_empty_paths(self.__solution) if remove_empty_paths else self.__solution
 
     def is_valid_solution(self, tolerance=0.001):
         """
@@ -350,7 +422,7 @@ def is_valid_solution(self, tolerance=0.001):
                 ):
                     return False
 
-        if abs(self.get_objective_value() - self.solver.get_objective_value()) > tolerance * self.k:
+        if abs(self.get_objective_value() - self.solver.get_objective_value()) > tolerance * self.original_k:
             return False
 
         return True
diff --git a/flowpaths/utils/solverwrapper.py b/flowpaths/utils/solverwrapper.py
@@ -6,6 +6,7 @@
 import signal
 import math
 import flowpaths.utils as utils
+import numpy as np
 
 class SolverWrapper:
     """
@@ -72,7 +73,8 @@ def __init__(
         self.did_timeout = False
 
         if self.external_solver == "highs":
-            self.solver = highspy.Highs()
+            self.solver = HighsCustom()
+            self.solver.setMinimize() # minimization by default
             self.solver.setOptionValue("solver", "choose")
             self.solver.setOptionValue("threads", kwargs.get("threads", SolverWrapper.threads))
             self.solver.setOptionValue("time_limit", kwargs.get("time_limit", SolverWrapper.time_limit))
@@ -95,9 +97,11 @@ def __init__(
             self.env.setParam("MIPGap", self.tolerance)
             self.env.setParam("IntFeasTol", self.tolerance)
             self.env.setParam("FeasibilityTol", self.tolerance)
+            self.env.setParam("ModelSense", 1) # minimization by default
             
             self.env.start()
             self.solver = gurobipy.Model(env=self.env)
+            
         else:
             utils.logger.error(f"{__name__}: Unsupported solver type `{self.external_solver}`. Supported solvers are `highs` and `gurobi`.")
             raise ValueError(
@@ -268,10 +272,7 @@ def set_objective(self, expr, sense="minimize"):
         self.optimization_sense = sense
 
         if self.external_solver == "highs":
-            if sense in ["minimize", "min"]:
-                self.solver.minimize(expr)
-            else:
-                self.solver.maximize(expr)
+            self.solver.set_objective_without_solving(expr, sense=sense)
         elif self.external_solver == "gurobi":
             import gurobipy
 
@@ -529,4 +530,48 @@ def __run_with_timeout(self, timeout, func):
             except Exception as e:
                 pass
             finally:
-                signal.alarm(0)  # Disable alarm after execution
+                signal.alarm(0)  # Disable alarm after execution
+
+
+
+class HighsCustom(highspy.Highs):
+    def __init__(self):
+        super().__init__()
+    
+    def set_objective_without_solving(self, obj, sense: str = "minimize") -> None:
+        """
+        This method is implemented is the same was as the [minimize()](https://github.com/ERGO-Code/HiGHS/blob/master/src/highspy/highs.py#L182) or 
+        [maximize()](https://github.com/ERGO-Code/HiGHS/blob/master/src/highspy/highs.py#L218) methods of the [Highs](https://github.com/ERGO-Code/HiGHS/blob/master/src/highspy/highs.py) class,
+        only that it does not call the solve() method. 
+        
+        That is, you can use it to set the objective value, without also running the solver.
+
+        **`obj` cannot be a single variable, but a linear expression.**
+        """
+
+        if obj is not None:
+            # if we have a single variable, wrap it in a linear expression
+            # expr = highspy.highs_linear_expression(obj) if isinstance(obj, highspy.highs_var) else obj
+            expr = obj
+
+            if expr.bounds is not None:
+                raise Exception("Objective cannot be an inequality")
+
+            # reset objective
+            super().changeColsCost(
+                self.numVariables,
+                np.arange(self.numVariables, dtype=np.int32),
+                np.full(self.numVariables, 0, dtype=np.float64),
+            )
+
+            # if we have duplicate variables, add the vals
+            idxs, vals = expr.unique_elements()
+            super().changeColsCost(len(idxs), idxs, vals)
+            super().changeObjectiveOffset(expr.constant or 0.0)
+
+        if sense in ["minimize", "min"]:
+            super().changeObjectiveSense(highspy.ObjSense.kMinimize)
+        elif sense in ["maximize", "max"]:
+            super().changeObjectiveSense(highspy.ObjSense.kMaximize)
+        else:
+            raise ValueError(f"Invalid objective sense: {sense}. Use 'minimize' or 'maximize'.")
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "flowpaths"
-version = "0.1.20" 
+version = "0.1.21" 
 description = "A Python package to quickly decompose weighted graphs into weights paths, under various models."
 readme = "README.md"
 authors = [{name="Graph Algorithms and Bioinformatics Group @ University of Helsinki, and external collaborators"}]
@@ -19,6 +19,7 @@ dependencies = [
     "networkx",
     "highspy",
     "graphviz",
+    "numpy",
 ]
 
 [build-system]
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
 networkx>=3.4.2
 highspy>=1.9.0
 graphviz>=0.20.3
+numpy>=2.2.5