algbio
diff --git a/‎.github/copilot-instructions.md‎
Lines changed: 41 additions & 0 deletions b/‎.github/copilot-instructions.md‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎examples/mfd_cycles_mingenset.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/mfd_cycles_mingenset.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎flowpaths/kflowdecomp.py‎
Lines changed: 85 additions & 39 deletions b/‎flowpaths/kflowdecomp.py‎
Lines changed: 85 additions & 39 deletions
diff --git a/‎flowpaths/kleastabserrors.py‎
Lines changed: 56 additions & 34 deletions b/‎flowpaths/kleastabserrors.py‎
Lines changed: 56 additions & 34 deletions
@@ -0,0 +1,41 @@
+## AI coding agent guide for this repo (flowpaths)
+
+Purpose: Python package to decompose weighted digraphs into weighted paths/walks via (M)ILP. Default solver is HiGHS (highspy); Gurobi (gurobipy) is optional.
+
+Architecture (what to know first)
+- Public API: `flowpaths/__init__.py` re-exports solvers (`MinFlowDecomp`, `kMinPathError`, `kLeastAbsErrors`, `*Cycles`, path-/set-cover, etc.).
+- Two model bases: `AbstractPathModelDAG` (acyclic, s–t paths; uses `stDAG`) and `AbstractWalkModelDiGraph` (general digraphs, walks).
+- ILP bridge: `utils/solverwrapper.py` unifies HiGHS/Gurobi (vars, constraints, binary×continuous, objective, status, timeouts).
+- Node-weighted graphs: `nodeexpandeddigraph.py` handles `flow_attr_origin="node"`, expands/condenses paths; supports `additional_starts/ends`.
+- Safety/optimizations live under `flowpaths/utils/*` and are toggled via `optimization_options` in concrete solvers.
+
+How solutions are built (DAG models)
+- Create k edge-binary vars x(u,v,i) constrained to be s–t paths; add weights/constraints per objective. Output is `{'paths'|'walks', 'weights'}`; node-origin paths are condensed back.
+- `MinFlowDecomp` minimizes number of paths; uses width lower bound, minimal generating set, and subgraph scanning; may accept a greedy solution if it matches a lower bound.
+
+Project-specific conventions
+- Options are dicts:
+  - `solver_options`: {threads, time_limit, presolve, log_to_console, external_solver: "highs"|"gurobi"}.
+  - `optimization_options`: {optimize_with_safe_paths|safe_sequences|safe_zero_edges, use_min_gen_set_lowerbound, use_subgraph_scanning_lowerbound, ...}.
+- `weight_type` is int or float; choose deliberately (affects feasibility/integrality). `flow_attr_origin` is "edge" (default) or "node"; only node-mode allows `additional_starts/ends`.
+- `elements_to_ignore`: edges (tuples) in edge-mode; node names (strings) in node-mode. `subpath_constraints` support coverage by fraction or length (`length_attr`).
+
+Example (from README/tests)
+```python
+import flowpaths as fp, networkx as nx
+G = nx.DiGraph(); G.add_edge('s','a', flow=2); G.add_edge('a','t', flow=2)
+m = fp.MinFlowDecomp(G, flow_attr='flow'); m.solve(); sol = m.get_solution()
+```
+
+Developer workflows
+- Setup: `pip install -e ".[dev]"`; optional `pip install gurobipy` and set `GRB_LICENSE_FILE`.
+- Tests (pytest.ini pins discovery to `tests/`): `pytest -vv -ra --durations=10`; targeted: `pytest -k "min_flow_decomp"`.
+- Examples/CLI: `python examples/min_flow_decomp.py`; `python -m flowpaths`.
+- Docs: `mkdocs serve` (sources in `docs/`, nav in `mkdocs.yml`).
+
+Pitfalls/checks
+- Path models require a DAG; flows must be non-negative and conserve at non s/t nodes. If `is_solved()` is False, check `solver.get_model_status()`; `kTimeLimit` → raise `time_limit`.
+
+Pointers
+- Overview/examples: `README.md`, `examples/*.py`.
+- Options/optimizations: `docs/solver-options-optimizations.md`; internals: `abstract-path-model.md`, `stdag.md`.
@@ -24,7 +24,7 @@ def test(filename: str):
             "optimize_with_safe_sequences": True,
             "optimize_with_safety_as_subset_constraints": False,
             "use_min_gen_set_lowerbound": False,
-            "optimize_with_given_weights": False,
+            "optimize_with_guessed_weights": False,
         },
         solver_options={"external_solver": "highs"},
     )
 
@@ -24,6 +24,7 @@ def __init__(
         subpath_constraints_coverage_length: float = None,
         length_attr: str = None,
         elements_to_ignore: list = [],
+        solution_weights_superset: list = None,
         optimization_options: dict = {},
         solver_options: dict = {},
     ):
@@ -87,6 +88,12 @@ def __init__(
 
             List of edges (or nodes, if `flow_attr_origin` is `"node"`) to ignore when adding constrains on flow explanation by the weighted paths. Default is an empty list. See [ignoring edges documentation](ignoring-edges.md)
 
+        - `solution_weights_superset: list`, optional
+
+            List of allowed weights for the paths. Default is `None`. 
+            If set, the model will use the solution path weights only from this set, with the property that **every weight in this list
+            appears at most once in the solution weight**. That is, if you want to have more paths with the same weight, add it more times to `solution_weights_superset`.
+
         - `optimization_options : dict`, optional
             
             Dictionary with the optimization options. Default is `None`. See [optimization options documentation](solver-options-optimizations.md).
@@ -164,7 +171,11 @@ def __init__(
             )
         )
 
+        if k <= 0 or not isinstance(k, int):
+            utils.logger.error(f"{__name__}: k must be a positive integer, not {k}")
+            raise ValueError(f"k must be a positive integer, not {k}")
         self.k = k
+        self.original_k = self.k
 
         self.subpath_constraints_coverage = subpath_constraints_coverage
         self.subpath_constraints_coverage_length = subpath_constraints_coverage_length
@@ -206,6 +217,16 @@ def __init__(
 
         self.optimization_options["trusted_edges_for_safety"] = self.G.get_non_zero_flow_edges(flow_attr=self.flow_attr, edges_to_ignore=self.edges_to_ignore)
 
+        self.solution_weights_superset = solution_weights_superset
+        
+        if self.solution_weights_superset is not None:
+            self.k = len(self.solution_weights_superset)
+            self.optimization_options["allow_empty_paths"] = True
+            self.optimization_options["optimize_with_safe_paths"] = False
+            self.optimization_options["optimize_with_flow_safe_paths"] = False
+            self.optimization_options["optimize_with_safe_sequences"] = False
+            self.optimization_options["optimize_with_safe_zero_edges"] = False
+
         # Call the constructor of the parent class AbstractPathModelDAG
         super().__init__(
             G=self.G, 
@@ -227,10 +248,10 @@ def __init__(
         self.create_solver_and_paths()
 
         # This method is called from the current class to encode the flow decomposition
-        self._encode_flow_decomposition()
-
-        # The given weights optimization
-        self._encode_given_weights()
+        if self.solution_weights_superset is None:
+            self._encode_flow_decomposition()
+        else:
+            self._encode_flow_decomposition_with_given_weights()
 
         utils.logger.info(f"{__name__}: initialized with graph id = {utils.fpid(G)}, k = {self.k}")
 
@@ -280,15 +301,16 @@ def _encode_flow_decomposition(self):
 
             self.solver.add_constraint(
                 self.solver.quicksum(self.pi_vars[(u, v, i)] for i in range(self.k)) == f_u_v,
-                name=f"10d_u={u}_v={v}_i={i}",
+                name=f"10d_u={u}_v={v}",
             )
 
-    def _encode_given_weights(self):
-
-        weights = self.optimization_options.get("given_weights", None)
-        if weights is None:
-            return
+    def _encode_flow_decomposition_with_given_weights(self):
 
+        # If already solved, no need to encode further
+        if self.is_solved():
+            return
+
+        # Some checks
         if self.optimization_options.get("optimize_with_safe_paths", False):
             utils.logger.error(f"{__name__}: Cannot optimize with both given weights and safe paths")
             raise ValueError("Cannot optimize with both given weights and safe paths")
@@ -302,18 +324,38 @@ def _encode_given_weights(self):
             utils.logger.error(f"{__name__}: Cannot optimize with both given weights and flow safe paths")
             raise ValueError("Cannot optimize with both given weights and flow safe paths")
 
-        if len(weights) > self.k:
-            utils.logger.error(f"Length of given weights ({len(weights)}) is greater than k ({self.k})")
-            raise ValueError(f"Length of given weights ({len(weights)}) is greater than k ({self.k})")
+        if len(self.solution_weights_superset) != self.k:
+            utils.logger.error(f"Length of given weights ({len(self.solution_weights_superset)}) is different from k ({self.k})")
+            raise ValueError(f"Length of given weights ({len(self.solution_weights_superset)}) is different from k ({self.k})")
+
+        # We encode that for each edge (u,v), the sum of the weights of the paths going through the edge is equal to the flow value of the edge.
+        for u, v, data in self.G.edges(data=True):
+            if (u, v) in self.edges_to_ignore:
+                continue
+            f_u_v = data[self.flow_attr]
 
-        for i, weight in enumerate(weights):
             self.solver.add_constraint(
-                self.path_weights_vars[i] == weight,
-                name=f"given_weight_{i}",
+                self.solver.quicksum(self.solution_weights_superset[i] * self.edge_vars[(u, v, i)] for i in range(self.k)) == f_u_v,
+                name=f"10d_u={u}_v={v}",
             )
 
+        # We state that at most self.original_k paths can be used
+        self.solver.add_constraint(            
+            self.solver.quicksum(
+                self.solver.quicksum(
+                        self.edge_vars[(self.G.source, v, i)]
+                        for v in self.G.successors(self.G.source)
+                ) for i in range(self.k)
+            ) <= self.original_k,
+            name="max_paths_original_k_paths",
+        )
+
         self.solver.set_objective(
-            self.solver.quicksum(self.edge_vars[(u, v, i)] for u, v in self.G.edges() for i in range(self.k)),
+            self.solver.quicksum(
+                self.edge_vars[(self.G.source, v, i)]
+                for v in self.G.successors(self.G.source)
+                for i in range(self.k)
+            ),
             sense="minimize",
         )
 
@@ -428,32 +470,36 @@ def get_solution(self, remove_empty_paths=False):
         - `exception` If model is not solved.
         """
 
-        if self._solution is None:            
+        if self._solution is not None:
+            return self._remove_empty_paths(self._solution) if remove_empty_paths else self._solution
 
-            self.check_is_solved()
+        self.check_is_solved()
+    
+        if self.solution_weights_superset is None:
             weights_sol_dict = self.solver.get_values(self.path_weights_vars)
-            self.path_weights_sol = [
-                (
-                    round(weights_sol_dict[i])
-                    if self.weight_type == int
-                    else float(weights_sol_dict[i])
-                )
-                for i in range(self.k)
-            ]
+        else:
+            weights_sol_dict = {i: self.solution_weights_superset[i] for i in range(self.k)}
 
-            if self.flow_attr_origin == "edge":
-                self._solution = {
-                    "paths": self.get_solution_paths(),
-                    "weights": self.path_weights_sol,
-                }
-            elif self.flow_attr_origin == "node":
-                self._solution = {
-                    "_paths_internal": self.get_solution_paths(),
-                    "paths": self.G_internal.get_condensed_paths(self.get_solution_paths()),
-                    "weights": self.path_weights_sol,
-                }
+        self.path_weights_sol = [
+            (
+                round(weights_sol_dict[i])
+                if self.weight_type == int
+                else float(weights_sol_dict[i])
+            )
+            for i in range(self.k)
+        ]
 
-        return self._remove_empty_paths(self._solution) if remove_empty_paths else self._solution
+        if self.flow_attr_origin == "edge":
+            self._solution = {
+                "paths": self.get_solution_paths(),
+                "weights": self.path_weights_sol,
+            }
+        elif self.flow_attr_origin == "node":
+            self._solution = {
+                "_paths_internal": self.get_solution_paths(),
+                "paths": self.G_internal.get_condensed_paths(self.get_solution_paths()),
+                "weights": self.path_weights_sol,
+            }
 
     def is_valid_solution(self, tolerance=0.001):
         """
 
@@ -108,8 +108,8 @@ def __init__(
         - `solution_weights_superset: list`, optional
 
             List of allowed weights for the paths. Default is `None`. 
-            If set, the model will use the solution path weights only from this set, with the property that **every weight in the superset
-            appears at most once in the solution weight**.
+            If set, the model will use the solution path weights only from this set, with the property that **every weight in this list
+            appears at most once in the solution weight**. That is, if you want to have more paths with the same weight, add it more times to `solution_weights_superset`.
 
         - `optimization_options: dict`, optional
 
@@ -260,10 +260,10 @@ def __init__(
         self.create_solver_and_paths()
 
         # This method is called from the current class 
-        self._encode_leastabserrors_decomposition()
-
-        # This method is called from the current class    
-        self._encode_solution_weights_superset()
+        if self.solution_weights_superset is not None:
+            self._encode_leastabserrors_decomposition_with_given_weights()
+        else:
+            self._encode_leastabserrors_decomposition()
 
         # This method is called from the current class to add the objective function
         self._encode_objective()
@@ -330,38 +330,56 @@ def _encode_leastabserrors_decomposition(self):
                 name=f"9ab_u={u}_v={v}_i={i}",
             )
 
-    def _encode_solution_weights_superset(self):
+    def _encode_leastabserrors_decomposition_with_given_weights(self):
 
-        if self.solution_weights_superset is not None:
+        # Some checks on the solution_weights_superset
+        if len(self.solution_weights_superset) != self.k:
+            utils.logger.error(f"{__name__}: solution_weights_superset must have length {self.k}, not {len(self.solution_weights_superset)}")
+            raise ValueError(f"solution_weights_superset must have length {self.k}, not {len(self.solution_weights_superset)}")
+        if not self.allow_empty_paths:
+            utils.logger.error(f"{__name__}: solution_weights_superset is not allowed when allow_empty_paths is False")
+            raise ValueError(f"solution_weights_superset is not allowed when allow_empty_paths is False")
 
-            if len(self.solution_weights_superset) != self.k:
-                utils.logger.error(f"{__name__}: solution_weights_superset must have length {self.k}, not {len(self.solution_weights_superset)}")
-                raise ValueError(f"solution_weights_superset must have length {self.k}, not {len(self.solution_weights_superset)}")
-            if not self.allow_empty_paths:
-                utils.logger.error(f"{__name__}: solution_weights_superset is not allowed when allow_empty_paths is False")
-                raise ValueError(f"solution_weights_superset is not allowed when allow_empty_paths is False")
-            
-            # We state that the weight of the i-th path equals the i-th entry of the solution_weights_superset
-            for i in range(self.k):
-                if self.solution_weights_superset[i] > self.w_max:
-                    utils.logger.error(f"{__name__}: solution_weights_superset[{i}] must be less than or equal to {self.w_max}, not {self.solution_weights_superset[i]}")
-                    raise ValueError(f"solution_weights_superset[{i}] must be less than or equal to {self.w_max}, not {self.solution_weights_superset[i]}")
-                self.solver.add_constraint(
-                    self.path_weights_vars[i] == self.solution_weights_superset[i],
-                    name=f"solution_weights_superset_{i}",
-                )
+        self.edge_indexes_basic = [(u,v) for (u,v) in self.G.edges() if (u,v) not in self.edges_to_ignore]
+        
+        self.edge_errors_vars = self.solver.add_variables(
+            self.edge_indexes_basic,
+            name_prefix="ee",
+            lb=0,
+            ub=self.w_max,
+            var_type="integer" if self.weight_type == int else "continuous",
+        )
 
-            # We state that at most self.original_k paths can be used
-            self.solver.add_constraint(            
-                self.solver.quicksum(
-                    self.solver.quicksum(
-                            self.edge_vars[(self.G.source, v, i)]
-                            for v in self.G.successors(self.G.source)
-                    ) for i in range(self.k)
-                ) <= self.original_k,
-                name="max_paths_original_k_paths",
+        for u, v, data in self.G.edges(data=True):
+            if (u, v) in self.edges_to_ignore:
+                continue
+
+            f_u_v = data[self.flow_attr]
+
+            # Encoding the error on the edge (u, v) as the difference between 
+            # the flow value of the edge and the sum of the weights of the paths that go through it (pi variables)
+            # If we minimize the sum of edge_errors_vars, then we are minimizing the sum of the absolute errors.
+            self.solver.add_constraint(
+                f_u_v - self.solver.quicksum(self.solution_weights_superset[i] * self.edge_vars[(u, v, i)] for i in range(self.k)) <= self.edge_errors_vars[(u, v)],
+                name=f"9aa_u={u}_v={v}",
             )
 
+            self.solver.add_constraint(
+                -f_u_v + self.solver.quicksum(self.solution_weights_superset[i] * self.edge_vars[(u, v, i)] for i in range(self.k)) <= self.edge_errors_vars[(u, v)],
+                name=f"9ab_u={u}_v={v}",
+            )
+
+        # We state that at most self.original_k paths can be used
+        self.solver.add_constraint(            
+            self.solver.quicksum(
+                self.solver.quicksum(
+                        self.edge_vars[(self.G.source, v, i)]
+                        for v in self.G.successors(self.G.source)
+                ) for i in range(self.k)
+            ) <= self.original_k,
+            name="max_paths_original_k_paths",
+        )
+
     def _encode_objective(self):
 
         self.solver.set_objective(
@@ -425,7 +443,11 @@ def get_solution(self, remove_empty_paths=True):
 
         self.check_is_solved()
 
-        weights_sol_dict = self.solver.get_values(self.path_weights_vars)
+        if self.solution_weights_superset is None:
+            weights_sol_dict = self.solver.get_values(self.path_weights_vars)
+        else:
+            weights_sol_dict = {i: self.solution_weights_superset[i] for i in range(self.k)}
+
         self.path_weights_sol = [
             (
                 round(weights_sol_dict[i])
Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@ def test(filename: str):`
`24`	`24`	`"optimize_with_safe_sequences": True,`
`25`	`25`	`"optimize_with_safety_as_subset_constraints": False,`
`26`	`26`	`"use_min_gen_set_lowerbound": False,`
`27`		`- "optimize_with_given_weights": False,`
	`27`	`+ "optimize_with_guessed_weights": False,`
`28`	`28`	`},`
`29`	`29`	`solver_options={"external_solver": "highs"},`
`30`	`30`	`)`