Skip to content

Commit 65a9b85

Browse files
Added new option elements_to_ignore_percentile to kMPE
1 parent 0f80d6c commit 65a9b85

File tree

3 files changed

+65
-15
lines changed

3 files changed

+65
-15
lines changed

docs/abstract-path-model.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Abstract Path Model in DAGs
1+
# Abstract Path Model in DAGs
22

33
A general approach in developing a model to decompose a weighted graph into weighted paths is to:
44

examples/cycles_demo.py

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,18 @@ def test_min_flow_decomp(filename: str):
55
graph = fp.graphutils.read_graphs(filename)[0]
66
print("graph id", graph.graph["id"])
77
# print("subset_constraints", graph.graph["constraints"])
8-
fp.utils.draw(
9-
G=graph,
10-
filename=filename + ".pdf",
11-
flow_attr="flow",
12-
subpath_constraints=graph.graph["constraints"],
13-
draw_options={
14-
"show_graph_edges": True,
15-
"show_edge_weights": True,
16-
"show_path_weights": False,
17-
"show_path_weight_on_first_edge": True,
18-
"pathwidth": 2,
19-
})
8+
# fp.utils.draw(
9+
# G=graph,
10+
# filename=filename + ".pdf",
11+
# flow_attr="flow",
12+
# subpath_constraints=graph.graph["constraints"],
13+
# draw_options={
14+
# "show_graph_edges": True,
15+
# "show_edge_weights": True,
16+
# "show_path_weights": False,
17+
# "show_path_weight_on_first_edge": True,
18+
# "pathwidth": 2,
19+
# })
2020

2121
print(graph.graph["n"], graph.graph["m"], graph.graph["w"])
2222

@@ -28,11 +28,11 @@ def test_min_flow_decomp(filename: str):
2828
optimization_options={
2929
"optimize_with_safe_sequences": True, # set to false to deactivate the safe sequences optimization
3030
"optimize_with_safe_sequences_allow_geq_constraints": True,
31-
"optimize_with_safe_sequences_fix_via_bounds": False,
31+
"optimize_with_safe_sequences_fix_via_bounds": True,
3232
"optimize_with_safe_sequences_fix_zero_edges": True,
3333
},
3434
solver_options={
35-
"external_solver": "highs", # we can try also "highs" at some point
35+
"external_solver": "gurobi", # we can try also "highs" at some point
3636
"time_limit": 300, # 300s = 5min, is it ok?
3737
},
3838
)
@@ -125,6 +125,26 @@ def test_min_path_error(filename):
125125
kmpe_percentile_model.solve()
126126
process_solution(kmpe_percentile_model)
127127

128+
# here, we use percentile to determine which edges we ignore
129+
kmpe_percentile_ignore_model = fp.kMinPathErrorCycles(
130+
G=graph,
131+
flow_attr="flow",
132+
weight_type=int,
133+
elements_to_ignore_percentile=25, # we ignore edges whose weight is < 25 percentile
134+
subset_constraints=graph.graph["constraints"], # try with and without
135+
optimization_options={
136+
"optimize_with_safe_sequences": True, # set to false to deactivate the safe sequences optimization
137+
"optimize_with_safe_sequences_allow_geq_constraints": False,
138+
"optimize_with_safe_sequences_fix_via_bounds": True,
139+
},
140+
solver_options={
141+
"external_solver": "gurobi", # we can try also "highs" at some point
142+
"time_limit": 300, # 300s = 5min, is it ok?
143+
},
144+
)
145+
kmpe_percentile_ignore_model.solve()
146+
process_solution(kmpe_percentile_ignore_model)
147+
128148
def process_solution(model):
129149
if model.is_solved():
130150
solution = model.get_solution()
@@ -154,6 +174,7 @@ def process_solution(model):
154174
def main():
155175
test_min_flow_decomp(filename = "tests/cyclic_graphs/gt3.kmer15.(130000.132000).V23.E32.cyc100.graph")
156176
test_min_flow_decomp(filename = "tests/cyclic_graphs/gt5.kmer27.(1300000.1400000).V809.E1091.mincyc1000.graph")
177+
# test_min_flow_decomp(filename = "tests/cyclic_graphs/gt4.kmer15.(0.10000).V1096.E1622.mincyc100.e1.0.graph")
157178
test_least_abs_errors(filename = "tests/cyclic_graphs/gt5.kmer27.(655000.660000).V18.E27.mincyc4.e0.75.graph")
158179
test_min_path_error(filename = "tests/cyclic_graphs/gt5.kmer27.(655000.660000).V18.E27.mincyc4.e0.75.graph")
159180

flowpaths/kminpatherrorcycles.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ def __init__(
1818
subset_constraints: list = [],
1919
subset_constraints_coverage: float = 1.0,
2020
elements_to_ignore: list = [],
21+
elements_to_ignore_percentile: float = None,
2122
error_scaling: dict = {},
2223
additional_starts: list = [],
2324
additional_ends: list = [],
@@ -77,6 +78,14 @@ def __init__(
7778
List of edges (or nodes, if `flow_attr_origin` is `"node"`) to ignore when adding constrains on flow explanation by the weighted walks.
7879
Default is an empty list. See [ignoring edges documentation](ignoring-edges.md)
7980
81+
- `elements_to_ignore_percentile: float`, optional
82+
83+
If provided, ignores elements automatically based on a percentile threshold of their flow values (`flow_attr`).
84+
Elements (edges, or nodes if `flow_attr_origin` is `"node"`) whose flow is below this percentile
85+
are ignored when enforcing the error constraints. Must be in the range `[0, 100]`.
86+
This is mutually exclusive with `elements_to_ignore` (setting both raises a `ValueError`).
87+
See [ignoring edges documentation](ignoring-edges.md).
88+
8089
- `error_scaling: dict`, optional
8190
8291
Dictionary `edge: factor` (or `node: factor`, if `flow_attr_origin` is `"node"`)) storing the error scale factor (in [0,1]) of every edge, which scale the allowed difference between edge/node weight and walk weights.
@@ -114,6 +123,8 @@ def __init__(
114123
- If the flow attribute `flow_attr` is not specified in some edge.
115124
- If the graph contains edges with negative flow values.
116125
- ValueError: If `flow_attr_origin` is not "node" or "edge".
126+
- If `elements_to_ignore_percentile` is set and is not in `[0, 100]`.
127+
- If `elements_to_ignore_percentile` is set together with `elements_to_ignore`.
117128
"""
118129

119130
# Handling node-weighted graphs
@@ -155,6 +166,20 @@ def __init__(
155166

156167
self.G = stdigraph.stDiGraph(self.G_internal, additional_starts=additional_starts_internal, additional_ends=additional_ends_internal)
157168
self.subset_constraints = subset_constraints_internal
169+
170+
if elements_to_ignore_percentile is not None:
171+
if elements_to_ignore_percentile < 0 or elements_to_ignore_percentile > 100:
172+
utils.logger.error(f"{__name__}: elements_to_ignore_percentile must be between 0 and 100, not {elements_to_ignore_percentile}")
173+
raise ValueError(f"elements_to_ignore_percentile must be between 0 and 100, not {elements_to_ignore_percentile}")
174+
if len(elements_to_ignore) > 0:
175+
utils.logger.critical(f"{__name__}: you cannot set elements_to_ignore when elements_to_ignore_percentile is set.")
176+
raise ValueError(f"you cannot set elements_to_ignore when elements_to_ignore_percentile is set.")
177+
178+
# Select edges where the flow_attr value is >= elements_to_ignore_percentile (using self.G)
179+
flow_values = [self.G.edges[edge][flow_attr] for edge in self.G.edges() if flow_attr in self.G.edges[edge]]
180+
percentile = np.percentile(flow_values, elements_to_ignore_percentile) if flow_values else 0
181+
edges_to_ignore_internal = [edge for edge in edges_to_ignore_internal if self.G.edges[edge][flow_attr] < percentile]
182+
158183
self.edges_to_ignore = self.G.source_sink_edges.union(edges_to_ignore_internal)
159184
self.edge_error_scaling = error_scaling_internal
160185
# If the error scaling factor is 0, we ignore the edge
@@ -200,6 +225,10 @@ def __init__(
200225
self.solve_time_start = time.perf_counter()
201226

202227
if trusted_edges_for_safety_percentile is not None:
228+
if trusted_edges_for_safety_percentile < 0 or trusted_edges_for_safety_percentile > 100:
229+
utils.logger.error(f"{__name__}: trusted_edges_for_safety_percentile must be between 0 and 100.")
230+
raise ValueError(f"trusted_edges_for_safety_percentile must be between 0 and 100.")
231+
203232
# Select edges where the flow_attr value is >= trusted_edges_for_safety_percentile (using self.G)
204233
flow_values = [self.G.edges[edge][flow_attr] for edge in self.G.edges() if flow_attr in self.G.edges[edge]]
205234
percentile = np.percentile(flow_values, trusted_edges_for_safety_percentile) if flow_values else 0

0 commit comments

Comments
 (0)