diff --git a/cpmpy/tools/benchmark/__init__.py b/cpmpy/tools/benchmark/__init__.py new file mode 100644 index 000000000..ce383c1de --- /dev/null +++ b/cpmpy/tools/benchmark/__init__.py @@ -0,0 +1,67 @@ +import sys +import time +import warnings +import psutil + + +TIME_BUFFER = 5 # seconds +# TODO : see if good value +MEMORY_BUFFER_SOFT = 2 # MiB +MEMORY_BUFFER_HARD = 0 # MiB +MEMORY_BUFFER_SOLVER = 20 # MB + + + +def set_memory_limit(mem_limit): + """ + Set memory limit (Virtual Memory Size). + """ + if mem_limit is not None: + soft = max(_mib_as_bytes(mem_limit) - _mib_as_bytes(MEMORY_BUFFER_SOFT), _mib_as_bytes(MEMORY_BUFFER_SOFT)) + hard = max(_mib_as_bytes(mem_limit) - _mib_as_bytes(MEMORY_BUFFER_HARD), _mib_as_bytes(MEMORY_BUFFER_HARD)) + if sys.platform != "win32": + import resource + resource.setrlimit(resource.RLIMIT_AS, (soft, hard)) # limit memory in number of bytes + else: + warnings.warn("Memory limits using `resource` are not supported on Windows. Skipping hard limit.") + +def disable_memory_limit(): + if sys.platform != "win32": + import resource + soft, hard = resource.getrlimit(resource.RLIMIT_AS) + # set a very high soft limit + resource.setrlimit(resource.RLIMIT_AS, (hard, hard)) + +def set_time_limit(time_limit, verbose:bool=False): + """ + Set time limit (CPU time in seconds). + """ + if time_limit is not None: + if sys.platform != "win32": + import resource + soft = time_limit + hard = resource.RLIM_INFINITY + resource.setrlimit(resource.RLIMIT_CPU, (soft, hard)) + else: + warnings.warn("CPU time limits using `resource` are not supported on Windows. Skipping hard limit.") + +def _wall_time(p: psutil.Process): + return time.time() - p.create_time() + +def _mib_as_bytes(mib: int) -> int: + return mib * 1024 * 1024 + +def _mb_as_bytes(mb: int) -> int: + return mb * 1000 * 1000 + +def _bytes_as_mb(bytes: int) -> int: + return bytes // (1000 * 1000) + +def _bytes_as_gb(bytes: int) -> int: + return bytes // (1000 * 1000 * 1000) + +def _bytes_as_mb_float(bytes: int) -> float: + return bytes / (1000 * 1000) + +def _bytes_as_gb_float(bytes: int) -> float: + return bytes / (1000 * 1000 * 1000) \ No newline at end of file diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py new file mode 100644 index 000000000..ce103dcab --- /dev/null +++ b/cpmpy/tools/benchmark/_base.py @@ -0,0 +1,636 @@ +""" +Benchmark framework for CPMpy models. + +This module provides the `Benchmark` base class, designed to run constraint programming +benchmarks in a structured fashion. It allows reading instances, posting them to different +back-end solvers, and handling solver execution with limits on time and memory. +It also provides hooks for customizing logging, intermediate solution printing, and +error handling. Although this base class can be used on its own (example below), +users will most likely want to have a look at one of its subclasses for running a specific +benchmark dataset, e.g. xcsp3, opb, mse, ... + + +Usage Example +------------- +>>> from myparser import read_instance # your custom model parser (or one included in CPMpy) +>>> bm = Benchmark(reader=read_instance) +>>> bm.run( +... instance="example.extension", # your benchmark instance (e.g. coming from a CPMpy model dataset) +... solver="ortools", +... time_limit=30, +... mem_limit=1024, +... verbose=True +... ) +Status: OPTIMAL +Objective: 42 +Solution: ... + +""" + + +from abc import ABC + +import os +import signal +import sys +import time +import math +import random +import psutil +import warnings +from enum import Enum +from typing import Optional + +import cpmpy as cp +from cpmpy.tools.benchmark import _mib_as_bytes, _wall_time, set_memory_limit, set_time_limit, _bytes_as_mb, _bytes_as_gb, disable_memory_limit + +class ExitStatus(Enum): + unsupported:str = "unsupported" # instance contains an unsupported feature (e.g. a unsupported global constraint) + sat:str = "sat" # CSP : found a solution | COP : found a solution but couldn't prove optimality + optimal:str = "optimal" # optimal COP solution found + unsat:str = "unsat" # instance is unsatisfiable + unknown:str = "unknown" # any other case + +class Benchmark(ABC): + """ + Abstract base class for running CPMpy benchmarks. + + The `Benchmark` class provides a standardized framework for reading instances, + posting models to solvers, and managing solver runs with resource limits. + It is designed to be extended or customized for specific benchmarking needs. + """ + + def __init__(self, reader:callable, exit_status:Enum=ExitStatus): + """ + Arguments: + reader (callable): A parser from a model format to a CPMPy model. + """ + self.reader = reader + self.exit_status = exit_status + + def read_instance(self, instance, open) -> cp.Model: + """ + Parse a model instance to a CPMpy model. + + Arguments: + instance (str or os.PathLike): The model instance to parse into a CPMpy model. + """ + return self.reader(instance, open=open) + + """ + Callback methods which can be overwritten to make a custom benchmark run. + """ + + def print_comment(self, comment:str): + print(comment) + + def print_intermediate(self, objective:int): + self.print_comment("Intermediate solution:", objective) + + def print_result(self, s): + self.print_comment(s.status()) + + def handle_memory_error(self, mem_limit): + self.print_comment(f"MemoryError raised. Reached limit of {mem_limit} MiB") + + def handle_not_implemented(self, e): + self.print_comment(str(e)) + + def handle_exception(self, e): + self.print_comment(f"An {type(e)} got raised: {e}") + import traceback + self.print_comment("Stack trace:") + for line in traceback.format_exc().split('\n'): + if line.strip(): + self.print_comment(line) + + def handle_sigterm(self): + pass + + def handle_rlimit_cpu(self): + pass + + """ + Solver arguments (can also be tweaked for a specific benchmark). + """ + + def ortools_arguments( + self, + model: cp.Model, + cores: Optional[int] = None, + seed: Optional[int] = None, + intermediate: bool = False, + **kwargs + ): + # https://github.com/google/or-tools/blob/stable/ortools/sat/sat_parameters.proto + res = dict() + + # https://github.com/google/or-tools/blob/1c5daab55dd84bca7149236e4b4fa009e5fd95ca/ortools/flatzinc/cp_model_fz_solver.cc#L1688 + res |= { + "interleave_search": True, + "use_rins_lns": False, + } + if not model.has_objective(): + res |= { "num_violation_ls": 1 } + + if cores is not None: + res |= { "num_search_workers": cores } + if seed is not None: + res |= { "random_seed": seed } + + if intermediate and model.has_objective(): + # Define custom ORT solution callback, then register it + _self = self + from ortools.sat.python import cp_model as ort + class OrtSolutionCallback(ort.CpSolverSolutionCallback): + """ + For intermediate objective printing. + """ + + def __init__(self): + super().__init__() + self.__start_time = time.time() + self.__solution_count = 1 + + def on_solution_callback(self): + """Called on each new solution.""" + + current_time = time.time() + obj = int(self.ObjectiveValue()) + _self.print_comment('Solution %i, time = %0.4fs' % + (self.__solution_count, current_time - self.__start_time)) + _self.print_intermediate(obj) + self.__solution_count += 1 + + + def solution_count(self): + """Returns the number of solutions found.""" + return self.__solution_count + + # Register the callback + res |= { "solution_callback": OrtSolutionCallback() } + + def internal_options(solver: "CPM_ortools"): + # https://github.com/google/or-tools/blob/1c5daab55dd84bca7149236e4b4fa009e5fd95ca/ortools/flatzinc/cp_model_fz_solver.cc#L1688 + solver.ort_solver.parameters.subsolvers.extend(["default_lp", "max_lp", "quick_restart"]) + if not model.has_objective(): + solver.ort_solver.parameters.subsolvers.append("core_or_no_lp") + if len(solver.ort_model.proto.search_strategy) != 0: + solver.ort_solver.parameters.subsolvers.append("fixed") + + return res, internal_options + + def exact_arguments( + self, + seed: Optional[int] = None, + **kwargs + ): + # Documentation: https://gitlab.com/JoD/exact/-/blob/main/src/Options.hpp?ref_type=heads + res = dict() + if seed is not None: + res |= { "seed": seed } + + return res, None + + def choco_arguments(self): + # Documentation: https://github.com/chocoteam/pychoco/blob/master/pychoco/solver.py + return {}, None + + def z3_arguments( + self, + model: cp.Model, + cores: int = 1, + seed: Optional[int] = None, + mem_limit: Optional[int] = None, + **kwargs + ): + # Documentation: https://microsoft.github.io/z3guide/programming/Parameters/ + # -> is outdated, just let it crash and z3 will report the available options + + res = dict() + + if model.has_objective(): + # Opt does not seem to support setting random seed or max memory + pass + else: + # Sat parameters + if cores is not None: + res |= { "threads": cores } # TODO what with hyperthreadding, when more threads than cores + if seed is not None: + res |= { "random_seed": seed } + if mem_limit is not None: + res |= { "max_memory": _bytes_as_mb(mem_limit) } + + return res, None + + def minizinc_arguments( + self, + solver: str, + cores: Optional[int] = None, + seed: Optional[int] = None, + **kwargs + ): + # Documentation: https://minizinc-python.readthedocs.io/en/latest/api.html#minizinc.instance.Instance.solve + res = dict() + if cores is not None: + res |= { "processes": cores } + if seed is not None: + res |= { "random_seed": seed } + + #if solver.endswith("gecode"): + # Documentation: https://www.minizinc.org/doc-2.4.3/en/lib-gecode.html + #elif solver.endswith("chuffed"): + # Documentation: + # - https://www.minizinc.org/doc-2.5.5/en/lib-chuffed.html + # - https://github.com/chuffed/chuffed/blob/develop/chuffed/core/options.h + + return res, None + + def gurobi_arguments( + self, + model: cp.Model, + cores: Optional[int] = None, + seed: Optional[int] = None, + mem_limit: Optional[int] = None, + intermediate: bool = False, + **kwargs + ): + # Documentation: https://www.gurobi.com/documentation/9.5/refman/parameters.html#sec:Parameters + res = dict() + if cores is not None: + res |= { "Threads": cores } + if seed is not None: + res |= { "Seed": seed } + if mem_limit is not None: + res |= { "MemLimit": _bytes_as_gb(mem_limit) } + + if intermediate and model.has_objective(): + + _self = self + + class GurobiSolutionCallback: + def __init__(self, model:cp.Model): + self.__start_time = time.time() + self.__solution_count = 0 + self.model = model + + def callback(self, *args, **kwargs): + current_time = time.time() + model, state = args + + # Callback codes: https://www.gurobi.com/documentation/current/refman/cb_codes.html#sec:CallbackCodes + + from gurobipy import GRB + # if state == GRB.Callback.MESSAGE: # verbose logging + # print_comment("log message: " + str(model.cbGet(GRB.Callback.MSG_STRING))) + if state == GRB.Callback.MIP: # callback from the MIP solver + if model.cbGet(GRB.Callback.MIP_SOLCNT) > self.__solution_count: # do we have a new solution? + + obj = int(model.cbGet(GRB.Callback.MIP_OBJBST)) + _self.print_comment('Solution %i, time = %0.4fs' % + (self.__solution_count, current_time - self.__start_time)) + _self.print_intermediate(obj) + self.__solution_count = model.cbGet(GRB.Callback.MIP_SOLCNT) + + res |= { "solution_callback": GurobiSolutionCallback(model).callback } + + return res, None + + def cpo_arguments( + self, + model: cp.Model, + cores: Optional[int] = None, + seed: Optional[int] = None, + intermediate: bool = False, + **kwargs + ): + # Documentation: https://ibmdecisionoptimization.github.io/docplex-doc/cp/docplex.cp.parameters.py.html#docplex.cp.parameters.CpoParameters + res = dict() + if cores is not None: + res |= { "Workers": cores } + if seed is not None: + res |= { "RandomSeed": seed } + + if intermediate and model.has_objective(): + from docplex.cp.solver.solver_listener import CpoSolverListener + _self = self + class CpoSolutionCallback(CpoSolverListener): + + def __init__(self): + super().__init__() + self.__start_time = time.time() + self.__solution_count = 1 + + def result_found(self, solver, sres): + current_time = time.time() + obj = sres.get_objective_value() + if obj is not None: + _self.print_comment('Solution %i, time = %0.4fs' % + (self.__solution_count, current_time - self.__start_time)) + _self.print_intermediate(obj) + self.__solution_count += 1 + + def solution_count(self): + """Returns the number of solutions found.""" + return self.__solution_count + + # Register the callback + res |= { "solution_callback": CpoSolutionCallback } + + return res, None + + def cplex_arguments( + self, + cores: Optional[int] = None, + seed: Optional[int] = None, + **kwargs + ): + res = dict() + if cores is not None: + res |= {"threads": cores} + if seed is not None: + res |= {"randomseed": seed} + + return res, None + + def hexaly_arguments( + self, + model: cp.Model, + cores: Optional[int] = None, + seed: Optional[int] = None, + intermediate: bool = False, + **kwargs + ): + res = dict() + #res |= {"nb_threads": cores} + #res |= {"seed": seed} + + + if intermediate and model.has_objective(): + # Define custom Hexaly solution callback, then register it + + _self = self + class HexSolutionCallback: + + def __init__(self): + self.__start_time = time.time() + self.__solution_count = 0 + + + def on_solution_callback(self, optimizer, cb_type): + """Called on each new solution.""" + # check if solution with different objective (or if verbose) + current_time = time.time() + obj = optimizer.model.objectives[0] + _self.print_comment('Solution %i, time = %0.4fs' % + (self.__solution_count, current_time - self.__start_time)) + _self.print_intermediate(obj) + self.__solution_count += 1 + + def solution_count(self): + return self.__solution_count + + # Register the callback + res |= { "solution_callback": HexSolutionCallback().on_solution_callback } + + + # def internal_options(solver: "CPM_hexaly"): + # # https://github.com/google/or-tools/blob/1c5daab55dd84bca7149236e4b4fa009e5fd95ca/ortools/flatzinc/cp_model_fz_solver.cc#L1688 + # #solver.native_model.get_param().set_seed(seed) + # #solver.native_model.get_param().set_nr_threads(cores) + + # _self = self + # class CallbackExample: + # def __init__(self): + # self.last_best_value = 0 + # self.last_best_running_time = 0 + # self.__solution_count = 0 + # self.__start_time = time.time() + + # def my_callback(self, optimizer, cb_type): + # stats = optimizer.statistics + # obj = optimizer.model.objectives[0] + # current_time = time.time() + # #obj = int(self.ObjectiveValue()) + # #obj = optimizer.get_objective_bound(0).value + # if obj.value > self.last_best_value: + # self.last_best_running_time = stats.running_time + # self.last_best_value = obj.value + # self.__solution_count += 1 + + # _self.print_comment('Solution %i, time = %0.4fs' % + # (self.__solution_count, current_time - self.__start_time)) + # _self.print_intermediate(obj.value) + + # optimizer = solver.native_model + # cb = CallbackExample() + # from hexaly.optimizer import HxCallbackType + # optimizer.add_callback(HxCallbackType.TIME_TICKED, cb.my_callback) + + return res, None + + """ + Methods which can, bit most likely shouldn't, be overwritten. + """ + + def set_memory_limit(self, mem_limit): + set_memory_limit(mem_limit) + + def set_time_limit(self, time_limit): + p = psutil.Process() + if time_limit is not None: + set_time_limit(int(time_limit - _wall_time(p) + time.process_time())) + else: + set_time_limit(None) + + def sigterm_handler(self, _signo, _stack_frame): + exit_code = self.handle_sigterm() + print(flush=True) + os._exit(exit_code) + + def rlimit_cpu_handler(self, _signo, _stack_frame): + exit_code = self.handle_rlimit_cpu() + print(flush=True) + os._exit(exit_code) + + def init_signal_handlers(self): + """ + Configure signal handlers + """ + signal.signal(signal.SIGINT, self.sigterm_handler) + signal.signal(signal.SIGTERM, self.sigterm_handler) + signal.signal(signal.SIGINT, self.sigterm_handler) + signal.signal(signal.SIGABRT, self.sigterm_handler) + if sys.platform != "win32": + signal.signal(signal.SIGXCPU, self.rlimit_cpu_handler) + else: + warnings.warn("Windows does not support setting SIGXCPU signal") + + def post_model(self, model, solver, solver_args): + """ + Post the model to the selected backend solver. + """ + if solver == "exact": # Exact2 takes its options at creation time + s = cp.SolverLookup.get(solver, model, **solver_args) + solver_args = dict() # no more solver args needed + else: + s = cp.SolverLookup.get(solver, model) + return s + + + """ + Internal workings + """ + + def solver_arguments( + self, + solver: str, + model: cp.Model, + seed: Optional[int] = None, + intermediate: bool = False, + cores: int = 1, + mem_limit: Optional[int] = None, + **kwargs + ): + opt = model.has_objective() + sat = not opt + + if solver == "ortools": + return self.ortools_arguments(model, cores=cores, seed=seed, intermediate=intermediate, **kwargs) + elif solver == "exact": + return self.exact_arguments(seed=seed, **kwargs) + elif solver == "choco": + return self.choco_arguments() + elif solver == "z3": + return self.z3_arguments(model, cores=cores, seed=seed, mem_limit=mem_limit, **kwargs) + elif solver.startswith("minizinc"): # also can have a subsolver + return self.minizinc_arguments(solver, cores=cores, seed=seed, **kwargs) + elif solver == "gurobi": + return self.gurobi_arguments(model, cores=cores, seed=seed, mem_limit=mem_limit, intermediate=intermediate, opt=opt, **kwargs) + elif solver == "cpo": + return self.cpo_arguments(model=model, cores=cores, seed=seed, intermediate=intermediate, **kwargs) + elif solver == "hexaly": + return self.hexaly_arguments(model, cores=cores, seed=seed, intermediate=intermediate, **kwargs) + elif solver == "cplex": + return self.cplex_arguments(cores=cores, **kwargs) + else: + self.print_comment(f"setting parameters of {solver} is not (yet) supported") + return dict(), None + + def run( + self, + instance:str, # path to the instance to run + open:Optional[callable] = None, # how to 'open' the instance file + seed: Optional[int] = None, # random seed + time_limit: Optional[int] = None, # time limit for this single instance + mem_limit: Optional[int] = None, # MiB: 1024 * 1024 bytes + cores: int = 1, + solver: str = None, # which backend solver to use + time_buffer: int = 0, + intermediate: bool = False, + verbose: bool = False, + **kwargs, + ): + + if not verbose: + warnings.filterwarnings("ignore") + + try: + + # --------------------------- Global Configuration --------------------------- # + + # Get the current process + p = psutil.Process() + + # pychoco currently does not support setting the mem_limit + if solver == "choco" and mem_limit is not None: + warnings.warn("'mem_limit' is currently not supported with choco, issues with GraalVM") + mem_limit = None + + # Set random seed (if provided) + if seed is not None: + random.seed(seed) + + self.init_signal_handlers() + + # Set memory limit (if provided) + if mem_limit is not None: + self.set_memory_limit(mem_limit) + + # Set time limit (if provided) + if time_limit is not None: + self.set_time_limit(time_limit) # set remaining process time != wall time + + # ------------------------------ Parse instance ------------------------------ # + + time_parse = time.time() + model = self.read_instance(instance, open=open) + time_parse = time.time() - time_parse + if verbose: self.print_comment(f"took {time_parse:.4f} seconds to parse model") + + if time_limit and time_limit < _wall_time(p): + raise TimeoutError("Time's up after parse") + + # ------------------------ Post CPMpy model to solver ------------------------ # + + solver_args, internal_options = self.solver_arguments(solver, model=model, seed=seed, + intermediate=intermediate, + cores=cores, mem_limit=_mib_as_bytes(mem_limit) if mem_limit is not None else None, + **kwargs) + + # Post model to solver + time_post = time.time() + s = self.post_model(model, solver, solver_args) + time_post = time.time() - time_post + if verbose: self.print_comment(f"took {time_post:.4f} seconds to post model to {solver}") + + if time_limit and time_limit < _wall_time(p): + raise TimeoutError("Time's up after post") + + # ------------------------------- Solve model ------------------------------- # + + if time_limit: + # give solver only the remaining time + time_limit = time_limit - _wall_time(p) - time_buffer + # disable signal-based time limit and let the solver handle it (solvers don't play well with difference between cpu and wall time) + self.set_time_limit(None) + + if verbose: self.print_comment(f"{time_limit}s left to solve") + + time_solve = time.time() + try: + if internal_options is not None: + internal_options(s) # Set more internal solver options (need access to native solver object) + is_sat = s.solve(time_limit=time_limit, **solver_args) + except RuntimeError as e: + if "Program interrupted by user." in str(e): # Special handling for Exact + raise TimeoutError("Exact interrupted due to timeout") + else: + raise e + + time_solve = time.time() - time_solve + if verbose: self.print_comment(f"took {time_solve:.4f} seconds to solve") + + # ------------------------------- Print result ------------------------------- # + + self.print_result(s) + + # ------------------------------------- - ------------------------------------ # + + + except MemoryError as e: + disable_memory_limit() + self.handle_memory_error(mem_limit) + raise e + except NotImplementedError as e: + self.handle_not_implemented(e) + raise e + except TimeoutError as e: + self.handle_exception(e) # TODO add callback for timeout? + raise e + except Exception as e: + self.handle_exception(e) + raise e + + + \ No newline at end of file diff --git a/cpmpy/tools/benchmark/jsplib.py b/cpmpy/tools/benchmark/jsplib.py new file mode 100644 index 000000000..343c2dfdd --- /dev/null +++ b/cpmpy/tools/benchmark/jsplib.py @@ -0,0 +1,213 @@ +""" +JSPLib as a CPMpy benchmark + +This module provides a benchmarking framework for running CPMpy on JSPLib +instances. + +Command-line Interface +---------------------- +This script can be run directly to benchmark solvers on JSPLib datasets. + +Usage: + python jsplib.py --solver ortools + +Arguments: + --solver Solver name (e.g., ortools, exact, choco, ...). + --workers Number of parallel workers to use. + --time-limit Time limit in seconds per instance. + --mem-limit Memory limit in MB per instance. + --cores Number of cores to assign to a single instance. + --output-dir Output directory for CSV files. + --verbose Show solver output if set. + --intermediate Report intermediate solutions if supported. + +=============== +List of classes +=============== + +.. autosummary:: + :nosignatures: + + MSEExitStatus + MSEBenchmark + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + solution_mse +""" + +import warnings +import argparse +from enum import Enum +from pathlib import Path +from datetime import datetime + +# CPMpy +from cpmpy.tools.benchmark.runner import benchmark_runner +from cpmpy.tools.benchmark._base import Benchmark, ExitStatus +from cpmpy.tools.jsplib import read_jsplib +from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus + + +def solution_psplib(model): + """ + Convert a CPMpy model solution into the solution string format. + + Arguments: + model (cp.solvers.SolverInterface): The solver-specific model for which to print its solution + + Returns: + str: formatted solution string. + """ + variables = {var.name: var.value() for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]} # dirty workaround for all missed aux vars in user vars TODO fix with Ignace + return str(variables) + +class JSPLibBenchmark(Benchmark): + + """ + PSPLIB as a CPMpy benchmark. + """ + + def __init__(self): + self.sol_time = None + super().__init__(reader=read_jsplib) + + def print_comment(self, comment:str): + print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True) + + def print_status(self, status: ExitStatus) -> None: + print('s' + chr(32) + status.value, end="\n", flush=True) + + def print_value(self, value: str) -> None: + print('v' + chr(32) + value, end="\n", flush=True) + + def print_objective(self, objective: int) -> None: + print('o' + chr(32) + str(objective), end="\n", flush=True) + + def print_intermediate(self, objective:int): + self.print_objective(objective) + + def print_result(self, s): + if s.status().exitstatus == CPMStatus.OPTIMAL: + self.print_objective(s.objective_value()) + self.print_value(solution_psplib(s)) + self.print_status(ExitStatus.optimal) + elif s.status().exitstatus == CPMStatus.FEASIBLE: + self.print_objective(s.objective_value()) + self.print_value(solution_psplib(s)) + self.print_status(ExitStatus.sat) + elif s.status().exitstatus == CPMStatus.UNSATISFIABLE: + self.print_status(ExitStatus.unsat) + else: + self.print_comment("Solver did not find any solution within the time/memory limit") + self.print_status(ExitStatus.unknown) + + def handle_memory_error(self, mem_limit): + super().handle_memory_error(mem_limit) + self.print_status(ExitStatus.unknown) + + def handle_not_implemented(self, e): + super().handle_not_implemented(e) + self.print_status(ExitStatus.unsupported) + + def handle_exception(self, e): + super().handle_exception(e) + self.print_status(ExitStatus.unknown) + + + def handle_sigterm(self): + """ + Handles a SIGTERM. Gives us 1 second to finish the current job before we get killed. + """ + # Report that we haven't found a solution in time + self.print_status(ExitStatus.unknown) + self.print_comment("SIGTERM raised.") + return 0 + + def handle_rlimit_cpu(self): + """ + Handles a SIGXCPU. + """ + # Report that we haven't found a solution in time + self.print_status(ExitStatus.unknown) + self.print_comment("SIGXCPU raised.") + return 0 + + def parse_output_line(self, line, result): + if line.startswith('s '): + result['status'] = line[2:].strip() + elif line.startswith('v '): + # only record first line, contains 'type' and 'cost' + solution = line.split("\n")[0][2:].strip() + if solution not in result: + result['solution'] = solution + else: + result['solution'] = result['solution'] + ' ' + str(solution) + elif line.startswith('c Solution'): + parts = line.split(', time = ') + # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines + self.sol_time = float(parts[-1].replace('s', '').rstrip()) + elif line.startswith('o '): + obj = int(line[2:].strip()) + if result['intermediate'] is None: + result['intermediate'] = [] + if self.sol_time is not None: + result['intermediate'] += [(self.sol_time, obj)] + result['objective_value'] = obj + obj = None + elif line.startswith('c took '): + # Parse timing information + parts = line.split(' seconds to ') + if len(parts) == 2: + time_val = float(parts[0].replace('c took ', '')) + action = parts[1].strip() + if action.startswith('parse'): + result['time_parse'] = time_val + elif action.startswith('convert'): + result['time_model'] = time_val + elif action.startswith('post'): + result['time_post'] = time_val + elif action.startswith('solve'): + result['time_solve'] = time_val + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description='Benchmark solvers on JSPLib instances') + parser.add_argument('--solver', type=str, required=True, help='Solver name (e.g., ortools, exact, choco, ...)') + parser.add_argument('--workers', type=int, default=4, help='Number of parallel workers') + parser.add_argument('--time-limit', type=int, default=300, help='Time limit in seconds per instance') + parser.add_argument('--mem-limit', type=int, default=8192, help='Memory limit in MB per instance') + parser.add_argument('--cores', type=int, default=1, help='Number of cores to assign tp a single instance') + parser.add_argument('--output-dir', type=str, default='results', help='Output directory for CSV files') + parser.add_argument('--verbose', action='store_true', help='Show solver output') + parser.add_argument('--intermediate', action='store_true', help='Report on intermediate solutions') + # parser.add_argument('--checker-path', type=str, default=None, + # help='Path to the XCSP3 solution checker JAR file') + args = parser.parse_args() + + if not args.verbose: + warnings.filterwarnings("ignore") + + # Load benchmark instances (as a dataset) + from cpmpy.tools.dataset.problem.jsplib import JSPLibDataset + dataset = JSPLibDataset(download=True) + + # Create output directory + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Get current timestamp in a filename-safe format + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + # Define output file path with timestamp + output_file = str(output_dir / "jsplib" / f"jsplib_{args.solver}_{timestamp}.csv") + + # Run the benchmark + instance_runner = JSPLibBenchmark() + output_file = benchmark_runner(dataset=dataset, instance_runner=instance_runner, output_file=output_file, **vars(args)) + print(f"Results added to {output_file}") diff --git a/cpmpy/tools/benchmark/mse.py b/cpmpy/tools/benchmark/mse.py new file mode 100644 index 000000000..a1936346e --- /dev/null +++ b/cpmpy/tools/benchmark/mse.py @@ -0,0 +1,231 @@ +""" +MSE competition as a CPMpy benchmark + +This module provides a benchmarking framework for running CPMpy on MaxSAT Evaluation (MSE) +competition instances encoded in WCNF (Weighted CNF) format. It extends the generic +`Benchmark` base class with MSE-specific logging and result reporting in DIMACS-like format. + +Command-line Interface +---------------------- +This script can be run directly to benchmark solvers on MSE datasets. + +Usage: + python mse.py --year 2024 --track exact-weighted --solver ortools + +Arguments: + --year Competition year (e.g., 2024). + --track Track type (e.g., exact-weighted, exact-unweighted). + --solver Solver name (e.g., ortools, exact, choco, ...). + --workers Number of parallel workers to use. + --time-limit Time limit in seconds per instance. + --mem-limit Memory limit in MB per instance. + --cores Number of cores to assign to a single instance. + --output-dir Output directory for CSV files. + --verbose Show solver output if set. + --intermediate Report intermediate solutions if supported. + +=============== +List of classes +=============== + +.. autosummary:: + :nosignatures: + + MSEExitStatus + MSEBenchmark + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + solution_mse +""" + +import warnings +import argparse +from enum import Enum +from pathlib import Path +from datetime import datetime + +# CPMpy +from cpmpy.tools.benchmark.runner import benchmark_runner +from cpmpy.tools.benchmark._base import Benchmark +from cpmpy.tools.wcnf import read_wcnf +from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus + + +class MSEExitStatus(Enum): + unsupported:str = "UNSUPPORTED" # instance contains an unsupported feature (e.g. a unsupported global constraint) + sat:str = "SATISFIABLE" # CSP : found a solution | COP : found a solution but couldn't prove optimality + optimal:str = "OPTIMUM" + chr(32) + "FOUND" # optimal COP solution found + unsat:str = "UNSATISFIABLE" # instance is unsatisfiable + unknown:str = "UNKNOWN" # any other case + +def solution_mse(model): + """ + Convert a CPMpy model solution into the MSE solution string format. + + Arguments: + model (cp.solvers.SolverInterface): The solver-specific model for which to print its solution in MSE format. + + Returns: + str: MSE-formatted solution string. + """ + variables = [var for var in model.user_vars if var.name[:2] == "BV"] # dirty workaround for all missed aux vars in user vars TODO fix with Ignace + variables = sorted(variables, key=lambda v: int("".join(filter(str.isdigit, v.name)))) + return " ".join([str(1 if var.value() else 0) for var in variables]) + +class MSEBenchmark(Benchmark): + + """ + MSE (MaxSAT Evaluation) competition as a CPMpy benchmark. + + This class extends `Benchmark` to implement MSE-specific solution printing + in DIMACS-like output format (`c`, `s`, `v`, `o` lines). It uses CPMpy's `read_wcnf` + to parse WCNF (Weighted CNF) instances and runs them on a selected solver supported + by CPMpy. + """ + + def __init__(self): + self._sol_time = None + super().__init__(reader=read_wcnf, exit_status=MSEExitStatus) + + def print_comment(self, comment:str): + print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True) + + def print_status(self, status: MSEExitStatus) -> None: + print('s' + chr(32) + status.value, end="\n", flush=True) + + def print_value(self, value: str) -> None: + print('v' + chr(32) + value, end="\n", flush=True) + + def print_objective(self, objective: int) -> None: + print('o' + chr(32) + str(objective), end="\n", flush=True) + + def print_intermediate(self, objective:int): + self.print_objective(objective) + + def print_result(self, s): + if s.status().exitstatus == CPMStatus.OPTIMAL: + self.print_objective(s.objective_value()) + self.print_value(solution_mse(s)) + self.print_status(MSEExitStatus.optimal) + elif s.status().exitstatus == CPMStatus.FEASIBLE: + self.print_objective(s.objective_value()) + self.print_value(solution_mse(s)) + self.print_status(MSEExitStatus.sat) + elif s.status().exitstatus == CPMStatus.UNSATISFIABLE: + self.print_status(MSEExitStatus.unsat) + else: + self.print_comment("Solver did not find any solution within the time/memory limit") + self.print_status(MSEExitStatus.unknown) + + def handle_memory_error(self, mem_limit): + super().handle_memory_error(mem_limit) + self.print_status(MSEExitStatus.unknown) + + def handle_not_implemented(self, e): + super().handle_not_implemented(e) + self.print_status(MSEExitStatus.unsupported) + + def handle_exception(self, e): + super().handle_exception(e) + self.print_status(MSEExitStatus.unknown) + + + def handle_sigterm(self): + """ + Handles a SIGTERM. Gives us 1 second to finish the current job before we get killed. + """ + # Report that we haven't found a solution in time + self.print_status(MSEExitStatus.unknown) + self.print_comment("SIGTERM raised.") + return 0 + + def handle_rlimit_cpu(self): + """ + Handles a SIGXCPU. + """ + # Report that we haven't found a solution in time + self.print_status(MSEExitStatus.unknown) + self.print_comment("SIGXCPU raised.") + return 0 + + def parse_output_line(self, line, result): + if line.startswith('s '): + result['status'] = line[2:].strip() + elif line.startswith('v '): + # only record first line, contains 'type' and 'cost' + solution = line.split("\n")[0][2:].strip() + if solution not in result: + result['solution'] = solution + else: + result['solution'] = result['solution'] + ' ' + str(solution) + elif line.startswith('c Solution'): + parts = line.split(', time = ') + # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines + self._sol_time = float(parts[-1].replace('s', '').rstrip()) + elif line.startswith('o '): + obj = int(line[2:].strip()) + if result['intermediate'] is None: + result['intermediate'] = [] + if self._sol_time is not None: + result['intermediate'] += [(self._sol_time, obj)] + result['objective_value'] = obj + obj = None + elif line.startswith('c took '): + # Parse timing information + parts = line.split(' seconds to ') + if len(parts) == 2: + time_val = float(parts[0].replace('c took ', '')) + action = parts[1].strip() + if action.startswith('parse'): + result['time_parse'] = time_val + elif action.startswith('convert'): + result['time_model'] = time_val + elif action.startswith('post'): + result['time_post'] = time_val + elif action.startswith('solve'): + result['time_solve'] = time_val + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description='Benchmark solvers on MSE instances') + parser.add_argument('--year', type=int, required=True, help='Competition year (e.g., 2024)') + parser.add_argument('--track', type=str, required=True, help='Track type (e.g., exact-weighted, exact-unweighted)') + parser.add_argument('--solver', type=str, required=True, help='Solver name (e.g., ortools, exact, choco, ...)') + parser.add_argument('--workers', type=int, default=4, help='Number of parallel workers') + parser.add_argument('--time-limit', type=int, default=300, help='Time limit in seconds per instance') + parser.add_argument('--mem-limit', type=int, default=8192, help='Memory limit in MB per instance') + parser.add_argument('--cores', type=int, default=1, help='Number of cores to assign tp a single instance') + parser.add_argument('--output-dir', type=str, default='results', help='Output directory for CSV files') + parser.add_argument('--verbose', action='store_true', help='Show solver output') + parser.add_argument('--intermediate', action='store_true', help='Report on intermediate solutions') + # parser.add_argument('--checker-path', type=str, default=None, + # help='Path to the XCSP3 solution checker JAR file') + args = parser.parse_args() + + if not args.verbose: + warnings.filterwarnings("ignore") + + # Load benchmark instances (as a dataset) + from cpmpy.tools.dataset.model.mse import MSEDataset + dataset = MSEDataset(year=args.year, track=args.track, download=True) + + # Create output directory + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Get current timestamp in a filename-safe format + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + # Define output file path with timestamp + output_file = str(output_dir / "mse" / f"mse_{args.year}_{args.track}_{args.solver}_{timestamp}.csv") + + # Run the benchmark + instance_runner = MSEBenchmark() + output_file = benchmark_runner(dataset=dataset, instance_runner=instance_runner, output_file=output_file, **vars(args)) + print(f"Results added to {output_file}") diff --git a/cpmpy/tools/benchmark/opb.py b/cpmpy/tools/benchmark/opb.py new file mode 100644 index 000000000..0c571a058 --- /dev/null +++ b/cpmpy/tools/benchmark/opb.py @@ -0,0 +1,222 @@ +""" +PB competition as a CPMpy benchmark + +This module provides a benchmarking framework for running CPMpy on PB +competition instances. It extends the generic `Benchmark` base class with +PB Competition-specific logging and result reporting. + +Command-line Interface +---------------------- +This script can be run directly to benchmark solvers on MSE datasets. + +Usage: + python opb.py --year 2024 --track OPT-LIN --solver ortools + +Arguments: + --year Competition year (e.g., 2024). + --track Track type (e.g., OPT_LIN, DEC_LIN). + --solver Solver name (e.g., ortools, exact, choco, ...). + --workers Number of parallel workers to use. + --time-limit Time limit in seconds per instance. + --mem-limit Memory limit in MB per instance. + --cores Number of cores to assign to a single instance. + --output-dir Output directory for CSV files. + --verbose Show solver output if set. + --intermediate Report intermediate solutions if supported. + +=============== +List of classes +=============== + +.. autosummary:: + :nosignatures: + + OPBExitStatus + OPBBenchmark + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + solution_opb +""" + +import warnings +import argparse +from enum import Enum +from pathlib import Path +from datetime import datetime + +# CPMpy +from cpmpy.tools.benchmark.runner import benchmark_runner +from cpmpy.tools.benchmark._base import Benchmark +from cpmpy.tools.opb import read_opb +from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus + + +class OPBExitStatus(Enum): + unsupported:str = "UNSUPPORTED" # instance contains an unsupported feature (e.g. a unsupported global constraint) + sat:str = "SATISFIABLE" # CSP : found a solution | COP : found a solution but couldn't prove optimality + optimal:str = "OPTIMUM" + chr(32) + "FOUND" # optimal COP solution found + unsat:str = "UNSATISFIABLE" # instance is unsatisfiable + unknown:str = "UNKNOWN" # any other case + +def solution_opb(model): + """ + Formats a solution according to the PB24 specification. + + Arguments: + model: CPMpy model for which to format its solution (should be solved first) + + Returns: + Formatted model solution according to PB24 specification. + """ + variables = [var for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]] # dirty workaround for all missed aux vars in user vars TODO fix with Ignace + return " ".join([var.name.replace("[","").replace("]","") if var.value() else "-"+var.name.replace("[","").replace("]","") for var in variables]) + +class OPBBenchmark(Benchmark): + """ + The PB competition as a CPMpy benchmark. + """ + + def __init__(self): + self.sol_time = None + super().__init__(reader=read_opb, exit_status=OPBExitStatus) + + def print_comment(self, comment:str): + print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True) + + def print_status(self, status: OPBExitStatus) -> None: + print('s' + chr(32) + status.value, end="\n", flush=True) + + def print_value(self, value: str) -> None: + value = value[:-2].replace("\n", "\nv" + chr(32)) + value[-2:] + print('v' + chr(32) + value, end="\n", flush=True) + + def print_objective(self, objective: int) -> None: + print('o' + chr(32) + str(objective), end="\n", flush=True) + + def print_intermediate(self, objective:int): + self.print_objective(objective) + + def print_result(self, s): + if s.status().exitstatus == CPMStatus.OPTIMAL: + self.print_objective(s.objective_value()) + self.print_value(solution_opb(s)) + self.print_status(OPBExitStatus.optimal) + elif s.status().exitstatus == CPMStatus.FEASIBLE: + self.print_objective(s.objective_value()) + self.print_value(solution_opb(s)) + self.print_status(OPBExitStatus.sat) + elif s.status().exitstatus == CPMStatus.UNSATISFIABLE: + self.print_status(OPBExitStatus.unsat) + else: + self.print_comment("Solver did not find any solution within the time/memory limit") + self.print_status(OPBExitStatus.unknown) + + def handle_memory_error(self, mem_limit): + super().handle_memory_error(mem_limit) + self.print_status(OPBExitStatus.unknown) + + def handle_not_implemented(self, e): + super().handle_not_implemented(e) + self.print_status(OPBExitStatus.unsupported) + + def handle_exception(self, e): + super().handle_exception(e) + self.print_status(OPBExitStatus.unknown) + + def handle_sigterm(self): + """ + Handles a SIGTERM. Gives us 1 second to finish the current job before we get killed. + """ + # Report that we haven't found a solution in time + self.print_status(OPBExitStatus.unknown) + self.print_comment("SIGTERM raised.") + return 0 + + def handle_rlimit_cpu(self): + """ + Handles a SIGXCPU. + """ + # Report that we haven't found a solution in time + self.print_status(OPBExitStatus.unknown) + self.print_comment("SIGXCPU raised.") + return 0 + + def parse_output_line(self, line, result): + if line.startswith('s '): + result['status'] = line[2:].strip() + elif line.startswith('v '): + # only record first line, contains 'type' and 'cost' + solution = line.split("\n")[0][2:].strip() + if solution not in result: + result['solution'] = solution + else: + result['solution'] = result['solution'] + ' ' + str(solution) + elif line.startswith('c Solution'): + parts = line.split(', time = ') + # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines + self.sol_time = float(parts[-1].replace('s', '').rstrip()) + elif line.startswith('o '): + obj = int(line[2:].strip()) + if result['intermediate'] is None: + result['intermediate'] = [] + if self.sol_time is not None: + result['intermediate'] += [(self.sol_time, obj)] + result['objective_value'] = obj + obj = None + elif line.startswith('c took '): + # Parse timing information + parts = line.split(' seconds to ') + if len(parts) == 2: + time_val = float(parts[0].replace('c took ', '')) + action = parts[1].strip() + if action.startswith('parse'): + result['time_parse'] = time_val + elif action.startswith('convert'): + result['time_model'] = time_val + elif action.startswith('post'): + result['time_post'] = time_val + elif action.startswith('solve'): + result['time_solve'] = time_val + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description='Benchmark solvers on OPB instances') + parser.add_argument('--year', type=int, required=True, help='Competition year (e.g., 2023)') + parser.add_argument('--track', type=str, required=True, help='Track type (e.g., OPT-LIN, DEC-LIN)') + parser.add_argument('--solver', type=str, required=True, help='Solver name (e.g., ortools, exact, choco, ...)') + parser.add_argument('--workers', type=int, default=4, help='Number of parallel workers') + parser.add_argument('--time-limit', type=int, default=300, help='Time limit in seconds per instance') + parser.add_argument('--mem-limit', type=int, default=8192, help='Memory limit in MB per instance') + parser.add_argument('--cores', type=int, default=1, help='Number of cores to assign tp a single instance') + parser.add_argument('--output-dir', type=str, default='results', help='Output directory for CSV files') + parser.add_argument('--verbose', action='store_true', help='Show solver output') + parser.add_argument('--intermediate', action='store_true', help='Report on intermediate solutions') + args = parser.parse_args() + + if not args.verbose: + warnings.filterwarnings("ignore") + + # Load benchmark instances (as a dataset) + from cpmpy.tools.dataset.model.opb import OPBDataset + dataset = OPBDataset(year=args.year, track=args.track, download=True) + + # Create output directory + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Get current timestamp in a filename-safe format + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + # Define output file path with timestamp + output_file = str(output_dir / "opb" / f"opb_{args.year}_{args.track}_{args.solver}_{timestamp}.csv") + + # Run the benchmark + instance_runner = OPBBenchmark() + output_file = benchmark_runner(dataset=dataset, instance_runner=instance_runner, output_file=output_file, **vars(args)) + print(f"Results added to {output_file}") diff --git a/cpmpy/tools/benchmark/psplib.py b/cpmpy/tools/benchmark/psplib.py new file mode 100644 index 000000000..0f1a1639f --- /dev/null +++ b/cpmpy/tools/benchmark/psplib.py @@ -0,0 +1,216 @@ +""" +PSPLIB as a CPMpy benchmark + +This module provides a benchmarking framework for running CPMpy on PSPLIB +instances. + +Command-line Interface +---------------------- +This script can be run directly to benchmark solvers on PSPLIB datasets. + +Usage: + python psplib.py --year 2024 --variant rcpsp --family j30 + +Arguments: + --variant Problem variant (e.g., rcpsp). + --family Problem family (e.g., j30, j120, ...) + --solver Solver name (e.g., ortools, exact, choco, ...). + --workers Number of parallel workers to use. + --time-limit Time limit in seconds per instance. + --mem-limit Memory limit in MB per instance. + --cores Number of cores to assign to a single instance. + --output-dir Output directory for CSV files. + --verbose Show solver output if set. + --intermediate Report intermediate solutions if supported. + +=============== +List of classes +=============== + +.. autosummary:: + :nosignatures: + + PSPLIBBenchmark + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + solution_psplib +""" + +import warnings +import argparse +from enum import Enum +from pathlib import Path +from datetime import datetime + +# CPMpy +from cpmpy.tools.benchmark.runner import benchmark_runner +from cpmpy.tools.benchmark._base import Benchmark, ExitStatus +from cpmpy.tools.rcpsp import read_rcpsp +from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus + + +def solution_psplib(model): + """ + Convert a CPMpy model solution into the solution string format. + + Arguments: + model (cp.solvers.SolverInterface): The solver-specific model for which to print its solution + + Returns: + str: formatted solution string. + """ + variables = {var.name: var.value() for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]} # dirty workaround for all missed aux vars in user vars TODO fix with Ignace + return str(variables) + +class PSPLIBBenchmark(Benchmark): + + """ + PSPLIB as a CPMpy benchmark. + """ + + def __init__(self): + self.sol_time = None + super().__init__(reader=read_rcpsp) # TODO: reader should depend on problem variant + + def print_comment(self, comment:str): + print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True) + + def print_status(self, status: ExitStatus) -> None: + print('s' + chr(32) + status.value, end="\n", flush=True) + + def print_value(self, value: str) -> None: + print('v' + chr(32) + value, end="\n", flush=True) + + def print_objective(self, objective: int) -> None: + print('o' + chr(32) + str(objective), end="\n", flush=True) + + def print_intermediate(self, objective:int): + self.print_objective(objective) + + def print_result(self, s): + if s.status().exitstatus == CPMStatus.OPTIMAL: + self.print_objective(s.objective_value()) + self.print_value(solution_psplib(s)) + self.print_status(ExitStatus.optimal) + elif s.status().exitstatus == CPMStatus.FEASIBLE: + self.print_objective(s.objective_value()) + self.print_value(solution_psplib(s)) + self.print_status(ExitStatus.sat) + elif s.status().exitstatus == CPMStatus.UNSATISFIABLE: + self.print_status(ExitStatus.unsat) + else: + self.print_comment("Solver did not find any solution within the time/memory limit") + self.print_status(ExitStatus.unknown) + + def handle_memory_error(self, mem_limit): + super().handle_memory_error(mem_limit) + self.print_status(ExitStatus.unknown) + + def handle_not_implemented(self, e): + super().handle_not_implemented(e) + self.print_status(ExitStatus.unsupported) + + def handle_exception(self, e): + super().handle_exception(e) + self.print_status(ExitStatus.unknown) + + + def handle_sigterm(self): + """ + Handles a SIGTERM. Gives us 1 second to finish the current job before we get killed. + """ + # Report that we haven't found a solution in time + self.print_status(ExitStatus.unknown) + self.print_comment("SIGTERM raised.") + return 0 + + def handle_rlimit_cpu(self): + """ + Handles a SIGXCPU. + """ + # Report that we haven't found a solution in time + self.print_status(ExitStatus.unknown) + self.print_comment("SIGXCPU raised.") + return 0 + + def parse_output_line(self, line, result): + if line.startswith('s '): + result['status'] = line[2:].strip() + elif line.startswith('v '): + # only record first line, contains 'type' and 'cost' + solution = line.split("\n")[0][2:].strip() + if solution not in result: + result['solution'] = solution + else: + result['solution'] = result['solution'] + ' ' + str(solution) + elif line.startswith('c Solution'): + parts = line.split(', time = ') + # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines + self.sol_time = float(parts[-1].replace('s', '').rstrip()) + elif line.startswith('o '): + obj = int(line[2:].strip()) + if result['intermediate'] is None: + result['intermediate'] = [] + if self.sol_time is not None: + result['intermediate'] += [(self.sol_time, obj)] + result['objective_value'] = obj + obj = None + elif line.startswith('c took '): + # Parse timing information + parts = line.split(' seconds to ') + if len(parts) == 2: + time_val = float(parts[0].replace('c took ', '')) + action = parts[1].strip() + if action.startswith('parse'): + result['time_parse'] = time_val + elif action.startswith('convert'): + result['time_model'] = time_val + elif action.startswith('post'): + result['time_post'] = time_val + elif action.startswith('solve'): + result['time_solve'] = time_val + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description='Benchmark solvers on PSPLIB instances') + parser.add_argument('--variant', type=str, required=True, help='Problem variant (e.g., rcpsp)') + parser.add_argument('--family', type=str, required=True, help='Problem family (e.g., j30, j120, ...)') + parser.add_argument('--solver', type=str, required=True, help='Solver name (e.g., ortools, exact, choco, ...)') + parser.add_argument('--workers', type=int, default=4, help='Number of parallel workers') + parser.add_argument('--time-limit', type=int, default=300, help='Time limit in seconds per instance') + parser.add_argument('--mem-limit', type=int, default=8192, help='Memory limit in MB per instance') + parser.add_argument('--cores', type=int, default=1, help='Number of cores to assign tp a single instance') + parser.add_argument('--output-dir', type=str, default='results', help='Output directory for CSV files') + parser.add_argument('--verbose', action='store_true', help='Show solver output') + parser.add_argument('--intermediate', action='store_true', help='Report on intermediate solutions') + # parser.add_argument('--checker-path', type=str, default=None, + # help='Path to the XCSP3 solution checker JAR file') + args = parser.parse_args() + + if not args.verbose: + warnings.filterwarnings("ignore") + + # Load benchmark instances (as a dataset) + from cpmpy.tools.dataset.problem.psplib import PSPLibDataset + dataset = PSPLibDataset(variant=args.variant, family=args.family, download=True) + + # Create output directory + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Get current timestamp in a filename-safe format + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + # Define output file path with timestamp + output_file = str(output_dir / "psplib" / f"psplib_{args.variant}_{args.family}_{args.solver}_{timestamp}.csv") + + # Run the benchmark + instance_runner = PSPLIBBenchmark() + output_file = benchmark_runner(dataset=dataset, instance_runner=instance_runner, output_file=output_file, **vars(args)) + print(f"Results added to {output_file}") diff --git a/cpmpy/tools/benchmark/runner.py b/cpmpy/tools/benchmark/runner.py new file mode 100644 index 000000000..6bc85e6ae --- /dev/null +++ b/cpmpy/tools/benchmark/runner.py @@ -0,0 +1,296 @@ +""" +Benchmark Runner for CPMpy Instances + +This module provides tools to execute benchmark instances in parallel while +safely capturing solver output, enforcing time and memory limits, and +writing structured results to a CSV file. The included functions should not +be used directly, but rather through one of the available benchmarks. + +Key Features +------------ +- Supports running multiple instances in parallel using threads. +- Executes each instance in a separate subprocess for isolation. +- Forwards stdout to both console and parent process, preserving output. +- Handles timeouts and SIGTERM/SIGKILL signals gracefully. +- Writes results to a CSV file. +- Optional reporting of intermediate solutions and solution checking. +""" + +import csv +from io import StringIO +import os +import signal +import time +import sys +import warnings +import traceback +import multiprocessing +from tqdm import tqdm +from typing import Optional, Tuple +from filelock import FileLock +from concurrent.futures import ThreadPoolExecutor + +class Tee: + """ + A stream-like object that duplicates writes to multiple underlying streams. + """ + def __init__(self, *streams): + """ + Arguments: + *streams: Any number of file-like objects that implement a write() method, + such as sys.stdout, sys.stderr, or StringIO. + """ + self.streams = streams + + def write(self, data): + """ + Write data to all underlying streams. + + Args: + data (str): The string to write. + """ + for s in self.streams: + s.write(data) + + def flush(self): + """ + Flush all underlying streams to ensure all data is written out. + """ + for s in self.streams: + s.flush() + +class PipeWriter: + """ + Stdout wrapper for a multiprocessing pipe. + """ + def __init__(self, conn): + self.conn = conn + def write(self, data): + if data: # avoid empty writes + try: + self.conn.send(data) + except: + pass + def flush(self): + pass # no buffering + + +def wrapper(instance_runner, conn, kwargs, verbose): + """ + Wraps a call to a benchmark as to correctly + forward stdout to the multiprocessing pipe (conn). + Also sends a last status report though the pipe. + + Status report can be missing when process has been terminated by a SIGTERM. + """ + + original_stdout = sys.stdout + pipe_writer = PipeWriter(conn) + + if not verbose: + warnings.filterwarnings("ignore") + sys.stdout = pipe_writer # only forward to pipe + else: + sys.stdout = Tee(original_stdout, pipe_writer) # forward to pipe and console + + try: + kwargs["verbose"] = verbose + instance_runner.run(**kwargs) + conn.send({"status": "ok"}) + except TimeoutError: + try: + conn.send({"status": "timeout"}) + except (BrokenPipeError, EOFError): + pass + except Exception as e: # capture exceptions and report in state + tb_str = traceback.format_exc() + try: + conn.send({"status": "error", "exception": e, "traceback": tb_str}) + except (BrokenPipeError, EOFError): + pass + #conn.send({"status": "error", "exception": e, "traceback": tb_str}) + finally: + #sys.stdout = original_stdout + conn.close() + +# exec_args = (instance_runner, filename, metadata, open, solver, time_limit, mem_limit, output_file, verbose) +_std_open = open +def execute_instance(args: Tuple[callable, str, dict, callable, str, int, int, int, str, bool, bool, str]) -> None: + """ + Solve a single benchmark instance and write results to file immediately. + + Args is a list of: + filename: Path to the instance file + metadata: Dictionary containing instance metadata (year, track, name) + solver: Name of the solver to use + time_limit: Time limit in seconds + mem_limit: Memory limit in MB + output_file: Path to the output CSV file + verbose: Whether to show solver output + """ + + instance_runner, filename, metadata, open, solver, time_limit, mem_limit, cores, output_file, verbose, intermediate, checker_path = args + + # Fieldnames for the CSV file + fieldnames = list(metadata.keys()) + \ + ['solver', + 'time_total', 'time_parse', 'time_model', 'time_post', 'time_solve', + 'status', 'objective_value', 'solution', 'intermediate', 'checker_result'] + result = dict.fromkeys(fieldnames) # init all fields to None + for k in metadata.keys(): + result[k] = metadata[k] + result['solver'] = solver + + # Decompress before timers start + with open(filename) as f: # <- dataset-specific 'open' callable + filename = f.read() # read to memory-mapped file + + # Start total timing + total_start = time.time() + + # Call xcsp3 in separate process + ctx = multiprocessing.get_context("spawn") + parent_conn, child_conn = multiprocessing.Pipe() # communication pipe between processes + process = ctx.Process(target=wrapper, args=( + instance_runner, + child_conn, + { + "instance": filename, + "solver": solver, + "time_limit": time_limit, + "mem_limit": mem_limit, + "intermediate": intermediate, + "force_mem_limit": True, + "time_buffer": 1, + "cores": cores, + }, + verbose)) + process.start() + process.join(timeout=time_limit) + + # Replicate competition convention on how jobs get terminated + if process.is_alive(): + # Send sigterm to let process know it reached its time limit + os.kill(process.pid, signal.SIGTERM) + # 1 second grace period + process.join(timeout=1) + # Kill if still alive + if process.is_alive(): + os.kill(process.pid, signal.SIGKILL) + process.join() + + result['time_total'] = time.time() - total_start + + # Default status if nothing returned by subprocess + # -> process exited prematurely due to sigterm + status = {"status": "error", "exception": "sigterm"} + + # Parse the output to get status, solution and timings + while parent_conn.poll(timeout=1): + line = parent_conn.recv() + + # Received a print statement from the subprocess + if isinstance(line, str): + instance_runner.parse_output_line(line, result) + + # Received a new status from the subprocess + elif isinstance(line, dict): + status = line + + else: + raise() + + # Parse the exit status + if status["status"] == "timeout": + # Ignore timeouts + pass + elif status["status"] == "error": + # All exceptions, put in solution field + if result['solution'] is None: + result['status'] = instance_runner.exit_status.unknown.value + result["solution"] = status["exception"] + + # if checker_path is not None and complete_solution is not None: TODO: generalise 'checkers' for benchmarks + # checker_output, checker_time = run_solution_checker( + # JAR=checker_path, + # instance_location=file_path, + # out_file="'" + complete_solution.replace("\n\r", " ").replace("\n", " ").replace("v ", "").replace("v ", "")+ "'", + # verbose=verbose, + # cpm_time=result.get('time_solve', 0) # or total solve time you have + # ) + + # if checker_output is not None: + # result['checker_result'] = checker_output + # else: + # result['checker_result'] = None + + # Use a lock file to prevent concurrent writes + lock_file = f"{output_file}.lock" + lock = FileLock(lock_file) + try: + with lock: + # Pre-check if file exists to determine if we need to write header + write_header = not os.path.exists(output_file) + + with _std_open(output_file, 'a', newline='') as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + if write_header: + writer.writeheader() + writer.writerow(result) + finally: + # Optional: cleanup if the lock file somehow persists + if os.path.exists(lock_file): + try: + os.remove(lock_file) + except Exception: + pass # avoid crashing on cleanup + + + +def benchmark_runner( + dataset, instance_runner, + output_file: str, + solver: str, workers: int = 1, + time_limit: int = 300, mem_limit: Optional[int] = 4096, cores: int=1, + verbose: bool = False, intermediate: bool = False, + checker_path: Optional[str] = None, + **kwargs + ) -> str: + """ + Run a benchmark over all instances in a dataset using multiple threads. + + Arguments: + dataset (_Dataset): Dataset object containing instances to benchmark. + instance_runner (Benchmark): Benchmark runner that implements the run() method. + output_file (str): Path to the CSV file where results will be stored. + solver (str): Name of the solver to use. + workers (int): Number of parallel processes to run instances (default=1). + time_limit (int): Time limit in seconds for each instance (default=300). + mem_limit (int, optional): Memory limit in MB per instance (default=4096). + cores (int): Number of CPU cores assigned per instance (default=1). + verbose (bool): Whether to show solver output in stdout (default=False). + intermediate (bool): Whether to report intermediate solutions if supported (default=False). + checker_path (str, optional): Path to a solution checker for validating instance solutions. + **kwargs: Additional arguments passed to `execute_instance`. + + Returns: + str: Path to the CSV file where benchmark results were written. + """ + + # Process instances in parallel + with ThreadPoolExecutor(max_workers=workers) as executor: + # Submit all tasks and track their futures + futures = [executor.submit(execute_instance, # below: args + (instance_runner, filename, metadata, dataset.open, solver, time_limit, mem_limit, cores, output_file, verbose, intermediate, checker_path)) + for filename, metadata in dataset] + # Process results as they complete + for i, future in enumerate(tqdm(futures, total=len(futures), desc=f"Running {solver}")): + try: + _ = future.result(timeout=time_limit + 60) # for cleanliness sake, result is empty + except TimeoutError: + pass + except Exception as e: + print(f"Job {i}: {dataset[i][1]['name']}, ProcessPoolExecutor caught: {e}") + if verbose: traceback.print_exc() + + return output_file diff --git a/cpmpy/tools/benchmark/xcsp3.py b/cpmpy/tools/benchmark/xcsp3.py new file mode 100644 index 000000000..1bc70ad9b --- /dev/null +++ b/cpmpy/tools/benchmark/xcsp3.py @@ -0,0 +1,253 @@ +""" +XCSP3 competition as a CPMpy benchmark + +This module provides a benchmarking framework for running CPMpy on XCSP3 +competition instances. It extends the generic `Benchmark` base class with +XCSP3-specific logging and result reporting. + +Command-line Interface +---------------------- +This script can be run directly to benchmark solvers on XCSP3 datasets. + +Usage: + python xcsp3.py --year 2024 --track CSP --solver ortools + +Arguments: + --year Competition year (e.g., 2024). + --track Track type (e.g., CSP, COP). + --solver Solver name (e.g., ortools, exact, choco, ...). + --workers Number of parallel workers to use. + --time-limit Time limit in seconds per instance. + --mem-limit Memory limit in MB per instance. + --cores Number of cores to assign to a single instance. + --output-dir Output directory for CSV files. + --verbose Show solver output if set. + --intermediate Report intermediate solutions if supported. + +=============== +List of classes +=============== + +.. autosummary:: + :nosignatures: + + XCSP3ExitStatus + XCSP3Benchmark + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + solution_xcsp3 +""" + +import warnings +import argparse +from enum import Enum +from pathlib import Path +from datetime import datetime + +# CPMpy +from cpmpy.tools.benchmark.runner import benchmark_runner +from cpmpy.tools.benchmark._base import Benchmark +from cpmpy.tools.xcsp3 import read_xcsp3 +from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus + +# PyCSP3 +from xml.etree.ElementTree import ParseError +import xml.etree.cElementTree as ET + + +class XCSP3ExitStatus(Enum): + unsupported:str = "UNSUPPORTED" # instance contains an unsupported feature (e.g. a unsupported global constraint) + sat:str = "SATISFIABLE" # CSP : found a solution | COP : found a solution but couldn't prove optimality + optimal:str = "OPTIMUM" + chr(32) + "FOUND" # optimal COP solution found + unsat:str = "UNSATISFIABLE" # instance is unsatisfiable + unknown:str = "UNKNOWN" # any other case + +def solution_xcsp3(model, useless_style="*", boolean_style="int"): + """ + Formats a solution according to the XCSP3 specification. + + Arguments: + model: CPMpy model for which to format its solution (should be solved first) + useless_style: How to process unused decision variables (with value `None`). + If "*", variable is included in reporting with value "*". + If "drop", variable is excluded from reporting. + boolean_style: Print style for boolean constants. + "int" results in 0/1, "bool" results in False/True. + + Returns: + XML-formatted model solution according to XCSP3 specification. + """ + + # CSP + if not model.has_objective(): + root = ET.Element("instantiation", type="solution") + # COP + else: + root = ET.Element("instantiation", type="optimum", cost=str(int(model.objective_value()))) + + # How useless variables should be handled + # (variables which have value `None` in the solution) + variables = {var.name: var for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]} # dirty workaround for all missed aux vars in user vars TODO fix with Ignace + if useless_style == "*": + variables = {k:(v.value() if v.value() is not None else "*") for k,v in variables.items()} + elif useless_style == "drop": + variables = {k:v.value() for k,v in variables.items() if v.value() is not None} + + # Convert booleans + if boolean_style == "bool": + pass + elif boolean_style == "int": + variables = {k:(v if (not isinstance(v, bool)) else (1 if v else 0)) for k,v in variables.items()} + + # Build XCSP3 XML tree + ET.SubElement(root, "list").text=" " + " ".join([str(v) for v in variables.keys()]) + " " + ET.SubElement(root, "values").text=" " + " ".join([str(v) for v in variables.values()]) + " " + tree = ET.ElementTree(root) + ET.indent(tree, space=" ", level=0) + res = ET.tostring(root).decode("utf-8") + + return str(res) + + +class XCSP3Benchmark(Benchmark): + """ + The XCSP3 competition as a CPMpy benchmark. + """ + + def __init__(self): + self._sol_time = None + super().__init__(reader=read_xcsp3, exit_status=XCSP3ExitStatus) + + def print_comment(self, comment:str): + print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True) + + def print_status(self, status: XCSP3ExitStatus) -> None: + print('s' + chr(32) + status.value, end="\n", flush=True) + + def print_value(self, value: str) -> None: + value = value[:-2].replace("\n", "\nv" + chr(32)) + value[-2:] + print('v' + chr(32) + value, end="\n", flush=True) + + def print_objective(self, objective: int) -> None: + print('o' + chr(32) + str(objective), end="\n", flush=True) + + def print_intermediate(self, objective:int): + self.print_objective(objective) + + def print_result(self, s): + if s.status().exitstatus == CPMStatus.OPTIMAL: + self.print_value(solution_xcsp3(s)) + self.print_status(XCSP3ExitStatus.optimal) + elif s.status().exitstatus == CPMStatus.FEASIBLE: + self.print_value(solution_xcsp3(s)) + self.print_status(XCSP3ExitStatus.sat) + elif s.status().exitstatus == CPMStatus.UNSATISFIABLE: + self.print_status(XCSP3ExitStatus.unsat) + else: + self.print_comment("Solver did not find any solution within the time/memory limit") + self.print_status(XCSP3ExitStatus.unknown) + + def handle_memory_error(self, mem_limit): + super().handle_memory_error(mem_limit) + self.print_status(XCSP3ExitStatus.unknown) + + def handle_not_implemented(self, e): + super().handle_not_implemented(e) + self.print_status(XCSP3ExitStatus.unsupported) + + def handle_exception(self, e): + if isinstance(e, ParseError): + if "out of memory" in e.msg: + self.print_comment(f"MemoryError raised by parser.") + self.print_status(XCSP3ExitStatus.unknown) + else: + self.print_comment(f"An {type(e)} got raised by the parser: {e}") + self.print_status(XCSP3ExitStatus.unknown) + else: + super().handle_exception(e) + self.print_status(XCSP3ExitStatus.unknown) + + def parse_output_line(self, line, result): + if line.startswith('s '): + result['status'] = line[2:].strip() + elif line.startswith('v ') and result['solution'] is None: + # only record first line, contains 'type' and 'cost' + solution = line.split("\n")[0][2:].strip() + result['solution'] = solution + complete_solution = line + if "cost" in solution: + result['objective_value'] = solution.split('cost="')[-1][:-2] + elif line.startswith('c Solution'): + parts = line.split(', time = ') + # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines + self._sol_time = float(parts[-1].replace('s', '').rstrip()) + elif line.startswith('o '): + obj = int(line[2:].strip()) + if result['intermediate'] is None: + result['intermediate'] = [] + if self._sol_time is not None: + result['intermediate'] += [(self._sol_time, obj)] + result['objective_value'] = obj + obj = None + elif line.startswith('c took '): + # Parse timing information + parts = line.split(' seconds to ') + if len(parts) == 2: + time_val = float(parts[0].replace('c took ', '')) + action = parts[1].strip() + if action.startswith('parse'): + result['time_parse'] = time_val + elif action.startswith('convert'): + result['time_model'] = time_val + elif action.startswith('post'): + result['time_post'] = time_val + elif action.startswith('solve'): + result['time_solve'] = time_val + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description='Benchmark solvers on XCSP3 instances') + parser.add_argument('--year', type=int, required=True, help='Competition year (e.g., 2023)') + parser.add_argument('--track', type=str, required=True, help='Track type (e.g., COP, CSP, MiniCOP)') + parser.add_argument('--solver', type=str, required=True, help='Solver name (e.g., ortools, exact, choco, ...)') + parser.add_argument('--workers', type=int, default=4, help='Number of parallel workers') + parser.add_argument('--time-limit', type=int, default=300, help='Time limit in seconds per instance') + parser.add_argument('--mem-limit', type=int, default=8192, help='Memory limit in MB per instance') + parser.add_argument('--cores', type=int, default=1, help='Number of cores to assign tp a single instance') + parser.add_argument('--output-dir', type=str, default='results', help='Output directory for CSV files') + parser.add_argument('--verbose', action='store_true', help='Show solver output') + parser.add_argument('--intermediate', action='store_true', help='Report on intermediate solutions') + parser.add_argument('--checker-path', type=str, default=None, + help='Path to the XCSP3 solution checker JAR file') + args = parser.parse_args() + + if not args.verbose: + warnings.filterwarnings("ignore") + + # Load benchmark instances (as a dataset) + from cpmpy.tools.dataset.model.xcsp3 import XCSP3Dataset + dataset = XCSP3Dataset(year=args.year, track=args.track, download=True) + + # Create output directory + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Get current timestamp in a filename-safe format + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + # Define output file path with timestamp + output_file = str(output_dir / "xcsp3" / f"xcsp3_{args.year}_{args.track}_{args.solver}_{timestamp}.csv") + + # Run the benchmark + instance_runner = XCSP3Benchmark() + output_file = benchmark_runner(dataset=dataset, instance_runner=instance_runner, output_file=output_file, **vars(args)) + print(f"Results added to {output_file}") + + diff --git a/cpmpy/tools/dataset/__init__.py b/cpmpy/tools/dataset/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cpmpy/tools/dataset/_base.py b/cpmpy/tools/dataset/_base.py new file mode 100644 index 000000000..496780b2d --- /dev/null +++ b/cpmpy/tools/dataset/_base.py @@ -0,0 +1,109 @@ +""" +Dataset Base Class + +This module defines the abstract `_Dataset` class, which serves as the foundation +for loading and managing benchmark instance collections in CPMpy-based experiments. +It standardizes how datasets are stored, accessed, and optionally transformed. +""" + +from abc import ABC, abstractmethod +import pathlib +from typing import Any, Tuple + +class _Dataset(ABC): + """ + Abstract base class for PyTorch-style datasets of benchmarking instances. + + The `_Dataset` class provides a standardized interface for downloading and + accessing benchmark instances. This class should not be used on its own. + """ + + def __init__( + self, + dataset_dir: str = ".", + transform=None, target_transform=None, + download: bool = False, + extension:str=".txt", + **kwargs + ): + self.dataset_dir = pathlib.Path(dataset_dir) + self.transform = transform + self.target_transform = target_transform + self.extension = extension + + if not self.dataset_dir.exists(): + if not download: + raise ValueError(f"Dataset not found. Please set download=True to download the dataset.") + else: + self.download() + files = sorted(list(self.dataset_dir.glob(f"*{self.extension}"))) + print(f"Finished downloading {len(files)} instances") + + files = sorted(list(self.dataset_dir.glob(f"*{self.extension}"))) + if len(files) == 0: + raise ValueError("Cannot find any instances inside dataset. Is it a valid dataset? If so, please report on GitHub.") + + @abstractmethod + def category(self) -> dict: + """ + Labels to distinguish instances into categories matching to those of the dataset. + E.g. + - year + - track + """ + pass + + @abstractmethod + def download(self, *args, **kwargs): + """ + How the dataset should be downloaded. + """ + pass + + @abstractmethod + def open(self, instance) -> callable: + """ + How an instance file from the dataset should be opened. + Especially usefull when files come compressed and won't work with + python standard library's 'open', e.g. '.xz', '.lzma'. + """ + pass + + def metadata(self, file) -> dict: + metadata = self.category() | { + 'name': pathlib.Path(file).stem.replace(self.extension, ''), + 'path': file, + } + return metadata + + def __len__(self) -> int: + """Return the total number of instances.""" + return len(list(self.dataset_dir.glob(f"*{self.extension}"))) + + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + + if index < 0 or index >= len(self): + raise IndexError("Index out of range") + + # Get all compressed XML files and sort for deterministic behavior + files = sorted(list(self.dataset_dir.glob(f"*{self.extension}"))) + file_path = files[index] + + filename = str(file_path) + if self.transform: + # does not need to remain a filename... + filename = self.transform(filename) + + # Basic metadata about the instance + metadata = self.metadata(file=filename, ) + if self.target_transform: + metadata = self.target_transform(metadata) + + return filename, metadata + + + + + + diff --git a/cpmpy/tools/dataset/model/mse.py b/cpmpy/tools/dataset/model/mse.py new file mode 100644 index 000000000..3ddfebf35 --- /dev/null +++ b/cpmpy/tools/dataset/model/mse.py @@ -0,0 +1,114 @@ +""" +MaxSAT Evaluation (MSE) Dataset + +https://maxsat-evaluations.github.io/ +""" + + +import os +import lzma +import zipfile +import pathlib +from urllib.request import urlretrieve +from urllib.error import HTTPError, URLError + +from .._base import _Dataset + +class MSEDataset(_Dataset): # torch.utils.data.Dataset compatible + """ + MaxSAT Evaluation (MSE) benchmark dataset. + + Provides access to benchmark instances from the MaxSAT Evaluation + competitions. Instances are grouped by `year` and `track` (e.g., + `"exact-unweighted"`, `"exact-weighted"`) and stored as `.wcnf.xz` files. + If the dataset is not available locally, it can be automatically + downloaded and extracted. + + More information on the competition can be found here: https://maxsat-evaluations.github.io/ + """ + + def __init__( + self, + root: str = ".", + year: int = 2024, track: str = "exact-unweighted", + transform=None, target_transform=None, + download: bool = False + ): + """ + Constructor for a dataset object of the MSE competition. + + Arguments: + root (str): Root directory where datasets are stored or will be downloaded to (default="."). + year (int): Competition year of the dataset to use (default=2024). + track (str): Track name specifying which subset of the competition instances to load (default="exact-unweighted"). + transform (callable, optional): Optional transform applied to the instance file path. + target_transform (callable, optional): Optional transform applied to the metadata dictionary. + download (bool): If True, downloads the dataset if it does not exist locally (default=False). + + + Raises: + ValueError: If the dataset directory does not exist and `download=False`, + or if the requested year/track combination is not available. + """ + + self.root = pathlib.Path(root) + self.year = year + self.track = track + + # Check requested dataset + if not str(year).startswith('20'): + raise ValueError("Year must start with '20'") + if not track: + raise ValueError("Track must be specified, e.g. OPT-LIN, DEC-LIN, ...") + + dataset_dir = self.root / str(year) / track + + super().__init__( + dataset_dir=dataset_dir, + transform=transform, target_transform=target_transform, + download=download, extension=".wcnf.xz" + ) + + + def category(self) -> dict: + return { + "year": self.year, + "track": self.track + } + + + def download(self): + print(f"Downloading MaxSAT Eval {self.year} {self.track} instances...") + + zip_name = f"mse{str(self.year)[2:]}-{self.track}.zip" + url = f"https://www.cs.helsinki.fi/group/coreo/MSE{self.year}-instances/" + + url_path = url + zip_name + zip_path = self.root / zip_name + + try: + urlretrieve(url_path, str(zip_path)) + except (HTTPError, URLError) as e: + raise ValueError(f"No dataset available for year {self.year} and track {self.track}. Error: {str(e)}") + + # Extract only the specific track folder from the tar + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + # Create track folder in root directory, parents=True ensures recursive creation + self.dataset_dir.mkdir(parents=True, exist_ok=True) + + # Extract files + for file_info in zip_ref.infolist(): + # Extract file to family_dir, removing main_folder/track prefix + filename = pathlib.Path(file_info.filename).name + with zip_ref.open(file_info) as source, open(self.dataset_dir / filename, 'wb') as target: + target.write(source.read()) + # Clean up the zip file + zip_path.unlink() + + def open(self, instance: os.PathLike) -> callable: + return lzma.open(instance, "rt") if str(instance).endswith(".xz") else open(instance) + +if __name__ == "__main__": + dataset = MSEDataset(year=2024, track="exact-weighted", download=True) + print("Dataset size:", len(dataset)) + print("Instance 0:", dataset[0]) diff --git a/cpmpy/tools/dataset/model/opb.py b/cpmpy/tools/dataset/model/opb.py new file mode 100644 index 000000000..201075749 --- /dev/null +++ b/cpmpy/tools/dataset/model/opb.py @@ -0,0 +1,147 @@ +""" +Pseudo Boolean Competition (PB) Dataset + +https://www.cril.univ-artois.fr/PB25/ +""" + +import lzma +import os +import pathlib +from urllib.request import urlretrieve +from urllib.error import HTTPError, URLError +import tarfile + +from .._base import _Dataset + + +class OPBDataset(_Dataset): + """ + Pseudo Boolean Competition (PB) benchmark dataset. + + Provides access to benchmark instances from the Pseudo Boolean + competitions. Instances are grouped by `year` and `track` (e.g., + `"OPT-LIN"`, `"DEC-LIN"`) and stored as `.opb.xz` files. + If the dataset is not available locally, it can be automatically + downloaded and extracted. + + More information on the competition can be found here: https://www.cril.univ-artois.fr/PB25/ + """ + + def __init__( + self, + root: str = ".", + year: int = 2024, track: str = "OPT-LIN", + transform=None, target_transform=None, + download: bool = False + ): + """ + Constructor for a dataset object of the PB competition. + + Arguments: + root (str): Root directory where datasets are stored or will be downloaded to (default="."). + year (int): Competition year of the dataset to use (default=2024). + track (str): Track name specifying which subset of the competition instances to load (default="OPT-LIN"). + transform (callable, optional): Optional transform applied to the instance file path. + target_transform (callable, optional): Optional transform applied to the metadata dictionary. + download (bool): If True, downloads the dataset if it does not exist locally (default=False). + + + Raises: + ValueError: If the dataset directory does not exist and `download=False`, + or if the requested year/track combination is not available. + """ + + self.root = pathlib.Path(root) + self.year = year + self.track = track + + # Check requested dataset + if not str(year).startswith('20'): + raise ValueError("Year must start with '20'") + if not track: + raise ValueError("Track must be specified, e.g. exact-weighted, exact-unweighted, ...") + + dataset_dir = self.root / str(year) / track + + super().__init__( + dataset_dir=dataset_dir, + transform=transform, target_transform=target_transform, + download=download, extension=".opb.xz" + ) + + def category(self) -> dict: + return { + "year": self.year, + "track": self.track + } + + def metadata(self, file) -> dict: + # Add the author to the metadata + return super().metadata(file) | {'author': str(file).split(os.sep)[-1].split("_")[0],} + + + def download(self): + # TODO: add option to filter on competition instances + print(f"Downloading OPB {self.year} {self.track} instances...") + + url = f"https://www.cril.univ-artois.fr/PB24/benchs/" + year_suffix = str(self.year)[2:] # Drop the starting '20' + url_path = url + f"normalized-PB{year_suffix}.tar" + tar_path = self.root / f"normalized-extraPB{year_suffix}.tar" + + try: + urlretrieve(url_path, str(tar_path)) + except (HTTPError, URLError) as e: + raise ValueError(f"No dataset available for year {self.year}. Error: {str(e)}") + + # Extract only the specific track folder from the tar + with tarfile.open(tar_path, "r:*") as tar_ref: # r:* handles .tar, .tar.gz, .tar.bz2, etc. + # Get the main folder name + main_folder = None + for name in tar_ref.getnames(): + if "/" in name: + main_folder = name.split("/")[0] + break + + if main_folder is None: + raise ValueError(f"Could not find main folder in tar file") + + # Extract only files from the specified track + # Get all unique track names from tar + tracks = set() + for member in tar_ref.getmembers(): + parts = member.name.split("/") + if len(parts) > 2 and parts[0] == main_folder: + tracks.add(parts[1]) + + # Check if requested track exists + if self.track not in tracks: + raise ValueError(f"Track '{self.track}' not found in dataset. Available tracks: {sorted(tracks)}") + + # Create track folder in root directory + self.dataset_dir.mkdir(parents=True, exist_ok=True) + + # Extract files for the specified track + prefix = f"{main_folder}/{self.track}/" + for member in tar_ref.getmembers(): + if member.name.startswith(prefix) and member.isfile(): + # Path relative to main_folder/track + relative_path = member.name[len(prefix):] + + # Flatten: replace "/" with "_" to encode subfolders (some instances have clashing names) + flat_name = relative_path.replace("/", "_") + target_path = self.dataset_dir / flat_name + + with tar_ref.extractfile(member) as source, open(target_path, "wb") as target: + target.write(source.read()) + + # Clean up the tar file + tar_path.unlink() + + def open(self, instance: os.PathLike) -> callable: + return lzma.open(instance, 'rt') if str(instance).endswith(".xz") else open(instance) + +if __name__ == "__main__": + dataset = OPBDataset(year=2024, track="DEC-LIN", download=True) + print("Dataset size:", len(dataset)) + print("Instance 0:", dataset[0]) diff --git a/cpmpy/tools/dataset/model/xcsp3.py b/cpmpy/tools/dataset/model/xcsp3.py new file mode 100644 index 000000000..f17a4d193 --- /dev/null +++ b/cpmpy/tools/dataset/model/xcsp3.py @@ -0,0 +1,138 @@ +""" +XCS3 Dataset + +https://xcsp.org/instances/ +""" + +from functools import partial +import os +import lzma +import zipfile +import pathlib +from urllib.request import urlretrieve +from urllib.error import HTTPError, URLError + +from cpmpy.tools.dataset._base import _Dataset + + +class XCSP3Dataset(_Dataset): + """ + XCSP3 benchmark dataset. + + Provides access to benchmark instances from the XCSP3 + competitions. Instances are grouped by `year` and `track` (e.g., + `"CSP"`, `"eCOP"`) and stored as `.xml.lzma` files. + If the dataset is not available locally, it can be automatically + downloaded and extracted. + + More information on the competition can be found here: https://xcsp.org/competitions/ + """ + + def __init__( + self, + root: str = ".", + year: int = 2023, track: str = "CSP", + transform=None, target_transform=None, + download: bool = False + ): + """ + Constructor for a dataset object of the XCP3 competition. + + Arguments: + root (str): Root directory where datasets are stored or will be downloaded to (default="."). + year (int): Competition year of the dataset to use (default=2024). + track (str): Track name specifying which subset of the competition instances to load (default="CSP"). + transform (callable, optional): Optional transform applied to the instance file path. + target_transform (callable, optional): Optional transform applied to the metadata dictionary. + download (bool): If True, downloads the dataset if it does not exist locally (default=False). + + + Raises: + ValueError: If the dataset directory does not exist and `download=False`, + or if the requested year/track combination is not available. + """ + + self.root = pathlib.Path(root) + self.year = year + self.track = track + + # Check requested dataset + if not str(year).startswith('20'): + raise ValueError("Year must start with '20'") + if not track: + raise ValueError("Track must be specified, e.g. COP, CSP, ...") + + dataset_dir = self.root / str(year) / track + + super().__init__( + dataset_dir=dataset_dir, + transform=transform, target_transform=target_transform, + download=download, extension=".xml.lzma" + ) + + + def category(self) -> dict: + return { + "year": self.year, + "track": self.track + } + + def download(self): + print(f"Downloading XCSP3 {self.year} instances...") + + url = f"https://www.cril.univ-artois.fr/~lecoutre/compets/" + year_suffix = str(self.year)[2:] # Drop the starting '20' + url_path = url + f"instancesXCSP{year_suffix}.zip" + zip_path = self.root / f"instancesXCSP{year_suffix}.zip" + + try: + urlretrieve(url_path, str(zip_path)) + except (HTTPError, URLError) as e: + raise ValueError(f"No dataset available for year {self.year}. Error: {str(e)}") + + # Extract only the specific track folder from the zip + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + # Get the main folder name (e.g., "024_V3") + main_folder = None + for name in zip_ref.namelist(): + if '/' in name: + main_folder = name.split('/')[0] + break + + if main_folder is None: + raise ValueError(f"Could not find main folder in zip file") + + # Extract only files from the specified track + # Get all unique track names from zip + tracks = set() + for file_info in zip_ref.infolist(): + parts = file_info.filename.split('/') + if len(parts) > 2 and parts[0] == main_folder: + tracks.add(parts[1]) + + # Check if requested track exists + if self.track not in tracks: + raise ValueError(f"Track '{self.track}' not found in dataset. Available tracks: {sorted(tracks)}") + + # Create track folder in root directory, parents=True ensures recursive creation + self.dataset_dir.mkdir(parents=True, exist_ok=True) + + # Extract files for the specified track + prefix = f"{main_folder}/{self.track}/" + for file_info in zip_ref.infolist(): + if file_info.filename.startswith(prefix): + # Extract file to track_dir, removing main_folder/track prefix + filename = pathlib.Path(file_info.filename).name + with zip_ref.open(file_info) as source, open(self.dataset_dir / filename, 'wb') as target: + target.write(source.read()) + # Clean up the zip file + zip_path.unlink() + + def open(self, instance: os.PathLike) -> callable: + return lzma.open(instance, mode='rt', encoding='utf-8') if str(instance).endswith(".lzma") else open(instance) + + +if __name__ == "__main__": + dataset = XCSP3Dataset(year=2024, track="MiniCOP", download=True) + print("Dataset size:", len(dataset)) + print("Instance 0:", dataset[0]) diff --git a/cpmpy/tools/dataset/problem/jsplib.py b/cpmpy/tools/dataset/problem/jsplib.py new file mode 100644 index 000000000..17453fe32 --- /dev/null +++ b/cpmpy/tools/dataset/problem/jsplib.py @@ -0,0 +1,219 @@ +""" +PyTorch-style Dataset for Jobshop instances from JSPLib + +Simply create a dataset instance and start iterating over its contents: +The `metadata` contains usefull information about the current problem instance. + +https://github.com/tamy0612/JSPLIB +""" +import os +import json +import pathlib +from os.path import join +from typing import Tuple, Any +from urllib.request import urlretrieve +from urllib.error import HTTPError, URLError +import zipfile +import numpy as np + +import cpmpy as cp + +class JSPLibDataset(object): # torch.utils.data.Dataset compatible + + """ + JSP Dataset in a PyTorch compatible format. + + More information on JSPLib can be found here: https://github.com/tamy0612/JSPLIB + """ + + def __init__(self, root: str = ".", transform=None, target_transform=None, download: bool = False): + """ + Initialize the PSPLib Dataset. + + Arguments: + root (str): Root directory containing the jsp instances (if 'download', instances will be downloaded to this location) + transform (callable, optional): Optional transform to be applied on the instance data + target_transform (callable, optional): Optional transform to be applied on the file path + download (bool): If True, downloads the dataset from the internet and puts it in `root` directory + """ + + self.root = pathlib.Path(root) + self.instance_dir = pathlib.Path(join(self.root, "jsplib")) + self.metadata_file = "instances.json" + self.transform = transform + self.target_transform = target_transform + + # Create root directory if it doesn't exist + self.root.mkdir(parents=True, exist_ok=True) + + print(self.instance_dir, self.instance_dir.exists(), self.instance_dir.is_dir()) + if not self.instance_dir.exists(): + if not download: + raise ValueError(f"Dataset not found in local file system. Please set download=True to download the dataset.") + else: + url = f"https://github.com/tamy0612/JSPLIB/archive/refs/heads/master.zip" # download full repo... + url_path = url + zip_path = pathlib.Path(join(root,"jsplib-master.zip")) + + print(f"Downloading JSPLib instances..") + + try: + urlretrieve(url_path, str(zip_path)) + except (HTTPError, URLError) as e: + raise ValueError(f"No dataset available on {url}. Error: {str(e)}") + + # make directory and extract files + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + self.instance_dir.mkdir(parents=True, exist_ok=True) + + # Extract files + for file_info in zip_ref.infolist(): + if file_info.filename.startswith("JSPLIB-master/instances/") and file_info.file_size > 0: + filename = pathlib.Path(file_info.filename).name + with zip_ref.open(file_info) as source, open(self.instance_dir / filename, 'wb') as target: + target.write(source.read()) + # extract metadata file + with zip_ref.open("JSPLIB-master/instances.json") as source, open(self.instance_dir / self.metadata_file, 'wb') as target: + target.write(source.read()) + # Clean up the zip file + zip_path.unlink() + + + def __len__(self) -> int: + """Return the total number of instances.""" + return len(list(self.instance_dir.glob("*"))) + + def __getitem__(self, index: int|str) -> Tuple[Any, Any]: + """ + Get a single JSPLib instance filename and metadata. + + Args: + index (int or str): Index or name of the instance to retrieve + + Returns: + Tuple[Any, Any]: A tuple containing: + - The filename of the instance + - Metadata dictionary with file name, track, year etc. + """ + if isinstance(index, int) and (index < 0 or index >= len(self)): + raise IndexError("Index out of range") + + # Get all instance files and sort for deterministic behavior # TODO: use natsort instead? + files = sorted(list(self.instance_dir.glob("*[!.json]"))) # exclude metadata file + if isinstance(index, int): + file_path = files[index] + elif isinstance(index, str): + for file_path in files: + if file_path.stem == index: + break + else: + raise IndexError(f"Instance {index} not found in dataset") + + filename = str(file_path) + if self.transform: + # does not need to remain a filename... + filename = self.transform(filename) + + with open(self.instance_dir / self.metadata_file, "r") as f: + for entry in json.load(f): + if entry["name"] == file_path.stem: + metadata = entry + if "bounds" not in metadata: + metadata["bounds"] = {"upper": metadata["optimum"], "lower": metadata["optimum"]} + del metadata['path'] + metadata['path'] = str(file_path) + break + else: + metadata = dict() + + if self.target_transform: + metadata = self.target_transform(metadata) + + return filename, metadata + + def open(self, instance: os.PathLike) -> callable: + return open(instance, "r") + + +def parse_jsp(filename: str): + """ + Parse a JSPLib instance file + Returns two matrices: + - task to machines indicating on which machine to run which task + - task durations: indicating the duration of each task + """ + + with open(filename, "r") as f: + line = f.readline() + while line.startswith("#"): + line = f.readline() + n_jobs, n_tasks = map(int, line.strip().split(" ")) + matrix = np.fromstring(f.read(), sep=" ", dtype=int).reshape((n_jobs, n_tasks*2)) + + task_to_machines = np.empty(dtype=int, shape=(n_jobs, n_tasks)) + task_durations = np.empty(dtype=int, shape=(n_jobs, n_tasks)) + + for t in range(n_tasks): + task_to_machines[:, t] = matrix[:, t*2] + task_durations[:, t] = matrix[:, t*2+1] + + return task_to_machines, task_durations + +def jobshop_model(task_to_machines, task_durations): + + + task_to_machines = np.array(task_to_machines) + dur = np.array(task_durations) + + assert task_to_machines.shape == task_durations.shape + + n_jobs, n_tasks = task_to_machines.shape + + start = cp.intvar(0, task_durations.sum(), name="start", shape=(n_jobs,n_tasks)) # extremely bad upperbound... TODO + end = cp.intvar(0, task_durations.sum(), name="end", shape=(n_jobs,n_tasks)) # extremely bad upperbound... TODO + makespan = cp.intvar(0, task_durations.sum(), name="makespan") # extremely bad upperbound... TODO + + model = cp.Model() + model += start + dur == end + model += end[:,:-1] <= start[:,1:] # precedences + + for machine in set(task_to_machines.flat): + model += cp.NoOverlap(start[task_to_machines == machine], + dur[task_to_machines == machine], + end[task_to_machines == machine]) + + model += end <= makespan + model.minimize(makespan) + + return model, (start, makespan) + + +if __name__ == "__main__": + + dataset = JSPLibDataset(root=".", download=True, transform=parse_jsp) + print("Dataset size:", len(dataset)) + print("Instance 0:") + (machines, dur), metadata = dataset[0] + print("Machines:", machines) + print("Durations:", dur) + print("Metadata:", metadata) + + print("Solving", metadata['name']) + model, (start, makespan) = jobshop_model(task_to_machines=machines, task_durations=dur) + assert model.solve(time_limit=10) + + import pandas as pd + import plotly.express as px + import plotly.io as pio + pio.renderers.default = "browser" # ensure plotly opens figure in browser + + df = pd.DataFrame({"Start": start.value().flat, "Duration": dur.flat, "Machine": machines.flat}) + df["Job"] = [j for j in range(metadata['jobs']) for _ in range(metadata['machines']) ] + df["Task"] = [j for _ in range(metadata['machines']) for j in range(metadata['jobs'])] + df["Name"] = "T" + df["Job"].astype(str) + "-" + df["Task"].astype(str) + print(df) + ghant_fig = px.bar(df, orientation='h', + base="Start", x="Duration", y="Machine", color="Job", text="Name", + title=f"Jobshop instance {metadata['name']}, makespan: {makespan.value()}, status: {model.status()}" + ) + ghant_fig.show() \ No newline at end of file diff --git a/cpmpy/tools/dataset/problem/psplib.py b/cpmpy/tools/dataset/problem/psplib.py new file mode 100644 index 000000000..89f0e93c7 --- /dev/null +++ b/cpmpy/tools/dataset/problem/psplib.py @@ -0,0 +1,136 @@ +""" +PSPlib Dataset + +https://www.om-db.wi.tum.de/psplib/getdata_sm.html +""" +import os +import pathlib +from typing import Tuple, Any +from urllib.request import urlretrieve +from urllib.error import HTTPError, URLError +import zipfile + +class PSPLibDataset(object): # torch.utils.data.Dataset compatible + + """ + PSPlib Dataset in a PyTorch compatible format. + + More information on PSPlib can be found here: https://www.om-db.wi.tum.de/psplib/main.html + """ + + def __init__(self, root: str = ".", variant: str = "rcpsp", family: str = "j30", transform=None, target_transform=None, download: bool = False): + """ + Constructor for a dataset object for PSPlib. + + Arguments: + root (str): Root directory containing the psplib instances (if 'download', instances will be downloaded to this location) + variant (str): scheduling variant (only 'rcpsp' is supported for now) + family (str): family name (e.g. j30, j60, etc...) + transform (callable, optional): Optional transform to be applied on the instance data + target_transform (callable, optional): Optional transform to be applied on the file path + download (bool): If True, downloads the dataset from the internet and puts it in `root` directory + + + Raises: + ValueError: If the dataset directory does not exist and `download=False`, + or if the requested variant/family combination is not available. + """ + + self.root = pathlib.Path(root) + self.variant = variant + self.family = family + self.transform = transform + self.target_transform = target_transform + self.family_dir = pathlib.Path(os.path.join(self.root, variant, family)) + + self.families = dict( + rcpsp = ["j30", "j60", "j90", "j120"] + ) + self.family_codes = dict(rcpsp="sm", mrcpsp="mm") + + if variant != "rcpsp": + raise ValueError("Only 'rcpsp' variant is supported for now") + if family not in self.families[variant]: + raise ValueError(f"Unknown problem family. Must be any of {','.join(self.families[variant])}") + # Create root directory if it doesn't exist + self.root.mkdir(parents=True, exist_ok=True) + + if not self.family_dir.exists(): + if not download: + raise ValueError(f"Dataset for variant {variant} and family {family} not found. Please set download=True to download the dataset.") + else: + print(f"Downloading PSPLib {variant} {family} instances...") + + zip_name = f"{family}.{self.family_codes[variant]}.zip" + url = f"https://www.om-db.wi.tum.de/psplib/files/" + + url_path = url + zip_name + zip_path = self.root / zip_name + + try: + urlretrieve(url_path, str(zip_path)) + except (HTTPError, URLError) as e: + raise ValueError(f"No dataset available for variant {variant} and family {family}. Error: {str(e)}") + + # make directory and extract files + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + # Create track folder in root directory, parents=True ensures recursive creation + self.family_dir.mkdir(parents=True, exist_ok=True) + + # Extract files + for file_info in zip_ref.infolist(): + # Extract file to family_dir, removing main_folder/track prefix + filename = pathlib.Path(file_info.filename).name + with zip_ref.open(file_info) as source, open(self.family_dir / filename, 'wb') as target: + target.write(source.read()) + # Clean up the zip file + zip_path.unlink() + + def open(self, instance: os.PathLike) -> callable: + return open(instance, "r") + + + def __len__(self) -> int: + """Return the total number of instances.""" + return len(list(self.family_dir.glob(f"*.{self.family_codes[self.variant]}"))) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + """ + Get a single RCPSP instance filename and metadata. + + Args: + index (int): Index of the instance to retrieve + + Returns: + Tuple[Any, Any]: A tuple containing: + - The filename of the instance + - Metadata dictionary with file name, track, year etc. + """ + if index < 0 or index >= len(self): + raise IndexError("Index out of range") + + # Get all instance files and sort for deterministic behavior # TODO: use natsort instead? + files = sorted(list(self.family_dir.glob(f"*.{self.family_codes[self.variant]}"))) + file_path = files[index] + + filename = str(file_path) + if self.transform: + # does not need to remain a filename... + filename = self.transform(filename) + + # Basic metadata about the instance + metadata = dict( + variant = self.variant, + family = self.family, + name = file_path.stem + ) + + if self.target_transform: + metadata = self.target_transform(metadata) + + return filename, metadata + +if __name__ == "__main__": + dataset = PSPLibDataset(variant="rcpsp", family="j30", download=True) + print("Dataset size:", len(dataset)) + print("Instance 0:", dataset[0]) \ No newline at end of file diff --git a/cpmpy/tools/jsplib/__init__.py b/cpmpy/tools/jsplib/__init__.py new file mode 100644 index 000000000..6ebdec377 --- /dev/null +++ b/cpmpy/tools/jsplib/__init__.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- +## +## __init__.py +## +""" +Set of utilities for working with JSPLib-formatted CP models. + + +================== +List of submodules +================== + +.. autosummary:: + :nosignatures: + + parser +""" + +from .parser import read_jsplib diff --git a/cpmpy/tools/jsplib/parser.py b/cpmpy/tools/jsplib/parser.py new file mode 100644 index 000000000..11c820faa --- /dev/null +++ b/cpmpy/tools/jsplib/parser.py @@ -0,0 +1,148 @@ +""" +Parser for the JSPLib format. + + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + read_jsplib +""" + + +import os +import sys +import lzma +import argparse +import cpmpy as cp +import numpy as np +from io import StringIO +from typing import Union + + +_std_open = open +def read_jsplib(jsp: Union[str, os.PathLike], open=open) -> cp.Model: + """ + Parser for JSPLib format. Reads in an instance and returns its matching CPMpy model. + + Arguments: + jsp (str or os.PathLike): + - A file path to a JSPlib file + - OR a string containing the JSPLib content directly + open: (callable): + If jsp is the path to a file, a callable to "open" that file (default=python standard library's 'open'). + + Returns: + cp.Model: The CPMpy model of the JSPLib instance. + """ + # If rcpsp is a path to a file -> open file + if isinstance(jsp, (str, os.PathLike)) and os.path.exists(jsp): + if open is not None: + f = open(jsp) + else: + f = _std_open(jsp, "rt") + # If rcpsp is a string containing a model -> create a memory-mapped file + else: + f = StringIO(jsp) + + + task_to_machines, task_durations = _parse_jsplib(f) + model, (start, makespan) = _model_jsplib(task_to_machines=task_to_machines, task_durations=task_durations) + return model + + +def _parse_jsplib(f): + """ + Parse a JSPLib instance file + Returns two matrices: + - task to machines indicating on which machine to run which task + - task durations: indicating the duration of each task + """ + + line = f.readline() + while line.startswith("#"): + line = f.readline() + n_jobs, n_tasks = map(int, line.strip().split(" ")) + matrix = np.fromstring(f.read(), sep=" ", dtype=int).reshape((n_jobs, n_tasks*2)) + + task_to_machines = np.empty(dtype=int, shape=(n_jobs, n_tasks)) + task_durations = np.empty(dtype=int, shape=(n_jobs, n_tasks)) + + for t in range(n_tasks): + task_to_machines[:, t] = matrix[:, t*2] + task_durations[:, t] = matrix[:, t*2+1] + + return task_to_machines, task_durations + + + +def _model_jsplib(task_to_machines, task_durations): + + task_to_machines = np.array(task_to_machines) + dur = np.array(task_durations) + + assert task_to_machines.shape == task_durations.shape + + n_jobs, n_tasks = task_to_machines.shape + + start = cp.intvar(0, task_durations.sum(), name="start", shape=(n_jobs,n_tasks)) # extremely bad upperbound... TODO + end = cp.intvar(0, task_durations.sum(), name="end", shape=(n_jobs,n_tasks)) # extremely bad upperbound... TODO + makespan = cp.intvar(0, task_durations.sum(), name="makespan") # extremely bad upperbound... TODO + + model = cp.Model() + model += start + dur == end + model += end[:,:-1] <= start[:,1:] # precedences + + for machine in set(task_to_machines.flat): + model += cp.NoOverlap(start[task_to_machines == machine], + dur[task_to_machines == machine], + end[task_to_machines == machine]) + + model += end <= makespan + model.minimize(makespan) + + return model, (start, makespan) + + + +def main(): + parser = argparse.ArgumentParser(description="Parse and solve a JSPLib model using CPMpy") + parser.add_argument("model", help="Path to a JSPLib file (or raw RCPSP string if --string is given)") + parser.add_argument("-s", "--solver", default=None, help="Solver name to use (default: CPMpy's default)") + parser.add_argument("--string", action="store_true", help="Interpret the first argument (model) as a raw JSPLib string instead of a file path") + parser.add_argument("-t", "--time-limit", type=int, default=None, help="Time limit for the solver in seconds (default: no limit)") + args = parser.parse_args() + + # Build the CPMpy model + try: + if args.string: + model = read_jsplib(args.model) + else: + model = read_jsplib(os.path.expanduser(args.model)) + except Exception as e: + sys.stderr.write(f"Error reading model: {e}\n") + sys.exit(1) + + # Solve the model + try: + if args.solver: + result = model.solve(solver=args.solver, time_limit=args.time_limit) + else: + result = model.solve(time_limit=args.time_limit) + except Exception as e: + sys.stderr.write(f"Error solving model: {e}\n") + sys.exit(1) + + # Print results + print("Status:", model.status()) + if result is not None: + if model.has_objective(): + print("Objective:", model.objective_value()) + else: + print("No solution found.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/cpmpy/tools/opb/__init__.py b/cpmpy/tools/opb/__init__.py new file mode 100644 index 000000000..ae751c7e7 --- /dev/null +++ b/cpmpy/tools/opb/__init__.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- +## +## __init__.py +## +""" +Set of utilities for working with OPB-formatted CP models. + +Currently only the restricted OPB PB24 format is supported (without WBO). + +================== +List of submodules +================== + +.. autosummary:: + :nosignatures: + + parser +""" + +from .parser import read_opb diff --git a/cpmpy/tools/opb/parser.py b/cpmpy/tools/opb/parser.py new file mode 100644 index 000000000..f63db7c7d --- /dev/null +++ b/cpmpy/tools/opb/parser.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- +## +## __init__.py +## +""" +OPB parser. + +Currently only the restricted OPB PB24 format is supported (without WBO). + + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + read_opb +""" + + +import os +import re +import sys +import lzma +import argparse +import cpmpy as cp +from io import StringIO +from typing import Union +from functools import reduce +from operator import mul + +# Regular expressions +HEADER_RE = re.compile(r'(.*)\s*#variable=\s*(\d+)\s*#constraint=\s*(\d+).*') +TERM_RE = re.compile(r"([+-]?\d+)((?:\s+~?x\d+)+)") +OBJ_TERM_RE = re.compile(r'^min:') +IND_TERM_RE = re.compile(r'([>=|<=|=]+)\s+([+-]?\d+)') +IND_TERM_RE = re.compile(r'(>=|<=|=)\s*([+-]?\d+)') + + +def _parse_term(line, vars): + """ + Parse a line containing OPB terms into a CPMpy expression. + + Supports: + - Linear terms (e.g., +2 x1) + - Non-linear terms (e.g., -1 x1 x14) + - Negated variables using '~' (e.g., ~x5) + + Arguments: + line (str): A string containing one or more terms. + vars (list[cp.boolvar]): List or array of CPMpy Boolean variables. + + Returns: + cp.Expression: A CPMpy expression representing the sum of all parsed terms. + + Example: + >>> _parse_term("2 x2 x3 +3 x4 ~x5", vars) + sum([2, 3] * [(IV2*IV3), (IV4*~IV5)]) + """ + + terms = [] + for w, vars_str in TERM_RE.findall(line): + factors = [] + + for v in vars_str.split(): + if v.startswith("~x"): + idx = int(v[2:]) - 1 # remove "~x" and opb is 1-based indexing + factors.append(~vars[idx]) + else: + idx = int(v[1:]) - 1 # remove "x" and opb is 1-based indexing + factors.append(vars[idx]) + + term = int(w) * reduce(mul, factors, 1) # create weighted term + terms.append(term) + + return cp.sum(terms) + +def _parse_constraint(line, vars): + """ + Parse a single OPB constraint line into a CPMpy comparison expression. + + Arguments: + line (str): A string representing a single OPB constraint. + vars (list[cp.boolvar]): List or array of CPMpy Boolean variables. Will be index to get the variables for the constraint. + + Returns: + cp.expressions.core.Comparison: A CPMpy comparison expression representing + the constraint. + + Example: + >>> _parse_constraint("-1 x1 x14 -1 x1 ~x17 >= -1", vars) + sum([-1, -1] * [(IV1*IV14), (IV1*~IV17)]) >= -1 + """ + + op, ind_term = IND_TERM_RE.search(line).groups() + lhs = _parse_term(line, vars) + + rhs = int(ind_term) if ind_term.lstrip("+-").isdigit() else vars[int(ind_term)] + + return cp.expressions.core.Comparison( + name="==" if op == "=" else ">=", + left=lhs, + right=rhs + ) + +_std_open = open +def read_opb(opb: Union[str, os.PathLike], open=open) -> cp.Model: + """ + Parser for OPB (Pseudo-Boolean) format. Reads in an instance and returns its matching CPMpy model. + + Based on PyPBLib's example parser: https://hardlog.udl.cat/static/doc/pypblib/html/library/index.html#example-from-opb-to-cnf-file + + Supports: + - Linear and non-linear terms (e.g., -1 x1 x14 +2 x2) + - Negated variables using '~' (e.g., ~x5) + - Minimisation objective + - Comparison operators in constraints: '=', '>=' + + Arguments: + opb (str or os.PathLike): + - A file path to an OPB file (optionally LZMA-compressed with `.xz`) + - OR a string containing the OPB content directly + open: (callable): + If wcnf is the path to a file, a callable to "open" that file (default=python standard library's 'open'). + + Returns: + cp.Model: The CPMpy model of the OPB instance. + + Example: + >>> opb_text = ''' + ... * #variable= 5 #constraint= 2 #equal= 1 intsize= 64 #product= 5 sizeproduct= 13 + ... min: 2 x2 x3 +3 x4 ~x5 +2 ~x1 x2 +3 ~x1 x2 x3 ~x4 ~x5 ; + ... 2 x2 x3 -1 x1 ~x3 = 5 ; + ... ''' + >>> model = read_opb(opb_text) + >>> print(model) + Model(...) + + Notes: + - Comment lines starting with '*' are ignored. + - Only "min:" objectives are supported; "max:" is not recognized. + """ + + + # If opb is a path to a file -> open file + if isinstance(opb, (str, os.PathLike)) and os.path.exists(opb): + if open is not None: + f = open(opb) + else: + f = _std_open(opb, "rt") + # If opb is a string containing a model -> create a memory-mapped file + else: + f = StringIO(opb) + + # Look for header on first line + line = f.readline() + header = HEADER_RE.match(line) + if not header: # If not found on first line, look on second (happens when passing multi line string) + _line = f.readline() + header = HEADER_RE.match(_line) + if not header: + raise ValueError(f"Missing or incorrect header: \n0: {line}1: {_line}2: ...") + nr_vars = int(header.group(2)) + + # Generator without comment lines + reader = (l for l in map(str.strip, f) if l and l[0] != '*') + + # CPMpy objects + vars = cp.boolvar(shape=nr_vars, name="x") + if nr_vars == 1: + vars = cp.cpm_array([vars]) # ensure vars is indexable even for single variable case + model = cp.Model() + + # Special case for first line -> might contain objective function + first_line = next(reader) + if OBJ_TERM_RE.match(first_line): + obj_expr = _parse_term(first_line, vars) + model.minimize(obj_expr) + else: # no objective found, parse as a constraint instead + model.add(_parse_constraint(first_line, vars)) + + # Start parsing line by line + for line in reader: + model.add(_parse_constraint(line, vars)) + + return model + + +def main(): + parser = argparse.ArgumentParser(description="Parse and solve an OPB model using CPMpy") + parser.add_argument("model", help="Path to an OPB file (or raw OPB string if --string is given)") + parser.add_argument("-s", "--solver", default=None, help="Solver name to use (default: CPMpy's default)") + parser.add_argument("--string", action="store_true", help="Interpret the first argument (model) as a raw OPB string instead of a file path") + parser.add_argument("-t", "--time-limit", type=int, default=None, help="Time limit for the solver in seconds (default: no limit)") + args = parser.parse_args() + + # Build the CPMpy model + try: + if args.string: + model = read_opb(args.model) + else: + model = read_opb(os.path.expanduser(args.model)) + except Exception as e: + sys.stderr.write(f"Error reading model: {e}\n") + sys.exit(1) + + # Solve the model + try: + if args.solver: + result = model.solve(solver=args.solver, time_limit=args.time_limit) + else: + result = model.solve(time_limit=args.time_limit) + except Exception as e: + sys.stderr.write(f"Error solving model: {e}\n") + sys.exit(1) + + # Print results + print("Status:", model.status()) + if result is not None: + if model.has_objective(): + print("Objective:", model.objective_value()) + else: + print("No solution found.") + +if __name__ == "__main__": + main() diff --git a/cpmpy/tools/rcpsp/__init__.py b/cpmpy/tools/rcpsp/__init__.py new file mode 100644 index 000000000..b24d99980 --- /dev/null +++ b/cpmpy/tools/rcpsp/__init__.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- +## +## __init__.py +## +""" +Set of utilities for working with psplib-formatted rcpsp CP models. + + +================== +List of submodules +================== + +.. autosummary:: + :nosignatures: + + parser +""" + +from .parser import read_rcpsp diff --git a/cpmpy/tools/rcpsp/parser.py b/cpmpy/tools/rcpsp/parser.py new file mode 100644 index 000000000..cadc32482 --- /dev/null +++ b/cpmpy/tools/rcpsp/parser.py @@ -0,0 +1,171 @@ +""" +Parser for the PSPLIB RCPSP format. + + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + read_rcpsp +""" + + +import os +import sys +import lzma +import argparse +import cpmpy as cp +from io import StringIO +from typing import Union + + +_std_open = open +def read_rcpsp(rcpsp: Union[str, os.PathLike], open=open) -> cp.Model: + """ + Parser for PSPLIB RCPSP format. Reads in an instance and returns its matching CPMpy model. + + Arguments: + rcpsp (str or os.PathLike): + - A file path to a PSPLIB RCPSP file + - OR a string containing the RCPSP content directly + open: (callable): + If rcpsp is the path to a file, a callable to "open" that file (default=python standard library's 'open'). + + Returns: + cp.Model: The CPMpy model of the PSPLIB RCPSP instance. + """ + # If rcpsp is a path to a file -> open file + if isinstance(rcpsp, (str, os.PathLike)) and os.path.exists(rcpsp): + if open is not None: + f = open(rcpsp) + else: + f = _std_open(rcpsp, "rt") + # If rcpsp is a string containing a model -> create a memory-mapped file + else: + f = StringIO(rcpsp) + + + table, capacities = _parse_rcpsp(f) + model, (start, end, makespan) = _model_rcpsp(job_data=table, capacities=capacities) + return model + +def _parse_rcpsp(f): + + data = dict() + + line = f.readline() + while not line.startswith("PRECEDENCE RELATIONS:"): + line = f.readline() + + f.readline() # skip keyword line + line = f.readline() # first line of table, skip + while not line.startswith("*****"): + jobnr, n_modes, n_succ, *succ = [int(x) for x in line.split(" ") if len(x.strip())] + assert len(succ) == n_succ, "Expected %d successors for job %d, got %d" % (n_succ, jobnr, len(succ)) + data[jobnr] = dict(num_modes=n_modes, successors=succ) + line = f.readline() + + # skip to job info + while not line.startswith("REQUESTS/DURATIONS:"): + line = f.readline() + + line = f.readline() + _j, _m, _d, *_r = [x.strip() for x in line.split(" ") if len(x.strip())] # first line of table + resource_names = [f"{_r[i]}{_r[i+1]}" for i in range(0,len(_r),2)] + line = f.readline() # first line of table + if line.startswith("----") or line.startswith("*****"): # intermediate line in table... + line = f.readline() # skip + + while not line.startswith("*****"): + jobnr, mode, duration, *resources = [int(x) for x in line.split(" ") if len(x.strip())] + assert len(resources) == len(resource_names), "Expected %d resources for job %d, got %d" % (len(resource_names), jobnr, len(resources)) + data[jobnr].update(dict(mode=mode, duration=duration)) + data[jobnr].update({name : req for name, req in zip(resource_names, resources)}) + line = f.readline() + + # read resource availabilities + while not line.startswith("RESOURCEAVAILABILITIES:"): + line = f.readline() + + f.readline() # skip header + capacities = [int(x) for x in f.readline().split(" ") if len(x)] + + import pandas as pd + df =pd.DataFrame([dict(jobnr=k ,**info) for k, info in data.items()], + columns=["jobnr", "mode", "duration", "successors", *resource_names]) + df.set_index("jobnr", inplace=True) + + return df, dict(zip(resource_names, capacities)) + +def _model_rcpsp(job_data, capacities): + + model = cp.Model() + + horizon = job_data.duration.sum() # worst case, all jobs sequential on a machine + makespan = cp.intvar(0, horizon, name="makespan") + + start = cp.intvar(0, horizon, name="start", shape=len(job_data)) + end = cp.intvar(0, horizon, name="end", shape=len(job_data)) + + # ensure capacity is not exceeded + for rescource, capa in capacities.items(): + model += cp.Cumulative( + start = start, + duration = job_data['duration'].tolist(), + end = end, + demand = job_data[rescource].tolist(), + capacity = capa + ) + + # enforce precedences + for idx, (jobnr, info) in enumerate(job_data.iterrows()): + for succ in info['successors']: + model += end[idx] <= start[succ-1] # job ids start at idx 1 + + model += end <= makespan + model.minimize(makespan) + + return model, (start, end, makespan) + + +def main(): + parser = argparse.ArgumentParser(description="Parse and solve a PSPLIB RCPSP model using CPMpy") + parser.add_argument("model", help="Path to a PSPLIB RCPSP file (or raw RCPSP string if --string is given)") + parser.add_argument("-s", "--solver", default=None, help="Solver name to use (default: CPMpy's default)") + parser.add_argument("--string", action="store_true", help="Interpret the first argument (model) as a raw RCPSP string instead of a file path") + parser.add_argument("-t", "--time-limit", type=int, default=None, help="Time limit for the solver in seconds (default: no limit)") + args = parser.parse_args() + + # Build the CPMpy model + try: + if args.string: + model = read_rcpsp(args.model) + else: + model = read_rcpsp(os.path.expanduser(args.model)) + except Exception as e: + sys.stderr.write(f"Error reading model: {e}\n") + sys.exit(1) + + # Solve the model + try: + if args.solver: + result = model.solve(solver=args.solver, time_limit=args.time_limit) + else: + result = model.solve(time_limit=args.time_limit) + except Exception as e: + sys.stderr.write(f"Error solving model: {e}\n") + sys.exit(1) + + # Print results + print("Status:", model.status()) + if result is not None: + if model.has_objective(): + print("Objective:", model.objective_value()) + else: + print("No solution found.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/cpmpy/tools/wcnf/__init__.py b/cpmpy/tools/wcnf/__init__.py new file mode 100644 index 000000000..e2db10412 --- /dev/null +++ b/cpmpy/tools/wcnf/__init__.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- +## +## __init__.py +## +""" +Set of utilities for working with WCNF-formatted CP models. + + +================== +List of submodules +================== + +.. autosummary:: + :nosignatures: + + parser +""" + +from .parser import read_wcnf diff --git a/cpmpy/tools/wcnf/parser.py b/cpmpy/tools/wcnf/parser.py new file mode 100644 index 000000000..84b484979 --- /dev/null +++ b/cpmpy/tools/wcnf/parser.py @@ -0,0 +1,132 @@ +""" +Parser for the WCNF format. + + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + read_wcnf +""" + + +import os +import sys +import lzma +import argparse +import cpmpy as cp +from io import StringIO +from typing import Union + + +def _get_var(i, vars_dict): + """ + Returns CPMpy boolean decision variable matching to index `i` if exists, else creates a new decision variable. + + Arguments: + i: index + vars_dict (dict): dictionary to keep track of previously generated decision variables + """ + if i not in vars_dict: + vars_dict[i] = cp.boolvar(name=f"x{i}") # <- be carefull that name doesn't clash with generated variables during transformations / user variables + return vars_dict[i] + +_std_open = open +def read_wcnf(wcnf: Union[str, os.PathLike], open=open) -> cp.Model: + """ + Parser for WCNF format. Reads in an instance and returns its matching CPMpy model. + + Arguments: + wcnf (str or os.PathLike): + - A file path to an WCNF file (optionally LZMA-compressed with `.xz`) + - OR a string containing the WCNF content directly + open: (callable): + If wcnf is the path to a file, a callable to "open" that file (default=python standard library's 'open'). + + Returns: + cp.Model: The CPMpy model of the WCNF instance. + """ + # If wcnf is a path to a file -> open file + if isinstance(wcnf, (str, os.PathLike)) and os.path.exists(wcnf): + if open is not None: + f = open(wcnf) + else: + f = _std_open(wcnf, "rt") + # If wcnf is a string containing a model -> create a memory-mapped file + else: + f = StringIO(wcnf) + + model = cp.Model() + vars = {} + soft_terms = [] + + for raw in f: + line = raw.strip() + + # Empty line or a comment -> skip + if not line or line.startswith("c"): + continue + + # Hard clause + if line[0] == "h": + literals = map(int, line[1:].split()) + clause = [_get_var(i, vars) if i > 0 else ~_get_var(-i, vars) + for i in literals if i != 0] + model.add(cp.any(clause)) + + # Soft clause (weight first) + else: + parts = line.split() + weight = int(parts[0]) + literals = map(int, parts[1:]) + clause = [_get_var(i, vars) if i > 0 else ~_get_var(-i, vars) + for i in literals if i != 0] + soft_terms.append(weight * cp.any(clause)) + + # Objective = sum of soft clause terms + if soft_terms: + model.maximize(sum(soft_terms)) + + return model + +def main(): + parser = argparse.ArgumentParser(description="Parse and solve a WCNF model using CPMpy") + parser.add_argument("model", help="Path to a WCNF file (or raw WCNF string if --string is given)") + parser.add_argument("-s", "--solver", default=None, help="Solver name to use (default: CPMpy's default)") + parser.add_argument("--string", action="store_true", help="Interpret the first argument (model) as a raw WCNF string instead of a file path") + parser.add_argument("-t", "--time-limit", type=int, default=None, help="Time limit for the solver in seconds (default: no limit)") + args = parser.parse_args() + + # Build the CPMpy model + try: + if args.string: + model = read_wcnf(args.model) + else: + model = read_wcnf(os.path.expanduser(args.model)) + except Exception as e: + sys.stderr.write(f"Error reading model: {e}\n") + sys.exit(1) + + # Solve the model + try: + if args.solver: + result = model.solve(solver=args.solver, time_limit=args.time_limit) + else: + result = model.solve(time_limit=args.time_limit) + except Exception as e: + sys.stderr.write(f"Error solving model: {e}\n") + sys.exit(1) + + # Print results + print("Status:", model.status()) + if result is not None: + if model.has_objective(): + print("Objective:", model.objective_value()) + else: + print("No solution found.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/cpmpy/tools/xcsp3/__init__.py b/cpmpy/tools/xcsp3/__init__.py index d5abf2766..9572943d8 100644 --- a/cpmpy/tools/xcsp3/__init__.py +++ b/cpmpy/tools/xcsp3/__init__.py @@ -4,127 +4,24 @@ ## __init__.py ## """ - Set of utilities for working with XCSP3-formatted CP models. - - - ================= - List of functions - ================= - - .. autosummary:: - :nosignatures: - - read_xcsp3 - - ======================== - List of helper functions - ======================== - - .. autosummary:: - :nosignatures: - - _parse_xcsp3 - _load_xcsp3 - - ================== - List of submodules - ================== - - .. autosummary:: - :nosignatures: - - parser_callbacks - analyze - benchmark - xcsp3_cpmpy - dataset - globals +Set of utilities for working with XCSP3-formatted CP models. + +================== +List of submodules +================== + +.. autosummary:: + :nosignatures: + + parser + parser_callbacks + analyze + benchmark + xcsp3_cpmpy + dataset + globals """ -from io import StringIO -import lzma -import os -import cpmpy as cp - -# Special case for optional cpmpy dependencies -from typing import TYPE_CHECKING -if TYPE_CHECKING: - from pycsp3.parser.xparser import CallbackerXCSP3, ParserXCSP3 from .dataset import XCSP3Dataset # for easier importing - -def _parse_xcsp3(path: os.PathLike) -> "ParserXCSP3": - """ - Parses an XCSP3 instance file (.xml) and returns a `ParserXCSP3` instance. - - Arguments: - path: location of the XCSP3 instance to read (expects a .xml file). - - Returns: - A parser object. - """ - try: - from pycsp3.parser.xparser import ParserXCSP3 - except ImportError as e: - raise ImportError("The 'pycsp3' package is required to parse XCSP3 files. " - "Please install it with `pip install pycsp3`.") from e - - parser = ParserXCSP3(path) - return parser - -def _load_xcsp3(parser: "ParserXCSP3") -> cp.Model: - """ - Takes in a `ParserXCSP3` instance and loads its captured model as a CPMpy model. - - Arguments: - parser (ParserXCSP3): A parser object to load from. - - Returns: - The XCSP3 instance loaded as a CPMpy model. - """ - from .parser_callbacks import CallbacksCPMPy - from pycsp3.parser.xparser import CallbackerXCSP3 - callbacks = CallbacksCPMPy() - callbacks.force_exit = True - callbacker = CallbackerXCSP3(parser, callbacks) - callbacker.load_instance() - model = callbacks.cpm_model - - return model - - -def read_xcsp3(path: os.PathLike) -> cp.Model: - """ - Reads in an XCSP3 instance (.xml or .xml.lzma) and returns its matching CPMpy model. - - Arguments: - path: location of the XCSP3 instance to read (expects a .xml or .xml.lzma file). - - Returns: - The XCSP3 instance loaded as a CPMpy model. - """ - # Decompress on the fly if still in .lzma format - if str(path).endswith(".lzma"): - path = decompress_lzma(path) - - # Parse and create CPMpy model - parser = _parse_xcsp3(path) - model = _load_xcsp3(parser) - return model - -def decompress_lzma(path: os.PathLike) -> StringIO: - """ - Decompresses a .lzma file. - - Arguments: - path: Location of .lzma file - - Returns: - Memory-mapped decompressed file - """ - # Decompress the XZ file - with lzma.open(path, 'rt', encoding='utf-8') as f: - return StringIO(f.read()) # read to memory-mapped file - - - \ No newline at end of file +from .parser import read_xcsp3 \ No newline at end of file diff --git a/cpmpy/tools/xcsp3/parser.py b/cpmpy/tools/xcsp3/parser.py new file mode 100644 index 000000000..761ef7caa --- /dev/null +++ b/cpmpy/tools/xcsp3/parser.py @@ -0,0 +1,146 @@ +""" +Parser for the XCSP3 format. + + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + read_xcsp3 + +======================== +List of helper functions +======================== + +.. autosummary:: + :nosignatures: + + _parse_xcsp3 + _load_xcsp3 +""" + +import os +import sys +import argparse +from io import StringIO + +import cpmpy as cp + +# Special case for optional cpmpy dependencies +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pycsp3.parser.xparser import ParserXCSP3 + +def _parse_xcsp3(path: os.PathLike) -> "ParserXCSP3": + """ + Parses an XCSP3 instance file (.xml) and returns a `ParserXCSP3` instance. + + Arguments: + path: location of the XCSP3 instance to read (expects a .xml file). + + Returns: + A parser object. + """ + try: + from pycsp3.parser.xparser import ParserXCSP3 + except ImportError as e: + raise ImportError("The 'pycsp3' package is required to parse XCSP3 files. " + "Please install it with `pip install pycsp3`.") from e + + parser = ParserXCSP3(path) + return parser + +def _load_xcsp3(parser: "ParserXCSP3") -> cp.Model: + """ + Takes in a `ParserXCSP3` instance and loads its captured model as a CPMpy model. + + Arguments: + parser (ParserXCSP3): A parser object to load from. + + Returns: + The XCSP3 instance loaded as a CPMpy model. + """ + from .parser_callbacks import CallbacksCPMPy + from pycsp3.parser.xparser import CallbackerXCSP3 + callbacks = CallbacksCPMPy() + callbacks.force_exit = True + callbacker = CallbackerXCSP3(parser, callbacks) + callbacker.load_instance() + model = callbacks.cpm_model + + return model + +_std_open = open +def read_xcsp3(xcsp3: os.PathLike, open=open) -> cp.Model: + """ + Reads in an XCSP3 instance (.xml or .xml.lzma) and returns its matching CPMpy model. + + Arguments: + xcsp3 (str or os.PathLike): + - A file path to an WCNF file (optionally LZMA-compressed with `.lzma`) + - OR a string containing the WCNF content directly + open: (callable): + If wcnf is the path to a file, a callable to "open" that file (default=python standard library's 'open'). + + Returns: + The XCSP3 instance loaded as a CPMpy model. + """ + # If wcnf is a path to a file -> open file + if isinstance(xcsp3, (str, os.PathLike)) and os.path.exists(xcsp3): + if open is not None: + f = open(xcsp3) + else: + f = _std_open(xcsp3, "rt") + # If wcnf is a string containing a model -> create a memory-mapped file + else: + f = StringIO(xcsp3) + + # Parse and create CPMpy model + parser = _parse_xcsp3(f) + model = _load_xcsp3(parser) + return model + + +def main(): + parser = argparse.ArgumentParser(description="Parse and solve a WCNF model using CPMpy") + parser.add_argument("model", help="Path to a WCNF file (or raw WCNF string if --string is given)") + parser.add_argument("-s", "--solver", default=None, help="Solver name to use (default: CPMpy's default)") + parser.add_argument("--string", action="store_true", help="Interpret the first argument (model) as a raw WCNF string instead of a file path") + parser.add_argument("-t", "--time-limit", type=int, default=None, help="Time limit for the solver in seconds (default: no limit)") + args = parser.parse_args() + + # Build the CPMpy model + try: + if args.string: + model = read_xcsp3(args.model) + else: + model = read_xcsp3(os.path.expanduser(args.model)) + except Exception as e: + sys.stderr.write(f"Error reading model: {e}\n") + sys.exit(1) + + # Solve the model + try: + if args.solver: + result = model.solve(solver=args.solver, time_limit=args.time_limit) + else: + result = model.solve(time_limit=args.time_limit) + except Exception as e: + sys.stderr.write(f"Error solving model: {e}\n") + sys.exit(1) + + # Print results + print("Status:", model.status()) + if result is not None: + if model.has_objective(): + print("Objective:", model.objective_value()) + else: + print("No solution found.") + +if __name__ == "__main__": + main() + \ No newline at end of file