microsoft · iFurySt · Aug 24, 2025
diff --git a/aiopslab/orchestrator/orchestrator.py b/aiopslab/orchestrator/orchestrator.py
@@ -19,7 +19,7 @@
 
 
 class Orchestrator:
-    def __init__(self):
+    def __init__(self, results_dir=None):
         self.agent = None
         self.session = None
         self.parser = ResponseParser()
@@ -29,6 +29,7 @@ def __init__(self):
         self.execution_end_time = None
         self.kubectl = KubeCtl()
         self.use_wandb = os.getenv("USE_WANDB", "false").lower() == "true"
+        self.results_dir = results_dir
 
     def init_problem(self, problem_id: str):
         """Initialize a problem instance for the agent to solve.
@@ -42,7 +43,7 @@ def init_problem(self, problem_id: str):
         # Start timer
         self.execution_start_time = time.time()
 
-        self.session = Session()
+        self.session = Session(results_dir=self.results_dir)
         print(f"Session ID: {self.session.session_id}")
         prob = self.probs.get_problem_instance(problem_id)
         deployment = self.probs.get_problem_deployment(problem_id)

diff --git a/aiopslab/session.py b/aiopslab/session.py
@@ -18,7 +18,7 @@ class SessionItem(BaseModel):
 
 
 class Session:
-    def __init__(self) -> None:
+    def __init__(self, results_dir=None) -> None:
         self.session_id = uuid.uuid4()
         self.pid = None
         self.problem = None
@@ -28,6 +28,7 @@ def __init__(self) -> None:
         self.start_time = None
         self.end_time = None
         self.agent_name = None
+        self.results_dir = results_dir
 
     def set_problem(self, problem, pid=None):
         """Set the problem instance for the session.
@@ -115,19 +116,21 @@ def to_dict(self):
 
     def to_json(self):
         """Save the session to a JSON file."""
-        RESULTS_DIR.mkdir(parents=True, exist_ok=True)
+        results_dir = self.results_dir if self.results_dir else RESULTS_DIR
+        results_dir.mkdir(parents=True, exist_ok=True)
 
-        with open(RESULTS_DIR / f"{self.session_id}_{self.start_time}.json", "w") as f:
+        with open(results_dir / f"{self.session_id}_{self.start_time}.json", "w") as f:
             json.dump(self.to_dict(), f, indent=4)
-    
+
     def to_wandb(self):
         """Log the session to Weights & Biases."""
         wandb.log(self.to_dict())
 
     def from_json(self, filename: str):
         """Load a session from a JSON file."""
+        results_dir = self.results_dir if self.results_dir else RESULTS_DIR
 
-        with open(RESULTS_DIR / filename, "r") as f:
+        with open(results_dir / filename, "r") as f:
             data = json.load(f)
 
         self.session_id = data.get("session_id")

diff --git a/clients/openrouter.py b/clients/openrouter.py
@@ -8,6 +8,9 @@
 import asyncio
 import tiktoken
 import wandb
+import argparse
+import json
+from pathlib import Path
 from aiopslab.orchestrator import Orchestrator
 from aiopslab.orchestrator.problems.registry import ProblemRegistry
 from clients.utils.llm import OpenRouterClient
@@ -107,33 +110,112 @@ def _filter_dict(self, dictionary, filter_func):
         return {k: v for k, v in dictionary.items() if filter_func(k, v)}
 
 
+def get_completed_problems(results_dir: Path, agent_name: str, model: str) -> set:
+    """Get set of completed problem IDs from existing result files."""
+    completed = set()
+
+    # Look in organized directory structure first
+    organized_dir = results_dir / agent_name / model.replace("/", "_")
+    if organized_dir.exists():
+        for result_file in organized_dir.glob("*.json"):
+            try:
+                with open(result_file, 'r') as f:
+                    data = json.load(f)
+                    if 'problem_id' in data:
+                        completed.add(data['problem_id'])
+            except (json.JSONDecodeError, IOError):
+                continue
+
+    # Also check legacy flat structure
+    for result_file in results_dir.glob("*.json"):
+        try:
+            with open(result_file, 'r') as f:
+                data = json.load(f)
+                if ('problem_id' in data and
+                    data.get('agent') == agent_name and
+                    model.split('/')[-1] in str(result_file)):
+                    completed.add(data['problem_id'])
+        except (json.JSONDecodeError, IOError):
+            continue
+
+    return completed
+
+def setup_results_directory(model: str, agent_name: str = "openrouter") -> Path:
+    """Setup organized results directory structure."""
+    results_base = Path("aiopslab/data/results")
+
+    # Create organized structure: results/{agent}/{model_safe}/
+    model_safe = model.replace("/", "_")
+    results_dir = results_base / agent_name / model_safe
+    results_dir.mkdir(parents=True, exist_ok=True)
+
+    return results_dir
+
 if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Run OpenRouter agent on AIOpsLab problems')
+    parser.add_argument('--skip-completed', action='store_true',
+                       help='Skip problems that have already been completed')
+    parser.add_argument('--problem-ids', nargs='+',
+                       help='Run only specific problem IDs')
+    parser.add_argument('--max-steps', type=int, default=30,
+                       help='Maximum steps per problem (default: 30)')
+    parser.add_argument('--model', type=str,
+                       default=os.getenv("OPENROUTER_MODEL", "openai/gpt-4o-mini"),
+                       help='OpenRouter model to use')
+
+    args = parser.parse_args()
+
     # Load use_wandb from environment variable with a default of False
     use_wandb = os.getenv("USE_WANDB", "false").lower() == "true"
 
     if use_wandb:
         # Initialize wandb running
         wandb.init(project="AIOpsLab", entity="AIOpsLab")
 
-    # You can specify different models supported by OpenRouter
-    # Popular models:
-    # - "anthropic/claude-3.5-sonnet"
-    # - "openai/gpt-4-turbo"
-    # - "meta-llama/llama-3.1-8b-instruct"
-    # - "google/gemini-pro"
-    # - "mistralai/mixtral-8x7b-instruct"
-    model = os.getenv("OPENROUTER_MODEL", "openai/gpt-4o-mini")
+    model = args.model
+    agent_name = "openrouter"
 
+    # Setup organized results directory
+    results_dir = setup_results_directory(model, agent_name)
+    print(f"Results will be saved to: {results_dir}")
+
+    # Get all problems
     problems = ProblemRegistry().PROBLEM_REGISTRY
+
+    # Filter problems if specific IDs requested
+    if args.problem_ids:
+        problems = {pid: problems[pid] for pid in args.problem_ids if pid in problems}
+        if not problems:
+            print("No valid problem IDs found")
+            exit(1)
+
+    # Skip completed problems if requested
+    if args.skip_completed:
+        completed_problems = get_completed_problems(
+            Path("aiopslab/data/results"), agent_name, model
+        )
+        problems = {pid: prob for pid, prob in problems.items()
+                   if pid not in completed_problems}
+
+        print(f"Found {len(completed_problems)} completed problems")
+        print(f"Running {len(problems)} remaining problems")
+
+        if not problems:
+            print("All problems have been completed!")
+            exit(0)
+
+    print(f"Running {len(problems)} problems with model: {model}")
+
     for pid in problems:
+        print(f"\n=== Starting problem: {pid} ===")
         agent = OpenRouterAgent(model=model)
 
-        orchestrator = Orchestrator()
-        orchestrator.register_agent(agent, name="openrouter")
+        orchestrator = Orchestrator(results_dir=results_dir)
+        orchestrator.register_agent(agent, name=agent_name)
 
         problem_desc, instructs, apis = orchestrator.init_problem(pid)
         agent.init_context(problem_desc, instructs, apis)
-        asyncio.run(orchestrator.start_problem(max_steps=30))
+        asyncio.run(orchestrator.start_problem(max_steps=args.max_steps))
 
     if use_wandb:
         # Finish the wandb run