Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions aiopslab/orchestrator/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@


class Orchestrator:
def __init__(self):
def __init__(self, results_dir=None):
self.agent = None
self.session = None
self.parser = ResponseParser()
Expand All @@ -29,6 +29,7 @@ def __init__(self):
self.execution_end_time = None
self.kubectl = KubeCtl()
self.use_wandb = os.getenv("USE_WANDB", "false").lower() == "true"
self.results_dir = results_dir

def init_problem(self, problem_id: str):
"""Initialize a problem instance for the agent to solve.
Expand All @@ -42,7 +43,7 @@ def init_problem(self, problem_id: str):
# Start timer
self.execution_start_time = time.time()

self.session = Session()
self.session = Session(results_dir=self.results_dir)
print(f"Session ID: {self.session.session_id}")
prob = self.probs.get_problem_instance(problem_id)
deployment = self.probs.get_problem_deployment(problem_id)
Expand Down
13 changes: 8 additions & 5 deletions aiopslab/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class SessionItem(BaseModel):


class Session:
def __init__(self) -> None:
def __init__(self, results_dir=None) -> None:
self.session_id = uuid.uuid4()
self.pid = None
self.problem = None
Expand All @@ -28,6 +28,7 @@ def __init__(self) -> None:
self.start_time = None
self.end_time = None
self.agent_name = None
self.results_dir = results_dir

def set_problem(self, problem, pid=None):
"""Set the problem instance for the session.
Expand Down Expand Up @@ -115,19 +116,21 @@ def to_dict(self):

def to_json(self):
"""Save the session to a JSON file."""
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
results_dir = self.results_dir if self.results_dir else RESULTS_DIR
results_dir.mkdir(parents=True, exist_ok=True)

with open(RESULTS_DIR / f"{self.session_id}_{self.start_time}.json", "w") as f:
with open(results_dir / f"{self.session_id}_{self.start_time}.json", "w") as f:
json.dump(self.to_dict(), f, indent=4)

def to_wandb(self):
"""Log the session to Weights & Biases."""
wandb.log(self.to_dict())

def from_json(self, filename: str):
"""Load a session from a JSON file."""
results_dir = self.results_dir if self.results_dir else RESULTS_DIR

with open(RESULTS_DIR / filename, "r") as f:
with open(results_dir / filename, "r") as f:
data = json.load(f)

self.session_id = data.get("session_id")
Expand Down
104 changes: 93 additions & 11 deletions clients/openrouter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
import asyncio
import tiktoken
import wandb
import argparse
import json
from pathlib import Path
from aiopslab.orchestrator import Orchestrator
from aiopslab.orchestrator.problems.registry import ProblemRegistry
from clients.utils.llm import OpenRouterClient
Expand Down Expand Up @@ -107,33 +110,112 @@ def _filter_dict(self, dictionary, filter_func):
return {k: v for k, v in dictionary.items() if filter_func(k, v)}


def get_completed_problems(results_dir: Path, agent_name: str, model: str) -> set:
"""Get set of completed problem IDs from existing result files."""
completed = set()

# Look in organized directory structure first
organized_dir = results_dir / agent_name / model.replace("/", "_")
if organized_dir.exists():
for result_file in organized_dir.glob("*.json"):
try:
with open(result_file, 'r') as f:
data = json.load(f)
if 'problem_id' in data:
completed.add(data['problem_id'])
except (json.JSONDecodeError, IOError):
continue

# Also check legacy flat structure
for result_file in results_dir.glob("*.json"):
try:
with open(result_file, 'r') as f:
data = json.load(f)
if ('problem_id' in data and
data.get('agent') == agent_name and
model.split('/')[-1] in str(result_file)):
completed.add(data['problem_id'])
except (json.JSONDecodeError, IOError):
continue

return completed

def setup_results_directory(model: str, agent_name: str = "openrouter") -> Path:
"""Setup organized results directory structure."""
results_base = Path("aiopslab/data/results")

# Create organized structure: results/{agent}/{model_safe}/
model_safe = model.replace("/", "_")
results_dir = results_base / agent_name / model_safe
results_dir.mkdir(parents=True, exist_ok=True)

return results_dir

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Run OpenRouter agent on AIOpsLab problems')
parser.add_argument('--skip-completed', action='store_true',
help='Skip problems that have already been completed')
parser.add_argument('--problem-ids', nargs='+',
help='Run only specific problem IDs')
parser.add_argument('--max-steps', type=int, default=30,
help='Maximum steps per problem (default: 30)')
parser.add_argument('--model', type=str,
default=os.getenv("OPENROUTER_MODEL", "openai/gpt-4o-mini"),
help='OpenRouter model to use')

args = parser.parse_args()

# Load use_wandb from environment variable with a default of False
use_wandb = os.getenv("USE_WANDB", "false").lower() == "true"

if use_wandb:
# Initialize wandb running
wandb.init(project="AIOpsLab", entity="AIOpsLab")

# You can specify different models supported by OpenRouter
# Popular models:
# - "anthropic/claude-3.5-sonnet"
# - "openai/gpt-4-turbo"
# - "meta-llama/llama-3.1-8b-instruct"
# - "google/gemini-pro"
# - "mistralai/mixtral-8x7b-instruct"
model = os.getenv("OPENROUTER_MODEL", "openai/gpt-4o-mini")
model = args.model
agent_name = "openrouter"

# Setup organized results directory
results_dir = setup_results_directory(model, agent_name)
print(f"Results will be saved to: {results_dir}")

# Get all problems
problems = ProblemRegistry().PROBLEM_REGISTRY

# Filter problems if specific IDs requested
if args.problem_ids:
problems = {pid: problems[pid] for pid in args.problem_ids if pid in problems}
if not problems:
print("No valid problem IDs found")
exit(1)

# Skip completed problems if requested
if args.skip_completed:
completed_problems = get_completed_problems(
Path("aiopslab/data/results"), agent_name, model
)
problems = {pid: prob for pid, prob in problems.items()
if pid not in completed_problems}

print(f"Found {len(completed_problems)} completed problems")
print(f"Running {len(problems)} remaining problems")

if not problems:
print("All problems have been completed!")
exit(0)

print(f"Running {len(problems)} problems with model: {model}")

for pid in problems:
print(f"\n=== Starting problem: {pid} ===")
agent = OpenRouterAgent(model=model)

orchestrator = Orchestrator()
orchestrator.register_agent(agent, name="openrouter")
orchestrator = Orchestrator(results_dir=results_dir)
orchestrator.register_agent(agent, name=agent_name)

problem_desc, instructs, apis = orchestrator.init_problem(pid)
agent.init_context(problem_desc, instructs, apis)
asyncio.run(orchestrator.start_problem(max_steps=30))
asyncio.run(orchestrator.start_problem(max_steps=args.max_steps))

if use_wandb:
# Finish the wandb run
Expand Down