Skip to content

Commit 3eaa5bb

Browse files
committed
feat: add new endpoint
1 parent 35d1897 commit 3eaa5bb

File tree

11 files changed

+1861
-6
lines changed

11 files changed

+1861
-6
lines changed

Makefile

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,15 @@ install:
1515

1616
# Linting and Formatting Checks
1717
lint:
18-
poetry run ruff check $(PACKAGE_NAME) $(TEST_DIR)
19-
poetry run black --check $(PACKAGE_NAME) $(TEST_DIR)
20-
poetry run isort --check-only $(PACKAGE_NAME) $(TEST_DIR)
18+
poetry run ruff check $(PACKAGE_NAME) $(TEST_DIR) examples
19+
poetry run black --check $(PACKAGE_NAME) $(TEST_DIR) examples
20+
poetry run isort --check-only $(PACKAGE_NAME) $(TEST_DIR) examples
21+
22+
# Auto-format code
23+
format:
24+
poetry run ruff check --fix $(PACKAGE_NAME) $(TEST_DIR) examples
25+
poetry run black $(PACKAGE_NAME) $(TEST_DIR) examples
26+
poetry run isort $(PACKAGE_NAME) $(TEST_DIR) examples
2127

2228
# Type Checking with MyPy
2329
type-check:

examples/scheduled_jobs_client.py

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
#!/usr/bin/env python3
2+
"""Scheduled Jobs Example - Direct Client Usage"""
3+
4+
import os
5+
6+
from scrapegraph_py import Client
7+
8+
9+
def main():
10+
client = Client.from_env()
11+
12+
print("🚀 ScrapeGraph AI Scheduled Jobs Example")
13+
print("=" * 50)
14+
15+
try:
16+
print("\n📅 Creating a scheduled SmartScraper job...")
17+
18+
smartscraper_config = {
19+
"website_url": "https://example.com",
20+
"user_prompt": "Extract the main heading and description from the page",
21+
}
22+
23+
job = client.create_scheduled_job(
24+
job_name="Daily Example Scraping",
25+
service_type="smartscraper",
26+
cron_expression="0 9 * * *",
27+
job_config=smartscraper_config,
28+
is_active=True,
29+
)
30+
31+
job_id = job["id"]
32+
print(f"✅ Created job: {job['job_name']} (ID: {job_id})")
33+
print(f" Next run: {job.get('next_run_at', 'Not scheduled')}")
34+
35+
print("\n📅 Creating a scheduled SearchScraper job...")
36+
37+
searchscraper_config = {
38+
"user_prompt": "Find the latest news about artificial intelligence",
39+
"num_results": 5,
40+
}
41+
42+
search_job = client.create_scheduled_job(
43+
job_name="Weekly AI News Search",
44+
service_type="searchscraper",
45+
cron_expression="0 10 * * 1",
46+
job_config=searchscraper_config,
47+
is_active=True,
48+
)
49+
50+
search_job_id = search_job["id"]
51+
print(f"✅ Created job: {search_job['job_name']} (ID: {search_job_id})")
52+
53+
print("\n📋 Listing all scheduled jobs...")
54+
55+
jobs_response = client.get_scheduled_jobs(page=1, page_size=10)
56+
jobs = jobs_response["jobs"]
57+
58+
print(f"Found {jobs_response['total']} total jobs:")
59+
for job in jobs:
60+
status = "🟢 Active" if job["is_active"] else "🔴 Inactive"
61+
print(f" - {job['job_name']} ({job['service_type']}) - {status}")
62+
print(f" Schedule: {job['cron_expression']}")
63+
if job.get("next_run_at"):
64+
print(f" Next run: {job['next_run_at']}")
65+
66+
print(f"\n🔍 Getting details for job {job_id}...")
67+
68+
job_details = client.get_scheduled_job(job_id)
69+
print(f"Job Name: {job_details['job_name']}")
70+
print(f"Service Type: {job_details['service_type']}")
71+
print(f"Created: {job_details['created_at']}")
72+
print(f"Active: {job_details['is_active']}")
73+
74+
print("\n📝 Updating job schedule...")
75+
76+
updated_job = client.update_scheduled_job(
77+
job_id=job_id,
78+
cron_expression="0 8 * * *",
79+
job_name="Daily Example Scraping (Updated)",
80+
)
81+
82+
print(f"✅ Updated job: {updated_job['job_name']}")
83+
print(f" New schedule: {updated_job['cron_expression']}")
84+
85+
print(f"\n⏸️ Pausing job {job_id}...")
86+
87+
pause_result = client.pause_scheduled_job(job_id)
88+
print(f"✅ {pause_result['message']}")
89+
print(f" Job is now: {'Active' if pause_result['is_active'] else 'Paused'}")
90+
91+
print(f"\n▶️ Resuming job {job_id}...")
92+
93+
resume_result = client.resume_scheduled_job(job_id)
94+
print(f"✅ {resume_result['message']}")
95+
print(f" Job is now: {'Active' if resume_result['is_active'] else 'Paused'}")
96+
if resume_result.get("next_run_at"):
97+
print(f" Next run: {resume_result['next_run_at']}")
98+
99+
print(f"\n🚀 Manually triggering job {job_id}...")
100+
101+
trigger_result = client.trigger_scheduled_job(job_id)
102+
print(f"✅ {trigger_result['message']}")
103+
print(f" Execution ID: {trigger_result['execution_id']}")
104+
print(f" Triggered at: {trigger_result['triggered_at']}")
105+
106+
print(f"\n📊 Getting execution history for job {job_id}...")
107+
108+
executions_response = client.get_job_executions(
109+
job_id=job_id, page=1, page_size=5
110+
)
111+
112+
executions = executions_response["executions"]
113+
print(f"Found {executions_response['total']} total executions:")
114+
115+
for execution in executions:
116+
status_emoji = {
117+
"completed": "✅",
118+
"failed": "❌",
119+
"running": "🔄",
120+
"pending": "⏳",
121+
}.get(execution["status"], "❓")
122+
123+
print(f" {status_emoji} {execution['status'].upper()}")
124+
print(f" Started: {execution['started_at']}")
125+
if execution.get("completed_at"):
126+
print(f" Completed: {execution['completed_at']}")
127+
if execution.get("credits_used"):
128+
print(f" Credits used: {execution['credits_used']}")
129+
130+
print("\n🔧 Filtering jobs by service type (smartscraper)...")
131+
132+
filtered_jobs = client.get_scheduled_jobs(
133+
service_type="smartscraper", is_active=True
134+
)
135+
136+
print(f"Found {filtered_jobs['total']} active SmartScraper jobs:")
137+
for job in filtered_jobs["jobs"]:
138+
print(f" - {job['job_name']} (Schedule: {job['cron_expression']})")
139+
140+
print("\n🗑️ Cleaning up - deleting created jobs...")
141+
142+
delete_result1 = client.delete_scheduled_job(job_id)
143+
print(f"✅ {delete_result1['message']} (Job 1)")
144+
145+
delete_result2 = client.delete_scheduled_job(search_job_id)
146+
print(f"✅ {delete_result2['message']} (Job 2)")
147+
148+
print("\n🎉 Scheduled jobs example completed successfully!")
149+
150+
except Exception as e:
151+
print(f"\n❌ Error: {str(e)}")
152+
raise
153+
154+
finally:
155+
client.close()
156+
157+
158+
if __name__ == "__main__":
159+
if os.getenv("SGAI_MOCK", "0").lower() in ["1", "true", "yes"]:
160+
print("🧪 Running in MOCK mode - no real API calls will be made")
161+
162+
main()

examples/scheduled_jobs_tool.py

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
#!/usr/bin/env python3
2+
"""Scheduled Jobs Example - LangChain Tools"""
3+
4+
import time
5+
6+
from scrapegraph_py.logger import sgai_logger
7+
8+
from langchain_scrapegraph.tools import (
9+
CreateScheduledJobTool,
10+
DeleteScheduledJobTool,
11+
GetJobExecutionsTool,
12+
GetScheduledJobsTool,
13+
GetScheduledJobTool,
14+
PauseScheduledJobTool,
15+
ResumeScheduledJobTool,
16+
TriggerScheduledJobTool,
17+
UpdateScheduledJobTool,
18+
)
19+
20+
sgai_logger.set_logging(level="INFO")
21+
22+
23+
def main():
24+
print("🚀 ScrapeGraph AI Scheduled Jobs Example - LangChain Tools")
25+
print("=" * 60)
26+
27+
# Initialize tools
28+
create_job_tool = CreateScheduledJobTool()
29+
get_jobs_tool = GetScheduledJobsTool()
30+
get_job_tool = GetScheduledJobTool()
31+
update_job_tool = UpdateScheduledJobTool()
32+
pause_job_tool = PauseScheduledJobTool()
33+
resume_job_tool = ResumeScheduledJobTool()
34+
trigger_job_tool = TriggerScheduledJobTool()
35+
get_executions_tool = GetJobExecutionsTool()
36+
delete_job_tool = DeleteScheduledJobTool()
37+
38+
try:
39+
print("\n📅 Creating a scheduled SmartScraper job...")
40+
41+
smartscraper_config = {
42+
"website_url": "https://example.com",
43+
"user_prompt": "Extract the main heading and description from the page",
44+
}
45+
46+
job = create_job_tool.invoke(
47+
{
48+
"job_name": "Daily Example Scraping",
49+
"service_type": "smartscraper",
50+
"cron_expression": "0 9 * * *",
51+
"job_config": smartscraper_config,
52+
"is_active": True,
53+
}
54+
)
55+
56+
job_id = job["id"]
57+
print(f"✅ Created job: {job['job_name']} (ID: {job_id})")
58+
print(f" Next run: {job.get('next_run_at', 'Not scheduled')}")
59+
60+
print("\n📅 Creating a scheduled SearchScraper job...")
61+
62+
searchscraper_config = {
63+
"user_prompt": "Find the latest news about artificial intelligence",
64+
"num_results": 5,
65+
}
66+
67+
search_job = create_job_tool.invoke(
68+
{
69+
"job_name": "Weekly AI News Search",
70+
"service_type": "searchscraper",
71+
"cron_expression": "0 10 * * 1",
72+
"job_config": searchscraper_config,
73+
"is_active": True,
74+
}
75+
)
76+
77+
search_job_id = search_job["id"]
78+
print(f"✅ Created job: {search_job['job_name']} (ID: {search_job_id})")
79+
80+
print("\n📋 Listing all scheduled jobs...")
81+
82+
jobs_response = get_jobs_tool.invoke({"page": 1, "page_size": 10})
83+
jobs = jobs_response["jobs"]
84+
85+
print(f"Found {jobs_response['total']} total jobs:")
86+
for job_item in jobs:
87+
status = "🟢 Active" if job_item["is_active"] else "🔴 Inactive"
88+
print(f" - {job_item['job_name']} ({job_item['service_type']}) - {status}")
89+
print(f" Schedule: {job_item['cron_expression']}")
90+
if job_item.get("next_run_at"):
91+
print(f" Next run: {job_item['next_run_at']}")
92+
93+
print(f"\n🔍 Getting details for job {job_id}...")
94+
95+
job_details = get_job_tool.invoke({"job_id": job_id})
96+
print(f"Job Name: {job_details['job_name']}")
97+
print(f"Service Type: {job_details['service_type']}")
98+
print(f"Created: {job_details['created_at']}")
99+
print(f"Active: {job_details['is_active']}")
100+
101+
print("\n📝 Updating job schedule...")
102+
103+
updated_job = update_job_tool.invoke(
104+
{
105+
"job_id": job_id,
106+
"cron_expression": "0 8 * * *",
107+
"job_name": "Daily Example Scraping (Updated)",
108+
}
109+
)
110+
111+
print(f"✅ Updated job: {updated_job['job_name']}")
112+
print(f" New schedule: {updated_job['cron_expression']}")
113+
114+
print(f"\n⏸️ Pausing job {job_id}...")
115+
116+
pause_result = pause_job_tool.invoke({"job_id": job_id})
117+
print(f"✅ {pause_result['message']}")
118+
print(f" Job is now: {'Active' if pause_result['is_active'] else 'Paused'}")
119+
120+
print(f"\n▶️ Resuming job {job_id}...")
121+
122+
resume_result = resume_job_tool.invoke({"job_id": job_id})
123+
print(f"✅ {resume_result['message']}")
124+
print(f" Job is now: {'Active' if resume_result['is_active'] else 'Paused'}")
125+
if resume_result.get("next_run_at"):
126+
print(f" Next run: {resume_result['next_run_at']}")
127+
128+
print(f"\n🚀 Manually triggering job {job_id}...")
129+
130+
trigger_result = trigger_job_tool.invoke({"job_id": job_id})
131+
print(f"✅ {trigger_result['message']}")
132+
print(f" Execution ID: {trigger_result['execution_id']}")
133+
print(f" Triggered at: {trigger_result['triggered_at']}")
134+
135+
# Wait a moment for the execution to potentially start
136+
time.sleep(2)
137+
138+
print(f"\n📊 Getting execution history for job {job_id}...")
139+
140+
executions_response = get_executions_tool.invoke(
141+
{"job_id": job_id, "page": 1, "page_size": 5}
142+
)
143+
144+
executions = executions_response["executions"]
145+
print(f"Found {executions_response['total']} total executions:")
146+
147+
for execution in executions:
148+
status_emoji = {
149+
"completed": "✅",
150+
"failed": "❌",
151+
"running": "🔄",
152+
"pending": "⏳",
153+
}.get(execution["status"], "❓")
154+
155+
print(f" {status_emoji} {execution['status'].upper()}")
156+
print(f" Started: {execution['started_at']}")
157+
if execution.get("completed_at"):
158+
print(f" Completed: {execution['completed_at']}")
159+
if execution.get("credits_used"):
160+
print(f" Credits used: {execution['credits_used']}")
161+
162+
print("\n🔧 Filtering jobs by service type (smartscraper)...")
163+
164+
filtered_jobs = get_jobs_tool.invoke(
165+
{"service_type": "smartscraper", "is_active": True}
166+
)
167+
168+
print(f"Found {filtered_jobs['total']} active SmartScraper jobs:")
169+
for job_item in filtered_jobs["jobs"]:
170+
print(
171+
f" - {job_item['job_name']} (Schedule: {job_item['cron_expression']})"
172+
)
173+
174+
print("\n🗑️ Cleaning up - deleting created jobs...")
175+
176+
delete_result1 = delete_job_tool.invoke({"job_id": job_id})
177+
print(f"✅ {delete_result1['message']} (Job 1)")
178+
179+
delete_result2 = delete_job_tool.invoke({"job_id": search_job_id})
180+
print(f"✅ {delete_result2['message']} (Job 2)")
181+
182+
print("\n🎉 Scheduled jobs example completed successfully!")
183+
184+
except Exception as e:
185+
print(f"\n❌ Error: {str(e)}")
186+
raise
187+
188+
189+
if __name__ == "__main__":
190+
main()

0 commit comments

Comments
 (0)