|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Scheduled Jobs Example - LangChain Tools""" |
| 3 | + |
| 4 | +import time |
| 5 | + |
| 6 | +from scrapegraph_py.logger import sgai_logger |
| 7 | + |
| 8 | +from langchain_scrapegraph.tools import ( |
| 9 | + CreateScheduledJobTool, |
| 10 | + DeleteScheduledJobTool, |
| 11 | + GetJobExecutionsTool, |
| 12 | + GetScheduledJobsTool, |
| 13 | + GetScheduledJobTool, |
| 14 | + PauseScheduledJobTool, |
| 15 | + ResumeScheduledJobTool, |
| 16 | + TriggerScheduledJobTool, |
| 17 | + UpdateScheduledJobTool, |
| 18 | +) |
| 19 | + |
| 20 | +sgai_logger.set_logging(level="INFO") |
| 21 | + |
| 22 | + |
| 23 | +def main(): |
| 24 | + print("🚀 ScrapeGraph AI Scheduled Jobs Example - LangChain Tools") |
| 25 | + print("=" * 60) |
| 26 | + |
| 27 | + # Initialize tools |
| 28 | + create_job_tool = CreateScheduledJobTool() |
| 29 | + get_jobs_tool = GetScheduledJobsTool() |
| 30 | + get_job_tool = GetScheduledJobTool() |
| 31 | + update_job_tool = UpdateScheduledJobTool() |
| 32 | + pause_job_tool = PauseScheduledJobTool() |
| 33 | + resume_job_tool = ResumeScheduledJobTool() |
| 34 | + trigger_job_tool = TriggerScheduledJobTool() |
| 35 | + get_executions_tool = GetJobExecutionsTool() |
| 36 | + delete_job_tool = DeleteScheduledJobTool() |
| 37 | + |
| 38 | + try: |
| 39 | + print("\n📅 Creating a scheduled SmartScraper job...") |
| 40 | + |
| 41 | + smartscraper_config = { |
| 42 | + "website_url": "https://example.com", |
| 43 | + "user_prompt": "Extract the main heading and description from the page", |
| 44 | + } |
| 45 | + |
| 46 | + job = create_job_tool.invoke( |
| 47 | + { |
| 48 | + "job_name": "Daily Example Scraping", |
| 49 | + "service_type": "smartscraper", |
| 50 | + "cron_expression": "0 9 * * *", |
| 51 | + "job_config": smartscraper_config, |
| 52 | + "is_active": True, |
| 53 | + } |
| 54 | + ) |
| 55 | + |
| 56 | + job_id = job["id"] |
| 57 | + print(f"✅ Created job: {job['job_name']} (ID: {job_id})") |
| 58 | + print(f" Next run: {job.get('next_run_at', 'Not scheduled')}") |
| 59 | + |
| 60 | + print("\n📅 Creating a scheduled SearchScraper job...") |
| 61 | + |
| 62 | + searchscraper_config = { |
| 63 | + "user_prompt": "Find the latest news about artificial intelligence", |
| 64 | + "num_results": 5, |
| 65 | + } |
| 66 | + |
| 67 | + search_job = create_job_tool.invoke( |
| 68 | + { |
| 69 | + "job_name": "Weekly AI News Search", |
| 70 | + "service_type": "searchscraper", |
| 71 | + "cron_expression": "0 10 * * 1", |
| 72 | + "job_config": searchscraper_config, |
| 73 | + "is_active": True, |
| 74 | + } |
| 75 | + ) |
| 76 | + |
| 77 | + search_job_id = search_job["id"] |
| 78 | + print(f"✅ Created job: {search_job['job_name']} (ID: {search_job_id})") |
| 79 | + |
| 80 | + print("\n📋 Listing all scheduled jobs...") |
| 81 | + |
| 82 | + jobs_response = get_jobs_tool.invoke({"page": 1, "page_size": 10}) |
| 83 | + jobs = jobs_response["jobs"] |
| 84 | + |
| 85 | + print(f"Found {jobs_response['total']} total jobs:") |
| 86 | + for job_item in jobs: |
| 87 | + status = "🟢 Active" if job_item["is_active"] else "🔴 Inactive" |
| 88 | + print(f" - {job_item['job_name']} ({job_item['service_type']}) - {status}") |
| 89 | + print(f" Schedule: {job_item['cron_expression']}") |
| 90 | + if job_item.get("next_run_at"): |
| 91 | + print(f" Next run: {job_item['next_run_at']}") |
| 92 | + |
| 93 | + print(f"\n🔍 Getting details for job {job_id}...") |
| 94 | + |
| 95 | + job_details = get_job_tool.invoke({"job_id": job_id}) |
| 96 | + print(f"Job Name: {job_details['job_name']}") |
| 97 | + print(f"Service Type: {job_details['service_type']}") |
| 98 | + print(f"Created: {job_details['created_at']}") |
| 99 | + print(f"Active: {job_details['is_active']}") |
| 100 | + |
| 101 | + print("\n📝 Updating job schedule...") |
| 102 | + |
| 103 | + updated_job = update_job_tool.invoke( |
| 104 | + { |
| 105 | + "job_id": job_id, |
| 106 | + "cron_expression": "0 8 * * *", |
| 107 | + "job_name": "Daily Example Scraping (Updated)", |
| 108 | + } |
| 109 | + ) |
| 110 | + |
| 111 | + print(f"✅ Updated job: {updated_job['job_name']}") |
| 112 | + print(f" New schedule: {updated_job['cron_expression']}") |
| 113 | + |
| 114 | + print(f"\n⏸️ Pausing job {job_id}...") |
| 115 | + |
| 116 | + pause_result = pause_job_tool.invoke({"job_id": job_id}) |
| 117 | + print(f"✅ {pause_result['message']}") |
| 118 | + print(f" Job is now: {'Active' if pause_result['is_active'] else 'Paused'}") |
| 119 | + |
| 120 | + print(f"\n▶️ Resuming job {job_id}...") |
| 121 | + |
| 122 | + resume_result = resume_job_tool.invoke({"job_id": job_id}) |
| 123 | + print(f"✅ {resume_result['message']}") |
| 124 | + print(f" Job is now: {'Active' if resume_result['is_active'] else 'Paused'}") |
| 125 | + if resume_result.get("next_run_at"): |
| 126 | + print(f" Next run: {resume_result['next_run_at']}") |
| 127 | + |
| 128 | + print(f"\n🚀 Manually triggering job {job_id}...") |
| 129 | + |
| 130 | + trigger_result = trigger_job_tool.invoke({"job_id": job_id}) |
| 131 | + print(f"✅ {trigger_result['message']}") |
| 132 | + print(f" Execution ID: {trigger_result['execution_id']}") |
| 133 | + print(f" Triggered at: {trigger_result['triggered_at']}") |
| 134 | + |
| 135 | + # Wait a moment for the execution to potentially start |
| 136 | + time.sleep(2) |
| 137 | + |
| 138 | + print(f"\n📊 Getting execution history for job {job_id}...") |
| 139 | + |
| 140 | + executions_response = get_executions_tool.invoke( |
| 141 | + {"job_id": job_id, "page": 1, "page_size": 5} |
| 142 | + ) |
| 143 | + |
| 144 | + executions = executions_response["executions"] |
| 145 | + print(f"Found {executions_response['total']} total executions:") |
| 146 | + |
| 147 | + for execution in executions: |
| 148 | + status_emoji = { |
| 149 | + "completed": "✅", |
| 150 | + "failed": "❌", |
| 151 | + "running": "🔄", |
| 152 | + "pending": "⏳", |
| 153 | + }.get(execution["status"], "❓") |
| 154 | + |
| 155 | + print(f" {status_emoji} {execution['status'].upper()}") |
| 156 | + print(f" Started: {execution['started_at']}") |
| 157 | + if execution.get("completed_at"): |
| 158 | + print(f" Completed: {execution['completed_at']}") |
| 159 | + if execution.get("credits_used"): |
| 160 | + print(f" Credits used: {execution['credits_used']}") |
| 161 | + |
| 162 | + print("\n🔧 Filtering jobs by service type (smartscraper)...") |
| 163 | + |
| 164 | + filtered_jobs = get_jobs_tool.invoke( |
| 165 | + {"service_type": "smartscraper", "is_active": True} |
| 166 | + ) |
| 167 | + |
| 168 | + print(f"Found {filtered_jobs['total']} active SmartScraper jobs:") |
| 169 | + for job_item in filtered_jobs["jobs"]: |
| 170 | + print( |
| 171 | + f" - {job_item['job_name']} (Schedule: {job_item['cron_expression']})" |
| 172 | + ) |
| 173 | + |
| 174 | + print("\n🗑️ Cleaning up - deleting created jobs...") |
| 175 | + |
| 176 | + delete_result1 = delete_job_tool.invoke({"job_id": job_id}) |
| 177 | + print(f"✅ {delete_result1['message']} (Job 1)") |
| 178 | + |
| 179 | + delete_result2 = delete_job_tool.invoke({"job_id": search_job_id}) |
| 180 | + print(f"✅ {delete_result2['message']} (Job 2)") |
| 181 | + |
| 182 | + print("\n🎉 Scheduled jobs example completed successfully!") |
| 183 | + |
| 184 | + except Exception as e: |
| 185 | + print(f"\n❌ Error: {str(e)}") |
| 186 | + raise |
| 187 | + |
| 188 | + |
| 189 | +if __name__ == "__main__": |
| 190 | + main() |
0 commit comments