feat: add pre commit

VinciGit00 · VinciGit00 · commit 9637524a4c90 · 2025-07-15T09:44:58.000+02:00
diff --git a/examples/smartcrawler_tool.py b/examples/smartcrawler_tool.py
@@ -1,6 +1,7 @@
-from scrapegraph_py.logger import sgai_logger
 import json
 
+from scrapegraph_py.logger import sgai_logger
+
 from langchain_scrapegraph.tools import SmartCrawlerTool
 
 sgai_logger.set_logging(level="INFO")
@@ -10,16 +11,20 @@
 
 # Example based on the provided code snippet
 url = "https://scrapegraphai.com/"
-prompt = "What does the company do? and I need text content from their privacy and terms"
+prompt = (
+    "What does the company do? and I need text content from their privacy and terms"
+)
 
 # Use the tool with crawling parameters
-result = tool.invoke({
-    "url": url,
-    "prompt": prompt,
-    "cache_website": True,
-    "depth": 2,
-    "max_pages": 2,
-    "same_domain_only": True
-})
+result = tool.invoke(
+    {
+        "url": url,
+        "prompt": prompt,
+        "cache_website": True,
+        "depth": 2,
+        "max_pages": 2,
+        "same_domain_only": True,
+    }
+)
 
-print(json.dumps(result, indent=2)) 
+print(json.dumps(result, indent=2))
diff --git a/examples/smartcrawler_tool_schema.py b/examples/smartcrawler_tool_schema.py
@@ -1,33 +1,40 @@
+import json
+
 from pydantic import BaseModel, Field
 from scrapegraph_py.logger import sgai_logger
-import json
 
 from langchain_scrapegraph.tools import SmartCrawlerTool
 
 sgai_logger.set_logging(level="INFO")
 
+
 # Define the output schema
 class CompanyInfo(BaseModel):
     company_description: str = Field(description="What the company does")
     privacy_policy: str = Field(description="Privacy policy content")
     terms_of_service: str = Field(description="Terms of service content")
 
+
 # Initialize the tool with the schema
 tool = SmartCrawlerTool(llm_output_schema=CompanyInfo)
 
 # Example crawling with structured output
 url = "https://scrapegraphai.com/"
-prompt = "What does the company do? and I need text content from their privacy and terms"
+prompt = (
+    "What does the company do? and I need text content from their privacy and terms"
+)
 
 # Use the tool with crawling parameters and structured output
-result = tool.invoke({
-    "url": url,
-    "prompt": prompt,
-    "cache_website": True,
-    "depth": 2,
-    "max_pages": 2,
-    "same_domain_only": True
-})
+result = tool.invoke(
+    {
+        "url": url,
+        "prompt": prompt,
+        "cache_website": True,
+        "depth": 2,
+        "max_pages": 2,
+        "same_domain_only": True,
+    }
+)
 
 print(json.dumps(result, indent=2))
 
@@ -36,4 +43,4 @@ class CompanyInfo(BaseModel):
 #   "company_description": "...",
 #   "privacy_policy": "...",
 #   "terms_of_service": "..."
-# } 
+# }
diff --git a/langchain_scrapegraph/tools/__init__.py b/langchain_scrapegraph/tools/__init__.py
@@ -4,4 +4,10 @@
 from .smartcrawler import SmartCrawlerTool
 from .smartscraper import SmartScraperTool
 
-__all__ = ["SmartScraperTool", "SmartCrawlerTool", "GetCreditsTool", "MarkdownifyTool", "SearchScraperTool"]
+__all__ = [
+    "SmartScraperTool",
+    "SmartCrawlerTool",
+    "GetCreditsTool",
+    "MarkdownifyTool",
+    "SearchScraperTool",
+]
diff --git a/langchain_scrapegraph/tools/smartcrawler.py b/langchain_scrapegraph/tools/smartcrawler.py
@@ -17,19 +17,15 @@ class SmartCrawlerInput(BaseModel):
     url: str = Field(description="URL of the website to start crawling from")
     cache_website: bool = Field(
         default=True,
-        description="Whether to cache the website content for faster subsequent requests"
+        description="Whether to cache the website content for faster subsequent requests",
     )
     depth: int = Field(
-        default=2,
-        description="Maximum depth to crawl from the starting URL"
-    )
-    max_pages: int = Field(
-        default=2,
-        description="Maximum number of pages to crawl"
+        default=2, description="Maximum depth to crawl from the starting URL"
     )
+    max_pages: int = Field(default=2, description="Maximum number of pages to crawl")
     same_domain_only: bool = Field(
         default=True,
-        description="Whether to only crawl pages from the same domain as the starting URL"
+        description="Whether to only crawl pages from the same domain as the starting URL",
     )
 
 
@@ -189,4 +185,4 @@ async def _arun(
             max_pages=max_pages,
             same_domain_only=same_domain_only,
             run_manager=run_manager.get_sync() if run_manager else None,
-        )
+        )