Skip to content

Commit 9637524

Browse files
committed
feat: add pre commit
1 parent 8aac37e commit 9637524

File tree

4 files changed

+46
-32
lines changed

4 files changed

+46
-32
lines changed

examples/smartcrawler_tool.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
from scrapegraph_py.logger import sgai_logger
21
import json
32

3+
from scrapegraph_py.logger import sgai_logger
4+
45
from langchain_scrapegraph.tools import SmartCrawlerTool
56

67
sgai_logger.set_logging(level="INFO")
@@ -10,16 +11,20 @@
1011

1112
# Example based on the provided code snippet
1213
url = "https://scrapegraphai.com/"
13-
prompt = "What does the company do? and I need text content from their privacy and terms"
14+
prompt = (
15+
"What does the company do? and I need text content from their privacy and terms"
16+
)
1417

1518
# Use the tool with crawling parameters
16-
result = tool.invoke({
17-
"url": url,
18-
"prompt": prompt,
19-
"cache_website": True,
20-
"depth": 2,
21-
"max_pages": 2,
22-
"same_domain_only": True
23-
})
19+
result = tool.invoke(
20+
{
21+
"url": url,
22+
"prompt": prompt,
23+
"cache_website": True,
24+
"depth": 2,
25+
"max_pages": 2,
26+
"same_domain_only": True,
27+
}
28+
)
2429

25-
print(json.dumps(result, indent=2))
30+
print(json.dumps(result, indent=2))
Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,40 @@
1+
import json
2+
13
from pydantic import BaseModel, Field
24
from scrapegraph_py.logger import sgai_logger
3-
import json
45

56
from langchain_scrapegraph.tools import SmartCrawlerTool
67

78
sgai_logger.set_logging(level="INFO")
89

10+
911
# Define the output schema
1012
class CompanyInfo(BaseModel):
1113
company_description: str = Field(description="What the company does")
1214
privacy_policy: str = Field(description="Privacy policy content")
1315
terms_of_service: str = Field(description="Terms of service content")
1416

17+
1518
# Initialize the tool with the schema
1619
tool = SmartCrawlerTool(llm_output_schema=CompanyInfo)
1720

1821
# Example crawling with structured output
1922
url = "https://scrapegraphai.com/"
20-
prompt = "What does the company do? and I need text content from their privacy and terms"
23+
prompt = (
24+
"What does the company do? and I need text content from their privacy and terms"
25+
)
2126

2227
# Use the tool with crawling parameters and structured output
23-
result = tool.invoke({
24-
"url": url,
25-
"prompt": prompt,
26-
"cache_website": True,
27-
"depth": 2,
28-
"max_pages": 2,
29-
"same_domain_only": True
30-
})
28+
result = tool.invoke(
29+
{
30+
"url": url,
31+
"prompt": prompt,
32+
"cache_website": True,
33+
"depth": 2,
34+
"max_pages": 2,
35+
"same_domain_only": True,
36+
}
37+
)
3138

3239
print(json.dumps(result, indent=2))
3340

@@ -36,4 +43,4 @@ class CompanyInfo(BaseModel):
3643
# "company_description": "...",
3744
# "privacy_policy": "...",
3845
# "terms_of_service": "..."
39-
# }
46+
# }

langchain_scrapegraph/tools/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,10 @@
44
from .smartcrawler import SmartCrawlerTool
55
from .smartscraper import SmartScraperTool
66

7-
__all__ = ["SmartScraperTool", "SmartCrawlerTool", "GetCreditsTool", "MarkdownifyTool", "SearchScraperTool"]
7+
__all__ = [
8+
"SmartScraperTool",
9+
"SmartCrawlerTool",
10+
"GetCreditsTool",
11+
"MarkdownifyTool",
12+
"SearchScraperTool",
13+
]

langchain_scrapegraph/tools/smartcrawler.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,15 @@ class SmartCrawlerInput(BaseModel):
1717
url: str = Field(description="URL of the website to start crawling from")
1818
cache_website: bool = Field(
1919
default=True,
20-
description="Whether to cache the website content for faster subsequent requests"
20+
description="Whether to cache the website content for faster subsequent requests",
2121
)
2222
depth: int = Field(
23-
default=2,
24-
description="Maximum depth to crawl from the starting URL"
25-
)
26-
max_pages: int = Field(
27-
default=2,
28-
description="Maximum number of pages to crawl"
23+
default=2, description="Maximum depth to crawl from the starting URL"
2924
)
25+
max_pages: int = Field(default=2, description="Maximum number of pages to crawl")
3026
same_domain_only: bool = Field(
3127
default=True,
32-
description="Whether to only crawl pages from the same domain as the starting URL"
28+
description="Whether to only crawl pages from the same domain as the starting URL",
3329
)
3430

3531

@@ -189,4 +185,4 @@ async def _arun(
189185
max_pages=max_pages,
190186
same_domain_only=same_domain_only,
191187
run_manager=run_manager.get_sync() if run_manager else None,
192-
)
188+
)

0 commit comments

Comments
 (0)