Skip to content

Commit e9e60d0

Browse files
committed
added -b/break-when-found flag
1 parent 17fb993 commit e9e60d0

File tree

6 files changed

+34
-3
lines changed

6 files changed

+34
-3
lines changed

Cargo.lock

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11

22
[package]
33
name = "webgrep"
4-
version = "0.3.3"
4+
version = "0.3.4"
55
edition = "2021"
66
license = "MIT"
77
description = "grep the web: a full-browser-spec search-focused ultra-simple way to read the web without having to leave the terminal"

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ to use headless-chrome with webgrep, either use the `-c/--chrome` flag, or set t
1313
- `-o/--samehost` OPTIONAL, only explore webpages on the same host (ie same domain)
1414
- `-c/--chrome` OPTIONAL, use chrome instead of just basic http request (needed for sites with js)
1515
- `-i/--insensitive` OPTIONAL, search case-insensitivly
16+
- `-b/--break_when_found` OPTIONAL, stop recursion when search nets result
1617

1718
### Examples:
1819
recursively search through a college course catalog for a keyword (case insensitive):

src/structs/Args.rs

+3
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,7 @@ pub struct ArgParser {
2424

2525
#[arg(short = 'c', long = "chrome", action=ArgAction::SetTrue)]
2626
pub use_chrome: Option<bool>,
27+
28+
#[arg(short, long, action=ArgAction::SetTrue)]
29+
pub break_when_found: Option<bool>,
2730
}

src/utils/recurse.rs

+22-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use crate::utils::request_handler;
77
use async_recursion::async_recursion;
88
use clap::Parser;
99
use lazy_static::lazy_static;
10+
use regex::Regex;
1011
use reqwest::Client;
1112
use std::collections::HashMap;
1213
use std::sync::Mutex;
@@ -47,7 +48,7 @@ pub async fn recurse(
4748
let link_selector = Selector::parse("a").unwrap();
4849
let mut phtml = Html::new_document();
4950
if use_chrome.is_some() && use_chrome.unwrap() {
50-
phtml = request_handler::browse_for_html_from_url(specific_url).await;
51+
phtml = request_handler::browse_for_html_from_url(specific_url.clone()).await;
5152
} else {
5253
phtml = request_handler::get_html_from_url(&specific_url).await;
5354
}
@@ -65,6 +66,26 @@ pub async fn recurse(
6566
if Url::parse(&href) == Err(url::ParseError::RelativeUrlWithoutBase) {
6667
href = parsed_url.join(&href).unwrap().as_str().to_string();
6768
}
69+
if args.break_when_found.is_some() && args.break_when_found.unwrap() {
70+
let mut re = Regex::new("").unwrap();
71+
if args.search.clone().is_some() {
72+
if args.insensitive.is_some() && args.insensitive.unwrap() == true {
73+
re = Regex::new(
74+
&("(?i)".to_owned() + &args.search.clone().unwrap()),
75+
)
76+
.unwrap();
77+
} else {
78+
re = Regex::new(&args.search.clone().unwrap()).unwrap();
79+
}
80+
}
81+
for t in CACHE.lock().unwrap().values_mut() {
82+
for a in t {
83+
if re.is_match(a) {
84+
return get_links(links, depth - 1, use_chrome).await;
85+
}
86+
}
87+
}
88+
}
6889
if !CACHE.lock().unwrap().contains_key(&href) && !Url::parse(&href).is_err() {
6990
let mut file_type = "html";
7091
if crate::utils::determine_file_type::determine_file_type(&href) == "pdf" {

tests/test_cases.toml

+6
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,9 @@ name = "Search/Insensitive"
6363
run_type = "lines_count"
6464
cmd = "../target/release/webgrep https://google.com dark -i"
6565
expect = "3"
66+
67+
[[test_cases]]
68+
name = "Recursive/BreakWhenFound"
69+
run_type = "lines_count"
70+
cmd = "../target/debug/webgrep https://quinnpatwardhan.com/ frisb -r 1 -b -c"
71+
expect = "5"

0 commit comments

Comments
 (0)