forked from danmax/r34-scraper
made scraper stop once there are no post urls
This commit is contained in:
parent
7b73c942b8
commit
36c0bfb0fe
16
src/main.rs
16
src/main.rs
@ -9,7 +9,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
r34pid += 42;
|
r34pid += 42;
|
||||||
|
|
||||||
let r34_url = format!(
|
let r34_url = format!(
|
||||||
"https://rule34.xxx/index.php?page=post&s=list&tags=1girls&pid={}",
|
"https://rule34.xxx/index.php?page=post&s=list&tags=kaguya_jinguu&pid={}",
|
||||||
r34pid
|
r34pid
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -17,11 +17,19 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
.get(r34_url)
|
.get(r34_url)
|
||||||
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36").send().await?.text().await?;
|
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36").send().await?.text().await?;
|
||||||
|
|
||||||
|
let urls = extract_urls(&body);
|
||||||
|
|
||||||
|
if !urls.is_empty() {
|
||||||
|
} else {
|
||||||
|
println!("no urls found, exiting...");
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
println!("-------------------------------");
|
println!("-------------------------------");
|
||||||
println!(" now scraping page {}", r34pid / 42 + 1);
|
println!(" now scraping page {}", r34pid / 42 + 1);
|
||||||
println!("-------------------------------");
|
println!("-------------------------------");
|
||||||
|
|
||||||
let urls = extract_urls(&body);
|
|
||||||
for url in urls {
|
for url in urls {
|
||||||
println!("found post: {}", url);
|
println!("found post: {}", url);
|
||||||
|
|
||||||
@ -50,9 +58,7 @@ fn extract_urls(html: &str) -> Vec<String> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn extract_img_url(html: &str) -> String {
|
fn extract_img_url(html: &str) -> String {
|
||||||
let re =
|
let re = Regex::new(r"https://us\.rule34\.xxx//([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z]+\?[0-9]+").unwrap();
|
||||||
Regex::new(r"https://us\.rule34\.xxx//([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z]+\?[0-9]+")
|
|
||||||
.unwrap();
|
|
||||||
match re.find(html) {
|
match re.find(html) {
|
||||||
Some(img_url) => img_url.as_str().to_string(),
|
Some(img_url) => img_url.as_str().to_string(),
|
||||||
None => String::new(),
|
None => String::new(),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user