forked from danmax/r34-scraper
added exponential backoff
This commit is contained in:
parent
c954cdeaea
commit
7e732c4a75
40
src/main.rs
40
src/main.rs
@ -1,7 +1,8 @@
|
||||
use std::process::ExitCode;
|
||||
|
||||
#![feature(async_closure)]
|
||||
use regex::Regex;
|
||||
use reqwest::Client;
|
||||
use std::process::ExitCode;
|
||||
use tokio::time::{sleep, Duration};
|
||||
|
||||
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36";
|
||||
|
||||
@ -17,12 +18,14 @@ async fn main() -> ExitCode {
|
||||
.to_string();
|
||||
|
||||
let client = Client::builder().user_agent(USER_AGENT).build().unwrap();
|
||||
let mut page = 0;
|
||||
let mut page = 1;
|
||||
|
||||
loop {
|
||||
|
||||
println!("now scraping page {page}");
|
||||
|
||||
let urls = extract_urls(
|
||||
let post_html = async || {
|
||||
extract_urls(
|
||||
&client
|
||||
.get(format!(
|
||||
"https://rule34.xxx/index.php?page=post&s=list&tags={tags}&pid={}",
|
||||
@ -34,14 +37,36 @@ async fn main() -> ExitCode {
|
||||
.text()
|
||||
.await
|
||||
.unwrap(),
|
||||
);
|
||||
)
|
||||
};
|
||||
|
||||
let mut urls = post_html().await;
|
||||
|
||||
let mut wait_time = 5000;
|
||||
|
||||
if urls.is_empty() {
|
||||
println!("no urls found, exiting...");
|
||||
|
||||
for reconnection_attempts in 0..4 {
|
||||
println!("no urls found, retrying in {} seconds...", wait_time / 1000);
|
||||
sleep(Duration::from_millis(wait_time)).await;
|
||||
|
||||
urls = post_html().await;
|
||||
|
||||
if !urls.is_empty() {
|
||||
println!("urls found! continuing...");
|
||||
break;
|
||||
}
|
||||
|
||||
if reconnection_attempts == 3 {
|
||||
println!("no urls found in 4 attempts, exiting...");
|
||||
return ExitCode::FAILURE;
|
||||
}
|
||||
|
||||
wait_time += 5000;
|
||||
}
|
||||
}
|
||||
|
||||
for url in urls {
|
||||
println!("found post: {url}");
|
||||
|
||||
let img_url =
|
||||
extract_img_url(&client.get(url).send().await.unwrap().text().await.unwrap());
|
||||
@ -75,4 +100,3 @@ fn extract_img_url(html: &str) -> String {
|
||||
String::new()
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user