added exponential backoff

This commit is contained in:
danmax 2024-10-16 23:20:24 -04:00
parent c954cdeaea
commit 7e732c4a75

View File

@ -1,7 +1,8 @@
use std::process::ExitCode; #![feature(async_closure)]
use regex::Regex; use regex::Regex;
use reqwest::Client; use reqwest::Client;
use std::process::ExitCode;
use tokio::time::{sleep, Duration};
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"; const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36";
@ -17,12 +18,14 @@ async fn main() -> ExitCode {
.to_string(); .to_string();
let client = Client::builder().user_agent(USER_AGENT).build().unwrap(); let client = Client::builder().user_agent(USER_AGENT).build().unwrap();
let mut page = 0; let mut page = 1;
loop { loop {
println!("now scraping page {page}"); println!("now scraping page {page}");
let urls = extract_urls( let post_html = async || {
extract_urls(
&client &client
.get(format!( .get(format!(
"https://rule34.xxx/index.php?page=post&s=list&tags={tags}&pid={}", "https://rule34.xxx/index.php?page=post&s=list&tags={tags}&pid={}",
@ -34,14 +37,36 @@ async fn main() -> ExitCode {
.text() .text()
.await .await
.unwrap(), .unwrap(),
); )
};
let mut urls = post_html().await;
let mut wait_time = 5000;
if urls.is_empty() { if urls.is_empty() {
println!("no urls found, exiting...");
for reconnection_attempts in 0..4 {
println!("no urls found, retrying in {} seconds...", wait_time / 1000);
sleep(Duration::from_millis(wait_time)).await;
urls = post_html().await;
if !urls.is_empty() {
println!("urls found! continuing...");
break;
}
if reconnection_attempts == 3 {
println!("no urls found in 4 attempts, exiting...");
return ExitCode::FAILURE; return ExitCode::FAILURE;
} }
wait_time += 5000;
}
}
for url in urls { for url in urls {
println!("found post: {url}");
let img_url = let img_url =
extract_img_url(&client.get(url).send().await.unwrap().text().await.unwrap()); extract_img_url(&client.get(url).send().await.unwrap().text().await.unwrap());
@ -75,4 +100,3 @@ fn extract_img_url(html: &str) -> String {
String::new() String::new()
} }
} }