added exponential backoff

This commit is contained in:
danmax 2024-10-16 23:20:24 -04:00
parent c954cdeaea
commit 7e732c4a75

View File

@ -1,7 +1,8 @@
use std::process::ExitCode;
#![feature(async_closure)]
use regex::Regex;
use reqwest::Client;
use std::process::ExitCode;
use tokio::time::{sleep, Duration};
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36";
@ -17,12 +18,14 @@ async fn main() -> ExitCode {
.to_string();
let client = Client::builder().user_agent(USER_AGENT).build().unwrap();
let mut page = 0;
let mut page = 1;
loop {
println!("now scraping page {page}");
let urls = extract_urls(
let post_html = async || {
extract_urls(
&client
.get(format!(
"https://rule34.xxx/index.php?page=post&s=list&tags={tags}&pid={}",
@ -34,14 +37,36 @@ async fn main() -> ExitCode {
.text()
.await
.unwrap(),
);
)
};
let mut urls = post_html().await;
let mut wait_time = 5000;
if urls.is_empty() {
println!("no urls found, exiting...");
for reconnection_attempts in 0..4 {
println!("no urls found, retrying in {} seconds...", wait_time / 1000);
sleep(Duration::from_millis(wait_time)).await;
urls = post_html().await;
if !urls.is_empty() {
println!("urls found! continuing...");
break;
}
if reconnection_attempts == 3 {
println!("no urls found in 4 attempts, exiting...");
return ExitCode::FAILURE;
}
wait_time += 5000;
}
}
for url in urls {
println!("found post: {url}");
let img_url =
extract_img_url(&client.get(url).send().await.unwrap().text().await.unwrap());
@ -75,4 +100,3 @@ fn extract_img_url(html: &str) -> String {
String::new()
}
}