diff --git a/src/main.rs b/src/main.rs index edd2e6d..c350b32 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,6 +17,7 @@ async fn main() -> ExitCode { .trim() .to_string(); + let mut thread_counter = 0; let client = Client::builder().user_agent(USER_AGENT).build().unwrap(); let mut page = 0; @@ -65,12 +66,33 @@ async fn main() -> ExitCode { } for url in urls { - let img_url = - extract_img_url(&client.get(url).send().await.unwrap().text().await.unwrap()); - if img_url.is_empty() { - println!("image url not found"); - } else { - println!("found image url: {img_url}"); + tokio::spawn(async move { + let thread_id = format!("[{thread_counter: >4}]"); + loop { + let client = Client::builder().user_agent(USER_AGENT).build().unwrap(); + match extract_img_url(&client.get(url.clone()).send().await.unwrap().text().await.unwrap()) { + Ok(img_url) => { + if img_url.is_empty() { + println!("{thread_id} image url not found"); + } else { + println!("{thread_id} found image url: {img_url}"); + } + }, + Err(_) => { + println!("{thread_id} ratelimited, retrying after 1 second"); + std::thread::sleep(std::time::Duration::from_millis(1000)); + continue; + } + } + break; + } + }); + thread_counter += 1; + if thread_counter > 9999 { + thread_counter = 0; + } + while tokio::runtime::Handle::current().metrics().num_alive_tasks() > 4 { + std::thread::sleep(std::time::Duration::from_millis(100)); } } @@ -86,14 +108,18 @@ fn extract_urls(html: &str) -> Vec { .collect() } -fn extract_img_url(html: &str) -> String { +fn extract_img_url(html: &str) -> Result { if let Some(img_url) = Regex::new(r"https://us\.rule34\.xxx/images/([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+") .unwrap() .find(html) { - img_url.as_str().to_string() + Ok(img_url.as_str().to_string()) } else { - String::new() + if html.contains("503 Rate limiting") { + Err("ratelimited") + } else { + Ok(String::new()) + } } }