From 7e732c4a75d508314e4bb20f890d43073b86fb36 Mon Sep 17 00:00:00 2001 From: danmax Date: Wed, 16 Oct 2024 23:20:24 -0400 Subject: [PATCH] added exponential backoff --- src/main.rs | 64 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 20 deletions(-) diff --git a/src/main.rs b/src/main.rs index 65d01d5..7f6d83e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,8 @@ -use std::process::ExitCode; - +#![feature(async_closure)] use regex::Regex; use reqwest::Client; +use std::process::ExitCode; +use tokio::time::{sleep, Duration}; const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"; @@ -17,31 +18,55 @@ async fn main() -> ExitCode { .to_string(); let client = Client::builder().user_agent(USER_AGENT).build().unwrap(); - let mut page = 0; + let mut page = 1; loop { + println!("now scraping page {page}"); - let urls = extract_urls( - &client - .get(format!( - "https://rule34.xxx/index.php?page=post&s=list&tags={tags}&pid={}", - page * 42 - )) - .send() - .await - .unwrap() - .text() - .await - .unwrap(), - ); + let post_html = async || { + extract_urls( + &client + .get(format!( + "https://rule34.xxx/index.php?page=post&s=list&tags={tags}&pid={}", + page * 42 + )) + .send() + .await + .unwrap() + .text() + .await + .unwrap(), + ) + }; + + let mut urls = post_html().await; + + let mut wait_time = 5000; + if urls.is_empty() { - println!("no urls found, exiting..."); - return ExitCode::FAILURE; + + for reconnection_attempts in 0..4 { + println!("no urls found, retrying in {} seconds...", wait_time / 1000); + sleep(Duration::from_millis(wait_time)).await; + + urls = post_html().await; + + if !urls.is_empty() { + println!("urls found! continuing..."); + break; + } + + if reconnection_attempts == 3 { + println!("no urls found in 4 attempts, exiting..."); + return ExitCode::FAILURE; + } + + wait_time += 5000; + } } for url in urls { - println!("found post: {url}"); let img_url = extract_img_url(&client.get(url).send().await.unwrap().text().await.unwrap()); @@ -75,4 +100,3 @@ fn extract_img_url(html: &str) -> String { String::new() } } -