added exponential backoff
This commit is contained in:
parent
c954cdeaea
commit
7e732c4a75
40
src/main.rs
40
src/main.rs
@ -1,7 +1,8 @@
|
|||||||
use std::process::ExitCode;
|
#![feature(async_closure)]
|
||||||
|
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use reqwest::Client;
|
use reqwest::Client;
|
||||||
|
use std::process::ExitCode;
|
||||||
|
use tokio::time::{sleep, Duration};
|
||||||
|
|
||||||
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36";
|
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36";
|
||||||
|
|
||||||
@ -17,12 +18,14 @@ async fn main() -> ExitCode {
|
|||||||
.to_string();
|
.to_string();
|
||||||
|
|
||||||
let client = Client::builder().user_agent(USER_AGENT).build().unwrap();
|
let client = Client::builder().user_agent(USER_AGENT).build().unwrap();
|
||||||
let mut page = 0;
|
let mut page = 1;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
|
|
||||||
println!("now scraping page {page}");
|
println!("now scraping page {page}");
|
||||||
|
|
||||||
let urls = extract_urls(
|
let post_html = async || {
|
||||||
|
extract_urls(
|
||||||
&client
|
&client
|
||||||
.get(format!(
|
.get(format!(
|
||||||
"https://rule34.xxx/index.php?page=post&s=list&tags={tags}&pid={}",
|
"https://rule34.xxx/index.php?page=post&s=list&tags={tags}&pid={}",
|
||||||
@ -34,14 +37,36 @@ async fn main() -> ExitCode {
|
|||||||
.text()
|
.text()
|
||||||
.await
|
.await
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
);
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut urls = post_html().await;
|
||||||
|
|
||||||
|
let mut wait_time = 5000;
|
||||||
|
|
||||||
if urls.is_empty() {
|
if urls.is_empty() {
|
||||||
println!("no urls found, exiting...");
|
|
||||||
|
for reconnection_attempts in 0..4 {
|
||||||
|
println!("no urls found, retrying in {} seconds...", wait_time / 1000);
|
||||||
|
sleep(Duration::from_millis(wait_time)).await;
|
||||||
|
|
||||||
|
urls = post_html().await;
|
||||||
|
|
||||||
|
if !urls.is_empty() {
|
||||||
|
println!("urls found! continuing...");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if reconnection_attempts == 3 {
|
||||||
|
println!("no urls found in 4 attempts, exiting...");
|
||||||
return ExitCode::FAILURE;
|
return ExitCode::FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wait_time += 5000;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for url in urls {
|
for url in urls {
|
||||||
println!("found post: {url}");
|
|
||||||
|
|
||||||
let img_url =
|
let img_url =
|
||||||
extract_img_url(&client.get(url).send().await.unwrap().text().await.unwrap());
|
extract_img_url(&client.get(url).send().await.unwrap().text().await.unwrap());
|
||||||
@ -75,4 +100,3 @@ fn extract_img_url(html: &str) -> String {
|
|||||||
String::new()
|
String::new()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user