forked from danmax/r34-scraper
added multithreading
Co-authored-by: ErrorNoInternet <errornointernet@envs.net>
This commit is contained in:
parent
abd91a6e95
commit
eeee4f50b3
44
src/main.rs
44
src/main.rs
@ -17,6 +17,7 @@ async fn main() -> ExitCode {
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
let mut thread_counter = 0;
|
||||
let client = Client::builder().user_agent(USER_AGENT).build().unwrap();
|
||||
let mut page = 0;
|
||||
|
||||
@ -65,12 +66,33 @@ async fn main() -> ExitCode {
|
||||
}
|
||||
|
||||
for url in urls {
|
||||
let img_url =
|
||||
extract_img_url(&client.get(url).send().await.unwrap().text().await.unwrap());
|
||||
if img_url.is_empty() {
|
||||
println!("image url not found");
|
||||
} else {
|
||||
println!("found image url: {img_url}");
|
||||
tokio::spawn(async move {
|
||||
let thread_id = format!("[{thread_counter: >4}]");
|
||||
loop {
|
||||
let client = Client::builder().user_agent(USER_AGENT).build().unwrap();
|
||||
match extract_img_url(&client.get(url.clone()).send().await.unwrap().text().await.unwrap()) {
|
||||
Ok(img_url) => {
|
||||
if img_url.is_empty() {
|
||||
println!("{thread_id} image url not found");
|
||||
} else {
|
||||
println!("{thread_id} found image url: {img_url}");
|
||||
}
|
||||
},
|
||||
Err(_) => {
|
||||
println!("{thread_id} ratelimited, retrying after 1 second");
|
||||
std::thread::sleep(std::time::Duration::from_millis(1000));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
});
|
||||
thread_counter += 1;
|
||||
if thread_counter > 9999 {
|
||||
thread_counter = 0;
|
||||
}
|
||||
while tokio::runtime::Handle::current().metrics().num_alive_tasks() > 4 {
|
||||
std::thread::sleep(std::time::Duration::from_millis(100));
|
||||
}
|
||||
}
|
||||
|
||||
@ -86,14 +108,18 @@ fn extract_urls(html: &str) -> Vec<String> {
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn extract_img_url(html: &str) -> String {
|
||||
fn extract_img_url(html: &str) -> Result<String, &'static str> {
|
||||
if let Some(img_url) =
|
||||
Regex::new(r"https://us\.rule34\.xxx/images/([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+")
|
||||
.unwrap()
|
||||
.find(html)
|
||||
{
|
||||
img_url.as_str().to_string()
|
||||
Ok(img_url.as_str().to_string())
|
||||
} else {
|
||||
String::new()
|
||||
if html.contains("503 Rate limiting") {
|
||||
Err("ratelimited")
|
||||
} else {
|
||||
Ok(String::new())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user