added multithreading
Co-authored-by: ErrorNoInternet <errornointernet@envs.net>
This commit is contained in:
parent
abd91a6e95
commit
eeee4f50b3
40
src/main.rs
40
src/main.rs
@ -17,6 +17,7 @@ async fn main() -> ExitCode {
|
|||||||
.trim()
|
.trim()
|
||||||
.to_string();
|
.to_string();
|
||||||
|
|
||||||
|
let mut thread_counter = 0;
|
||||||
let client = Client::builder().user_agent(USER_AGENT).build().unwrap();
|
let client = Client::builder().user_agent(USER_AGENT).build().unwrap();
|
||||||
let mut page = 0;
|
let mut page = 0;
|
||||||
|
|
||||||
@ -65,12 +66,33 @@ async fn main() -> ExitCode {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for url in urls {
|
for url in urls {
|
||||||
let img_url =
|
tokio::spawn(async move {
|
||||||
extract_img_url(&client.get(url).send().await.unwrap().text().await.unwrap());
|
let thread_id = format!("[{thread_counter: >4}]");
|
||||||
|
loop {
|
||||||
|
let client = Client::builder().user_agent(USER_AGENT).build().unwrap();
|
||||||
|
match extract_img_url(&client.get(url.clone()).send().await.unwrap().text().await.unwrap()) {
|
||||||
|
Ok(img_url) => {
|
||||||
if img_url.is_empty() {
|
if img_url.is_empty() {
|
||||||
println!("image url not found");
|
println!("{thread_id} image url not found");
|
||||||
} else {
|
} else {
|
||||||
println!("found image url: {img_url}");
|
println!("{thread_id} found image url: {img_url}");
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(_) => {
|
||||||
|
println!("{thread_id} ratelimited, retrying after 1 second");
|
||||||
|
std::thread::sleep(std::time::Duration::from_millis(1000));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
thread_counter += 1;
|
||||||
|
if thread_counter > 9999 {
|
||||||
|
thread_counter = 0;
|
||||||
|
}
|
||||||
|
while tokio::runtime::Handle::current().metrics().num_alive_tasks() > 4 {
|
||||||
|
std::thread::sleep(std::time::Duration::from_millis(100));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -86,14 +108,18 @@ fn extract_urls(html: &str) -> Vec<String> {
|
|||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_img_url(html: &str) -> String {
|
fn extract_img_url(html: &str) -> Result<String, &'static str> {
|
||||||
if let Some(img_url) =
|
if let Some(img_url) =
|
||||||
Regex::new(r"https://us\.rule34\.xxx/images/([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+")
|
Regex::new(r"https://us\.rule34\.xxx/images/([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+")
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.find(html)
|
.find(html)
|
||||||
{
|
{
|
||||||
img_url.as_str().to_string()
|
Ok(img_url.as_str().to_string())
|
||||||
} else {
|
} else {
|
||||||
String::new()
|
if html.contains("503 Rate limiting") {
|
||||||
|
Err("ratelimited")
|
||||||
|
} else {
|
||||||
|
Ok(String::new())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user