forked from danmax/r34-scraper
fix: conflicts & feat: multiline logger
This commit is contained in:
@@ -16,5 +16,5 @@ pub struct Args {
|
||||
|
||||
/// Async jobs to use for fetching
|
||||
#[arg(short, long, default_value = "4")]
|
||||
pub jobs: usize
|
||||
pub jobs: usize,
|
||||
}
|
||||
|
66
src/main.rs
66
src/main.rs
@@ -6,9 +6,8 @@ use futures::{stream, StreamExt};
|
||||
use regex::Regex;
|
||||
use reqwest::Client;
|
||||
use tokio::time::{sleep, Duration};
|
||||
use async_std::sync::Mutex;
|
||||
|
||||
use std::{process::ExitCode, sync::Arc};
|
||||
use std::process::ExitCode;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> ExitCode {
|
||||
@@ -78,37 +77,44 @@ async fn main() -> ExitCode {
|
||||
}
|
||||
}
|
||||
|
||||
let ratelimit_lock = &Arc::new(Mutex::new(()));
|
||||
let responses = stream::iter(urls.into_iter().enumerate()).map(|(i, url)| {
|
||||
let client = &client;
|
||||
async move {
|
||||
// "thread"
|
||||
let thread_id = format!("[{: >4}]", i % 9999);
|
||||
println!("{thread_id} scraping {url:?}");
|
||||
loop {
|
||||
let lock = ratelimit_lock.lock().await;
|
||||
drop(lock);
|
||||
let resp = client.get(&url).send().await.unwrap();
|
||||
match extract_img_url(&resp.text().await.unwrap()) {
|
||||
Ok(img_url) => {
|
||||
if img_url.is_empty() {
|
||||
println!("{thread_id} image url not found");
|
||||
} else {
|
||||
println!("{thread_id} found image url: {img_url}");
|
||||
let multi_prog = indicatif::MultiProgress::new();
|
||||
let urls_ammount = urls.len();
|
||||
let responses = stream::iter(urls.into_iter().enumerate())
|
||||
.map(|(i, url)| {
|
||||
let i = i + 1;
|
||||
let client = &client;
|
||||
let this_bar = indicatif::ProgressBar::new_spinner();
|
||||
this_bar.enable_steady_tick(Duration::from_millis(50));
|
||||
let this_prog = multi_prog.insert(i, this_bar);
|
||||
async move {
|
||||
// "thread"
|
||||
loop {
|
||||
this_prog.set_message(format!("\x1b[30m[{i}/{urls_ammount}] \x1b[36mscraping {url:?}\x1b[0m"));
|
||||
let resp = client.get(&url).send().await.unwrap();
|
||||
match extract_img_url(&resp.text().await.unwrap()) {
|
||||
Ok(img_url) => {
|
||||
if img_url.is_empty() {
|
||||
this_prog.abandon_with_message(format!(
|
||||
"\x1b[30m[{i}/{urls_ammount}] \x1b[1;31mimage url not found\x1b[0m"
|
||||
));
|
||||
} else {
|
||||
this_prog.finish_with_message(format!(
|
||||
"\x1b[30m[{i}/{urls_ammount}] \x1b[32mfound image url: {img_url}\x1b[0m"
|
||||
));
|
||||
}
|
||||
break img_url;
|
||||
}
|
||||
Err(_) => {
|
||||
this_prog
|
||||
.set_message(format!("\x1b[30m[{i}/{urls_ammount}] \x1b[31mratelimited, retrying after 1 second\x1b[0m"));
|
||||
tokio::time::sleep(std::time::Duration::from_millis(1000)).await;
|
||||
continue;
|
||||
}
|
||||
break img_url;
|
||||
}
|
||||
Err(_) => {
|
||||
let lock = ratelimit_lock.lock().await;
|
||||
println!("{thread_id} ratelimited, retrying after 1 second");
|
||||
tokio::time::sleep(std::time::Duration::from_millis(1000)).await;
|
||||
drop(lock);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}).buffered(args.jobs);
|
||||
}
|
||||
})
|
||||
.buffered(args.jobs);
|
||||
let _ = responses.for_each(|_| async {}).await;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user