diff --git a/Cargo.lock b/Cargo.lock index 6d130ef..aaa5301 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -322,6 +322,19 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "console" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "unicode-width", + "windows-sys 0.52.0", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -344,6 +357,12 @@ version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + [[package]] name = "encoding_rs" version = "0.8.34" @@ -738,6 +757,28 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "indicatif" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3" +dependencies = [ + "console", + "instant", + "number_prefix", + "portable-atomic", + "unicode-width", +] + +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", +] + [[package]] name = "ipnet" version = "2.10.1" @@ -774,6 +815,12 @@ dependencies = [ "log", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" version = "0.2.161" @@ -855,6 +902,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "object" version = "0.36.5" @@ -993,6 +1046,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "portable-atomic" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2" + [[package]] name = "proc-macro2" version = "1.0.88" @@ -1018,6 +1077,7 @@ dependencies = [ "async-std", "clap", "futures", + "indicatif", "regex", "reqwest", "tokio", @@ -1508,6 +1568,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + [[package]] name = "untrusted" version = "0.9.0" diff --git a/Cargo.toml b/Cargo.toml index 80fd8a3..7fb2766 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" async-std = "1.13.0" clap = { version = "4.5.20", features = ["derive"] } futures = "0.3.31" +indicatif = "0.17.8" regex = "1.11.0" reqwest = { version = "0.12.8", features = ["blocking"] } tokio = { version = "1", features = ["full"] } diff --git a/src/args/mod.rs b/src/args/mod.rs index 22c3479..5e78d06 100644 --- a/src/args/mod.rs +++ b/src/args/mod.rs @@ -16,5 +16,5 @@ pub struct Args { /// Async jobs to use for fetching #[arg(short, long, default_value = "4")] - pub jobs: usize + pub jobs: usize, } diff --git a/src/main.rs b/src/main.rs index 9ea9b16..b14ae51 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,9 +6,8 @@ use futures::{stream, StreamExt}; use regex::Regex; use reqwest::Client; use tokio::time::{sleep, Duration}; -use async_std::sync::Mutex; -use std::{process::ExitCode, sync::Arc}; +use std::process::ExitCode; #[tokio::main] async fn main() -> ExitCode { @@ -78,37 +77,44 @@ async fn main() -> ExitCode { } } - let ratelimit_lock = &Arc::new(Mutex::new(())); - let responses = stream::iter(urls.into_iter().enumerate()).map(|(i, url)| { - let client = &client; - async move { - // "thread" - let thread_id = format!("[{: >4}]", i % 9999); - println!("{thread_id} scraping {url:?}"); - loop { - let lock = ratelimit_lock.lock().await; - drop(lock); - let resp = client.get(&url).send().await.unwrap(); - match extract_img_url(&resp.text().await.unwrap()) { - Ok(img_url) => { - if img_url.is_empty() { - println!("{thread_id} image url not found"); - } else { - println!("{thread_id} found image url: {img_url}"); + let multi_prog = indicatif::MultiProgress::new(); + let urls_ammount = urls.len(); + let responses = stream::iter(urls.into_iter().enumerate()) + .map(|(i, url)| { + let i = i + 1; + let client = &client; + let this_bar = indicatif::ProgressBar::new_spinner(); + this_bar.enable_steady_tick(Duration::from_millis(50)); + let this_prog = multi_prog.insert(i, this_bar); + async move { + // "thread" + loop { + this_prog.set_message(format!("\x1b[30m[{i}/{urls_ammount}] \x1b[36mscraping {url:?}\x1b[0m")); + let resp = client.get(&url).send().await.unwrap(); + match extract_img_url(&resp.text().await.unwrap()) { + Ok(img_url) => { + if img_url.is_empty() { + this_prog.abandon_with_message(format!( + "\x1b[30m[{i}/{urls_ammount}] \x1b[1;31mimage url not found\x1b[0m" + )); + } else { + this_prog.finish_with_message(format!( + "\x1b[30m[{i}/{urls_ammount}] \x1b[32mfound image url: {img_url}\x1b[0m" + )); + } + break img_url; + } + Err(_) => { + this_prog + .set_message(format!("\x1b[30m[{i}/{urls_ammount}] \x1b[31mratelimited, retrying after 1 second\x1b[0m")); + tokio::time::sleep(std::time::Duration::from_millis(1000)).await; + continue; } - break img_url; - } - Err(_) => { - let lock = ratelimit_lock.lock().await; - println!("{thread_id} ratelimited, retrying after 1 second"); - tokio::time::sleep(std::time::Duration::from_millis(1000)).await; - drop(lock); - continue; } } - }; - } - }).buffered(args.jobs); + } + }) + .buffered(args.jobs); let _ = responses.for_each(|_| async {}).await; }