diff --git a/.gitignore b/.gitignore index ea8c4bf..d921c6b 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +/downloads diff --git a/Cargo.lock b/Cargo.lock index aaa5301..9dcb1a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -833,16 +833,6 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" -[[package]] -name = "lock_api" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" -dependencies = [ - "autocfg", - "scopeguard", -] - [[package]] name = "log" version = "0.4.22" @@ -973,29 +963,6 @@ version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" -[[package]] -name = "parking_lot" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets", -] - [[package]] name = "percent-encoding" version = "2.3.1" @@ -1083,15 +1050,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "redox_syscall" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" -dependencies = [ - "bitflags", -] - [[package]] name = "regex" version = "1.11.0" @@ -1253,12 +1211,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - [[package]] name = "security-framework" version = "2.11.1" @@ -1332,15 +1284,6 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" -[[package]] -name = "signal-hook-registry" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" -dependencies = [ - "libc", -] - [[package]] name = "slab" version = "0.4.9" @@ -1463,9 +1406,7 @@ dependencies = [ "bytes", "libc", "mio", - "parking_lot", "pin-project-lite", - "signal-hook-registry", "socket2", "tokio-macros", "windows-sys 0.52.0", diff --git a/Cargo.toml b/Cargo.toml index 7fb2766..4d508fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,10 +4,10 @@ version = "0.1.0" edition = "2021" [dependencies] -async-std = "1.13.0" -clap = { version = "4.5.20", features = ["derive"] } -futures = "0.3.31" -indicatif = "0.17.8" -regex = "1.11.0" -reqwest = { version = "0.12.8", features = ["blocking"] } -tokio = { version = "1", features = ["full"] } +async-std = "1" +clap = { version = "4", features = ["derive"] } +futures = "0" +indicatif = "0" +regex = "1" +reqwest = { version = "0", features = ["blocking"] } +tokio = { version = "1", features = ["macros", "rt-multi-thread"] } diff --git a/src/main.rs b/src/main.rs index 40b7c39..c00734c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,8 +8,10 @@ use regex::Regex; use reqwest::Client; use tokio::time::{sleep, Duration}; -use std::process::ExitCode; use std::io::Write; +use std::process::ExitCode; + +const BAR_LENGTH: u64 = 8; #[tokio::main] async fn main() -> ExitCode { @@ -21,10 +23,16 @@ async fn main() -> ExitCode { tags_binding .split(' ') .filter(|item| !item.is_empty()) - .map(|item| item.to_owned()) + .map(std::borrow::ToOwned::to_owned) .collect() }); - let uri_tags: String = tags.into_iter().intersperse(String::from("+")).collect(); + let tags_folder = &tags.join("+"); + let uri_tags = tags + .into_iter() + .intersperse(String::from("+")) + .collect::(); + + let _ = std::fs::create_dir(tags_folder); let client = Client::builder() .user_agent(&args.user_agent) @@ -55,74 +63,65 @@ async fn main() -> ExitCode { }; let mut urls = post_html(&client).await; - - let mut wait_time = 5000; - if urls.is_empty() { - for reconnection_attempts in 0..4 { - println!("no urls found, retrying in {} seconds...", wait_time / 1000); - sleep(Duration::from_millis(wait_time)).await; + let mut reconnection_attempts = 0; + loop { + println!("no urls found, retrying in 5 seconds..."); + sleep(Duration::from_millis(5000)).await; urls = post_html(&client).await; - if !urls.is_empty() { println!("urls found! continuing..."); break; } - if reconnection_attempts == 3 { - println!("no urls found in 4 attempts, exiting..."); + reconnection_attempts += 1; + if reconnection_attempts == 12 { + println!("no urls found in 1 minute, exiting..."); return ExitCode::FAILURE; } - - wait_time += 5000; } } let multi_prog = indicatif::MultiProgress::new(); - let urls_ammount = urls.len(); + let urls_amount = urls.len(); let responses = stream::iter(urls.into_iter().enumerate()) .map(|(i, url)| { let i = i + 1; let client = &client; - let this_bar = indicatif::ProgressBar::new_spinner(); - this_bar.enable_steady_tick(Duration::from_millis(50)); - let this_prog = multi_prog.insert(i, this_bar); + let this_bar = indicatif::ProgressBar::new(BAR_LENGTH); + this_bar.set_style(indicatif::ProgressStyle::with_template("[{bar}] {msg}").unwrap().progress_chars("=> ")); + let this_bar = multi_prog.insert(i, this_bar); async move { // "thread" loop { - this_prog.set_message(format!("\x1b[30m[{i: >4}/{urls_ammount}] \x1b[36mscraping {url:?}\x1b[0m")); + this_bar.set_message(format!("\x1b[37m[{i: >4}/{urls_amount}] \x1b[36mscraping {url:?}\x1b[0m")); let resp = client.get(&url).send().await.unwrap(); - match extract_img_url(&resp.text().await.unwrap()) { - Ok(img_url) => { - if img_url.is_empty() { - this_prog.abandon_with_message(format!( - "\x1b[30m[{i: >4}/{urls_ammount}] \x1b[1;31mimage url not found\x1b[0m" - )); - } else { - download_file(&img_url, this_prog, i, urls_ammount).await; - - } - break; - } - Err(_) => { - this_prog - .set_message(format!( - "\x1b[30m[{i: >4}/{urls_ammount}] \x1b[31mratelimited, retrying after {}ms\x1b[0m", - args.delay.as_millis()) - ); - tokio::time::sleep(args.delay).await; - continue; + if let Ok(img_url) = extract_img_url(&resp.text().await.unwrap()) { + if img_url.is_empty() { + this_bar.abandon_with_message(format!( + "\x1b[37m[{i: >4}/{urls_amount}] \x1b[1;31mimage url not found\x1b[0m" + )); + } else { + download_file(&img_url, this_bar, i, urls_amount, tags_folder).await; } + break; } + + this_bar + .set_message(format!( + "\x1b[37m[{i: >4}/{urls_amount}] \x1b[31mratelimited, retrying after {}ms\x1b[0m", + args.delay.as_millis()) + ); + tokio::time::sleep(args.delay).await; } } }) .buffered(args.jobs); - let _ = responses.for_each(|_| async {}).await; + let _ = responses.for_each(|()| async {}).await; } - return ExitCode::SUCCESS; + ExitCode::SUCCESS } fn extract_urls(html: &str) -> Vec { @@ -140,47 +139,53 @@ fn extract_img_url(html: &str) -> Result { .find(html) { Ok(img_url.as_str().to_string()) + } else if html.contains("503 Rate limiting") { + Err("ratelimited") } else { - if html.contains("503 Rate limiting") { - Err("ratelimited") - } else { - Ok(String::new()) - } + Ok(String::new()) } } -async fn download_file(img_url: &str, this_prog: ProgressBar, i: usize, urls_ammount: usize) { +async fn download_file( + img_url: &str, + this_bar: ProgressBar, + i: usize, + urls_amount: usize, + tags_folder: &str, +) { let args = args::Args::parse(); let file_name = Regex::new(r"[^/]+$") .unwrap() - .find(img_url) + .find(img_url) .map(|m| m.as_str()) .unwrap(); - let downl_img = Client::new() + let file_path = tags_folder.to_owned() + "/" + file_name; + + let mut file = if std::fs::File::open(&file_path).is_ok() { + this_bar.finish_with_message(format!( + "\x1b[37m[{i: >4}/{urls_amount}] \x1b[33m{file_name} exists, skipping...\x1b[0m" + )); + return; + } else { + std::fs::File::create(file_path).unwrap() + }; + + let mut res = Client::new() .get(img_url) .header("User-Agent", &args.user_agent) .send() .await - .unwrap() - .bytes() - .await .unwrap(); - - match std::fs::File::open(file_name) { - Ok(_) => { - this_prog.finish_with_message(format!( - "\x1b[30m[{i: >4}/{urls_ammount}] \x1b[32mfile exists, skipping...\x1b[0m" - )); - } - Err(_) => { - - this_prog.finish_with_message(format!( - "\x1b[30m[{i: >4}/{urls_ammount}] \x1b[32mdownloaded image: {img_url}\x1b[0m" - )); - let mut file = std::fs::File::create(file_name).unwrap(); - file.write_all(&downl_img).unwrap(); - } + let file_length = res.content_length().unwrap(); + let mut written = 0; + while let Some(img_chunk) = res.chunk().await.unwrap() { + file.write_all(&img_chunk).unwrap(); + written += img_chunk.len(); + this_bar.set_position((written as f64 / file_length as f64 * BAR_LENGTH as f64) as u64); } + this_bar.finish_with_message(format!( + "\x1b[37m[{i: >4}/{urls_amount}] \x1b[32mdownloaded {img_url}\x1b[0m" + )); }