javalsai-changes #6

Merged
danmax merged 9 commits from javalsai-changes into main 2024-10-19 21:39:41 +02:00
4 changed files with 104 additions and 31 deletions
Showing only changes of commit ace4ac2811 - Show all commits

66
Cargo.lock generated
View File

@ -322,6 +322,19 @@ dependencies = [
"crossbeam-utils", "crossbeam-utils",
] ]
[[package]]
name = "console"
version = "0.15.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
dependencies = [
"encode_unicode",
"lazy_static",
"libc",
"unicode-width",
"windows-sys 0.52.0",
]
[[package]] [[package]]
name = "core-foundation" name = "core-foundation"
version = "0.9.4" version = "0.9.4"
@ -344,6 +357,12 @@ version = "0.8.20"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]] [[package]]
name = "encoding_rs" name = "encoding_rs"
version = "0.8.34" version = "0.8.34"
@ -738,6 +757,28 @@ dependencies = [
"hashbrown", "hashbrown",
] ]
[[package]]
name = "indicatif"
version = "0.17.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3"
dependencies = [
"console",
"instant",
"number_prefix",
"portable-atomic",
"unicode-width",
]
[[package]]
name = "instant"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
dependencies = [
"cfg-if",
]
[[package]] [[package]]
name = "ipnet" name = "ipnet"
version = "2.10.1" version = "2.10.1"
@ -774,6 +815,12 @@ dependencies = [
"log", "log",
] ]
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.161" version = "0.2.161"
@ -855,6 +902,12 @@ dependencies = [
"tempfile", "tempfile",
] ]
[[package]]
name = "number_prefix"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]] [[package]]
name = "object" name = "object"
version = "0.36.5" version = "0.36.5"
@ -993,6 +1046,12 @@ dependencies = [
"windows-sys 0.59.0", "windows-sys 0.59.0",
] ]
[[package]]
name = "portable-atomic"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2"
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.88" version = "1.0.88"
@ -1018,6 +1077,7 @@ dependencies = [
"async-std", "async-std",
"clap", "clap",
"futures", "futures",
"indicatif",
"regex", "regex",
"reqwest", "reqwest",
"tokio", "tokio",
@ -1508,6 +1568,12 @@ dependencies = [
"tinyvec", "tinyvec",
] ]
[[package]]
name = "unicode-width"
version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
[[package]] [[package]]
name = "untrusted" name = "untrusted"
version = "0.9.0" version = "0.9.0"

View File

@ -7,6 +7,7 @@ edition = "2021"
async-std = "1.13.0" async-std = "1.13.0"
clap = { version = "4.5.20", features = ["derive"] } clap = { version = "4.5.20", features = ["derive"] }
futures = "0.3.31" futures = "0.3.31"
indicatif = "0.17.8"
regex = "1.11.0" regex = "1.11.0"
reqwest = { version = "0.12.8", features = ["blocking"] } reqwest = { version = "0.12.8", features = ["blocking"] }
tokio = { version = "1", features = ["full"] } tokio = { version = "1", features = ["full"] }

View File

@ -16,5 +16,5 @@ pub struct Args {
/// Async jobs to use for fetching /// Async jobs to use for fetching
#[arg(short, long, default_value = "4")] #[arg(short, long, default_value = "4")]
pub jobs: usize pub jobs: usize,
} }

View File

@ -6,9 +6,8 @@ use futures::{stream, StreamExt};
use regex::Regex; use regex::Regex;
use reqwest::Client; use reqwest::Client;
use tokio::time::{sleep, Duration}; use tokio::time::{sleep, Duration};
use async_std::sync::Mutex;
use std::{process::ExitCode, sync::Arc}; use std::process::ExitCode;
#[tokio::main] #[tokio::main]
async fn main() -> ExitCode { async fn main() -> ExitCode {
@ -78,37 +77,44 @@ async fn main() -> ExitCode {
} }
} }
let ratelimit_lock = &Arc::new(Mutex::new(())); let multi_prog = indicatif::MultiProgress::new();
let responses = stream::iter(urls.into_iter().enumerate()).map(|(i, url)| { let urls_ammount = urls.len();
let responses = stream::iter(urls.into_iter().enumerate())
.map(|(i, url)| {
let i = i + 1;
let client = &client; let client = &client;
let this_bar = indicatif::ProgressBar::new_spinner();
this_bar.enable_steady_tick(Duration::from_millis(50));
let this_prog = multi_prog.insert(i, this_bar);
async move { async move {
// "thread" // "thread"
let thread_id = format!("[{: >4}]", i % 9999);
println!("{thread_id} scraping {url:?}");
loop { loop {
let lock = ratelimit_lock.lock().await; this_prog.set_message(format!("\x1b[30m[{i}/{urls_ammount}] \x1b[36mscraping {url:?}\x1b[0m"));
drop(lock);
let resp = client.get(&url).send().await.unwrap(); let resp = client.get(&url).send().await.unwrap();
match extract_img_url(&resp.text().await.unwrap()) { match extract_img_url(&resp.text().await.unwrap()) {
Ok(img_url) => { Ok(img_url) => {
if img_url.is_empty() { if img_url.is_empty() {
println!("{thread_id} image url not found"); this_prog.abandon_with_message(format!(
"\x1b[30m[{i}/{urls_ammount}] \x1b[1;31mimage url not found\x1b[0m"
));
} else { } else {
println!("{thread_id} found image url: {img_url}"); this_prog.finish_with_message(format!(
"\x1b[30m[{i}/{urls_ammount}] \x1b[32mfound image url: {img_url}\x1b[0m"
));
} }
break img_url; break img_url;
} }
Err(_) => { Err(_) => {
let lock = ratelimit_lock.lock().await; this_prog
println!("{thread_id} ratelimited, retrying after 1 second"); .set_message(format!("\x1b[30m[{i}/{urls_ammount}] \x1b[31mratelimited, retrying after 1 second\x1b[0m"));
tokio::time::sleep(std::time::Duration::from_millis(1000)).await; tokio::time::sleep(std::time::Duration::from_millis(1000)).await;
drop(lock);
continue; continue;
} }
} }
};
} }
}).buffered(args.jobs); }
})
.buffered(args.jobs);
let _ = responses.for_each(|_| async {}).await; let _ = responses.for_each(|_| async {}).await;
} }