fix: conflicts & feat: multiline logger

This commit is contained in:
javalsai 2024-10-19 21:13:02 +02:00
parent af16bdbdfe
commit ace4ac2811
Signed by: javalsai
SSH Key Fingerprint: SHA256:3G83yKhBUWVABVX/vPWH88xnK4+ptMtHkZGCRXD4Mk8
4 changed files with 104 additions and 31 deletions

66
Cargo.lock generated
View File

@ -322,6 +322,19 @@ dependencies = [
"crossbeam-utils",
]
[[package]]
name = "console"
version = "0.15.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
dependencies = [
"encode_unicode",
"lazy_static",
"libc",
"unicode-width",
"windows-sys 0.52.0",
]
[[package]]
name = "core-foundation"
version = "0.9.4"
@ -344,6 +357,12 @@ version = "0.8.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "encoding_rs"
version = "0.8.34"
@ -738,6 +757,28 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "indicatif"
version = "0.17.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3"
dependencies = [
"console",
"instant",
"number_prefix",
"portable-atomic",
"unicode-width",
]
[[package]]
name = "instant"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
dependencies = [
"cfg-if",
]
[[package]]
name = "ipnet"
version = "2.10.1"
@ -774,6 +815,12 @@ dependencies = [
"log",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "libc"
version = "0.2.161"
@ -855,6 +902,12 @@ dependencies = [
"tempfile",
]
[[package]]
name = "number_prefix"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]]
name = "object"
version = "0.36.5"
@ -993,6 +1046,12 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "portable-atomic"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2"
[[package]]
name = "proc-macro2"
version = "1.0.88"
@ -1018,6 +1077,7 @@ dependencies = [
"async-std",
"clap",
"futures",
"indicatif",
"regex",
"reqwest",
"tokio",
@ -1508,6 +1568,12 @@ dependencies = [
"tinyvec",
]
[[package]]
name = "unicode-width"
version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
[[package]]
name = "untrusted"
version = "0.9.0"

View File

@ -7,6 +7,7 @@ edition = "2021"
async-std = "1.13.0"
clap = { version = "4.5.20", features = ["derive"] }
futures = "0.3.31"
indicatif = "0.17.8"
regex = "1.11.0"
reqwest = { version = "0.12.8", features = ["blocking"] }
tokio = { version = "1", features = ["full"] }

View File

@ -16,5 +16,5 @@ pub struct Args {
/// Async jobs to use for fetching
#[arg(short, long, default_value = "4")]
pub jobs: usize
pub jobs: usize,
}

View File

@ -6,9 +6,8 @@ use futures::{stream, StreamExt};
use regex::Regex;
use reqwest::Client;
use tokio::time::{sleep, Duration};
use async_std::sync::Mutex;
use std::{process::ExitCode, sync::Arc};
use std::process::ExitCode;
#[tokio::main]
async fn main() -> ExitCode {
@ -78,37 +77,44 @@ async fn main() -> ExitCode {
}
}
let ratelimit_lock = &Arc::new(Mutex::new(()));
let responses = stream::iter(urls.into_iter().enumerate()).map(|(i, url)| {
let multi_prog = indicatif::MultiProgress::new();
let urls_ammount = urls.len();
let responses = stream::iter(urls.into_iter().enumerate())
.map(|(i, url)| {
let i = i + 1;
let client = &client;
let this_bar = indicatif::ProgressBar::new_spinner();
this_bar.enable_steady_tick(Duration::from_millis(50));
let this_prog = multi_prog.insert(i, this_bar);
async move {
// "thread"
let thread_id = format!("[{: >4}]", i % 9999);
println!("{thread_id} scraping {url:?}");
loop {
let lock = ratelimit_lock.lock().await;
drop(lock);
this_prog.set_message(format!("\x1b[30m[{i}/{urls_ammount}] \x1b[36mscraping {url:?}\x1b[0m"));
let resp = client.get(&url).send().await.unwrap();
match extract_img_url(&resp.text().await.unwrap()) {
Ok(img_url) => {
if img_url.is_empty() {
println!("{thread_id} image url not found");
this_prog.abandon_with_message(format!(
"\x1b[30m[{i}/{urls_ammount}] \x1b[1;31mimage url not found\x1b[0m"
));
} else {
println!("{thread_id} found image url: {img_url}");
this_prog.finish_with_message(format!(
"\x1b[30m[{i}/{urls_ammount}] \x1b[32mfound image url: {img_url}\x1b[0m"
));
}
break img_url;
}
Err(_) => {
let lock = ratelimit_lock.lock().await;
println!("{thread_id} ratelimited, retrying after 1 second");
this_prog
.set_message(format!("\x1b[30m[{i}/{urls_ammount}] \x1b[31mratelimited, retrying after 1 second\x1b[0m"));
tokio::time::sleep(std::time::Duration::from_millis(1000)).await;
drop(lock);
continue;
}
}
};
}
}).buffered(args.jobs);
}
})
.buffered(args.jobs);
let _ = responses.for_each(|_| async {}).await;
}