forked from danmax/r34-scraper
fix: conflicts & feat: multiline logger
This commit is contained in:
parent
af16bdbdfe
commit
ace4ac2811
66
Cargo.lock
generated
66
Cargo.lock
generated
@ -322,6 +322,19 @@ dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "console"
|
||||
version = "0.15.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
|
||||
dependencies = [
|
||||
"encode_unicode",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"unicode-width",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation"
|
||||
version = "0.9.4"
|
||||
@ -344,6 +357,12 @@ version = "0.8.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
|
||||
|
||||
[[package]]
|
||||
name = "encode_unicode"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.34"
|
||||
@ -738,6 +757,28 @@ dependencies = [
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indicatif"
|
||||
version = "0.17.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3"
|
||||
dependencies = [
|
||||
"console",
|
||||
"instant",
|
||||
"number_prefix",
|
||||
"portable-atomic",
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "instant"
|
||||
version = "0.1.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ipnet"
|
||||
version = "2.10.1"
|
||||
@ -774,6 +815,12 @@ dependencies = [
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.161"
|
||||
@ -855,6 +902,12 @@ dependencies = [
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "number_prefix"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.36.5"
|
||||
@ -993,6 +1046,12 @@ dependencies = [
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "portable-atomic"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.88"
|
||||
@ -1018,6 +1077,7 @@ dependencies = [
|
||||
"async-std",
|
||||
"clap",
|
||||
"futures",
|
||||
"indicatif",
|
||||
"regex",
|
||||
"reqwest",
|
||||
"tokio",
|
||||
@ -1508,6 +1568,12 @@ dependencies = [
|
||||
"tinyvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
|
||||
|
||||
[[package]]
|
||||
name = "untrusted"
|
||||
version = "0.9.0"
|
||||
|
@ -7,6 +7,7 @@ edition = "2021"
|
||||
async-std = "1.13.0"
|
||||
clap = { version = "4.5.20", features = ["derive"] }
|
||||
futures = "0.3.31"
|
||||
indicatif = "0.17.8"
|
||||
regex = "1.11.0"
|
||||
reqwest = { version = "0.12.8", features = ["blocking"] }
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
|
@ -16,5 +16,5 @@ pub struct Args {
|
||||
|
||||
/// Async jobs to use for fetching
|
||||
#[arg(short, long, default_value = "4")]
|
||||
pub jobs: usize
|
||||
pub jobs: usize,
|
||||
}
|
||||
|
36
src/main.rs
36
src/main.rs
@ -6,9 +6,8 @@ use futures::{stream, StreamExt};
|
||||
use regex::Regex;
|
||||
use reqwest::Client;
|
||||
use tokio::time::{sleep, Duration};
|
||||
use async_std::sync::Mutex;
|
||||
|
||||
use std::{process::ExitCode, sync::Arc};
|
||||
use std::process::ExitCode;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> ExitCode {
|
||||
@ -78,37 +77,44 @@ async fn main() -> ExitCode {
|
||||
}
|
||||
}
|
||||
|
||||
let ratelimit_lock = &Arc::new(Mutex::new(()));
|
||||
let responses = stream::iter(urls.into_iter().enumerate()).map(|(i, url)| {
|
||||
let multi_prog = indicatif::MultiProgress::new();
|
||||
let urls_ammount = urls.len();
|
||||
let responses = stream::iter(urls.into_iter().enumerate())
|
||||
.map(|(i, url)| {
|
||||
let i = i + 1;
|
||||
let client = &client;
|
||||
let this_bar = indicatif::ProgressBar::new_spinner();
|
||||
this_bar.enable_steady_tick(Duration::from_millis(50));
|
||||
let this_prog = multi_prog.insert(i, this_bar);
|
||||
async move {
|
||||
// "thread"
|
||||
let thread_id = format!("[{: >4}]", i % 9999);
|
||||
println!("{thread_id} scraping {url:?}");
|
||||
loop {
|
||||
let lock = ratelimit_lock.lock().await;
|
||||
drop(lock);
|
||||
this_prog.set_message(format!("\x1b[30m[{i}/{urls_ammount}] \x1b[36mscraping {url:?}\x1b[0m"));
|
||||
let resp = client.get(&url).send().await.unwrap();
|
||||
match extract_img_url(&resp.text().await.unwrap()) {
|
||||
Ok(img_url) => {
|
||||
if img_url.is_empty() {
|
||||
println!("{thread_id} image url not found");
|
||||
this_prog.abandon_with_message(format!(
|
||||
"\x1b[30m[{i}/{urls_ammount}] \x1b[1;31mimage url not found\x1b[0m"
|
||||
));
|
||||
} else {
|
||||
println!("{thread_id} found image url: {img_url}");
|
||||
this_prog.finish_with_message(format!(
|
||||
"\x1b[30m[{i}/{urls_ammount}] \x1b[32mfound image url: {img_url}\x1b[0m"
|
||||
));
|
||||
}
|
||||
break img_url;
|
||||
}
|
||||
Err(_) => {
|
||||
let lock = ratelimit_lock.lock().await;
|
||||
println!("{thread_id} ratelimited, retrying after 1 second");
|
||||
this_prog
|
||||
.set_message(format!("\x1b[30m[{i}/{urls_ammount}] \x1b[31mratelimited, retrying after 1 second\x1b[0m"));
|
||||
tokio::time::sleep(std::time::Duration::from_millis(1000)).await;
|
||||
drop(lock);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}).buffered(args.jobs);
|
||||
}
|
||||
})
|
||||
.buffered(args.jobs);
|
||||
let _ = responses.for_each(|_| async {}).await;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user