forked from danmax/r34-scraper
Compare commits
18 Commits
javalsai-c
...
ctrlc
Author | SHA1 | Date | |
---|---|---|---|
094b0b7412
|
|||
351439034e | |||
fe3c399c1d
|
|||
|
89830d6e1e | ||
1b5608b014 | |||
a852c8bcc5
|
|||
235e13230b
|
|||
|
bd517ed0b5 | ||
5f848be434 | |||
8723769429
|
|||
08ed5e51f2 | |||
3573f6ff5a
|
|||
ee0e938782 | |||
ca6df90460 | |||
325730bd37
|
|||
|
188b714741 | ||
|
b5a70e3426 | ||
137378beb3 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1 +1,2 @@
|
|||||||
/target
|
/target
|
||||||
|
/downloads
|
||||||
|
94
Cargo.lock
generated
94
Cargo.lock
generated
@@ -267,6 +267,12 @@ version = "1.0.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cfg_aliases"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap"
|
name = "clap"
|
||||||
version = "4.5.20"
|
version = "4.5.20"
|
||||||
@@ -357,6 +363,16 @@ version = "0.8.20"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
|
checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ctrlc"
|
||||||
|
version = "3.4.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "90eeab0aa92f3f9b4e87f258c72b139c207d251f9cbc1080a0086b86a8870dd3"
|
||||||
|
dependencies = [
|
||||||
|
"nix",
|
||||||
|
"windows-sys 0.59.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "encode_unicode"
|
name = "encode_unicode"
|
||||||
version = "0.3.6"
|
version = "0.3.6"
|
||||||
@@ -833,16 +849,6 @@ version = "0.4.14"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
|
checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "lock_api"
|
|
||||||
version = "0.4.12"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
|
|
||||||
dependencies = [
|
|
||||||
"autocfg",
|
|
||||||
"scopeguard",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "log"
|
name = "log"
|
||||||
version = "0.4.22"
|
version = "0.4.22"
|
||||||
@@ -902,6 +908,18 @@ dependencies = [
|
|||||||
"tempfile",
|
"tempfile",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nix"
|
||||||
|
version = "0.29.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"cfg-if",
|
||||||
|
"cfg_aliases",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "number_prefix"
|
name = "number_prefix"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
@@ -973,29 +991,6 @@ version = "2.2.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba"
|
checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "parking_lot"
|
|
||||||
version = "0.12.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
|
|
||||||
dependencies = [
|
|
||||||
"lock_api",
|
|
||||||
"parking_lot_core",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "parking_lot_core"
|
|
||||||
version = "0.9.10"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"libc",
|
|
||||||
"redox_syscall",
|
|
||||||
"smallvec",
|
|
||||||
"windows-targets",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "percent-encoding"
|
name = "percent-encoding"
|
||||||
version = "2.3.1"
|
version = "2.3.1"
|
||||||
@@ -1072,10 +1067,11 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "r34-scraper"
|
name = "r34-scraper"
|
||||||
version = "0.1.0"
|
version = "1.0.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-std",
|
"async-std",
|
||||||
"clap",
|
"clap",
|
||||||
|
"ctrlc",
|
||||||
"futures",
|
"futures",
|
||||||
"indicatif",
|
"indicatif",
|
||||||
"regex",
|
"regex",
|
||||||
@@ -1083,15 +1079,6 @@ dependencies = [
|
|||||||
"tokio",
|
"tokio",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "redox_syscall"
|
|
||||||
version = "0.5.7"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f"
|
|
||||||
dependencies = [
|
|
||||||
"bitflags",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex"
|
name = "regex"
|
||||||
version = "1.11.0"
|
version = "1.11.0"
|
||||||
@@ -1253,12 +1240,6 @@ dependencies = [
|
|||||||
"windows-sys 0.59.0",
|
"windows-sys 0.59.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "scopeguard"
|
|
||||||
version = "1.2.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "security-framework"
|
name = "security-framework"
|
||||||
version = "2.11.1"
|
version = "2.11.1"
|
||||||
@@ -1332,15 +1313,6 @@ version = "1.3.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "signal-hook-registry"
|
|
||||||
version = "1.4.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1"
|
|
||||||
dependencies = [
|
|
||||||
"libc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "slab"
|
name = "slab"
|
||||||
version = "0.4.9"
|
version = "0.4.9"
|
||||||
@@ -1386,9 +1358,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "2.0.79"
|
version = "2.0.82"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590"
|
checksum = "83540f837a8afc019423a8edb95b52a8effe46957ee402287f4292fae35be021"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
@@ -1463,9 +1435,7 @@ dependencies = [
|
|||||||
"bytes",
|
"bytes",
|
||||||
"libc",
|
"libc",
|
||||||
"mio",
|
"mio",
|
||||||
"parking_lot",
|
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"signal-hook-registry",
|
|
||||||
"socket2",
|
"socket2",
|
||||||
"tokio-macros",
|
"tokio-macros",
|
||||||
"windows-sys 0.52.0",
|
"windows-sys 0.52.0",
|
||||||
|
17
Cargo.toml
17
Cargo.toml
@@ -1,13 +1,14 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "r34-scraper"
|
name = "r34-scraper"
|
||||||
version = "0.1.0"
|
version = "1.0.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
async-std = "1.13.0"
|
async-std = "1"
|
||||||
clap = { version = "4.5.20", features = ["derive"] }
|
clap = { version = "4", features = ["derive"] }
|
||||||
futures = "0.3.31"
|
ctrlc = "3"
|
||||||
indicatif = "0.17.8"
|
futures = "0"
|
||||||
regex = "1.11.0"
|
indicatif = "0"
|
||||||
reqwest = { version = "0.12.8", features = ["blocking"] }
|
regex = "1"
|
||||||
tokio = { version = "1", features = ["full"] }
|
reqwest = { version = "0", features = ["blocking"] }
|
||||||
|
tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
|
||||||
|
@@ -4,7 +4,7 @@ a scraper that well scrapes r34
|
|||||||
|
|
||||||
## note
|
## note
|
||||||
|
|
||||||
this thing is still not completed, it only gathers links, it doesnt download things yet
|
this program is pretty much complete, although i am planning to add a few extra features.
|
||||||
|
|
||||||
## example usage image
|
## example usage image
|
||||||

|

|
||||||
|
BIN
image.png
BIN
image.png
Binary file not shown.
Before Width: | Height: | Size: 79 KiB After Width: | Height: | Size: 122 KiB |
2
rust-toolchain.toml
Normal file
2
rust-toolchain.toml
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
[toolchain]
|
||||||
|
channel = "nightly"
|
@@ -11,11 +11,15 @@ pub struct Args {
|
|||||||
pub user_agent: String,
|
pub user_agent: String,
|
||||||
|
|
||||||
/// Tags to search for
|
/// Tags to search for
|
||||||
#[arg(short, long)]
|
#[arg(short, long, required = true)]
|
||||||
pub tags: Option<Vec<String>>,
|
pub tags: Vec<String>,
|
||||||
|
|
||||||
|
/// Page to start scraping from
|
||||||
|
#[arg(short, long, default_value_t = 1)]
|
||||||
|
pub page: usize,
|
||||||
|
|
||||||
/// Async jobs to use for fetching
|
/// Async jobs to use for fetching
|
||||||
#[arg(short, long, default_value = "4")]
|
#[arg(short, long, default_value_t = 4)]
|
||||||
pub jobs: usize,
|
pub jobs: usize,
|
||||||
|
|
||||||
/// Delay for rate-limits (ms)
|
/// Delay for rate-limits (ms)
|
173
src/main.rs
173
src/main.rs
@@ -1,40 +1,45 @@
|
|||||||
#![feature(async_closure, iter_intersperse)]
|
#![feature(async_closure)]
|
||||||
pub mod args;
|
pub mod args;
|
||||||
|
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use futures::{stream, StreamExt};
|
use futures::{stream, StreamExt};
|
||||||
|
use indicatif::ProgressBar;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use reqwest::Client;
|
use reqwest::Client;
|
||||||
use tokio::time::{sleep, Duration};
|
use tokio::time::{sleep, Duration};
|
||||||
|
|
||||||
|
use std::io::Write;
|
||||||
use std::process::ExitCode;
|
use std::process::ExitCode;
|
||||||
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
const BAR_LENGTH: u64 = 8;
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> ExitCode {
|
async fn main() -> ExitCode {
|
||||||
let args = args::Args::parse();
|
let args = args::Args::parse();
|
||||||
|
|
||||||
let tags = args.tags.unwrap_or_else(|| {
|
let uri_tags = &args.tags.join("+");
|
||||||
println!("which tags do you want to scrape? ex: 1girls 1boys yomama");
|
let _ = std::fs::create_dir(uri_tags);
|
||||||
let tags_binding = std::io::stdin().lines().next().unwrap().unwrap();
|
|
||||||
tags_binding
|
let running = Arc::new(AtomicBool::new(true));
|
||||||
.split(' ')
|
let running_t = running.clone();
|
||||||
.filter(|item| !item.is_empty())
|
ctrlc::set_handler(move || {
|
||||||
.map(|item| item.to_owned())
|
running_t.store(false, Ordering::SeqCst);
|
||||||
.collect()
|
})
|
||||||
});
|
.unwrap();
|
||||||
let uri_tags: String = tags.into_iter().intersperse(String::from("+")).collect();
|
|
||||||
|
|
||||||
let client = Client::builder()
|
let client = Client::builder()
|
||||||
.user_agent(&args.user_agent)
|
.user_agent(&args.user_agent)
|
||||||
.build()
|
.build()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
for page in 0.. {
|
for page in args.page - 1.. {
|
||||||
println!("now scraping page {}", page + 1);
|
if !running.load(Ordering::SeqCst) {
|
||||||
println!(
|
return ExitCode::FAILURE;
|
||||||
"https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={}",
|
}
|
||||||
page * 42
|
|
||||||
);
|
println!("now scraping page {} (https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={})", page + 1, page * 42);
|
||||||
|
|
||||||
let post_html = async |client: &Client| {
|
let post_html = async |client: &Client| {
|
||||||
extract_urls(
|
extract_urls(
|
||||||
@@ -53,75 +58,74 @@ async fn main() -> ExitCode {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let mut urls = post_html(&client).await;
|
let mut urls = post_html(&client).await;
|
||||||
|
|
||||||
let mut wait_time = 5000;
|
|
||||||
|
|
||||||
if urls.is_empty() {
|
if urls.is_empty() {
|
||||||
for reconnection_attempts in 0..4 {
|
let mut reconnection_attempts = 0;
|
||||||
println!("no urls found, retrying in {} seconds...", wait_time / 1000);
|
loop {
|
||||||
sleep(Duration::from_millis(wait_time)).await;
|
println!("no urls found, retrying in 5 seconds...");
|
||||||
|
sleep(Duration::from_millis(5000)).await;
|
||||||
|
|
||||||
|
if !running.load(Ordering::SeqCst) {
|
||||||
|
return ExitCode::FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
urls = post_html(&client).await;
|
urls = post_html(&client).await;
|
||||||
|
|
||||||
if !urls.is_empty() {
|
if !urls.is_empty() {
|
||||||
println!("urls found! continuing...");
|
println!("urls found! continuing...");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if reconnection_attempts == 3 {
|
reconnection_attempts += 1;
|
||||||
println!("no urls found in 4 attempts, exiting...");
|
if reconnection_attempts == 12 {
|
||||||
|
println!("no urls found in 1 minute, exiting...");
|
||||||
return ExitCode::FAILURE;
|
return ExitCode::FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
wait_time += 5000;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let multi_prog = indicatif::MultiProgress::new();
|
let multi_prog = indicatif::MultiProgress::new();
|
||||||
let urls_ammount = urls.len();
|
let urls_amount = urls.len();
|
||||||
let responses = stream::iter(urls.into_iter().enumerate())
|
let responses = stream::iter(urls.into_iter().enumerate())
|
||||||
.map(|(i, url)| {
|
.map(|(i, url)| {
|
||||||
let i = i + 1;
|
let i = i + 1;
|
||||||
let client = &client;
|
let client = &client;
|
||||||
let this_bar = indicatif::ProgressBar::new_spinner();
|
let running_t = running.clone();
|
||||||
this_bar.enable_steady_tick(Duration::from_millis(50));
|
let this_bar = indicatif::ProgressBar::new(BAR_LENGTH);
|
||||||
let this_prog = multi_prog.insert(i, this_bar);
|
this_bar.set_style(indicatif::ProgressStyle::with_template("[{bar}] {msg}").unwrap().progress_chars("=> "));
|
||||||
|
let this_bar = multi_prog.insert(i, this_bar);
|
||||||
async move {
|
async move {
|
||||||
// "thread"
|
// "thread"
|
||||||
loop {
|
loop {
|
||||||
this_prog.set_message(format!("\x1b[30m[{i: >4}/{urls_ammount}] \x1b[36mscraping {url:?}\x1b[0m"));
|
if !running_t.load(Ordering::SeqCst) {
|
||||||
let resp = client.get(&url).send().await.unwrap();
|
return;
|
||||||
match extract_img_url(&resp.text().await.unwrap()) {
|
|
||||||
Ok(img_url) => {
|
|
||||||
if img_url.is_empty() {
|
|
||||||
this_prog.abandon_with_message(format!(
|
|
||||||
"\x1b[30m[{i: >4}/{urls_ammount}] \x1b[1;31mimage url not found\x1b[0m"
|
|
||||||
));
|
|
||||||
} else {
|
|
||||||
this_prog.finish_with_message(format!(
|
|
||||||
"\x1b[30m[{i: >4}/{urls_ammount}] \x1b[32mfound image url: {img_url}\x1b[0m"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
break img_url;
|
|
||||||
}
|
|
||||||
Err(_) => {
|
|
||||||
this_prog
|
|
||||||
.set_message(format!(
|
|
||||||
"\x1b[30m[{i: >4}/{urls_ammount}] \x1b[31mratelimited, retrying after {}ms\x1b[0m",
|
|
||||||
args.delay.as_millis())
|
|
||||||
);
|
|
||||||
tokio::time::sleep(args.delay).await;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this_bar.set_message(format!("\x1b[37m[{i: >4}/{urls_amount}] \x1b[36mscraping {url}\x1b[0m"));
|
||||||
|
let resp = client.get(&url).send().await.unwrap();
|
||||||
|
if let Ok(img_url) = extract_img_url(&resp.text().await.unwrap()) {
|
||||||
|
if img_url.is_empty() {
|
||||||
|
this_bar.abandon_with_message(format!(
|
||||||
|
"\x1b[37m[{i: >4}/{urls_amount}] \x1b[1;31mimage url not found\x1b[0m"
|
||||||
|
));
|
||||||
|
} else {
|
||||||
|
download_file(running_t, &img_url, this_bar, i, urls_amount, uri_tags).await;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
this_bar
|
||||||
|
.set_message(format!(
|
||||||
|
"\x1b[37m[{i: >4}/{urls_amount}] \x1b[31mratelimited, retrying after {}ms\x1b[0m",
|
||||||
|
args.delay.as_millis())
|
||||||
|
);
|
||||||
|
tokio::time::sleep(args.delay).await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.buffered(args.jobs);
|
.buffered(args.jobs);
|
||||||
let _ = responses.for_each(|_| async {}).await;
|
let _ = responses.for_each(|()| async {}).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ExitCode::SUCCESS;
|
ExitCode::SUCCESS
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_urls(html: &str) -> Vec<String> {
|
fn extract_urls(html: &str) -> Vec<String> {
|
||||||
@@ -145,3 +149,56 @@ fn extract_img_url(html: &str) -> Result<String, &'static str> {
|
|||||||
Ok(String::new())
|
Ok(String::new())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn download_file(
|
||||||
|
running: Arc<AtomicBool>,
|
||||||
|
img_url: &str,
|
||||||
|
this_bar: ProgressBar,
|
||||||
|
i: usize,
|
||||||
|
urls_amount: usize,
|
||||||
|
uri_tags: &str,
|
||||||
|
) {
|
||||||
|
let args = args::Args::parse();
|
||||||
|
|
||||||
|
let file_name = Regex::new(r"[^/]+$")
|
||||||
|
.unwrap()
|
||||||
|
.find(img_url)
|
||||||
|
.map(|m| m.as_str())
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let file_path = uri_tags.to_owned() + "/" + file_name;
|
||||||
|
|
||||||
|
let mut file = if std::fs::File::open(&file_path).is_ok() {
|
||||||
|
this_bar.finish_with_message(format!(
|
||||||
|
"\x1b[37m[{i: >4}/{urls_amount}] \x1b[33m{file_name} exists, skipping...\x1b[0m"
|
||||||
|
));
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
std::fs::File::create(&file_path).unwrap()
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut res = Client::new()
|
||||||
|
.get(img_url)
|
||||||
|
.header("User-Agent", &args.user_agent)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let file_length = res.content_length().unwrap();
|
||||||
|
let mut written = 0;
|
||||||
|
while let Some(img_chunk) = res.chunk().await.unwrap() {
|
||||||
|
if !running.load(Ordering::SeqCst) {
|
||||||
|
this_bar.abandon_with_message(format!(
|
||||||
|
"\x1b[37m[{i: >4}/{urls_amount}] \x1b[33mcancelling {img_url}\x1b[0m"
|
||||||
|
));
|
||||||
|
drop(file);
|
||||||
|
std::fs::remove_file(&file_path).unwrap();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
file.write_all(&img_chunk).unwrap();
|
||||||
|
written += img_chunk.len();
|
||||||
|
this_bar.set_position((written as f64 / file_length as f64 * BAR_LENGTH as f64) as u64);
|
||||||
|
}
|
||||||
|
this_bar.finish_with_message(format!(
|
||||||
|
"\x1b[37m[{i: >4}/{urls_amount}] \x1b[32mdownloaded {img_url}\x1b[0m"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user