feat: capture SIGINT to avoid corrupting downloads #12
29
Cargo.lock
generated
29
Cargo.lock
generated
@ -267,6 +267,12 @@ version = "1.0.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cfg_aliases"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap"
|
name = "clap"
|
||||||
version = "4.5.20"
|
version = "4.5.20"
|
||||||
@ -357,6 +363,16 @@ version = "0.8.20"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
|
checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ctrlc"
|
||||||
|
version = "3.4.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "90eeab0aa92f3f9b4e87f258c72b139c207d251f9cbc1080a0086b86a8870dd3"
|
||||||
|
dependencies = [
|
||||||
|
"nix",
|
||||||
|
"windows-sys 0.59.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "encode_unicode"
|
name = "encode_unicode"
|
||||||
version = "0.3.6"
|
version = "0.3.6"
|
||||||
@ -892,6 +908,18 @@ dependencies = [
|
|||||||
"tempfile",
|
"tempfile",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nix"
|
||||||
|
version = "0.29.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"cfg-if",
|
||||||
|
"cfg_aliases",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "number_prefix"
|
name = "number_prefix"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
@ -1043,6 +1071,7 @@ version = "1.0.0"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"async-std",
|
"async-std",
|
||||||
"clap",
|
"clap",
|
||||||
|
"ctrlc",
|
||||||
"futures",
|
"futures",
|
||||||
"indicatif",
|
"indicatif",
|
||||||
"regex",
|
"regex",
|
||||||
|
@ -6,6 +6,7 @@ edition = "2021"
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
async-std = "1"
|
async-std = "1"
|
||||||
clap = { version = "4", features = ["derive"] }
|
clap = { version = "4", features = ["derive"] }
|
||||||
|
ctrlc = "3"
|
||||||
futures = "0"
|
futures = "0"
|
||||||
indicatif = "0"
|
indicatif = "0"
|
||||||
regex = "1"
|
regex = "1"
|
||||||
|
35
src/main.rs
35
src/main.rs
@ -10,6 +10,8 @@ use tokio::time::{sleep, Duration};
|
|||||||
|
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use std::process::ExitCode;
|
use std::process::ExitCode;
|
||||||
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
const BAR_LENGTH: u64 = 8;
|
const BAR_LENGTH: u64 = 8;
|
||||||
|
|
||||||
@ -20,12 +22,23 @@ async fn main() -> ExitCode {
|
|||||||
let uri_tags = &args.tags.join("+");
|
let uri_tags = &args.tags.join("+");
|
||||||
let _ = std::fs::create_dir(uri_tags);
|
let _ = std::fs::create_dir(uri_tags);
|
||||||
|
|
||||||
|
let running = Arc::new(AtomicBool::new(true));
|
||||||
|
let running_t = running.clone();
|
||||||
|
ctrlc::set_handler(move || {
|
||||||
|
running_t.store(false, Ordering::SeqCst);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let client = Client::builder()
|
let client = Client::builder()
|
||||||
.user_agent(&args.user_agent)
|
.user_agent(&args.user_agent)
|
||||||
.build()
|
.build()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
for page in args.page - 1.. {
|
for page in args.page - 1.. {
|
||||||
|
if !running.load(Ordering::SeqCst) {
|
||||||
|
return ExitCode::FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
println!("now scraping page {} (https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={})", page + 1, page * 42);
|
println!("now scraping page {} (https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={})", page + 1, page * 42);
|
||||||
|
|
||||||
let post_html = async |client: &Client| {
|
let post_html = async |client: &Client| {
|
||||||
@ -51,6 +64,10 @@ async fn main() -> ExitCode {
|
|||||||
println!("no urls found, retrying in 5 seconds...");
|
println!("no urls found, retrying in 5 seconds...");
|
||||||
sleep(Duration::from_millis(5000)).await;
|
sleep(Duration::from_millis(5000)).await;
|
||||||
|
|
||||||
|
if !running.load(Ordering::SeqCst) {
|
||||||
|
return ExitCode::FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
urls = post_html(&client).await;
|
urls = post_html(&client).await;
|
||||||
if !urls.is_empty() {
|
if !urls.is_empty() {
|
||||||
println!("urls found! continuing...");
|
println!("urls found! continuing...");
|
||||||
@ -71,12 +88,17 @@ async fn main() -> ExitCode {
|
|||||||
.map(|(i, url)| {
|
.map(|(i, url)| {
|
||||||
let i = i + 1;
|
let i = i + 1;
|
||||||
let client = &client;
|
let client = &client;
|
||||||
|
let running_t = running.clone();
|
||||||
let this_bar = indicatif::ProgressBar::new(BAR_LENGTH);
|
let this_bar = indicatif::ProgressBar::new(BAR_LENGTH);
|
||||||
this_bar.set_style(indicatif::ProgressStyle::with_template("[{bar}] {msg}").unwrap().progress_chars("=> "));
|
this_bar.set_style(indicatif::ProgressStyle::with_template("[{bar}] {msg}").unwrap().progress_chars("=> "));
|
||||||
let this_bar = multi_prog.insert(i, this_bar);
|
let this_bar = multi_prog.insert(i, this_bar);
|
||||||
async move {
|
async move {
|
||||||
// "thread"
|
// "thread"
|
||||||
loop {
|
loop {
|
||||||
|
if !running_t.load(Ordering::SeqCst) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
this_bar.set_message(format!("\x1b[37m[{i: >4}/{urls_amount}] \x1b[36mscraping {url}\x1b[0m"));
|
this_bar.set_message(format!("\x1b[37m[{i: >4}/{urls_amount}] \x1b[36mscraping {url}\x1b[0m"));
|
||||||
let resp = client.get(&url).send().await.unwrap();
|
let resp = client.get(&url).send().await.unwrap();
|
||||||
if let Ok(img_url) = extract_img_url(&resp.text().await.unwrap()) {
|
if let Ok(img_url) = extract_img_url(&resp.text().await.unwrap()) {
|
||||||
@ -85,7 +107,7 @@ async fn main() -> ExitCode {
|
|||||||
"\x1b[37m[{i: >4}/{urls_amount}] \x1b[1;31mimage url not found\x1b[0m"
|
"\x1b[37m[{i: >4}/{urls_amount}] \x1b[1;31mimage url not found\x1b[0m"
|
||||||
));
|
));
|
||||||
} else {
|
} else {
|
||||||
download_file(&img_url, this_bar, i, urls_amount, uri_tags).await;
|
download_file(running_t, &img_url, this_bar, i, urls_amount, uri_tags).await;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -129,6 +151,7 @@ fn extract_img_url(html: &str) -> Result<String, &'static str> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn download_file(
|
async fn download_file(
|
||||||
|
running: Arc<AtomicBool>,
|
||||||
img_url: &str,
|
img_url: &str,
|
||||||
this_bar: ProgressBar,
|
this_bar: ProgressBar,
|
||||||
i: usize,
|
i: usize,
|
||||||
@ -151,7 +174,7 @@ async fn download_file(
|
|||||||
));
|
));
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
std::fs::File::create(file_path).unwrap()
|
std::fs::File::create(&file_path).unwrap()
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut res = Client::new()
|
let mut res = Client::new()
|
||||||
@ -163,6 +186,14 @@ async fn download_file(
|
|||||||
let file_length = res.content_length().unwrap();
|
let file_length = res.content_length().unwrap();
|
||||||
let mut written = 0;
|
let mut written = 0;
|
||||||
while let Some(img_chunk) = res.chunk().await.unwrap() {
|
while let Some(img_chunk) = res.chunk().await.unwrap() {
|
||||||
|
if !running.load(Ordering::SeqCst) {
|
||||||
|
this_bar.abandon_with_message(format!(
|
||||||
|
"\x1b[37m[{i: >4}/{urls_amount}] \x1b[33mcancelling {img_url}\x1b[0m"
|
||||||
|
));
|
||||||
|
drop(file);
|
||||||
|
std::fs::remove_file(&file_path).unwrap();
|
||||||
|
return;
|
||||||
|
}
|
||||||
file.write_all(&img_chunk).unwrap();
|
file.write_all(&img_chunk).unwrap();
|
||||||
written += img_chunk.len();
|
written += img_chunk.len();
|
||||||
this_bar.set_position((written as f64 / file_length as f64 * BAR_LENGTH as f64) as u64);
|
this_bar.set_position((written as f64 / file_length as f64 * BAR_LENGTH as f64) as u64);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user