From 54ccc84719588c8126137766dbf000e148332d67 Mon Sep 17 00:00:00 2001 From: meme_man Date: Sat, 1 Feb 2025 21:37:41 -0500 Subject: [PATCH] add database --- src/args.rs | 3 +++ src/main.rs | 35 +++++++++++++++++++++++++++-------- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/src/args.rs b/src/args.rs index 43f8e9f..2504f67 100644 --- a/src/args.rs +++ b/src/args.rs @@ -22,6 +22,9 @@ pub struct Args { #[arg(short, long, default_value_t = 4)] pub jobs: usize, + #[arg(long, default_value = "downloads")] + pub dir: String, + /// Delay for rate-limits (ms) #[arg(short, long, default_value = "1000", value_parser = parse_duration)] pub delay: std::time::Duration, diff --git a/src/main.rs b/src/main.rs index eae0924..ff60b29 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,3 @@ -#![feature(async_closure)] pub mod args; use clap::Parser; @@ -20,7 +19,8 @@ async fn main() -> ExitCode { let args = args::Args::parse(); let uri_tags = &args.tags.join("+"); - let _ = std::fs::create_dir(uri_tags); + let dir = &args.dir; + let _ = std::fs::create_dir(dir); let running = Arc::new(AtomicBool::new(true)); let running_t = running.clone(); @@ -100,14 +100,14 @@ async fn main() -> ExitCode { } this_bar.set_message(format!("\x1b[37m[{i: >4}/{urls_amount}] \x1b[36mscraping {url}\x1b[0m")); - let resp = client.get(&url).send().await.unwrap(); - if let Ok(img_url) = extract_img_url(&resp.text().await.unwrap()) { + let resp = client.get(&url).send().await.unwrap().text().await.unwrap(); + if let Ok(img_url) = extract_img_url(&resp) { if img_url.is_empty() { this_bar.abandon_with_message(format!( "\x1b[37m[{i: >4}/{urls_amount}] \x1b[1;31mimage url not found\x1b[0m" )); } else { - download_file(running_t, &img_url, this_bar, i, urls_amount, uri_tags).await; + download_file(running_t, &img_url, this_bar, i, urls_amount, &resp, dir.as_str()).await; } break; } @@ -122,7 +122,7 @@ async fn main() -> ExitCode { } }) .buffered(args.jobs); - let _ = responses.for_each(|()| async {}).await; + let () = responses.for_each(|()| async {}).await; } ExitCode::SUCCESS @@ -136,6 +136,23 @@ fn extract_urls(html: &str) -> Vec { .collect() } +fn write_to_database(html: &str, file_path: &str) { + let strings = html.split('\n'); + for line in strings { + if line.contains("") { + let line = line.trim_start(); + let mut file = std::fs::OpenOptions::new() + .create(true) + .append(true) + .open("database") + .unwrap(); + + writeln!(file, "{file_path}: {line}").unwrap(); + break; + } + } +} + fn extract_img_url(html: &str) -> Result<String, &'static str> { if let Some(img_url) = Regex::new(r"https://us\.rule34\.xxx//images/([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+") @@ -156,7 +173,8 @@ async fn download_file( this_bar: ProgressBar, i: usize, urls_amount: usize, - uri_tags: &str, + html: &str, + dir: &str, ) { let args = args::Args::parse(); @@ -166,7 +184,7 @@ async fn download_file( .map(|m| m.as_str()) .unwrap(); - let file_path = uri_tags.to_owned() + "/" + file_name; + let file_path = dir.to_owned() + "/" + file_name; let mut file = if std::fs::File::open(&file_path).is_ok() { this_bar.finish_with_message(format!( @@ -198,6 +216,7 @@ async fn download_file( written += img_chunk.len(); this_bar.set_position((written as f64 / file_length as f64 * BAR_LENGTH as f64) as u64); } + write_to_database(html, &file_path); this_bar.finish_with_message(format!( "\x1b[37m[{i: >4}/{urls_amount}] \x1b[32mdownloaded {img_url}\x1b[0m" ));