forked from danmax/r34-scraper
Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
54ccc84719 |
@ -22,6 +22,9 @@ pub struct Args {
|
|||||||
#[arg(short, long, default_value_t = 4)]
|
#[arg(short, long, default_value_t = 4)]
|
||||||
pub jobs: usize,
|
pub jobs: usize,
|
||||||
|
|
||||||
|
#[arg(long, default_value = "downloads")]
|
||||||
|
pub dir: String,
|
||||||
|
|
||||||
/// Delay for rate-limits (ms)
|
/// Delay for rate-limits (ms)
|
||||||
#[arg(short, long, default_value = "1000", value_parser = parse_duration)]
|
#[arg(short, long, default_value = "1000", value_parser = parse_duration)]
|
||||||
pub delay: std::time::Duration,
|
pub delay: std::time::Duration,
|
||||||
|
35
src/main.rs
35
src/main.rs
@ -1,4 +1,3 @@
|
|||||||
#![feature(async_closure)]
|
|
||||||
pub mod args;
|
pub mod args;
|
||||||
|
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
@ -20,7 +19,8 @@ async fn main() -> ExitCode {
|
|||||||
let args = args::Args::parse();
|
let args = args::Args::parse();
|
||||||
|
|
||||||
let uri_tags = &args.tags.join("+");
|
let uri_tags = &args.tags.join("+");
|
||||||
let _ = std::fs::create_dir(uri_tags);
|
let dir = &args.dir;
|
||||||
|
let _ = std::fs::create_dir(dir);
|
||||||
|
|
||||||
let running = Arc::new(AtomicBool::new(true));
|
let running = Arc::new(AtomicBool::new(true));
|
||||||
let running_t = running.clone();
|
let running_t = running.clone();
|
||||||
@ -100,14 +100,14 @@ async fn main() -> ExitCode {
|
|||||||
}
|
}
|
||||||
|
|
||||||
this_bar.set_message(format!("\x1b[37m[{i: >4}/{urls_amount}] \x1b[36mscraping {url}\x1b[0m"));
|
this_bar.set_message(format!("\x1b[37m[{i: >4}/{urls_amount}] \x1b[36mscraping {url}\x1b[0m"));
|
||||||
let resp = client.get(&url).send().await.unwrap();
|
let resp = client.get(&url).send().await.unwrap().text().await.unwrap();
|
||||||
if let Ok(img_url) = extract_img_url(&resp.text().await.unwrap()) {
|
if let Ok(img_url) = extract_img_url(&resp) {
|
||||||
if img_url.is_empty() {
|
if img_url.is_empty() {
|
||||||
this_bar.abandon_with_message(format!(
|
this_bar.abandon_with_message(format!(
|
||||||
"\x1b[37m[{i: >4}/{urls_amount}] \x1b[1;31mimage url not found\x1b[0m"
|
"\x1b[37m[{i: >4}/{urls_amount}] \x1b[1;31mimage url not found\x1b[0m"
|
||||||
));
|
));
|
||||||
} else {
|
} else {
|
||||||
download_file(running_t, &img_url, this_bar, i, urls_amount, uri_tags).await;
|
download_file(running_t, &img_url, this_bar, i, urls_amount, &resp, dir.as_str()).await;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -122,7 +122,7 @@ async fn main() -> ExitCode {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
.buffered(args.jobs);
|
.buffered(args.jobs);
|
||||||
let _ = responses.for_each(|()| async {}).await;
|
let () = responses.for_each(|()| async {}).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
ExitCode::SUCCESS
|
ExitCode::SUCCESS
|
||||||
@ -136,6 +136,23 @@ fn extract_urls(html: &str) -> Vec<String> {
|
|||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn write_to_database(html: &str, file_path: &str) {
|
||||||
|
let strings = html.split('\n');
|
||||||
|
for line in strings {
|
||||||
|
if line.contains("<title>") {
|
||||||
|
let line = line.trim_start();
|
||||||
|
let mut file = std::fs::OpenOptions::new()
|
||||||
|
.create(true)
|
||||||
|
.append(true)
|
||||||
|
.open("database")
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
writeln!(file, "{file_path}: {line}").unwrap();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn extract_img_url(html: &str) -> Result<String, &'static str> {
|
fn extract_img_url(html: &str) -> Result<String, &'static str> {
|
||||||
if let Some(img_url) =
|
if let Some(img_url) =
|
||||||
Regex::new(r"https://us\.rule34\.xxx//images/([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+")
|
Regex::new(r"https://us\.rule34\.xxx//images/([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+")
|
||||||
@ -156,7 +173,8 @@ async fn download_file(
|
|||||||
this_bar: ProgressBar,
|
this_bar: ProgressBar,
|
||||||
i: usize,
|
i: usize,
|
||||||
urls_amount: usize,
|
urls_amount: usize,
|
||||||
uri_tags: &str,
|
html: &str,
|
||||||
|
dir: &str,
|
||||||
) {
|
) {
|
||||||
let args = args::Args::parse();
|
let args = args::Args::parse();
|
||||||
|
|
||||||
@ -166,7 +184,7 @@ async fn download_file(
|
|||||||
.map(|m| m.as_str())
|
.map(|m| m.as_str())
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let file_path = uri_tags.to_owned() + "/" + file_name;
|
let file_path = dir.to_owned() + "/" + file_name;
|
||||||
|
|
||||||
let mut file = if std::fs::File::open(&file_path).is_ok() {
|
let mut file = if std::fs::File::open(&file_path).is_ok() {
|
||||||
this_bar.finish_with_message(format!(
|
this_bar.finish_with_message(format!(
|
||||||
@ -198,6 +216,7 @@ async fn download_file(
|
|||||||
written += img_chunk.len();
|
written += img_chunk.len();
|
||||||
this_bar.set_position((written as f64 / file_length as f64 * BAR_LENGTH as f64) as u64);
|
this_bar.set_position((written as f64 / file_length as f64 * BAR_LENGTH as f64) as u64);
|
||||||
}
|
}
|
||||||
|
write_to_database(html, &file_path);
|
||||||
this_bar.finish_with_message(format!(
|
this_bar.finish_with_message(format!(
|
||||||
"\x1b[37m[{i: >4}/{urls_amount}] \x1b[32mdownloaded {img_url}\x1b[0m"
|
"\x1b[37m[{i: >4}/{urls_amount}] \x1b[32mdownloaded {img_url}\x1b[0m"
|
||||||
));
|
));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user