From 5ce292d1c262057cc9ebc43d7126b16ab660e64b Mon Sep 17 00:00:00 2001 From: javalsai Date: Fri, 18 Oct 2024 19:26:11 +0200 Subject: [PATCH] chore: modularity & good code --- src/args/mod.rs | 16 +++++++++++++++ src/main.rs | 53 ++++++++++++++++++++++++------------------------- 2 files changed, 42 insertions(+), 27 deletions(-) create mode 100644 src/args/mod.rs diff --git a/src/args/mod.rs b/src/args/mod.rs new file mode 100644 index 0000000..f982d82 --- /dev/null +++ b/src/args/mod.rs @@ -0,0 +1,16 @@ +use clap::Parser; + +#[derive(Parser)] +#[command(version)] +pub struct Args { + /// User Agent to use for requests + #[arg( + short, + default_value = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36" + )] + pub user_agent: String, + + // Tags to search for + #[arg(short, long)] + pub tags: Option>, +} diff --git a/src/main.rs b/src/main.rs index 769b9ee..037909e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,45 +1,44 @@ -#![feature(async_closure)] +#![feature(async_closure, iter_intersperse)] +pub mod args; + use clap::Parser; use regex::Regex; use reqwest::Client; use std::process::ExitCode; use tokio::time::{sleep, Duration}; -#[derive(Parser)] -#[command(version)] -struct Args { - /// User Agent to use for requests - #[arg( - short, - default_value = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36" - )] - user_agent: String, -} - #[tokio::main] async fn main() -> ExitCode { - let args = Args::parse(); + let args = args::Args::parse(); - println!("which tags do you want to scrape? ex: 1girls+1boys+yomama"); - let tags = std::io::stdin() - .lines() - .next() - .unwrap() - .unwrap() - .trim() - .to_string(); + let tags = args.tags.unwrap_or_else(|| { + println!("which tags do you want to scrape? ex: 1girls 1boys yomama"); + let tags_binding = std::io::stdin().lines().next().unwrap().unwrap(); + tags_binding + .split(' ') + .filter(|item| !item.is_empty()) + .map(|item| item.to_owned()) + .collect() + }); + let uri_tags: String = tags.into_iter().intersperse(String::from("+")).collect(); - let client = Client::builder().user_agent(args.user_agent).build().unwrap(); - let mut page = 0; + let client = Client::builder() + .user_agent(args.user_agent) + .build() + .unwrap(); - loop { + for page in 0.. { println!("now scraping page {}", page + 1); + println!( + "https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={}", + page * 42 + ); let post_html = async || { extract_urls( &client .get(format!( - "https://rule34.xxx/index.php?page=post&s=list&tags={tags}&pid={}", + "https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={}", page * 42 )) .send() @@ -85,9 +84,9 @@ async fn main() -> ExitCode { println!("found image url: {img_url}"); } } - - page += 1; } + + return ExitCode::SUCCESS; } fn extract_urls(html: &str) -> Vec {