chore: modularity & good code

This commit is contained in:
javalsai 2024-10-18 19:26:11 +02:00
parent e62d2cc186
commit 5ce292d1c2
Signed by: javalsai
SSH Key Fingerprint: SHA256:3G83yKhBUWVABVX/vPWH88xnK4+ptMtHkZGCRXD4Mk8
2 changed files with 42 additions and 27 deletions

16
src/args/mod.rs Normal file
View File

@ -0,0 +1,16 @@
use clap::Parser;
#[derive(Parser)]
#[command(version)]
pub struct Args {
/// User Agent to use for requests
#[arg(
short,
default_value = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
)]
pub user_agent: String,
// Tags to search for
#[arg(short, long)]
pub tags: Option<Vec<String>>,
}

View File

@ -1,45 +1,44 @@
#![feature(async_closure)] #![feature(async_closure, iter_intersperse)]
pub mod args;
use clap::Parser; use clap::Parser;
use regex::Regex; use regex::Regex;
use reqwest::Client; use reqwest::Client;
use std::process::ExitCode; use std::process::ExitCode;
use tokio::time::{sleep, Duration}; use tokio::time::{sleep, Duration};
#[derive(Parser)]
#[command(version)]
struct Args {
/// User Agent to use for requests
#[arg(
short,
default_value = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
)]
user_agent: String,
}
#[tokio::main] #[tokio::main]
async fn main() -> ExitCode { async fn main() -> ExitCode {
let args = Args::parse(); let args = args::Args::parse();
println!("which tags do you want to scrape? ex: 1girls+1boys+yomama"); let tags = args.tags.unwrap_or_else(|| {
let tags = std::io::stdin() println!("which tags do you want to scrape? ex: 1girls 1boys yomama");
.lines() let tags_binding = std::io::stdin().lines().next().unwrap().unwrap();
.next() tags_binding
.unwrap() .split(' ')
.unwrap() .filter(|item| !item.is_empty())
.trim() .map(|item| item.to_owned())
.to_string(); .collect()
});
let uri_tags: String = tags.into_iter().intersperse(String::from("+")).collect();
let client = Client::builder().user_agent(args.user_agent).build().unwrap(); let client = Client::builder()
let mut page = 0; .user_agent(args.user_agent)
.build()
.unwrap();
loop { for page in 0.. {
println!("now scraping page {}", page + 1); println!("now scraping page {}", page + 1);
println!(
"https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={}",
page * 42
);
let post_html = async || { let post_html = async || {
extract_urls( extract_urls(
&client &client
.get(format!( .get(format!(
"https://rule34.xxx/index.php?page=post&s=list&tags={tags}&pid={}", "https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={}",
page * 42 page * 42
)) ))
.send() .send()
@ -85,9 +84,9 @@ async fn main() -> ExitCode {
println!("found image url: {img_url}"); println!("found image url: {img_url}");
} }
} }
page += 1;
} }
return ExitCode::SUCCESS;
} }
fn extract_urls(html: &str) -> Vec<String> { fn extract_urls(html: &str) -> Vec<String> {