chore: modularity & good code

2024-10-18 19:26:11 +02:00
parent e62d2cc186
commit 5ce292d1c2
2 changed files with 42 additions and 27 deletions
--- a/src/args/mod.rs
+++ b/src/args/mod.rs
@@ -0,0 +1,16 @@
 use clap::Parser;
 #[derive(Parser)]
 #[command(version)]
 pub struct Args {
    /// User Agent to use for requests
    #[arg(
        short,
        default_value = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
    )]
    pub user_agent: String,
    // Tags to search for
    #[arg(short, long)]
    pub tags: Option<Vec<String>>,
 }
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,45 +1,44 @@
-#![feature(async_closure)]
+#![feature(async_closure, iter_intersperse)]
 pub mod args;
 use clap::Parser;
 use regex::Regex;
 use reqwest::Client;
 use std::process::ExitCode;
 use tokio::time::{sleep, Duration};
 #[derive(Parser)]
 #[command(version)]
 struct Args {
    /// User Agent to use for requests
    #[arg(
        short,
        default_value = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
    )]
    user_agent: String,
 }
 #[tokio::main]
 async fn main() -> ExitCode {
-    let args = Args::parse();
+    let args = args::Args::parse();
-    println!("which tags do you want to scrape? ex: 1girls+1boys+yomama");
+    let tags = args.tags.unwrap_or_else(|| {
-    let tags = std::io::stdin()
+        println!("which tags do you want to scrape? ex: 1girls 1boys yomama");
-        .lines()
+        let tags_binding = std::io::stdin().lines().next().unwrap().unwrap();
-        .next()
+        tags_binding
-        .unwrap()
+            .split(' ')
-        .unwrap()
+            .filter(|item| !item.is_empty())
-        .trim()
+            .map(|item| item.to_owned())
-        .to_string();
+            .collect()
    });
    let uri_tags: String = tags.into_iter().intersperse(String::from("+")).collect();
-    let client = Client::builder().user_agent(args.user_agent).build().unwrap();
+    let client = Client::builder()
-    let mut page = 0;
+        .user_agent(args.user_agent)
        .build()
        .unwrap();
-    loop {
+    for page in 0.. {
        println!("now scraping page {}", page + 1);
        println!(
            "https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={}",
            page * 42
        );
        let post_html = async || {
            extract_urls(
                &client
                    .get(format!(
-                        "https://rule34.xxx/index.php?page=post&s=list&tags={tags}&pid={}",
+                        "https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={}",
                        page * 42
                    ))
                    .send()
@@ -85,9 +84,9 @@ async fn main() -> ExitCode {
                println!("found image url: {img_url}");
            }
        }
        page += 1;
    }
    return ExitCode::SUCCESS;
 }
 fn extract_urls(html: &str) -> Vec<String> {