chore: modularity & good code
This commit is contained in:
parent
e62d2cc186
commit
5ce292d1c2
16
src/args/mod.rs
Normal file
16
src/args/mod.rs
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
use clap::Parser;
|
||||||
|
|
||||||
|
#[derive(Parser)]
|
||||||
|
#[command(version)]
|
||||||
|
pub struct Args {
|
||||||
|
/// User Agent to use for requests
|
||||||
|
#[arg(
|
||||||
|
short,
|
||||||
|
default_value = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
|
||||||
|
)]
|
||||||
|
pub user_agent: String,
|
||||||
|
|
||||||
|
// Tags to search for
|
||||||
|
#[arg(short, long)]
|
||||||
|
pub tags: Option<Vec<String>>,
|
||||||
|
}
|
53
src/main.rs
53
src/main.rs
@ -1,45 +1,44 @@
|
|||||||
#![feature(async_closure)]
|
#![feature(async_closure, iter_intersperse)]
|
||||||
|
pub mod args;
|
||||||
|
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use reqwest::Client;
|
use reqwest::Client;
|
||||||
use std::process::ExitCode;
|
use std::process::ExitCode;
|
||||||
use tokio::time::{sleep, Duration};
|
use tokio::time::{sleep, Duration};
|
||||||
|
|
||||||
#[derive(Parser)]
|
|
||||||
#[command(version)]
|
|
||||||
struct Args {
|
|
||||||
/// User Agent to use for requests
|
|
||||||
#[arg(
|
|
||||||
short,
|
|
||||||
default_value = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
|
|
||||||
)]
|
|
||||||
user_agent: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> ExitCode {
|
async fn main() -> ExitCode {
|
||||||
let args = Args::parse();
|
let args = args::Args::parse();
|
||||||
|
|
||||||
println!("which tags do you want to scrape? ex: 1girls+1boys+yomama");
|
let tags = args.tags.unwrap_or_else(|| {
|
||||||
let tags = std::io::stdin()
|
println!("which tags do you want to scrape? ex: 1girls 1boys yomama");
|
||||||
.lines()
|
let tags_binding = std::io::stdin().lines().next().unwrap().unwrap();
|
||||||
.next()
|
tags_binding
|
||||||
.unwrap()
|
.split(' ')
|
||||||
.unwrap()
|
.filter(|item| !item.is_empty())
|
||||||
.trim()
|
.map(|item| item.to_owned())
|
||||||
.to_string();
|
.collect()
|
||||||
|
});
|
||||||
|
let uri_tags: String = tags.into_iter().intersperse(String::from("+")).collect();
|
||||||
|
|
||||||
let client = Client::builder().user_agent(args.user_agent).build().unwrap();
|
let client = Client::builder()
|
||||||
let mut page = 0;
|
.user_agent(args.user_agent)
|
||||||
|
.build()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
loop {
|
for page in 0.. {
|
||||||
println!("now scraping page {}", page + 1);
|
println!("now scraping page {}", page + 1);
|
||||||
|
println!(
|
||||||
|
"https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={}",
|
||||||
|
page * 42
|
||||||
|
);
|
||||||
|
|
||||||
let post_html = async || {
|
let post_html = async || {
|
||||||
extract_urls(
|
extract_urls(
|
||||||
&client
|
&client
|
||||||
.get(format!(
|
.get(format!(
|
||||||
"https://rule34.xxx/index.php?page=post&s=list&tags={tags}&pid={}",
|
"https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={}",
|
||||||
page * 42
|
page * 42
|
||||||
))
|
))
|
||||||
.send()
|
.send()
|
||||||
@ -85,9 +84,9 @@ async fn main() -> ExitCode {
|
|||||||
println!("found image url: {img_url}");
|
println!("found image url: {img_url}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
page += 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return ExitCode::SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_urls(html: &str) -> Vec<String> {
|
fn extract_urls(html: &str) -> Vec<String> {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user