forked from danmax/r34-scraper
Compare commits
4 Commits
main
...
javalsai-c
Author | SHA1 | Date | |
---|---|---|---|
e5e586ca2a | |||
5ce292d1c2 | |||
e62d2cc186 | |||
4acaf0308c |
120
Cargo.lock
generated
120
Cargo.lock
generated
@ -26,6 +26,55 @@ dependencies = [
|
|||||||
"memchr",
|
"memchr",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstream"
|
||||||
|
version = "0.6.15"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
|
||||||
|
dependencies = [
|
||||||
|
"anstyle",
|
||||||
|
"anstyle-parse",
|
||||||
|
"anstyle-query",
|
||||||
|
"anstyle-wincon",
|
||||||
|
"colorchoice",
|
||||||
|
"is_terminal_polyfill",
|
||||||
|
"utf8parse",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle"
|
||||||
|
version = "1.0.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-parse"
|
||||||
|
version = "0.2.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
|
||||||
|
dependencies = [
|
||||||
|
"utf8parse",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-query"
|
||||||
|
version = "1.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
|
||||||
|
dependencies = [
|
||||||
|
"windows-sys 0.52.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-wincon"
|
||||||
|
version = "3.0.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
|
||||||
|
dependencies = [
|
||||||
|
"anstyle",
|
||||||
|
"windows-sys 0.52.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "atomic-waker"
|
name = "atomic-waker"
|
||||||
version = "1.1.2"
|
version = "1.1.2"
|
||||||
@ -92,6 +141,52 @@ version = "1.0.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap"
|
||||||
|
version = "4.5.20"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8"
|
||||||
|
dependencies = [
|
||||||
|
"clap_builder",
|
||||||
|
"clap_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_builder"
|
||||||
|
version = "4.5.20"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54"
|
||||||
|
dependencies = [
|
||||||
|
"anstream",
|
||||||
|
"anstyle",
|
||||||
|
"clap_lex",
|
||||||
|
"strsim",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_derive"
|
||||||
|
version = "4.5.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab"
|
||||||
|
dependencies = [
|
||||||
|
"heck",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_lex"
|
||||||
|
version = "0.7.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "colorchoice"
|
||||||
|
version = "1.0.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "core-foundation"
|
name = "core-foundation"
|
||||||
version = "0.9.4"
|
version = "0.9.4"
|
||||||
@ -261,6 +356,12 @@ version = "0.15.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb"
|
checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "heck"
|
||||||
|
version = "0.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hermit-abi"
|
name = "hermit-abi"
|
||||||
version = "0.3.9"
|
version = "0.3.9"
|
||||||
@ -405,6 +506,12 @@ version = "2.10.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708"
|
checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "is_terminal_polyfill"
|
||||||
|
version = "1.70.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "itoa"
|
name = "itoa"
|
||||||
version = "1.0.11"
|
version = "1.0.11"
|
||||||
@ -626,6 +733,7 @@ dependencies = [
|
|||||||
name = "r34-scraper"
|
name = "r34-scraper"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"clap",
|
||||||
"regex",
|
"regex",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"tokio",
|
"tokio",
|
||||||
@ -920,6 +1028,12 @@ version = "0.9.8"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
|
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "strsim"
|
||||||
|
version = "0.11.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "subtle"
|
name = "subtle"
|
||||||
version = "2.6.1"
|
version = "2.6.1"
|
||||||
@ -1127,6 +1241,12 @@ dependencies = [
|
|||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "utf8parse"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "vcpkg"
|
name = "vcpkg"
|
||||||
version = "0.2.15"
|
version = "0.2.15"
|
||||||
|
@ -4,6 +4,7 @@ version = "0.1.0"
|
|||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
clap = { version = "4.5.20", features = ["derive"] }
|
||||||
regex = "1.11.0"
|
regex = "1.11.0"
|
||||||
reqwest = { version = "0.12.8", features = ["blocking"] }
|
reqwest = { version = "0.12.8", features = ["blocking"] }
|
||||||
tokio = { version = "1", features = ["full"] }
|
tokio = { version = "1", features = ["full"] }
|
||||||
|
2
rust-toolchain.toml
Normal file
2
rust-toolchain.toml
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
[toolchain]
|
||||||
|
channel = "nightly"
|
16
src/args/mod.rs
Normal file
16
src/args/mod.rs
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
use clap::Parser;
|
||||||
|
|
||||||
|
#[derive(Parser)]
|
||||||
|
#[command(version)]
|
||||||
|
pub struct Args {
|
||||||
|
/// User Agent to use for requests
|
||||||
|
#[arg(
|
||||||
|
short,
|
||||||
|
default_value = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
|
||||||
|
)]
|
||||||
|
pub user_agent: String,
|
||||||
|
|
||||||
|
// Tags to search for
|
||||||
|
#[arg(short, long)]
|
||||||
|
pub tags: Option<Vec<String>>,
|
||||||
|
}
|
45
src/main.rs
45
src/main.rs
@ -1,33 +1,44 @@
|
|||||||
#![feature(async_closure)]
|
#![feature(async_closure, iter_intersperse)]
|
||||||
|
pub mod args;
|
||||||
|
|
||||||
|
use clap::Parser;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use reqwest::Client;
|
use reqwest::Client;
|
||||||
use std::process::ExitCode;
|
use std::process::ExitCode;
|
||||||
use tokio::time::{sleep, Duration};
|
use tokio::time::{sleep, Duration};
|
||||||
|
|
||||||
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36";
|
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> ExitCode {
|
async fn main() -> ExitCode {
|
||||||
println!("which tags do you want to scrape? ex: 1girls+1boys+yomama");
|
let args = args::Args::parse();
|
||||||
let tags = std::io::stdin()
|
|
||||||
.lines()
|
|
||||||
.next()
|
|
||||||
.unwrap()
|
|
||||||
.unwrap()
|
|
||||||
.trim()
|
|
||||||
.to_string();
|
|
||||||
|
|
||||||
let client = Client::builder().user_agent(USER_AGENT).build().unwrap();
|
let tags = args.tags.unwrap_or_else(|| {
|
||||||
let mut page = 0;
|
println!("which tags do you want to scrape? ex: 1girls 1boys yomama");
|
||||||
|
let tags_binding = std::io::stdin().lines().next().unwrap().unwrap();
|
||||||
|
tags_binding
|
||||||
|
.split(' ')
|
||||||
|
.filter(|item| !item.is_empty())
|
||||||
|
.map(|item| item.to_owned())
|
||||||
|
.collect()
|
||||||
|
});
|
||||||
|
let uri_tags: String = tags.into_iter().intersperse(String::from("+")).collect();
|
||||||
|
|
||||||
loop {
|
let client = Client::builder()
|
||||||
|
.user_agent(args.user_agent)
|
||||||
|
.build()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
for page in 0.. {
|
||||||
println!("now scraping page {}", page + 1);
|
println!("now scraping page {}", page + 1);
|
||||||
|
println!(
|
||||||
|
"https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={}",
|
||||||
|
page * 42
|
||||||
|
);
|
||||||
|
|
||||||
let post_html = async || {
|
let post_html = async || {
|
||||||
extract_urls(
|
extract_urls(
|
||||||
&client
|
&client
|
||||||
.get(format!(
|
.get(format!(
|
||||||
"https://rule34.xxx/index.php?page=post&s=list&tags={tags}&pid={}",
|
"https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={}",
|
||||||
page * 42
|
page * 42
|
||||||
))
|
))
|
||||||
.send()
|
.send()
|
||||||
@ -73,9 +84,9 @@ async fn main() -> ExitCode {
|
|||||||
println!("found image url: {img_url}");
|
println!("found image url: {img_url}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
page += 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return ExitCode::SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_urls(html: &str) -> Vec<String> {
|
fn extract_urls(html: &str) -> Vec<String> {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user