Update gitignore, add taap, luv gaylord

This commit is contained in:
GayLord 2024-10-19 14:05:30 +02:00
parent eeee4f50b3
commit 89fca9d0a3
5 changed files with 48 additions and 1358 deletions

1
.gitignore vendored
View File

@ -1 +1,2 @@
/target /target
/Cargo.lock

1344
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -6,4 +6,5 @@ edition = "2021"
[dependencies] [dependencies]
regex = "1.11.0" regex = "1.11.0"
reqwest = { version = "0.12.8", features = ["blocking"] } reqwest = { version = "0.12.8", features = ["blocking"] }
taap = "0.1.4"
tokio = { version = "1", features = ["full"] } tokio = { version = "1", features = ["full"] }

2
rust-toolchain.toml Normal file
View File

@ -0,0 +1,2 @@
[toolchain]
channel = "nightly"

View File

@ -1,21 +1,33 @@
#![feature(async_closure)] #![feature(async_closure)]
use regex::Regex; use regex::Regex;
use reqwest::Client; use reqwest::Client;
use std::process::ExitCode; use std::process::{exit, ExitCode};
use taap::Argument;
use tokio::time::{sleep, Duration}; use tokio::time::{sleep, Duration};
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"; const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36";
#[tokio::main] #[tokio::main]
async fn main() -> ExitCode { async fn main() -> ExitCode {
println!("which tags do you want to scrape? ex: 1girls+1boys+yomama"); // Taap setup
let tags = std::io::stdin() let mut arguments = Argument::new(
.lines() "r34-scrape",
.next() "A scraper for r34.xxx",
.unwrap() "Created by someone with no coding knowledge",
.unwrap() "Danmax and authors 2024",
.trim() );
.to_string();
arguments.add_arg("TAGS", "+", Some("the tags you want to search for"));
let parsed_arguments = arguments.parse_args(None);
let tags = parsed_arguments.get("TAGS").unwrap();
// End of taap setup
// Check if empty and warn
// Can't use tags.0 because taap is not buggy at all :3
if tags.1.is_empty() {
println!("[warning] No tags were used, use --help for help")
}
let mut thread_counter = 0; let mut thread_counter = 0;
let client = Client::builder().user_agent(USER_AGENT).build().unwrap(); let client = Client::builder().user_agent(USER_AGENT).build().unwrap();
@ -28,7 +40,12 @@ async fn main() -> ExitCode {
extract_urls( extract_urls(
&client &client
.get(format!( .get(format!(
"https://rule34.xxx/index.php?page=post&s=list&tags={tags}&pid={}", "https://rule34.xxx/index.php?page=post&s=list{}&pid={}",
if tags.0 {
format!("&tags={}", tags.1.join("+"))
} else {
"".to_owned()
},
page * 42 page * 42
)) ))
.send() .send()
@ -70,14 +87,23 @@ async fn main() -> ExitCode {
let thread_id = format!("[{thread_counter: >4}]"); let thread_id = format!("[{thread_counter: >4}]");
loop { loop {
let client = Client::builder().user_agent(USER_AGENT).build().unwrap(); let client = Client::builder().user_agent(USER_AGENT).build().unwrap();
match extract_img_url(&client.get(url.clone()).send().await.unwrap().text().await.unwrap()) { match extract_img_url(
&client
.get(url.clone())
.send()
.await
.unwrap()
.text()
.await
.unwrap(),
) {
Ok(img_url) => { Ok(img_url) => {
if img_url.is_empty() { if img_url.is_empty() {
println!("{thread_id} image url not found"); println!("{thread_id} image url not found");
} else { } else {
println!("{thread_id} found image url: {img_url}"); println!("{thread_id} found image url: {img_url}");
} }
}, }
Err(_) => { Err(_) => {
println!("{thread_id} ratelimited, retrying after 1 second"); println!("{thread_id} ratelimited, retrying after 1 second");
std::thread::sleep(std::time::Duration::from_millis(1000)); std::thread::sleep(std::time::Duration::from_millis(1000));
@ -91,7 +117,11 @@ async fn main() -> ExitCode {
if thread_counter > 9999 { if thread_counter > 9999 {
thread_counter = 0; thread_counter = 0;
} }
while tokio::runtime::Handle::current().metrics().num_alive_tasks() > 4 { while tokio::runtime::Handle::current()
.metrics()
.num_alive_tasks()
> 4
{
std::thread::sleep(std::time::Duration::from_millis(100)); std::thread::sleep(std::time::Duration::from_millis(100));
} }
} }