forked from danmax/r34-scraper
Compare commits
11 Commits
add-page-a
...
351439034e
Author | SHA1 | Date | |
---|---|---|---|
351439034e | |||
fe3c399c1d
|
|||
|
89830d6e1e | ||
1b5608b014 | |||
a852c8bcc5
|
|||
235e13230b
|
|||
|
bd517ed0b5 | ||
5f848be434 | |||
8723769429
|
|||
08ed5e51f2 | |||
bcd349e36f
|
6
Cargo.lock
generated
6
Cargo.lock
generated
@@ -1039,7 +1039,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "r34-scraper"
|
||||
version = "0.1.0"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"async-std",
|
||||
"clap",
|
||||
@@ -1329,9 +1329,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.79"
|
||||
version = "2.0.82"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590"
|
||||
checksum = "83540f837a8afc019423a8edb95b52a8effe46957ee402287f4292fae35be021"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "r34-scraper"
|
||||
version = "0.1.0"
|
||||
version = "1.0.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
2
rust-toolchain.toml
Normal file
2
rust-toolchain.toml
Normal file
@@ -0,0 +1,2 @@
|
||||
[toolchain]
|
||||
channel = "nightly"
|
@@ -11,8 +11,8 @@ pub struct Args {
|
||||
pub user_agent: String,
|
||||
|
||||
/// Tags to search for
|
||||
#[arg(short, long)]
|
||||
pub tags: Option<Vec<String>>,
|
||||
#[arg(short, long, required = true)]
|
||||
pub tags: Vec<String>,
|
||||
|
||||
/// Page to start scraping from
|
||||
#[arg(short, long, default_value_t = 1)]
|
34
src/main.rs
34
src/main.rs
@@ -1,4 +1,4 @@
|
||||
#![feature(async_closure, iter_intersperse)]
|
||||
#![feature(async_closure)]
|
||||
pub mod args;
|
||||
|
||||
use clap::Parser;
|
||||
@@ -17,22 +17,8 @@ const BAR_LENGTH: u64 = 8;
|
||||
async fn main() -> ExitCode {
|
||||
let args = args::Args::parse();
|
||||
|
||||
let tags = args.tags.unwrap_or_else(|| {
|
||||
println!("which tags do you want to scrape? ex: 1girls 1boys yomama");
|
||||
let tags_binding = std::io::stdin().lines().next().unwrap().unwrap();
|
||||
tags_binding
|
||||
.split(' ')
|
||||
.filter(|item| !item.is_empty())
|
||||
.map(std::borrow::ToOwned::to_owned)
|
||||
.collect()
|
||||
});
|
||||
let tags_folder = &tags.join("+");
|
||||
let uri_tags = tags
|
||||
.into_iter()
|
||||
.intersperse(String::from("+"))
|
||||
.collect::<String>();
|
||||
|
||||
let _ = std::fs::create_dir(tags_folder);
|
||||
let uri_tags = &args.tags.join("+");
|
||||
let _ = std::fs::create_dir(uri_tags);
|
||||
|
||||
let client = Client::builder()
|
||||
.user_agent(&args.user_agent)
|
||||
@@ -40,11 +26,7 @@ async fn main() -> ExitCode {
|
||||
.unwrap();
|
||||
|
||||
for page in args.page - 1.. {
|
||||
println!("now scraping page {}", page + 1);
|
||||
println!(
|
||||
"https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={}",
|
||||
page * 42
|
||||
);
|
||||
println!("now scraping page {} (https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={})", page + 1, page * 42);
|
||||
|
||||
let post_html = async |client: &Client| {
|
||||
extract_urls(
|
||||
@@ -95,7 +77,7 @@ async fn main() -> ExitCode {
|
||||
async move {
|
||||
// "thread"
|
||||
loop {
|
||||
this_bar.set_message(format!("\x1b[37m[{i: >4}/{urls_amount}] \x1b[36mscraping {url:?}\x1b[0m"));
|
||||
this_bar.set_message(format!("\x1b[37m[{i: >4}/{urls_amount}] \x1b[36mscraping {url}\x1b[0m"));
|
||||
let resp = client.get(&url).send().await.unwrap();
|
||||
if let Ok(img_url) = extract_img_url(&resp.text().await.unwrap()) {
|
||||
if img_url.is_empty() {
|
||||
@@ -103,7 +85,7 @@ async fn main() -> ExitCode {
|
||||
"\x1b[37m[{i: >4}/{urls_amount}] \x1b[1;31mimage url not found\x1b[0m"
|
||||
));
|
||||
} else {
|
||||
download_file(&img_url, this_bar, i, urls_amount, tags_folder).await;
|
||||
download_file(&img_url, this_bar, i, urls_amount, uri_tags).await;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -151,7 +133,7 @@ async fn download_file(
|
||||
this_bar: ProgressBar,
|
||||
i: usize,
|
||||
urls_amount: usize,
|
||||
tags_folder: &str,
|
||||
uri_tags: &str,
|
||||
) {
|
||||
let args = args::Args::parse();
|
||||
|
||||
@@ -161,7 +143,7 @@ async fn download_file(
|
||||
.map(|m| m.as_str())
|
||||
.unwrap();
|
||||
|
||||
let file_path = tags_folder.to_owned() + "/" + file_name;
|
||||
let file_path = uri_tags.to_owned() + "/" + file_name;
|
||||
|
||||
let mut file = if std::fs::File::open(&file_path).is_ok() {
|
||||
this_bar.finish_with_message(format!(
|
||||
|
Reference in New Issue
Block a user