11 Commits

Author SHA1 Message Date
351439034e Merge pull request 'refactor: minor nitpicks' (#11) from ErrorNoInternet/r34-scraper:main into main
Reviewed-on: https://git.javalsai.dynv6.net/danmax/r34-scraper/pulls/11
2024-10-21 00:00:43 +02:00
fe3c399c1d refactor: minor nitpicks 2024-10-20 17:59:10 -04:00
danmax
89830d6e1e update lock file 2024-10-20 17:15:59 -04:00
1b5608b014 Merge pull request 'bring back toolchain' (#10) from javalsai-changes into main
Reviewed-on: https://git.javalsai.dynv6.net/danmax/r34-scraper/pulls/10
2024-10-20 21:32:37 +02:00
a852c8bcc5 chore: bring back rust-toolchain.toml 2024-10-20 21:31:27 +02:00
235e13230b Merge branch 'main' into javalsai-changes 2024-10-20 21:31:03 +02:00
danmax
bd517ed0b5 chore: bump version to 1.0.0 2024-10-20 01:48:21 -04:00
5f848be434 Merge pull request 'refactor: accept cli args only' (#9) from ErrorNoInternet/r34-scraper:cli-args-only into main
Reviewed-on: https://git.javalsai.dynv6.net/danmax/r34-scraper/pulls/9
2024-10-20 06:25:37 +02:00
08ed5e51f2 Merge pull request 'feat(cli): add page argument to start scraping from a specific page' (#8) from ErrorNoInternet/r34-scraper:add-page-arg into main
Reviewed-on: https://git.javalsai.dynv6.net/danmax/r34-scraper/pulls/8
2024-10-20 06:17:35 +02:00
3573f6ff5a feat(cli): add page argument to start scraping from a specific page 2024-10-20 00:16:22 -04:00
bcd349e36f cargo clippy 👍 2024-10-19 21:50:54 +02:00
5 changed files with 14 additions and 12 deletions

6
Cargo.lock generated
View File

@@ -1039,7 +1039,7 @@ dependencies = [
[[package]]
name = "r34-scraper"
version = "0.1.0"
version = "1.0.0"
dependencies = [
"async-std",
"clap",
@@ -1329,9 +1329,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
[[package]]
name = "syn"
version = "2.0.79"
version = "2.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590"
checksum = "83540f837a8afc019423a8edb95b52a8effe46957ee402287f4292fae35be021"
dependencies = [
"proc-macro2",
"quote",

View File

@@ -1,6 +1,6 @@
[package]
name = "r34-scraper"
version = "0.1.0"
version = "1.0.0"
edition = "2021"
[dependencies]

2
rust-toolchain.toml Normal file
View File

@@ -0,0 +1,2 @@
[toolchain]
channel = "nightly"

View File

@@ -14,8 +14,12 @@ pub struct Args {
#[arg(short, long, required = true)]
pub tags: Vec<String>,
/// Page to start scraping from
#[arg(short, long, default_value_t = 1)]
pub page: usize,
/// Async jobs to use for fetching
#[arg(short, long, default_value = "4")]
#[arg(short, long, default_value_t = 4)]
pub jobs: usize,
/// Delay for rate-limits (ms)

View File

@@ -25,12 +25,8 @@ async fn main() -> ExitCode {
.build()
.unwrap();
for page in 0.. {
println!("now scraping page {}", page + 1);
println!(
"https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={}",
page * 42
);
for page in args.page - 1.. {
println!("now scraping page {} (https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={})", page + 1, page * 42);
let post_html = async |client: &Client| {
extract_urls(
@@ -81,7 +77,7 @@ async fn main() -> ExitCode {
async move {
// "thread"
loop {
this_bar.set_message(format!("\x1b[37m[{i: >4}/{urls_amount}] \x1b[36mscraping {url:?}\x1b[0m"));
this_bar.set_message(format!("\x1b[37m[{i: >4}/{urls_amount}] \x1b[36mscraping {url}\x1b[0m"));
let resp = client.get(&url).send().await.unwrap();
if let Ok(img_url) = extract_img_url(&resp.text().await.unwrap()) {
if img_url.is_empty() {