2 Commits

Author SHA1 Message Date
grialion
1a5fc75162 chore(README): add example usage image 2024-10-18 23:00:26 +02:00
grialion
91eff584cb fix: img regex 2024-10-18 22:53:39 +02:00
3 changed files with 5 additions and 5 deletions

View File

@@ -5,3 +5,6 @@ a scraper that well scrapes r34
## note ## note
this thing is still not completed, it only gathers links, it doesnt download things yet this thing is still not completed, it only gathers links, it doesnt download things yet
## example usage image
![example image](./image.png)

BIN
image.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 79 KiB

View File

@@ -21,7 +21,6 @@ async fn main() -> ExitCode {
let mut page = 0; let mut page = 0;
loop { loop {
println!("now scraping page {}", page + 1); println!("now scraping page {}", page + 1);
let post_html = async || { let post_html = async || {
@@ -45,7 +44,6 @@ async fn main() -> ExitCode {
let mut wait_time = 5000; let mut wait_time = 5000;
if urls.is_empty() { if urls.is_empty() {
for reconnection_attempts in 0..4 { for reconnection_attempts in 0..4 {
println!("no urls found, retrying in {} seconds...", wait_time / 1000); println!("no urls found, retrying in {} seconds...", wait_time / 1000);
sleep(Duration::from_millis(wait_time)).await; sleep(Duration::from_millis(wait_time)).await;
@@ -67,7 +65,6 @@ async fn main() -> ExitCode {
} }
for url in urls { for url in urls {
let img_url = let img_url =
extract_img_url(&client.get(url).send().await.unwrap().text().await.unwrap()); extract_img_url(&client.get(url).send().await.unwrap().text().await.unwrap());
if img_url.is_empty() { if img_url.is_empty() {
@@ -91,7 +88,7 @@ fn extract_urls(html: &str) -> Vec<String> {
fn extract_img_url(html: &str) -> String { fn extract_img_url(html: &str) -> String {
if let Some(img_url) = if let Some(img_url) =
Regex::new(r"https://us\.rule34\.xxx//([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+") Regex::new(r"https://us\.rule34\.xxx/images/([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+")
.unwrap() .unwrap()
.find(html) .find(html)
{ {