2 Commits

Author SHA1 Message Date
grialion 1a5fc75162 chore(README): add example usage image 2024-10-18 23:00:26 +02:00
grialion 91eff584cb fix: img regex 2024-10-18 22:53:39 +02:00
3 changed files with 5 additions and 5 deletions
+3
View File
@@ -5,3 +5,6 @@ a scraper that well scrapes r34
## note ## note
this thing is still not completed, it only gathers links, it doesnt download things yet this thing is still not completed, it only gathers links, it doesnt download things yet
## example usage image
![example image](./image.png)
BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 79 KiB

+1 -4
View File
@@ -21,7 +21,6 @@ async fn main() -> ExitCode {
let mut page = 0; let mut page = 0;
loop { loop {
println!("now scraping page {}", page + 1); println!("now scraping page {}", page + 1);
let post_html = async || { let post_html = async || {
@@ -45,7 +44,6 @@ async fn main() -> ExitCode {
let mut wait_time = 5000; let mut wait_time = 5000;
if urls.is_empty() { if urls.is_empty() {
for reconnection_attempts in 0..4 { for reconnection_attempts in 0..4 {
println!("no urls found, retrying in {} seconds...", wait_time / 1000); println!("no urls found, retrying in {} seconds...", wait_time / 1000);
sleep(Duration::from_millis(wait_time)).await; sleep(Duration::from_millis(wait_time)).await;
@@ -67,7 +65,6 @@ async fn main() -> ExitCode {
} }
for url in urls { for url in urls {
let img_url = let img_url =
extract_img_url(&client.get(url).send().await.unwrap().text().await.unwrap()); extract_img_url(&client.get(url).send().await.unwrap().text().await.unwrap());
if img_url.is_empty() { if img_url.is_empty() {
@@ -91,7 +88,7 @@ fn extract_urls(html: &str) -> Vec<String> {
fn extract_img_url(html: &str) -> String { fn extract_img_url(html: &str) -> String {
if let Some(img_url) = if let Some(img_url) =
Regex::new(r"https://us\.rule34\.xxx//([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+") Regex::new(r"https://us\.rule34\.xxx/images/([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+")
.unwrap() .unwrap()
.find(html) .find(html)
{ {