Compare commits

...

2 Commits

Author SHA1 Message Date
grialion
1a5fc75162 chore(README): add example usage image 2024-10-18 23:00:26 +02:00
grialion
91eff584cb fix: img regex 2024-10-18 22:53:39 +02:00
3 changed files with 5 additions and 5 deletions

View File

@ -5,3 +5,6 @@ a scraper that well scrapes r34
## note
this thing is still not completed, it only gathers links, it doesnt download things yet
## example usage image
![example image](./image.png)

BIN
image.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 79 KiB

View File

@ -21,7 +21,6 @@ async fn main() -> ExitCode {
let mut page = 0;
loop {
println!("now scraping page {}", page + 1);
let post_html = async || {
@ -45,11 +44,10 @@ async fn main() -> ExitCode {
let mut wait_time = 5000;
if urls.is_empty() {
for reconnection_attempts in 0..4 {
println!("no urls found, retrying in {} seconds...", wait_time / 1000);
sleep(Duration::from_millis(wait_time)).await;
urls = post_html().await;
if !urls.is_empty() {
@ -67,7 +65,6 @@ async fn main() -> ExitCode {
}
for url in urls {
let img_url =
extract_img_url(&client.get(url).send().await.unwrap().text().await.unwrap());
if img_url.is_empty() {
@ -91,7 +88,7 @@ fn extract_urls(html: &str) -> Vec<String> {
fn extract_img_url(html: &str) -> String {
if let Some(img_url) =
Regex::new(r"https://us\.rule34\.xxx//([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+")
Regex::new(r"https://us\.rule34\.xxx/images/([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+")
.unwrap()
.find(html)
{