forked from danmax/r34-scraper
Merge pull request 'fix: img regex' (#3) from grialion/r34-scraper:main into main
Reviewed-on: https://git.javalsai.dynv6.net/danmax/r34-scraper/pulls/3
This commit is contained in:
commit
722df4305d
@ -21,7 +21,6 @@ async fn main() -> ExitCode {
|
||||
let mut page = 0;
|
||||
|
||||
loop {
|
||||
|
||||
println!("now scraping page {}", page + 1);
|
||||
|
||||
let post_html = async || {
|
||||
@ -45,7 +44,6 @@ async fn main() -> ExitCode {
|
||||
let mut wait_time = 5000;
|
||||
|
||||
if urls.is_empty() {
|
||||
|
||||
for reconnection_attempts in 0..4 {
|
||||
println!("no urls found, retrying in {} seconds...", wait_time / 1000);
|
||||
sleep(Duration::from_millis(wait_time)).await;
|
||||
@ -67,7 +65,6 @@ async fn main() -> ExitCode {
|
||||
}
|
||||
|
||||
for url in urls {
|
||||
|
||||
let img_url =
|
||||
extract_img_url(&client.get(url).send().await.unwrap().text().await.unwrap());
|
||||
if img_url.is_empty() {
|
||||
@ -91,7 +88,7 @@ fn extract_urls(html: &str) -> Vec<String> {
|
||||
|
||||
fn extract_img_url(html: &str) -> String {
|
||||
if let Some(img_url) =
|
||||
Regex::new(r"https://us\.rule34\.xxx//([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+")
|
||||
Regex::new(r"https://us\.rule34\.xxx/images/([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+")
|
||||
.unwrap()
|
||||
.find(html)
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user