forked from danmax/r34-scraper
		
	Compare commits
	
		
			2 Commits
		
	
	
		
			javalsai-c
			...
			main
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 1a5fc75162 | ||
|  | 91eff584cb | 
| @@ -5,3 +5,6 @@ a scraper that well scrapes r34 | ||||
| ## note  | ||||
|  | ||||
| this thing is still not completed, it only gathers links, it doesnt download things yet | ||||
|  | ||||
| ## example usage image | ||||
|  | ||||
|   | ||||
| @@ -21,7 +21,6 @@ async fn main() -> ExitCode { | ||||
|     let mut page = 0; | ||||
|  | ||||
|     loop { | ||||
|  | ||||
|         println!("now scraping page {}", page + 1); | ||||
|  | ||||
|         let post_html = async || { | ||||
| @@ -45,11 +44,10 @@ async fn main() -> ExitCode { | ||||
|         let mut wait_time = 5000; | ||||
|  | ||||
|         if urls.is_empty() { | ||||
|  | ||||
|             for reconnection_attempts in 0..4 { | ||||
|                 println!("no urls found, retrying in {} seconds...", wait_time / 1000); | ||||
|                 sleep(Duration::from_millis(wait_time)).await; | ||||
|                  | ||||
|  | ||||
|                 urls = post_html().await; | ||||
|  | ||||
|                 if !urls.is_empty() { | ||||
| @@ -67,7 +65,6 @@ async fn main() -> ExitCode { | ||||
|         } | ||||
|  | ||||
|         for url in urls { | ||||
|  | ||||
|             let img_url = | ||||
|                 extract_img_url(&client.get(url).send().await.unwrap().text().await.unwrap()); | ||||
|             if img_url.is_empty() { | ||||
| @@ -91,7 +88,7 @@ fn extract_urls(html: &str) -> Vec<String> { | ||||
|  | ||||
| fn extract_img_url(html: &str) -> String { | ||||
|     if let Some(img_url) = | ||||
|         Regex::new(r"https://us\.rule34\.xxx//([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+") | ||||
|         Regex::new(r"https://us\.rule34\.xxx/images/([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+") | ||||
|             .unwrap() | ||||
|             .find(html) | ||||
|     { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user