forked from danmax/r34-scraper
		
	added multithreading
Co-authored-by: ErrorNoInternet <errornointernet@envs.net>
This commit is contained in:
		
							
								
								
									
										44
									
								
								src/main.rs
									
									
									
									
									
								
							
							
						
						
									
										44
									
								
								src/main.rs
									
									
									
									
									
								
							| @@ -17,6 +17,7 @@ async fn main() -> ExitCode { | |||||||
|         .trim() |         .trim() | ||||||
|         .to_string(); |         .to_string(); | ||||||
|  |  | ||||||
|  |     let mut thread_counter = 0; | ||||||
|     let client = Client::builder().user_agent(USER_AGENT).build().unwrap(); |     let client = Client::builder().user_agent(USER_AGENT).build().unwrap(); | ||||||
|     let mut page = 0; |     let mut page = 0; | ||||||
|  |  | ||||||
| @@ -65,12 +66,33 @@ async fn main() -> ExitCode { | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         for url in urls { |         for url in urls { | ||||||
|             let img_url = |             tokio::spawn(async move { | ||||||
|                 extract_img_url(&client.get(url).send().await.unwrap().text().await.unwrap()); |                 let thread_id = format!("[{thread_counter: >4}]"); | ||||||
|             if img_url.is_empty() { |                 loop { | ||||||
|                 println!("image url not found"); |                     let client = Client::builder().user_agent(USER_AGENT).build().unwrap(); | ||||||
|             } else { |                     match extract_img_url(&client.get(url.clone()).send().await.unwrap().text().await.unwrap()) { | ||||||
|                 println!("found image url: {img_url}"); |                         Ok(img_url) => { | ||||||
|  |                             if img_url.is_empty() { | ||||||
|  |                                 println!("{thread_id} image url not found"); | ||||||
|  |                             } else { | ||||||
|  |                                 println!("{thread_id} found image url: {img_url}"); | ||||||
|  |                             } | ||||||
|  |                         }, | ||||||
|  |                         Err(_) => { | ||||||
|  |                             println!("{thread_id} ratelimited, retrying after 1 second"); | ||||||
|  |                             std::thread::sleep(std::time::Duration::from_millis(1000)); | ||||||
|  |                             continue; | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                     break; | ||||||
|  |                 } | ||||||
|  |             }); | ||||||
|  |             thread_counter += 1; | ||||||
|  |             if thread_counter > 9999 { | ||||||
|  |                 thread_counter = 0; | ||||||
|  |             } | ||||||
|  |             while tokio::runtime::Handle::current().metrics().num_alive_tasks() > 4 { | ||||||
|  |                 std::thread::sleep(std::time::Duration::from_millis(100)); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -86,14 +108,18 @@ fn extract_urls(html: &str) -> Vec<String> { | |||||||
|         .collect() |         .collect() | ||||||
| } | } | ||||||
|  |  | ||||||
| fn extract_img_url(html: &str) -> String { | fn extract_img_url(html: &str) -> Result<String, &'static str> { | ||||||
|     if let Some(img_url) = |     if let Some(img_url) = | ||||||
|         Regex::new(r"https://us\.rule34\.xxx/images/([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+") |         Regex::new(r"https://us\.rule34\.xxx/images/([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+") | ||||||
|             .unwrap() |             .unwrap() | ||||||
|             .find(html) |             .find(html) | ||||||
|     { |     { | ||||||
|         img_url.as_str().to_string() |         Ok(img_url.as_str().to_string()) | ||||||
|     } else { |     } else { | ||||||
|         String::new() |         if html.contains("503 Rate limiting") { | ||||||
|  |             Err("ratelimited") | ||||||
|  |         } else { | ||||||
|  |             Ok(String::new()) | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user