#![feature(async_closure)]
pub mod args;

use clap::Parser;
use futures::{stream, StreamExt};
use indicatif::ProgressBar;
use regex::Regex;
use reqwest::Client;
use tokio::time::{sleep, Duration};

use std::io::Write;
use std::process::ExitCode;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;

const BAR_LENGTH: u64 = 8;

#[tokio::main]
async fn main() -> ExitCode {
    let args = args::Args::parse();

    let uri_tags = &args.tags.join("+");
    let _ = std::fs::create_dir(uri_tags);

    let running = Arc::new(AtomicBool::new(true));
    let running_t = running.clone();
    ctrlc::set_handler(move || {
        running_t.store(false, Ordering::SeqCst);
    })
    .unwrap();

    let client = Client::builder()
        .user_agent(&args.user_agent)
        .build()
        .unwrap();

    for page in args.page - 1.. {
        if !running.load(Ordering::SeqCst) {
            return ExitCode::FAILURE;
        }

        println!("now scraping page {} (https://rule34.xxx/index.php?page=post&s=list&tags={uri_tags}&pid={})", page + 1, page * 42);

        let post_html = async |client: &Client| {
            extract_urls(
                &client
                    .get(format!(
                        "https://rule34.xxx/index.php?page=post&s=list&pid={}&tags={uri_tags}",
                        page * 42
                    ))
                    .send()
                    .await
                    .unwrap()
                    .text()
                    .await
                    .unwrap(),
            )
        };

        let mut urls = post_html(&client).await;
        if urls.is_empty() {
            let mut reconnection_attempts = 0;
            loop {
                println!("no urls found, retrying in 5 seconds...");
                sleep(Duration::from_millis(5000)).await;

                if !running.load(Ordering::SeqCst) {
                    return ExitCode::FAILURE;
                }

                urls = post_html(&client).await;
                if !urls.is_empty() {
                    println!("urls found! continuing...");
                    break;
                }

                reconnection_attempts += 1;
                if reconnection_attempts == 12 {
                    println!("no urls found in 1 minute, exiting...");
                    return ExitCode::FAILURE;
                }
            }
        }

        let multi_prog = indicatif::MultiProgress::new();
        let urls_amount = urls.len();
        let responses = stream::iter(urls.into_iter().enumerate())
            .map(|(i, url)| {
                let i = i + 1;
                let client = &client;
                let running_t = running.clone();
                let this_bar = indicatif::ProgressBar::new(BAR_LENGTH);
                this_bar.set_style(indicatif::ProgressStyle::with_template("[{bar}] {msg}").unwrap().progress_chars("=> "));
                let this_bar = multi_prog.insert(i, this_bar);
                async move {
                    // "thread"
                    loop {
                        if !running_t.load(Ordering::SeqCst) {
                            return;
                        }

                        this_bar.set_message(format!("\x1b[37m[{i: >4}/{urls_amount}] \x1b[36mscraping {url}\x1b[0m"));
                        let resp = client.get(&url).send().await.unwrap();
                        if let Ok(img_url) = extract_img_url(&resp.text().await.unwrap()) {
                            if img_url.is_empty() {
                                this_bar.abandon_with_message(format!(
                                    "\x1b[37m[{i: >4}/{urls_amount}] \x1b[1;31mimage url not found\x1b[0m"
                                ));
                            } else {
                                download_file(running_t, &img_url, this_bar, i, urls_amount, uri_tags).await;
                            }
                            break;
                        }

                        this_bar
                            .set_message(format!(
                                    "\x1b[37m[{i: >4}/{urls_amount}] \x1b[31mratelimited, retrying after {}ms\x1b[0m",
                                    args.delay.as_millis())
                                );
                        tokio::time::sleep(args.delay).await;
                    }
                }
            })
            .buffered(args.jobs);
        let _ = responses.for_each(|()| async {}).await;
    }

    ExitCode::SUCCESS
}

fn extract_urls(html: &str) -> Vec<String> {
    Regex::new(r"/index\.php\?page=post&s=view&id=\d+")
        .unwrap()
        .find_iter(html)
        .map(|mat| format!("https://rule34.xxx{}", mat.as_str()))
        .collect()
}

fn extract_img_url(html: &str) -> Result<String, &'static str> {
    if let Some(img_url) =
        Regex::new(r"https://us\.rule34\.xxx/images/([A-Za-z0-9]+(/[A-Za-z0-9]+)+)\.[A-Za-z0-9]+")
            .unwrap()
            .find(html)
    {
        Ok(img_url.as_str().to_string())
    } else if html.contains("503 Rate limiting") {
        Err("ratelimited")
    } else {
        Ok(String::new())
    }
}

async fn download_file(
    running: Arc<AtomicBool>,
    img_url: &str,
    this_bar: ProgressBar,
    i: usize,
    urls_amount: usize,
    uri_tags: &str,
) {
    let args = args::Args::parse();

    let file_name = Regex::new(r"[^/]+$")
        .unwrap()
        .find(img_url)
        .map(|m| m.as_str())
        .unwrap();

    let file_path = uri_tags.to_owned() + "/" + file_name;

    let mut file = if std::fs::File::open(&file_path).is_ok() {
        this_bar.finish_with_message(format!(
            "\x1b[37m[{i: >4}/{urls_amount}] \x1b[33m{file_name} exists, skipping...\x1b[0m"
        ));
        return;
    } else {
        std::fs::File::create(&file_path).unwrap()
    };

    let mut res = Client::new()
        .get(img_url)
        .header("User-Agent", &args.user_agent)
        .send()
        .await
        .unwrap();
    let file_length = res.content_length().unwrap();
    let mut written = 0;
    while let Some(img_chunk) = res.chunk().await.unwrap() {
        if !running.load(Ordering::SeqCst) {
            this_bar.abandon_with_message(format!(
                "\x1b[37m[{i: >4}/{urls_amount}] \x1b[33mcancelling {img_url}\x1b[0m"
            ));
            drop(file);
            std::fs::remove_file(&file_path).unwrap();
            return;
        }
        file.write_all(&img_chunk).unwrap();
        written += img_chunk.len();
        this_bar.set_position((written as f64 / file_length as f64 * BAR_LENGTH as f64) as u64);
    }
    this_bar.finish_with_message(format!(
        "\x1b[37m[{i: >4}/{urls_amount}] \x1b[32mdownloaded {img_url}\x1b[0m"
    ));
}