readme

works ish
Gemini version
2025-04-03 23:19:21 +01:00 · 2025-04-03 22:15:38 +01:00 · 2025-04-03 21:42:37 +01:00 · 2025-01-26 11:15:34 +00:00 · 2025-01-26 02:59:06 +00:00 · 2025-01-26 02:39:43 +00:00
4 changed files with 252 additions and 263 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,6 +6,6 @@ edition = "2021"
 [dependencies]
 colored = "2.2.0"
 regex = "1.11.1"
-url = "2.5.4"
 termion = "4.0.3"
 open = "5.3.2"
+url = "2.5.4"
--- a/README.md
+++ b/README.md
@@ -1,11 +1,36 @@
 # Markdown web browser
-A web browser that let's you browse 'mttp' websites that use markdown as a superior standard to html
-Fully static!
+A web browser that let's you browse gemini capsules that use gemtext as a superior standard to html.
+
+# Requirements
+- gemget
+
+# Installing
+## Build from source:
+- Clone the repo
+```cargo run -r```
+
+# Help
+Type h in the program to see this text:
+```
+Source code: https://git.javalsai.dynv6.net/deadvey/markdown-webbrowser
+q: quit
+d: debug info
+h: help
+r: reload
+s: view source code of page
+i: visit root index of this host eg: root index of gemini://deadvey.com/blog/4.md is just gemini://deadvey.com
+b: go back in history
+f: go forward in history
+ox: print the hyprlink of reference x eg: o5 or o24
+[url]: follow the inputed url
+```

 # Example:
 ![screenshot](/images/screenshot.png)

 # TO DO
+- Make pages scrollable
+- Bookmarks
 - Properly comment it because I can't lie I can't even follow it.
 - Make it memory safe, it crashes if the input is unexpected.
 - Use treesitter instead of Regex, because, reasons.
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,192 +1,72 @@
 use std::process::{Command};
 use std::io::{stdin,stdout,Write};
 use colored::Colorize;
-use regex::Regex;
+use url::{Url, ParseError};
+use std::fs;

-struct Url {
-	protocol: String,
-	hostname: String,
-	port: u16,
-	path: String,
-}
+const DEBUG_MODE: bool = false;
+
+// Import other files
+mod parse_gemtext;

 fn clear_screen() {
-    Command::new("clear")
-        .status()
-        .expect("Failed to clear screen");
-    //println!("clearing");
-}
-
-fn parse_markdown(page_content: String) -> (String, Vec<String>) {
-	let mut parsed_page_content: String = "".to_string();
-	let mut hyperlink_number_counter: u64 = 0;
-	let mut links: Vec<String> = Vec::new();
-	let (screen_width, _screen_height) = termion::terminal_size().unwrap(); // So the horizontal line (<hr/>) spans the whole console
-	
-    for line in page_content.lines() {
-        let mut parsed_line: String = line.to_string();
-        // Bold
-        let bold_regex = Regex::new(r"((\*\*)|(__))(.*?)((\*\*)|(__))").unwrap();
-        parsed_line = bold_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
-            caps[4].bold().to_string()
-        }).to_string();
-
-        // Strikethrough
-        let strikethrough_regex = Regex::new(r"~~(.*?)~~").unwrap();
-        parsed_line = strikethrough_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
-            caps[1].strikethrough().to_string()
-        }).to_string();
-
-        // Horizontal lines
-        let hr_regex = Regex::new(r"^(\*\*\*)|(---)|(___)$").unwrap();
-        parsed_line = hr_regex.replace_all(&parsed_line, |_caps: &regex::Captures| {
-            let mut result: String = "\n".to_string();
-            for _x in 0..screen_width/2 {
-                result += "- ";
-            }
-            result += "\n";
-            result
-        }).to_string();
-
-        // html br tag support
-        let br_regex = Regex::new(r"(.*?)<br/>(.*?)").unwrap();
-        parsed_line = br_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
-            format!("{}{}{}", &caps[1], "\n", &caps[2])
-        }).to_string();
-
-        // Italics
-        let italic_regex = Regex::new(r"\*(.*?)\*").unwrap();
-        parsed_line = italic_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
-            caps[1].italic().to_string()
-        }).to_string();
-        let italic_regex = Regex::new(r"_(.*?)_").unwrap();
-        parsed_line = italic_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
-            caps[1].italic().to_string()
-        }).to_string();
-
-        // Block quotes
-        let block_quotes_regex = Regex::new(r"^>(.*)").unwrap();
-        parsed_line = block_quotes_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
-            format!("   | {}", &caps[1])
-        }).to_string();
-
-        // Ordered list
-        let ordered_list_regex = Regex::new(r"^([ \t]+|^)([0-9]+)\. (.*)").unwrap();
-        parsed_line = ordered_list_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
-            format!("{} {}. {}", &caps[1], &caps[2], &caps[3])
-        }).to_string();
-
-        // Unordered list ([    ]+|^)- (.*)
-        let unordered_list_regex = Regex::new(r"^([ \t]+|^)(-|\+|\*).(.*)").unwrap();
-        parsed_line = unordered_list_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
-            format!("{} • {}", &caps[1], &caps[3])
-        }).to_string();
-
-        // Inline code
-        let inline_code_regex = Regex::new(r"`(.*?)`").unwrap();
-        parsed_line = inline_code_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
-            format!("{}", &caps[1].magenta())
-        }).to_string();
-
-        // HyperLink
-        let hyperlink_regex = Regex::new(r"(.*?)\[(.*?)\]\((.*?)\)").unwrap();
-        parsed_line = hyperlink_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
-            // Check if the character before the link is not '!'
-            if !caps[1].ends_with('!') {  // caps[1] is everything before the link
-                let result = format!("{}{}[{}]", &caps[1], &caps[2].blue().underline(), hyperlink_number_counter);
-                let url = caps[3].to_string();
-                links.push(url);
-                hyperlink_number_counter += 1;
-                result
-            } else {
-                // If it's an image (starts with !), return the link as is
-                let url = caps[3].to_string();
-                links.push(url);
-                hyperlink_number_counter += 1;
-                format!("({})[{}]", &caps[2].green(), hyperlink_number_counter)
-            }
-        }).to_string();
-        
-        let quick_hyperlink_regex = Regex::new(r"<(.*?)>").unwrap();
-        parsed_line = quick_hyperlink_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
-            hyperlink_number_counter += 1;
-            let url = caps[1].to_string();
-            links.push(url);
-            format!("{}[{}]", &caps[1].blue().underline(), hyperlink_number_counter)
-        }).to_string();
-
-
-        parsed_page_content+=&(parsed_line + "\n");
+    if DEBUG_MODE == false
+    {
+        println!("clearing");
+        Command::new("clear")
+            .status()
+            .expect("Failed to clear screen");
    }
-
-    // multiline code
-    let multiline_code_regex = Regex::new(r"(?ms)%%%((.*?\n)+?)%%%").unwrap();
-    parsed_page_content = multiline_code_regex.replace_all(&parsed_page_content, |caps: &regex::Captures| {
-        // Capture the code inside the %% blocks
-        let code_block = &caps[1];
-        
-        // Add a tab to each line in the block
-        let indented_code = code_block
-            .lines()
-            .map(|line| format!("\t{}", line)) // Insert tab at the start of each line
-            .collect::<Vec<String>>()
-            .join("\n");
-
-        // Return the formatted block with magenta color
-        format!("{}", indented_code.magenta())
-    }).to_string();
-
-    return (parsed_page_content, links);
 }

-fn fetch_page(host: &String, port: u16, path: &String) -> String {
-    let full_url_formatted = format!("{}:{}{}", host, port, path);
+fn fetch_page(url: &Url) {
+    let full_url_formatted = format!("{}", url);

-    // Call curl using Com, mand
-    let output = Command::new("curl")
-        .arg(full_url_formatted)
+    let output = Command::new("gemget")
+        .args([full_url_formatted, "-o".to_string(), "/tmp/page".to_string()])
        .output()
-        .expect("Failed to execute curl command");
+        .expect("Failed to execute gemget command");

    // Check if the command was successful
-    if output.status.success() {
-        let page: String = String::from_utf8_lossy(&output.stdout).to_string();
-        return page
-    } else {
-        eprintln!("Error:\n{}", String::from_utf8_lossy(&output.stderr));
-        let result: String = "error".to_string();
-        return result
-    }
+    if ! output.status.success() {
+        println!("{}\n{:?}\n", "Failed to fetch page:".red(), output);
+    } 
 }

-fn render_page(host: String, port: u16, path: String) -> Vec<String> {
+fn render_page(url: Url, source: bool) -> Vec<String> {
 	clear_screen();
-	let mut content = fetch_page(&host, port, &path);
+	fetch_page(&url);
 	let mut links = Vec::new();
-	let (screen_width, _screen_height) = termion::terminal_size().unwrap();
+        if let Ok(mut content) = fs::read_to_string::<String>("/tmp/page".to_string()) {
+            Command::new("rm")
+                .arg("/tmp/page")
+                .output()
+                .expect("Failed to delete tmp page");
+	    let (screen_width, _screen_height) = termion::terminal_size().unwrap();

-	if &content[..13] == "<!DOCTYPE md>" {
-		(content, links) = parse_markdown((&content[13..]).to_string());
-	}
-	else {
-		content += &format!("{}", &"Warning: This page is invalid markdown, it should contain <!DOCTYPE md> at the very start of the file, showing raw text".yellow());
-	}
+            if source == true {
+                    content += &format!("{}", &"Viewing source code".yellow());
+            }
+            else {
+                (content, links) = parse_gemtext::parse_gemtext(content);
+            }

-	for _i in 0..screen_width {
-		print!("—"); 
-	}
-	print!("{}:{}{}\n", host, port, path);
-	for _i in 0..screen_width {
-		print!("—"); 
-	}
-	println!("\n\n{}", content);
-	for _i in 0..screen_width {
-		print!("—"); 
-	}
-	println!();
+            for _i in 0..screen_width {
+                    print!("-"); 
+            }
+            print!("{}\n", url);
+            for _i in 0..screen_width {
+                    print!("-"); 
+            }
+            println!("\n\n{}", content);
+            for _i in 0..screen_width {
+                    print!("-"); 
+            }
+            println!();

-	// Return links (you can add link parsing logic)
-	return links;   
+            // Return links (you can add link parsing logic)
+        }
+        return links;
 }

 fn input() -> String{
@@ -202,59 +82,31 @@ fn input() -> String{
    return s;
 }

-fn parse_url(user_input: String, previous_host: &String) -> Result<Url, u8> {
-    let mut url = Url {
-	protocol: "internal".to_string(),
-	hostname: "home".to_string(),
-	port: 0,
-	path: "/".to_string(),
-    };
+fn parse_url(user_input: String, previous_url: &Url) -> Result<Url, ParseError> {
+	println!("user input: {}",user_input);
+	println!("previous url: {:?}",previous_url);
+	let to_parse =  if user_input.contains("://")  {
+		println!("Contains different scheme or is a path");
+		user_input
+	}
+	else {
+		format!("gemini://{}/{}",Url::host_str(previous_url).expect("ivalid").to_string(), user_input)
+	};

-    let mttp_regex = Regex::new(r"^mttp:\/\/(.*?)\/(.*?)$").unwrap(); //  mttp://example.com/index.md
-    let mttp_regex_no_path = Regex::new(r"^mttp:\/\/(.*?)$").unwrap(); // mttp://example.com
-    //let accept_this_as_mttp = Regex::new(r"^(.*?)\.(.*?)$").unwrap(); //         example.com
-    //let no_protocol_but_path = Regex::new(r"^(.*?)/(.*?)$").unwrap(); //  example.com/index.md
-    let path_change = Regex::new(r"^/(.*?)$").unwrap(); //                /index.md
+	println!("Parsing: {}", to_parse);
+	if let Ok(mut url) = Url::parse(&to_parse) {
+		if url.port() == None {
+			let _ = url.set_port(Some(1965));
+		}
+		println!("{:?}",url);
+		println!("{}",url.as_str());
+		println!("parsed successfully");
+		return Ok(url)
+	}
+	else {
+		return Err(ParseError::InvalidDomainCharacter)
+	}

-    if let Some(caps) = mttp_regex.captures(&user_input) {
-        url.hostname = caps[1].to_string(); 
-        url.port = 3477;   
-        url.path = caps[2].to_string();
-        url.protocol = "mttp".to_string();
-    	Ok(url)
-    }
-    else if let Some(caps) = mttp_regex_no_path.captures(&user_input) {
-        url.hostname = caps[1].to_string(); 
-        url.port = 3477;   
-        url.path = "/".to_string();
-        url.protocol = "mttp".to_string();
-    	Ok(url)
-    }
-    else if let Some(caps) = path_change.captures(&user_input) {
-        url.hostname = previous_host.to_string(); 
-        url.port = 3477;   
-        url.path = format!("/{}", caps[1].to_string());
-        url.protocol = "mttp".to_string();
-    	Ok(url)
-    }
-    //else if let Some(caps) = no_protocol_but_path.captures(&user_input) {
-      //  url.hostname = caps[1].to_string(); 
-        //url.port = 3477;
-        //url.path = caps[2].to_string();
-        //url.protocol = "mttp".to_string();
-    	//Ok(url)
-    //}
-    //else if let Some(caps) = accept_this_as_mttp.captures(&user_input) {
-      //  url.hostname = format!("{}{}{}",caps[1].to_string(),".",caps[2].to_string());
-        //url.port = 3477;   
-     //   url.path = "/".to_string();
-      //  url.protocol = "mttp".to_string();
-    	//Ok(url)
-    //}
-    else {
-	open::that(user_input); // Fallback to open it in the web browser
-	Err(1)
-    }
 }

 fn main() {
@@ -262,64 +114,85 @@ fn main() {
 	println!("Enter a url: ");
 	let user_input = input();

-	if user_input == "q" {
+	if user_input == "q" || user_input == "quit" || user_input == "exit" {
 		std::process::exit(0);
 	}
 	let mut load_page: bool = true;
 	let mut history: Vec<Url> = Vec::new();
 	let mut historical_position: usize = 0;
 	let mut links: Vec<String> = Vec::new();
-	let mut url = Url {
-		protocol: "internal".to_string(),
-		hostname: "home".to_string(),
-		port: 0,
-		path: "/".to_string(),
-	};
-	if let Ok(parsed_value) = parse_url(user_input, &"example.com".to_string()) {
-		url = parsed_value;
-		history.push(Url {
-			protocol: url.protocol.clone(),
-			hostname: url.hostname.clone(),
-			port: url.port.clone(),
-			path: url.path.clone(),
-		}); 
+	let mut source: bool = false; // Wether to view source of markdown page or rendered version
+	if let Ok(mut url) = parse_url(user_input, &Url::parse(&"gemini://geminiprotocol.net").unwrap()) { // Change this and make internal pages ;)
+		history.push(url.clone());
 		'mainloop: loop {
 			if load_page {
-				links = Vec::new();
-				links = render_page(history[historical_position].hostname.clone(), history[historical_position].port.clone(), history[historical_position].path.clone());   
+				links = render_page(history[historical_position].clone(), source);
 				println!("Enter reference number to follow, h for help, or q to quit");
-				//for i in 0..history.len() {
-				//	println!("{}://{}:{}/{}",history[i].protocol,history[i].hostname, history[i].port, history[i].path);
-				//}
 			}
+                        url = history[historical_position].clone();
 			load_page = false;

 			let user_input = input(); 
 			if user_input == "q" {
 				break 'mainloop;
 			}
+                        else if user_input == "d" {
+                            println!(
+                                "load_page: {}\nhistory: {:?}\nhistorical_postition: {}\nlinks: {:?}\nsource: {}",
+                                load_page,
+                                history,
+                                historical_position,
+                                links,
+                                source
+                            );
+                        }
 			else if user_input == "r" {
 				load_page = true;
 				continue;
 			}
+			else if user_input == "s" {
+				source = ! source; // Flip the boolean to toggle source mode
+				load_page = true;
+			}
 			else if user_input == "i" {
-				url.path = "/".to_string();
+				let _ = url.set_path("/"); 				
+				for _i in historical_position+1..history.len() {
+					history.remove(historical_position+1);
+				}
+				history.push(url.clone());
+				historical_position += 1;
 				load_page = true;
 			}
 			else if user_input == "b" {
-				if historical_position >= 1 {
+				if historical_position > 0 {
 					historical_position -= 1;
-					if let Ok(parsed_value) = parse_url(format!("{}://{}/{}",history[historical_position].protocol.clone(), history[historical_position].hostname.clone(),history[historical_position].path.clone()),&url.hostname) {
-						url = parsed_value;
-						load_page = true;
-					}
-					else {
-						println!("Invalid url");
-					}
+					load_page = true;
+				}
+				else {
+					println!("At start of history");
+				}
+			}
+			else if user_input == "f" {
+				if historical_position < history.len()-1 {
+					historical_position += 1;
+					load_page = true;
+				}
+				else {
+					println!("At end of history");
 				}
 			}
 			else if user_input == "h" {
-				println!("Source code: https://git.javalsai.dynv6.net/deadvey/markdown-webbrowser\nq: quit\nh: help\nr: reload\ni: visit root index of this host eg: root index of mttp://deadvey.com/blog/4.md is just deadvey.com\nb: go back in history\nox: print the hyprlink of reference x eg: o5 or o24");
+				println!("Source code: https://git.javalsai.dynv6.net/deadvey/markdown-webbrowser
+q: quit
+d: debug info
+h: help
+r: reload
+s: view source code of page
+i: visit root index of this host eg: root index of gemini://deadvey.com/blog/4.md is just gemini://deadvey.com
+b: go back in history
+f: go forward in history
+ox: print the hyprlink of reference x eg: o5 or o24
+[url]: follow the inputed url");
 			}
 			else if user_input.chars().nth(0).unwrap() == 'o' {
 			    let number_str = &user_input[1..];
@@ -331,17 +204,12 @@ fn main() {
 			}
 			else if let Ok(number) = user_input.parse::<usize>() {
 				if number < links.len() {
-					if let Ok(parsed_value) = parse_url(links[number].clone(), &url.hostname) {
+					if let Ok(parsed_value) = parse_url(links[number].clone(), &url.clone()) {
 						url = parsed_value;
-						history.insert(historical_position+1, Url {
-							protocol: url.protocol.clone(),
-							hostname: url.hostname.clone(),
-							port: url.port.clone(),
-							path: url.path.clone(),
-						}); 
-						for i in historical_position+1..history.len()-1 {
-							history.remove(i);
+						for _i in historical_position+1..history.len() {
+							history.remove(historical_position+1);
 						}
+						history.push(url.clone());
 						historical_position += 1;
 						load_page = true;
 					}
@@ -352,8 +220,13 @@ fn main() {
 					println!("Invalid reference id");
 				}
 			}
-			else if let Ok(parsed_value) = parse_url(user_input, &url.hostname) {
+			else if let Ok(parsed_value) = parse_url(user_input, &url.clone()) {
 				url = parsed_value;
+				for _i in historical_position+1..history.len() {
+					history.remove(historical_position+1);
+				}
+				history.push(url.clone());
+				historical_position += 1;
 				load_page = true;
 			}
 			else {
--- a/src/parse_gemtext.rs
+++ b/src/parse_gemtext.rs
@@ -0,0 +1,91 @@
+use colored::Colorize;
+use regex::Regex;
+
+pub fn parse_gemtext(page_content: String) -> (String, Vec<String>) {
+	let mut parsed_page_content: String = "".to_string();
+	let mut hyperlink_number_counter: u64 = 0;
+	let mut links: Vec<String> = Vec::new();
+        let mut preformatted_code_toggle = false;
+
+        // Regex patterns
+        let preformatted_text_regex = Regex::new(r"^```(.*)").unwrap();
+        let header1_regex = Regex::new(r"^# (.*)").unwrap();
+        let header2_regex = Regex::new(r"^## (.*)").unwrap();
+        let header3_regex = Regex::new(r"^### (.*)").unwrap();
+        let block_quotes_regex = Regex::new(r"^>(.*)").unwrap();
+        let unordered_list_regex = Regex::new(r"^([ \t]+|^)(\*).(.*)").unwrap();
+        let hyperlink_regex = Regex::new(r"=>\s(\S*)\s(.*)").unwrap();
+        let quick_hyperlink_regex = Regex::new(r"=>\s(.*)").unwrap();
+	
+        for line in page_content.lines() {
+            let mut parsed_line: String = line.to_string();
+            let mut remove_line = false;
+
+            // preformatted text
+            parsed_line = preformatted_text_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
+                // Flip the toggle
+                preformatted_code_toggle = ! preformatted_code_toggle; 
+
+                if caps[1] == *""
+                {
+                    remove_line = true;
+                }
+                
+                // Remove the ```
+                format!("{}", &caps[1].magenta())
+            }).to_string();
+
+            if preformatted_code_toggle == false 
+            {
+                // Headers
+                parsed_line = header1_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
+                    format!("{}", &caps[1].blue().bold().underline())
+                }).to_string();
+                parsed_line = header2_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
+                    format!("{}", &caps[1].blue().bold())
+                }).to_string();
+                parsed_line = header3_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
+                    format!("{}", &caps[1].bold())
+                }).to_string();
+
+                // Block quotes
+                parsed_line = block_quotes_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
+                    format!("   | {}", &caps[1].red())
+                }).to_string();
+
+                // Unordered list ([    ]+|^)- (.*)
+                parsed_line = unordered_list_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
+                    format!("{} • {}", &caps[1], &caps[3])
+                }).to_string();
+
+                // HyperLink
+                parsed_line = hyperlink_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
+                    // Check if the character before the link is not '!'
+                    let result = format!("[{}] {}", hyperlink_number_counter, &caps[2].blue().underline());
+                    let url = caps[1].to_string();
+                    links.push(url);
+                    hyperlink_number_counter += 1;
+                    result
+                }).to_string();
+                
+                parsed_line = quick_hyperlink_regex.replace_all(&parsed_line, |caps: &regex::Captures| {
+                    hyperlink_number_counter += 1;
+                    let url = caps[1].to_string();
+                    links.push(url);
+                    format!("[{}] {}", hyperlink_number_counter, &caps[1].blue().underline())
+                }).to_string();
+            }
+            else if preformatted_code_toggle == true 
+            {
+                parsed_line = parsed_line.magenta().to_string();
+            }
+
+            if remove_line == false
+            {
+                parsed_page_content+=&(parsed_line + "\n");
+            }
+        }
+
+
+    return (parsed_page_content, links);
+}
Author	SHA1	Message	Date
deadvey	ca9d4cfaf9	readme	2025-04-03 23:19:21 +01:00
deadvey	1321e0dcaf	works ish	2025-04-03 22:15:38 +01:00
deadvey	a7468d3f40	Gemini version	2025-04-03 21:42:37 +01:00
deadvey	845866ef9d	readme	2025-01-26 11:15:34 +00:00
deadvey	5f856f35fe	source code viewing	2025-01-26 02:59:06 +00:00
deadvey	7b9d2d6fd3	some stuff	2025-01-26 02:39:43 +00:00
deadvey	f46d3bac42	fixed history functionality, TO DO: arguments, documentation	2025-01-26 02:12:56 +00:00
deadvey	ced5648c01	url parsing works (FINALLY)	2025-01-26 01:44:24 +00:00
deadvey	7b07b6f051	done for the day, got a proper url parser	2025-01-22 01:18:30 +00:00
deadvey	e4b08b45bc	```	2025-01-21 23:59:18 +00:00