feat: add image fetching

This commit is contained in:
2026-03-20 19:48:18 +01:00
parent fb62111c7f
commit 12388a9908
17 changed files with 2122 additions and 92 deletions

176
src/server/caches/mod.rs Normal file
View File

@@ -0,0 +1,176 @@
use std::{ffi::OsStr, io, os::unix::fs::MetadataExt, path::Path, sync::Arc};
use ::users::os::unix::UserExt;
use moka::future::{Cache, CacheBuilder};
use tokio::{fs::File, io::AsyncReadExt};
use crate::{
consts,
server::caches::{mime::Mime, users::UsersCache},
};
pub mod mime;
pub mod users;
const MIME_IMAGE_PREFIX: &str = "image/";
pub struct MimeWithBytes {
pub mime: Box<str>,
pub bytes: Box<[u8]>,
}
pub type Image = Arc<MimeWithBytes>;
// TODO: most of the cache methods here block the executor, if we wanna commit to async we'd have
// to consider that
pub struct AppCache<'a> {
only_groups: &'a [crate::serdes::Group],
// FIXME: blocks
user_cache: UsersCache,
// FIXME: blocks
magic_mime_cookie: Mime,
/// MUST only contain users from an accepted group, we do not want to cache arbitrary usernames
/// and blow memory up.
///
/// [`Option<Image>`] because users may not have a pfp.
pfp_cache: Cache<String, Option<Image>>,
}
impl<'a> AppCache<'a> {
/// # Errors
///
/// Errors if anything failed opening the magic cookie.
///
/// # Panics
///
/// If weighter's usize doesn't fit in its u32
#[must_use]
pub fn new(
magic_dbs: magic::cookie::DatabasePaths,
only_groups: &'a [crate::serdes::Group],
) -> Self {
Self {
only_groups,
user_cache: UsersCache::new(),
magic_mime_cookie: Mime::new(magic_dbs),
pfp_cache: CacheBuilder::new(consts::MAX_PFP_CACHE_CAPACITY)
.time_to_live(consts::USER_CACHES_TTL)
.weigher(|_, v: &Option<Image>| {
v.as_ref()
.map_or(1, |v| v.bytes.len())
.try_into()
.expect("size of image to fit in weigher's size")
})
.build(),
}
}
fn is_member_groups(&self, groups: Option<impl AsRef<[::users::Group]>>) -> bool {
groups.is_some_and(|groups| {
groups.as_ref().iter().any(|group| {
let gid = group.gid();
self.only_groups.iter().any(|from_gr| from_gr.0 == gid)
})
})
}
pub async fn get_member_user_by_name<S: AsRef<OsStr> + ?Sized>(
&self,
username: &S,
) -> Option<Arc<::users::User>> {
let user = self.user_cache.get_user_by_name(username).await?;
// FIXME: `user.groups()` is not cached and could be a DoS point. But I don't think caching
// if a user is member of any group is the proper way for this lmfao
if self.is_member_groups(user.groups()) {
Some(user)
} else {
None
}
}
async fn read_logo_from_home(&self, home: &Path) -> Option<Image> {
async fn read_limited_path<const MAXSIZE: u64>(path: &Path) -> io::Result<Vec<u8>> {
let f = File::open(path).await?;
let size = f.metadata().await?.size();
if size > MAXSIZE {
return Err(io::Error::new(
io::ErrorKind::FileTooLarge,
"filesize is bigger than MAXSIZE",
));
}
let mut buf = Vec::with_capacity(size.try_into().expect("u64 to fit in usize"));
// `.take()` just in case an open fd happens to grow, `.metadata()` SHOULD take
// properties from the fd and lock the read fd until its closed but still
f.take(MAXSIZE).read_to_end(&mut buf).await?;
Ok(buf)
}
for subpath in consts::USER_PFP_PATHS {
let path = home.join(subpath);
if let Ok(img_buf) = read_limited_path::<{ consts::MAX_PFP_SIZE }>(&path).await
&& let Ok(Ok(mime)) = self.magic_mime_cookie.buffer(&img_buf) // TODO: first layer
// error is actually
// relevant
&& mime.starts_with(MIME_IMAGE_PREFIX)
{
return Some(Arc::new(MimeWithBytes {
mime: mime.into_boxed_str(),
bytes: img_buf.into_boxed_slice(),
}));
}
}
None
}
/// Doesn't differenciate users without pfp and nonexistent ones
///
/// So ig a feature not a bug? Less scraping? As in, scraping a user without pfp will just
/// default to default pfp.
///
/// # Performance
///
/// `T` is very generic, usually just takes the path of [`AsRef<str>`], but rarely it can take
/// the [`ToOwned<Owned = String>`] path. That means, if you only have access to a type like
/// [`str`], use it. But if by any chance you have a [`String`] of the value and it's not going
/// to be used anymore, that might be more performant.
///
/// The loss is mainly just the allocation time, just a username, should be small enough but
/// still, just giving it flexibility. Also maybe a [`std::borrow::Cow<str>`] will work
/// perfectly too.
///
/// # Security
///
/// Images ultimately come from users home directories, so they could be anything, not only
/// images (though there's a MIME check, but not designed to be relied upon), make sure to
/// provide the mime type and `X-Content-Type-Options: nosniff` when serving it via http/s.
pub async fn get_pfp<T>(&self, username: T) -> Option<Image>
where
T: AsRef<str> + ToOwned<Owned = String>,
{
// If caching is done properly, it will take advantage of async.
if let Some(cached_pfp) = self.pfp_cache.get(username.as_ref()).await {
return cached_pfp;
}
// This blocks for now, so if we win with caching better. Non-member username requests
// won't cache, we win with actual user-cache and not pushing those away, but will make
// DDoS miss cache constantly.
let user = self.get_member_user_by_name(username.as_ref()).await?;
let img = self.read_logo_from_home(user.home_dir()).await?;
self.pfp_cache
.insert(username.to_owned(), Some(img.clone()))
.await;
Some(img)
}
}