From 146446d3e2679fc79a46603f0008e3deab1ac6b5 Mon Sep 17 00:00:00 2001 From: Valentin Popov Date: Tue, 30 Jun 2026 01:40:47 +0400 Subject: [PATCH] fix: harden stage1 path and archive identity --- crates/fparkan-path/src/lib.rs | 73 ++++++- crates/fparkan-resource/src/lib.rs | 308 +++++++++++++++++++++++------ crates/fparkan-vfs/src/lib.rs | 194 +++++++++++------- 3 files changed, 439 insertions(+), 136 deletions(-) diff --git a/crates/fparkan-path/src/lib.rs b/crates/fparkan-path/src/lib.rs index 2047f93..8051b0f 100644 --- a/crates/fparkan-path/src/lib.rs +++ b/crates/fparkan-path/src/lib.rs @@ -20,7 +20,9 @@ )] //! Legacy path normalization and ASCII lookup semantics. +use std::cmp::Ordering; use std::fmt; +use std::hash::{Hash, Hasher}; use std::path::{Path, PathBuf}; /// Original bytes. @@ -42,23 +44,41 @@ impl OriginalPathBytes { } /// Normalized relative path. -#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +#[derive(Clone, Debug)] pub struct NormalizedPath { raw: Vec, display: String, } impl NormalizedPath { - /// Returns string view. + /// Returns normalized byte view used for identity, ordering, and hashing. #[must_use] - pub fn as_str(&self) -> &str { - &self.display + pub fn identity_bytes(&self) -> &[u8] { + &self.raw + } + + /// Returns an ASCII-only lookup key for case-insensitive archive matching. + #[must_use] + pub fn lookup_key(&self) -> LookupKey { + ascii_lookup_key(&self.raw) } /// Returns normalized byte view. #[must_use] pub fn as_bytes(&self) -> &[u8] { - &self.raw + self.identity_bytes() + } + + /// Returns a lossy display representation. + #[must_use] + pub fn display_lossy(&self) -> &str { + &self.display + } + + /// Returns a lossy string view for UI and diagnostics only. + #[must_use] + pub fn as_str(&self) -> &str { + self.display_lossy() } /// Returns an OS path owned path buffer. @@ -68,6 +88,32 @@ impl NormalizedPath { } } +impl PartialEq for NormalizedPath { + fn eq(&self, other: &Self) -> bool { + self.raw == other.raw + } +} + +impl Eq for NormalizedPath {} + +impl PartialOrd for NormalizedPath { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for NormalizedPath { + fn cmp(&self, other: &Self) -> Ordering { + self.raw.cmp(&other.raw) + } +} + +impl Hash for NormalizedPath { + fn hash(&self, state: &mut H) { + self.raw.hash(state); + } +} + /// Normalized path paired with its original byte image. #[derive(Clone, Debug, Eq, PartialEq)] pub struct NormalizedPathWithOriginal { @@ -353,7 +399,8 @@ mod tests { let path = normalize_relative(b"DATA/\xFF.bin", PathPolicy::HostCompatible) .expect("raw legacy bytes"); - assert_eq!(path.as_str(), "DATA/\u{FFFD}.bin"); + assert_eq!(path.display_lossy(), "DATA/\u{FFFD}.bin"); + assert_eq!(path.identity_bytes(), b"DATA/\xFF.bin"); } #[test] @@ -364,4 +411,18 @@ mod tests { assert_eq!(path.normalized().as_str(), "DATA/Maps/Intro/Land.msh"); assert_eq!(path.original().as_bytes(), raw); } + + #[test] + fn lossy_display_does_not_affect_identity_or_ordering() { + let first = normalize_relative(b"DATA/\xFF.bin", PathPolicy::HostCompatible) + .expect("first raw path"); + let second = normalize_relative(b"DATA/\xFE.bin", PathPolicy::HostCompatible) + .expect("second raw path"); + + assert_eq!(first.display_lossy(), second.display_lossy()); + assert_ne!(first, second); + assert_ne!(first.identity_bytes(), second.identity_bytes()); + assert_ne!(first.cmp(&second), Ordering::Equal); + assert_ne!(first.lookup_key(), second.lookup_key()); + } } diff --git a/crates/fparkan-resource/src/lib.rs b/crates/fparkan-resource/src/lib.rs index 953d591..fbfdabf 100644 --- a/crates/fparkan-resource/src/lib.rs +++ b/crates/fparkan-resource/src/lib.rs @@ -20,7 +20,7 @@ )] //! Resource identity and repository ports. -use fparkan_binary::Sha256Digest; +use fparkan_binary::{sha256, Sha256Digest}; use fparkan_path::{normalize_relative, NormalizedPath, PathPolicy, ResourceName}; use fparkan_vfs::{Vfs, VfsError}; use std::collections::BTreeMap; @@ -222,6 +222,30 @@ pub struct CachedResourceRepository { state: Mutex, } +/// Repository-wide archive and payload cache limits. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct RepositoryLimits { + /// Maximum number of decoded archives retained in memory. + pub max_open_archives: usize, + /// Maximum total retained source archive bytes. + pub max_archive_bytes: usize, + /// Maximum cached decoded payload entries. + pub max_decoded_payload_entries: usize, + /// Maximum cached decoded payload bytes. + pub max_decoded_payload_bytes: usize, +} + +impl Default for RepositoryLimits { + fn default() -> Self { + Self { + max_open_archives: 32, + max_archive_bytes: 256 * 1024 * 1024, + max_decoded_payload_entries: 64, + max_decoded_payload_bytes: 64 * 1024 * 1024, + } + } +} + /// Decoded payload cache limits. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct PayloadCacheLimits { @@ -233,17 +257,23 @@ pub struct PayloadCacheLimits { impl Default for PayloadCacheLimits { fn default() -> Self { + let limits = RepositoryLimits::default(); Self { - max_entries: 64, - max_bytes: 64 * 1024 * 1024, + max_entries: limits.max_decoded_payload_entries, + max_bytes: limits.max_decoded_payload_bytes, } } } #[derive(Default)] struct RepositoryState { - paths: BTreeMap, + paths: BTreeMap, ArchiveId>, archives: Vec, + max_open_archives: usize, + max_archive_bytes: usize, + current_open_archives: usize, + current_archive_bytes: usize, + archive_access_generation: u64, payload_cache: DecodedPayloadCache, } @@ -252,7 +282,9 @@ struct ArchiveSlot { fingerprint: Sha256Digest, generation: u64, kind: ArchiveKind, - document: Arc, + document: Option>, + archive_bytes: usize, + last_access: u64, } enum ArchiveDocument { @@ -284,31 +316,41 @@ impl CachedResourceRepository { /// Creates a cached repository. #[must_use] pub fn new(vfs: Arc) -> Self { - Self::with_payload_cache_limits(vfs, PayloadCacheLimits::default()) + Self::with_limits(vfs, RepositoryLimits::default()) + } + + /// Creates a cached repository with explicit archive and payload budgets. + #[must_use] + pub fn with_limits(vfs: Arc, limits: RepositoryLimits) -> Self { + Self { + vfs, + state: Mutex::new(RepositoryState { + max_open_archives: limits.max_open_archives, + max_archive_bytes: limits.max_archive_bytes, + payload_cache: DecodedPayloadCache::new(PayloadCacheLimits { + max_entries: limits.max_decoded_payload_entries, + max_bytes: limits.max_decoded_payload_bytes, + }), + ..RepositoryState::default() + }), + } } /// Creates a cached repository with a decoded payload entry budget. #[must_use] pub fn with_payload_cache_budget(vfs: Arc, max_payload_entries: usize) -> Self { - Self::with_payload_cache_limits( - vfs, - PayloadCacheLimits { - max_entries: max_payload_entries, - ..PayloadCacheLimits::default() - }, - ) + let mut limits = RepositoryLimits::default(); + limits.max_decoded_payload_entries = max_payload_entries; + Self::with_limits(vfs, limits) } /// Creates a cached repository with decoded payload entry and byte budgets. #[must_use] pub fn with_payload_cache_limits(vfs: Arc, limits: PayloadCacheLimits) -> Self { - Self { - vfs, - state: Mutex::new(RepositoryState { - payload_cache: DecodedPayloadCache::new(limits), - ..RepositoryState::default() - }), - } + let mut repository_limits = RepositoryLimits::default(); + repository_limits.max_decoded_payload_entries = limits.max_entries; + repository_limits.max_decoded_payload_bytes = limits.max_bytes; + Self::with_limits(vfs, repository_limits) } /// Returns the archive kind for an opened archive. @@ -334,29 +376,38 @@ impl CachedResourceRepository { impl ResourceRepository for CachedResourceRepository { fn open_archive(&self, path: &NormalizedPath) -> Result { - let metadata = self.vfs.metadata(path).map_err(resource_error_from_vfs)?; - let fingerprint = metadata.fingerprint; - if let Some(id) = self.cached_id(path, fingerprint)? { - return Ok(id); - } - let bytes = self.vfs.read(path).map_err(resource_error_from_vfs)?; + let fingerprint = sha256(&bytes); let mut slot = decode_archive(path.clone(), bytes, fingerprint)?; let mut state = self.state.lock().map_err(|_| ResourceError::Poisoned)?; - if let Some(id) = state.paths.get(path.as_str()).copied() { - if state.archive(id)?.fingerprint == fingerprint { + let key = path.identity_bytes().to_vec(); + if let Some(id) = state.paths.get(&key).copied() { + let current = state.archive(id)?; + if current.fingerprint == fingerprint && current.document.is_some() { + state.touch_archive(id)?; return Ok(id); } - slot.generation = state.archive(id)?.generation.saturating_add(1); + let current_generation = current.generation; + let current_fingerprint = current.fingerprint; + if current_fingerprint != fingerprint { + slot.generation = current_generation.saturating_add(1); + state.payload_cache.remove_archive(id); + } else { + slot.generation = current_generation; + } + state.unload_archive(id)?; *state.archive_mut(id)? = slot; - state.payload_cache.remove_archive(id); + state.load_archive(id)?; + state.evict_archives(id)?; return Ok(id); } let id = ArchiveId(u64::try_from(state.archives.len()).map_err(|_| { ResourceError::Format("too many open archives for handle space".to_string()) })?); - state.paths.insert(path.as_str().to_string(), id); + state.paths.insert(key, id); state.archives.push(slot); + state.load_archive(id)?; + state.evict_archives(id)?; Ok(id) } @@ -367,7 +418,8 @@ impl ResourceRepository for CachedResourceRepository { ) -> Result, ResourceError> { let state = self.state.lock().map_err(|_| ResourceError::Poisoned)?; let slot = state.archive(archive)?; - let local = match slot.document.as_ref() { + let document = slot.document.as_ref().ok_or(ResourceError::InvalidHandle)?; + let local = match document.as_ref() { ArchiveDocument::Nres(document) => document.find_bytes(&name.0).map(|id| id.0), ArchiveDocument::Rsli(document) => document.find_bytes(&name.0).map(|id| id.0), }; @@ -381,7 +433,8 @@ impl ResourceRepository for CachedResourceRepository { fn first_entry(&self, archive: ArchiveId) -> Result, ResourceError> { let state = self.state.lock().map_err(|_| ResourceError::Poisoned)?; let slot = state.archive(archive)?; - let local = match slot.document.as_ref() { + let document = slot.document.as_ref().ok_or(ResourceError::InvalidHandle)?; + let local = match document.as_ref() { ArchiveDocument::Nres(document) => document.entries().first().map(|entry| entry.id().0), ArchiveDocument::Rsli(document) => document.entry(fparkan_rsli::EntryId(0)).map(|_| 0), }; @@ -421,7 +474,8 @@ impl ResourceRepository for CachedResourceRepository { fn entry_info(&self, entry: EntryHandle) -> Result { let state = self.state.lock().map_err(|_| ResourceError::Poisoned)?; let slot = state.entry_archive(entry)?; - match slot.document.as_ref() { + let document = slot.document.as_ref().ok_or(ResourceError::InvalidHandle)?; + match document.as_ref() { ArchiveDocument::Nres(document) => { let local = usize::try_from(entry.local).map_err(|_| ResourceError::InvalidHandle)?; @@ -448,7 +502,7 @@ impl ResourceRepository for CachedResourceRepository { Ok(ResourceEntryInfo { key: ResourceKey { archive: slot.path.clone(), - name: ResourceName(meta.name_raw.to_vec()), + name: ResourceName(c_name_bytes(&meta.name_raw).to_vec()), type_id: None, }, attr1: u32::try_from(meta.flags).unwrap_or_default(), @@ -460,24 +514,6 @@ impl ResourceRepository for CachedResourceRepository { } } -impl CachedResourceRepository { - fn cached_id( - &self, - path: &NormalizedPath, - fingerprint: Sha256Digest, - ) -> Result, ResourceError> { - let state = self.state.lock().map_err(|_| ResourceError::Poisoned)?; - let Some(id) = state.paths.get(path.as_str()).copied() else { - return Ok(None); - }; - if state.archive(id)?.fingerprint == fingerprint { - Ok(Some(id)) - } else { - Ok(None) - } - } -} - impl DecodedPayloadCache { fn new(limits: PayloadCacheLimits) -> Self { Self { @@ -568,16 +604,77 @@ impl RepositoryState { fn payload_decode_task(&self, entry: EntryHandle) -> Result { let slot = self.entry_archive(entry)?; + let document = slot.document.as_ref().ok_or(ResourceError::InvalidHandle)?; Ok(PayloadDecodeTask { - document: Arc::clone(&slot.document), + document: Arc::clone(document), key: slot.entry_key(entry.local)?, }) } + + fn touch_archive(&mut self, id: ArchiveId) -> Result<(), ResourceError> { + self.archive_access_generation = self.archive_access_generation.saturating_add(1); + let access = self.archive_access_generation; + self.archive_mut(id)?.last_access = access; + Ok(()) + } + + fn load_archive(&mut self, id: ArchiveId) -> Result<(), ResourceError> { + let archive_bytes = self.archive(id)?.archive_bytes; + if self.archive(id)?.document.is_none() { + return Err(ResourceError::InvalidHandle); + } + self.current_open_archives = self.current_open_archives.saturating_add(1); + self.current_archive_bytes = self.current_archive_bytes.saturating_add(archive_bytes); + self.touch_archive(id) + } + + fn unload_archive(&mut self, id: ArchiveId) -> Result<(), ResourceError> { + let (was_loaded, archive_bytes) = { + let slot = self.archive(id)?; + (slot.document.is_some(), slot.archive_bytes) + }; + if was_loaded { + self.current_open_archives = self.current_open_archives.saturating_sub(1); + self.current_archive_bytes = self.current_archive_bytes.saturating_sub(archive_bytes); + self.payload_cache.remove_archive(id); + let slot = self.archive_mut(id)?; + slot.document = None; + slot.archive_bytes = 0; + slot.generation = slot.generation.saturating_add(1); + } + Ok(()) + } + + fn evict_archives(&mut self, protected: ArchiveId) -> Result<(), ResourceError> { + while self.current_open_archives > self.max_open_archives + || self.current_archive_bytes > self.max_archive_bytes + { + let Some(victim) = self + .archives + .iter() + .enumerate() + .filter_map(|(index, slot)| { + let id = ArchiveId(u64::try_from(index).ok()?); + if id == protected || slot.document.is_none() { + return None; + } + Some((id, slot.last_access)) + }) + .min_by_key(|(_, access)| *access) + .map(|(id, _)| id) + else { + break; + }; + self.unload_archive(victim)?; + } + Ok(()) + } } impl ArchiveSlot { fn entry_key(&self, local: u32) -> Result { - match self.document.as_ref() { + let document = self.document.as_ref().ok_or(ResourceError::InvalidHandle)?; + match document.as_ref() { ArchiveDocument::Nres(document) => { let local = usize::try_from(local).map_err(|_| ResourceError::InvalidHandle)?; let entry = document @@ -623,6 +720,7 @@ fn decode_archive( bytes: Arc<[u8]>, fingerprint: Sha256Digest, ) -> Result { + let archive_bytes = bytes.len(); if bytes.starts_with(b"NRes") { let document = fparkan_nres::decode(bytes, fparkan_nres::ReadProfile::Compatible) .map_err(|err| ResourceError::Format(err.to_string()))?; @@ -631,7 +729,9 @@ fn decode_archive( fingerprint, generation: 0, kind: ArchiveKind::Nres, - document: Arc::new(ArchiveDocument::Nres(document)), + archive_bytes, + last_access: 0, + document: Some(Arc::new(ArchiveDocument::Nres(document))), }); } if bytes.get(0..4) == Some(b"NL\0\x01") { @@ -642,7 +742,9 @@ fn decode_archive( fingerprint, generation: 0, kind: ArchiveKind::Rsli, - document: Arc::new(ArchiveDocument::Rsli(document)), + archive_bytes, + last_access: 0, + document: Some(Arc::new(ArchiveDocument::Rsli(document))), }); } Err(ResourceError::Format( @@ -780,7 +882,7 @@ mod tests { let state = repo.state.lock().expect("state"); assert_eq!(state.archives.len(), 1); assert_eq!(state.payload_cache.entries.len(), 1); - assert_eq!(state.paths.get(path.as_str()).copied(), Some(archive)); + assert_eq!(state.paths.get(path.identity_bytes()).copied(), Some(archive)); drop(state); assert_eq!(repo.open_archive(&path).expect("cached archive"), archive); @@ -865,7 +967,7 @@ mod tests { fn archive_cache_invalidates_when_vfs_bytes_change() { let root = temp_dir("archive-invalidate"); let path = archive_path(b"cache/test.lib").expect("path"); - let host_path = root.join(path.as_str()); + let host_path = root.join(path.as_path()); std::fs::create_dir_all(host_path.parent().expect("parent")).expect("cache dir"); std::fs::write(&host_path, build_nres(&[("a.bin", b"before".as_slice())])) .expect("initial archive"); @@ -926,6 +1028,90 @@ mod tests { } } + #[test] + fn lossy_equivalent_archive_paths_remain_distinct() { + let first_path = archive_path(b"DATA/\xFF.lib").expect("first path"); + let second_path = archive_path(b"DATA/\xFE.lib").expect("second path"); + let mut vfs = MemoryVfs::default(); + vfs.insert( + first_path.clone(), + Arc::from(build_nres(&[("same.bin", b"first".as_slice())]).into_boxed_slice()), + ); + vfs.insert( + second_path.clone(), + Arc::from(build_nres(&[("same.bin", b"second".as_slice())]).into_boxed_slice()), + ); + let repo = CachedResourceRepository::new(Arc::new(vfs)); + + let first_archive = repo.open_archive(&first_path).expect("first archive"); + let second_archive = repo.open_archive(&second_path).expect("second archive"); + + assert_ne!(first_archive, second_archive); + assert_eq!( + repo.read( + repo.find(first_archive, &resource_name(b"same.bin")) + .expect("find first") + .expect("first handle") + ) + .expect("read first") + .as_slice(), + b"first" + ); + assert_eq!( + repo.read( + repo.find(second_archive, &resource_name(b"same.bin")) + .expect("find second") + .expect("second handle") + ) + .expect("read second") + .as_slice(), + b"second" + ); + } + + #[test] + fn archive_cache_eviction_makes_old_handles_stale() { + let first_path = archive_path(b"cache/first.lib").expect("first path"); + let second_path = archive_path(b"cache/second.lib").expect("second path"); + let mut vfs = MemoryVfs::default(); + vfs.insert( + first_path.clone(), + Arc::from(build_nres(&[("a.bin", b"first".as_slice())]).into_boxed_slice()), + ); + vfs.insert( + second_path.clone(), + Arc::from(build_nres(&[("b.bin", b"second".as_slice())]).into_boxed_slice()), + ); + let repo = CachedResourceRepository::with_limits( + Arc::new(vfs), + RepositoryLimits { + max_open_archives: 1, + max_archive_bytes: usize::MAX, + max_decoded_payload_entries: 64, + max_decoded_payload_bytes: 64 * 1024 * 1024, + }, + ); + + let first_archive = repo.open_archive(&first_path).expect("open first"); + let first_handle = repo + .find(first_archive, &resource_name(b"a.bin")) + .expect("find first") + .expect("first handle"); + assert_eq!(repo.read(first_handle).expect("read first").as_slice(), b"first"); + + let _second_archive = repo.open_archive(&second_path).expect("open second"); + assert!(matches!(repo.read(first_handle), Err(ResourceError::StaleHandle))); + + let reopened = repo.open_archive(&first_path).expect("reopen first"); + let refreshed = repo + .find(reopened, &resource_name(b"a.bin")) + .expect("find refreshed") + .expect("refreshed handle"); + assert_eq!(reopened, first_archive); + assert_ne!(refreshed, first_handle); + assert_eq!(repo.read(refreshed).expect("read refreshed").as_slice(), b"first"); + } + #[test] fn resource_error_display_is_actionable() { let path = archive_path(b"bad/rsli.lib").expect("path"); @@ -974,7 +1160,7 @@ mod tests { let material_path = archive_path(b"Material.lib").map_err(|err| err.to_string())?; let material_bytes = - std::fs::read(root.join(material_path.as_str())).map_err(|err| err.to_string())?; + std::fs::read(root.join(material_path.as_path())).map_err(|err| err.to_string())?; let material_doc = fparkan_nres::decode( Arc::from(material_bytes.clone().into_boxed_slice()), fparkan_nres::ReadProfile::Compatible, @@ -1008,7 +1194,7 @@ mod tests { let font_path = archive_path(b"gamefont.rlb").map_err(|err| err.to_string())?; let font_bytes = - std::fs::read(root.join(font_path.as_str())).map_err(|err| err.to_string())?; + std::fs::read(root.join(font_path.as_path())).map_err(|err| err.to_string())?; let font_doc = fparkan_rsli::decode( Arc::from(font_bytes.into_boxed_slice()), fparkan_rsli::ReadProfile::Compatible, diff --git a/crates/fparkan-vfs/src/lib.rs b/crates/fparkan-vfs/src/lib.rs index cd359a3..68d812b 100644 --- a/crates/fparkan-vfs/src/lib.rs +++ b/crates/fparkan-vfs/src/lib.rs @@ -25,12 +25,13 @@ use fparkan_path::{ascii_lookup_key, join_under, NormalizedPath}; use std::collections::BTreeMap; use std::fs; #[cfg(unix)] +use std::os::unix::ffi::OsStrExt; +#[cfg(unix)] use std::os::unix::fs::MetadataExt; #[cfg(windows)] use std::os::windows::fs::MetadataExt; use std::path::{Path, PathBuf}; -use std::sync::{Arc, Mutex}; -use std::time::SystemTime; +use std::sync::Arc; /// VFS metadata. #[derive(Clone, Debug, Eq, PartialEq)] @@ -105,7 +106,6 @@ pub trait Vfs: Send + Sync { #[derive(Clone, Debug)] pub struct DirectoryVfs { root: PathBuf, - fingerprint_cache: Arc>>, } impl DirectoryVfs { @@ -114,29 +114,20 @@ impl DirectoryVfs { pub fn new(root: impl AsRef) -> Self { Self { root: root.as_ref().to_path_buf(), - fingerprint_cache: Arc::default(), } } fn host_path(&self, path: &NormalizedPath) -> Result { join_under(&self.root, path).map_err(|_| VfsError::Path)?; - resolve_casefolded(&self.root, path.as_str()) + resolve_casefolded(&self.root, path) } fn metadata_from_host_file(&self, path: &Path) -> Result { let metadata = fs::symlink_metadata(path).map_err(VfsError::Io)?; - metadata_from_host_file_with_cache(path, &metadata, &self.fingerprint_cache) + metadata_from_host_file(path, &metadata) } } -#[derive(Clone, Debug, Eq, PartialEq)] -struct CachedHostFingerprint { - len: u64, - modified: Option, - identity: Option, - fingerprint: Sha256Digest, -} - impl Vfs for DirectoryVfs { fn metadata(&self, path: &NormalizedPath) -> Result { self.metadata_from_host_file(&self.host_path(path)?) @@ -171,21 +162,60 @@ impl Vfs for DirectoryVfs { let metadata = fs::symlink_metadata(&base).map_err(VfsError::Io)?; entries.push(VfsEntry { path: prefix.clone(), - metadata: metadata_from_host_file_with_cache( - &base, - &metadata, - &self.fingerprint_cache, - )?, + metadata: metadata_from_host_file(&base, &metadata)?, }); return Ok(entries); } - list_recursive(&self.root, &base, &self.fingerprint_cache, &mut entries)?; - entries.sort_by(|a, b| a.path.as_str().cmp(b.path.as_str())); + list_recursive(&self.root, &base, &mut entries)?; + entries.sort_by(|a, b| a.path.as_bytes().cmp(b.path.as_bytes())); Ok(entries) } } -fn resolve_casefolded(root: &Path, normalized: &str) -> Result { +fn resolve_casefolded(root: &Path, normalized: &NormalizedPath) -> Result { + #[cfg(unix)] + { + return resolve_casefolded_unix(root, normalized); + } + + #[cfg(not(unix))] + { + resolve_casefolded_text(root, normalized.display_lossy()) + } +} + +#[cfg(unix)] +fn resolve_casefolded_unix(root: &Path, normalized: &NormalizedPath) -> Result { + let mut current = root.to_path_buf(); + for segment in normalized.as_bytes().split(|byte| *byte == b'/') { + current = resolve_casefolded_segment(¤t, segment, normalized)?; + } + Ok(current) +} + +#[cfg(unix)] +fn resolve_casefolded_segment( + dir: &Path, + segment: &[u8], + normalized: &NormalizedPath, +) -> Result { + let read_dir = fs::read_dir(dir).map_err(VfsError::Io)?; + let mut matches = Vec::new(); + for entry in read_dir { + let entry = entry.map_err(VfsError::Io)?; + let name = entry.file_name(); + if name.as_bytes().eq_ignore_ascii_case(segment) { + if entry.file_type().map_err(VfsError::Io)?.is_symlink() { + return Err(VfsError::Path); + } + matches.push(entry.path()); + } + } + select_casefolded_match(normalized.display_lossy(), dir, segment, matches) +} + +#[cfg(not(unix))] +fn resolve_casefolded_text(root: &Path, normalized: &str) -> Result { let mut current = root.to_path_buf(); for segment in normalized.split('/') { let read_dir = fs::read_dir(¤t).map_err(VfsError::Io)?; @@ -211,10 +241,11 @@ fn resolve_casefolded(root: &Path, normalized: &str) -> Result, mut matches: Vec, ) -> Result { matches.sort(); + let segment = String::from_utf8_lossy(segment.as_ref()); match matches.len() { 0 => Err(VfsError::NotFound(normalized.to_string())), 1 => Ok(matches.remove(0)), @@ -229,7 +260,6 @@ fn select_casefolded_match( fn list_recursive( root: &Path, dir: &Path, - fingerprint_cache: &Mutex>, out: &mut Vec, ) -> Result<(), VfsError> { let read_dir = fs::read_dir(dir).map_err(VfsError::Io)?; @@ -245,68 +275,40 @@ fn list_recursive( return Err(VfsError::Path); } if metadata.is_dir() { - list_recursive(root, &child, fingerprint_cache, out)?; + list_recursive(root, &child, out)?; continue; } if !metadata.is_file() { continue; } let rel = child.strip_prefix(root).map_err(|_| VfsError::Path)?; - let rel_text = rel.to_str().ok_or(VfsError::Path)?; + #[cfg(unix)] + let rel_bytes = rel.as_os_str().as_bytes(); + #[cfg(not(unix))] + let rel_bytes = rel.to_str().ok_or(VfsError::Path)?.as_bytes(); let path = fparkan_path::normalize_relative( - rel_text.as_bytes(), + rel_bytes, fparkan_path::PathPolicy::HostCompatible, ) .map_err(|_| VfsError::Path)?; out.push(VfsEntry { path, - metadata: metadata_from_host_file_with_cache(&child, &metadata, fingerprint_cache)?, + metadata: metadata_from_host_file(&child, &metadata)?, }); } Ok(()) } -fn metadata_from_host_file_with_cache( +fn metadata_from_host_file( path: &Path, metadata: &fs::Metadata, - fingerprint_cache: &Mutex>, ) -> Result { if !metadata.is_file() { return Err(VfsError::Path); } let len = metadata.len(); - let modified = metadata.modified().ok(); - if let Some(cached) = fingerprint_cache - .lock() - .map_err(|_| VfsError::Path)? - .get(path) - .cloned() - .filter(|cached| { - cached.len == len - && cached.modified == modified - && cached.identity == file_identity(metadata) - }) - { - return Ok(VfsMetadata { - len, - fingerprint: cached.fingerprint, - }); - } - let bytes = fs::read(path).map_err(VfsError::Io)?; let fingerprint = sha256(&bytes); - fingerprint_cache - .lock() - .map_err(|_| VfsError::Path)? - .insert( - path.to_path_buf(), - CachedHostFingerprint { - len, - modified, - identity: file_identity(metadata), - fingerprint, - }, - ); Ok(VfsMetadata { len, fingerprint }) } @@ -344,11 +346,11 @@ impl MemoryVfs { let matches = self .lookup .get(&key) - .ok_or_else(|| VfsError::NotFound(path.as_str().to_string()))?; + .ok_or_else(|| VfsError::NotFound(path.display_lossy().to_string()))?; match matches.as_slice() { [single] => Ok(single.as_slice()), - [] => Err(VfsError::NotFound(path.as_str().to_string())), - _ => Err(VfsError::Ambiguous(path.as_str().to_string())), + [] => Err(VfsError::NotFound(path.display_lossy().to_string())), + _ => Err(VfsError::Ambiguous(path.display_lossy().to_string())), } } } @@ -380,7 +382,7 @@ impl Vfs for MemoryVfs { let bytes = self .files .get(resolved) - .ok_or_else(|| VfsError::NotFound(path.as_str().to_string()))?; + .ok_or_else(|| VfsError::NotFound(path.display_lossy().to_string()))?; Ok(VfsMetadata { len: bytes.len() as u64, fingerprint: sha256(bytes), @@ -392,7 +394,7 @@ impl Vfs for MemoryVfs { self.files .get(resolved) .cloned() - .ok_or_else(|| VfsError::NotFound(path.as_str().to_string())) + .ok_or_else(|| VfsError::NotFound(path.display_lossy().to_string())) } fn list(&self, prefix: &NormalizedPath) -> Result, VfsError> { @@ -476,7 +478,7 @@ impl Vfs for OverlayVfs { Err(err) => return Err(err), } } - Err(VfsError::NotFound(path.as_str().to_string())) + Err(VfsError::NotFound(path.display_lossy().to_string())) } fn read(&self, path: &NormalizedPath) -> Result, VfsError> { @@ -487,7 +489,7 @@ impl Vfs for OverlayVfs { Err(err) => return Err(err), } } - Err(VfsError::NotFound(path.as_str().to_string())) + Err(VfsError::NotFound(path.display_lossy().to_string())) } fn list(&self, prefix: &NormalizedPath) -> Result, VfsError> { @@ -496,7 +498,7 @@ impl Vfs for OverlayVfs { match layer.list(prefix) { Ok(entries) => { for entry in entries { - let key = entry.path.as_str().to_ascii_uppercase(); + let key = ascii_lookup_key(entry.path.as_bytes()).0; by_key.entry(key).or_insert(entry); } } @@ -505,7 +507,7 @@ impl Vfs for OverlayVfs { } } let mut entries: Vec<_> = by_key.into_values().collect(); - entries.sort_by(|a, b| a.path.as_str().cmp(b.path.as_str())); + entries.sort_by(|a, b| a.path.as_bytes().cmp(b.path.as_bytes())); Ok(entries) } } @@ -514,6 +516,10 @@ impl Vfs for OverlayVfs { mod tests { use super::*; use fparkan_path::{normalize_relative, PathPolicy}; + #[cfg(unix)] + use std::ffi::OsString; + #[cfg(unix)] + use std::os::unix::ffi::OsStringExt; #[test] fn directory_vfs_resolves_ascii_casefolded_segments() { @@ -634,6 +640,34 @@ mod tests { std::fs::remove_dir_all(outside).expect("cleanup outside"); } + #[cfg(unix)] + #[test] + fn directory_vfs_resolves_non_utf8_host_entries_by_raw_bytes() { + let root = unique_test_dir("non-utf8"); + let data_dir = root.join("DATA"); + std::fs::create_dir_all(&data_dir).expect("mkdir"); + let file_name = OsString::from_vec(vec![0xFF, b'.', b'b', b'i', b'n']); + let raw_path = data_dir.join(&file_name); + if let Err(err) = std::fs::write(&raw_path, b"raw") { + assert_eq!(err.kind(), std::io::ErrorKind::PermissionDenied); + std::fs::remove_dir_all(root).expect("cleanup"); + return; + } + + let vfs = DirectoryVfs::new(&root); + let path = + normalize_relative(b"data/\xFF.bin", PathPolicy::HostCompatible).expect("path"); + + assert_eq!(vfs.read(&path).expect("read raw path").as_ref(), b"raw"); + let entries = vfs + .list(&normalize_relative(b"DATA", PathPolicy::StrictLegacy).expect("prefix")) + .expect("list"); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].path.identity_bytes(), b"DATA/\xFF.bin"); + + std::fs::remove_dir_all(root).expect("cleanup"); + } + #[test] fn casefold_selector_reports_ambiguous_segments() { let err = select_casefolded_match( @@ -714,6 +748,28 @@ mod tests { assert_eq!(entries[0].metadata.len, 4); } + #[test] + fn overlay_vfs_keeps_lossy_equivalent_entries_distinct() { + let prefix = normalize_relative(b"DATA", PathPolicy::StrictLegacy).expect("prefix"); + let mut high = MemoryVfs::default(); + let mut low = MemoryVfs::default(); + high.insert( + normalize_relative(b"DATA/\xFF.bin", PathPolicy::HostCompatible).expect("high path"), + Arc::from(b"high".as_slice()), + ); + low.insert( + normalize_relative(b"DATA/\xFE.bin", PathPolicy::HostCompatible).expect("low path"), + Arc::from(b"low".as_slice()), + ); + + let overlay = OverlayVfs::from_layers(vec![Arc::new(high), Arc::new(low)]); + let entries = overlay.list(&prefix).expect("list"); + + assert_eq!(entries.len(), 2); + assert_eq!(entries[0].path.display_lossy(), entries[1].path.display_lossy()); + assert_ne!(entries[0].path.identity_bytes(), entries[1].path.identity_bytes()); + } + fn unique_test_dir(name: &str) -> PathBuf { let mut path = std::env::temp_dir(); path.push(format!("fparkan-vfs-{name}-{}", std::process::id()));