From 7337492c302f663d1b77130b3291ba891198bc65 Mon Sep 17 00:00:00 2001 From: Valentin Popov Date: Tue, 30 Jun 2026 01:54:57 +0400 Subject: [PATCH] fix: route archive inspection through byte-safe boundaries --- crates/fparkan-corpus/src/lib.rs | 148 +++++++++++++++------------ crates/fparkan-inspection/Cargo.toml | 2 + crates/fparkan-inspection/src/lib.rs | 144 ++++++++++++++++++++++++-- 3 files changed, 216 insertions(+), 78 deletions(-) diff --git a/crates/fparkan-corpus/src/lib.rs b/crates/fparkan-corpus/src/lib.rs index c2fad4d..91fd911 100644 --- a/crates/fparkan-corpus/src/lib.rs +++ b/crates/fparkan-corpus/src/lib.rs @@ -35,6 +35,8 @@ use std::collections::{BTreeMap, BTreeSet}; use std::fmt; use std::fs; use std::io::Write; +#[cfg(unix)] +use std::os::unix::ffi::OsStrExt; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -69,6 +71,8 @@ pub struct DiscoverOptions { pub struct ManifestEntry { /// Normalized relative path. pub path: String, + /// Byte-exact relative host path used for reopening corpus files. + pub host_rel_path: PathBuf, /// File size in bytes. pub size: u64, /// SHA-256 content fingerprint. @@ -188,7 +192,7 @@ pub fn discover(root: &Path, options: DiscoverOptions) -> Result Vec> { let mut grouped: BTreeMap, BTreeSet> = BTreeMap::new(); for file in files { grouped - .entry(ascii_lookup_key(file.path.as_bytes()).0) + .entry(ascii_lookup_key(path_identity_bytes(&file.host_rel_path)).0) .or_default() .insert(file.path.clone()); } @@ -353,7 +362,7 @@ fn inspect_report_file( ) -> CorpusFileRecord { let lower = entry.path.to_ascii_lowercase(); let mut variant = inspect_path_metrics(&lower, metrics); - let path = root.join(&entry.path); + let path = root.join(&entry.host_rel_path); let bytes = match fs::read(&path) { Ok(bytes) => bytes, Err(source) => { @@ -439,6 +448,17 @@ fn inspect_report_file( } } +fn path_identity_bytes(path: &Path) -> &[u8] { + #[cfg(unix)] + { + path.as_os_str().as_bytes() + } + #[cfg(not(unix))] + { + path.to_str().unwrap_or_default().as_bytes() + } +} + fn inspect_path_metrics(lower: &str, metrics: &mut BTreeMap) -> String { let mut variant = "file"; if lower.ends_with("data.tma") { @@ -767,11 +787,7 @@ mod tests { fn report_json_contains_metrics_and_hashes_not_paths_or_payloads() { let manifest = CorpusManifest { kind: CorpusKind::Part1, - files: vec![ManifestEntry { - path: "secret/payload.bin".to_string(), - size: 4, - hash: sha256(b"DATA"), - }], + files: vec![manifest_entry("secret/payload.bin", 4, sha256(b"DATA"))], casefold_collisions: Vec::new(), }; let report = report(Path::new("."), &manifest).expect("report"); @@ -791,11 +807,7 @@ mod tests { let root = temp_dir("report-missing"); let manifest = CorpusManifest { kind: CorpusKind::Unknown, - files: vec![ManifestEntry { - path: "missing.lib".to_string(), - size: 1, - hash: sha256(b"missing"), - }], + files: vec![manifest_entry("missing.lib", 1, sha256(b"missing"))], casefold_collisions: Vec::new(), }; @@ -814,11 +826,7 @@ mod tests { fs::write(root.join("bad.lib"), b"NRes").expect("bad nres"); let manifest = CorpusManifest { kind: CorpusKind::Unknown, - files: vec![ManifestEntry { - path: "bad.lib".to_string(), - size: 4, - hash: sha256(b"NRes"), - }], + files: vec![manifest_entry("bad.lib", 4, sha256(b"NRes"))], casefold_collisions: Vec::new(), }; @@ -857,11 +865,11 @@ mod tests { fs::write(root.join("archive.lib"), &archive).expect("archive"); let manifest = CorpusManifest { kind: CorpusKind::Unknown, - files: vec![ManifestEntry { - path: "archive.lib".to_string(), - size: u64::try_from(archive.len()).expect("archive size"), - hash: sha256(&archive), - }], + files: vec![manifest_entry( + "archive.lib", + u64::try_from(archive.len()).expect("archive size"), + sha256(&archive), + )], casefold_collisions: Vec::new(), }; @@ -886,11 +894,7 @@ mod tests { fs::write(root.join("WORLD/MAP/land.map"), build_nres(&[])).expect("land map"); let manifest = CorpusManifest { kind: CorpusKind::Unknown, - files: vec![ManifestEntry { - path: "WORLD/MAP/land.map".to_string(), - size: 16, - hash: sha256(b"land.map"), - }], + files: vec![manifest_entry("WORLD/MAP/land.map", 16, sha256(b"land.map"))], casefold_collisions: Vec::new(), }; @@ -909,11 +913,7 @@ mod tests { fs::write(root.join("WORLD/MAP/land.msh"), build_nres(&[])).expect("land msh"); let manifest = CorpusManifest { kind: CorpusKind::Unknown, - files: vec![ManifestEntry { - path: "WORLD/MAP/land.msh".to_string(), - size: 16, - hash: sha256(b"land.msh"), - }], + files: vec![manifest_entry("WORLD/MAP/land.msh", 16, sha256(b"land.msh"))], casefold_collisions: Vec::new(), }; @@ -932,11 +932,11 @@ mod tests { fs::write(root.join("MISSIONS/test/data.tma"), b"malformed tma").expect("tma"); let manifest = CorpusManifest { kind: CorpusKind::Unknown, - files: vec![ManifestEntry { - path: "MISSIONS/test/data.tma".to_string(), - size: 12, - hash: sha256(b"malformed tma"), - }], + files: vec![manifest_entry( + "MISSIONS/test/data.tma", + 12, + sha256(b"malformed tma"), + )], casefold_collisions: Vec::new(), }; @@ -955,11 +955,7 @@ mod tests { fs::write(root.join("units/unit.dat"), vec![0u8; 120]).expect("unit"); let manifest = CorpusManifest { kind: CorpusKind::Unknown, - files: vec![ManifestEntry { - path: "units/unit.dat".to_string(), - size: 120, - hash: sha256(&[0u8; 120]), - }], + files: vec![manifest_entry("units/unit.dat", 120, sha256(&[0u8; 120]))], casefold_collisions: Vec::new(), }; @@ -977,11 +973,7 @@ mod tests { fs::write(root.join("patch.nl"), b"NL malformed").expect("rsli"); let manifest = CorpusManifest { kind: CorpusKind::Unknown, - files: vec![ManifestEntry { - path: "patch.nl".to_string(), - size: 12, - hash: sha256(b"NL malformed"), - }], + files: vec![manifest_entry("patch.nl", 12, sha256(b"NL malformed"))], casefold_collisions: Vec::new(), }; @@ -1052,16 +1044,8 @@ mod tests { let manifest = CorpusManifest { kind: CorpusKind::Unknown, files: vec![ - ManifestEntry { - path: "Textures/Foo.TEX".to_string(), - size: 1, - hash: sha256(b"first"), - }, - ManifestEntry { - path: "textures/foo.tex".to_string(), - size: 1, - hash: sha256(b"second"), - }, + manifest_entry("Textures/Foo.TEX", 1, sha256(b"first")), + manifest_entry("textures/foo.tex", 1, sha256(b"second")), ], casefold_collisions: Vec::new(), }; @@ -1081,11 +1065,7 @@ mod tests { fn fingerprint_changes() { let mut manifest = CorpusManifest { kind: CorpusKind::Unknown, - files: vec![ManifestEntry { - path: "a".to_string(), - size: 1, - hash: sha256(b"before"), - }], + files: vec![manifest_entry("a", 1, sha256(b"before"))], casefold_collisions: Vec::new(), }; let a = fingerprint(&manifest); @@ -1118,6 +1098,29 @@ mod tests { let _ = fs::remove_file(tmp); } + #[cfg(unix)] + #[test] + fn discover_supports_non_utf8_host_paths() { + use std::ffi::OsString; + use std::os::unix::ffi::OsStringExt; + + let root = temp_dir("non-utf8"); + let file_name = OsString::from_vec(vec![0xFF, b'.', b'b', b'i', b'n']); + let file_path = root.join(&file_name); + if let Err(err) = fs::write(&file_path, b"raw") { + assert_eq!(err.kind(), std::io::ErrorKind::PermissionDenied); + let _ = fs::remove_dir_all(root); + return; + } + + let manifest = discover(&root, DiscoverOptions::default()).expect("manifest"); + + assert_eq!(manifest.files.len(), 1); + assert_eq!(manifest.files[0].path, "\u{FFFD}.bin"); + assert_eq!(manifest.files[0].host_rel_path, PathBuf::from(&file_name)); + let _ = fs::remove_dir_all(root); + } + struct TestNresEntry<'a> { name: &'a str, type_id: u32, @@ -1164,6 +1167,15 @@ mod tests { out } + fn manifest_entry(path: &str, size: u64, hash: Sha256Digest) -> ManifestEntry { + ManifestEntry { + path: path.to_string(), + host_rel_path: PathBuf::from(path), + size, + hash, + } + } + fn push_u32(out: &mut Vec, value: u32) { out.extend_from_slice(&value.to_le_bytes()); } diff --git a/crates/fparkan-inspection/Cargo.toml b/crates/fparkan-inspection/Cargo.toml index 4f35ecd..5d51e37 100644 --- a/crates/fparkan-inspection/Cargo.toml +++ b/crates/fparkan-inspection/Cargo.toml @@ -6,8 +6,10 @@ license.workspace = true repository.workspace = true [dependencies] +fparkan-diagnostics = { path = "../fparkan-diagnostics" } fparkan-msh = { path = "../fparkan-msh" } fparkan-nres = { path = "../fparkan-nres" } +fparkan-path = { path = "../fparkan-path" } fparkan-rsli = { path = "../fparkan-rsli" } fparkan-resource = { path = "../fparkan-resource" } fparkan-terrain-format = { path = "../fparkan-terrain-format" } diff --git a/crates/fparkan-inspection/src/lib.rs b/crates/fparkan-inspection/src/lib.rs index ac63bb9..7fce95e 100644 --- a/crates/fparkan-inspection/src/lib.rs +++ b/crates/fparkan-inspection/src/lib.rs @@ -20,14 +20,20 @@ )] //! Shared inspection helpers for format-backed tooling. -use fparkan_msh::{decode_msh, validate_msh}; +use fparkan_diagnostics::{ + diagnostic, render_human, Diagnostic, DiagnosticCode, DiagnosticContext, Phase, +}; +use fparkan_msh::{decode_msh, validate_msh, ModelAsset}; use fparkan_nres::{decode as decode_nres, NresDocument, ReadProfile}; +use fparkan_path::{normalize_relative, PathPolicy}; use fparkan_resource::{archive_path, resource_name, CachedResourceRepository, ResourceRepository}; use fparkan_rsli::decode as decode_rsli; use fparkan_terrain_format::{decode_land_map, decode_land_msh}; use fparkan_texm::decode_texm; -use fparkan_vfs::DirectoryVfs; +use fparkan_vfs::{DirectoryVfs, Vfs}; use std::fs; +#[cfg(unix)] +use std::os::unix::ffi::OsStrExt; use std::path::Path; use std::sync::Arc; @@ -131,7 +137,70 @@ pub enum LandFileKind { /// /// Returns a string error when the archive cannot be read or decoded. pub fn inspect_archive_file(path: &Path, sample_limit: usize) -> Result { - let bytes = fs::read(path).map_err(|err| format!("{}: {err}", path.display()))?; + inspect_archive_file_diagnostic(path, sample_limit).map_err(|diagnostic| render_human(&diagnostic)) +} + +/// Inspects a format archive and returns a structured diagnostic on failure. +/// +/// # Errors +/// +/// Returns a [`Diagnostic`] when the archive cannot be read or decoded. +pub fn inspect_archive_file_diagnostic( + path: &Path, + sample_limit: usize, +) -> Result { + let parent = path.parent().unwrap_or_else(|| Path::new(".")); + let file_name = path.file_name().ok_or_else(|| { + diagnostic( + DiagnosticCode("S1.VFS.PATH"), + format!("{}: archive path has no file name", path.display()), + ) + .with_context(DiagnosticContext { + phase: Some(Phase::Read), + path: Some(path.display().to_string()), + ..DiagnosticContext::default() + }) + })?; + #[cfg(unix)] + let raw_name = file_name.as_bytes(); + #[cfg(not(unix))] + let raw_name = file_name + .to_str() + .ok_or_else(|| { + diagnostic( + DiagnosticCode("S1.VFS.PATH"), + format!("{}: archive file name is not valid text", path.display()), + ) + .with_context(DiagnosticContext { + phase: Some(Phase::Read), + path: Some(path.display().to_string()), + ..DiagnosticContext::default() + }) + })? + .as_bytes(); + let normalized = normalize_relative(raw_name, PathPolicy::HostCompatible).map_err(|err| { + diagnostic( + DiagnosticCode("S1.VFS.PATH"), + format!("{}: {err}", path.display()), + ) + .with_context(DiagnosticContext { + phase: Some(Phase::Read), + path: Some(path.display().to_string()), + ..DiagnosticContext::default() + }) + })?; + let vfs = DirectoryVfs::new(parent); + let bytes = vfs.read(&normalized).map_err(|err| { + diagnostic( + DiagnosticCode("S1.VFS.READ"), + format!("{}: {err}", path.display()), + ) + .with_context(DiagnosticContext { + phase: Some(Phase::Read), + path: Some(path.display().to_string()), + ..DiagnosticContext::default() + }) + })?; inspect_archive_bytes(&bytes, sample_limit, Some(path)) } @@ -140,13 +209,13 @@ fn inspect_archive_bytes( bytes: &[u8], sample_limit: usize, source: Option<&Path>, -) -> Result { +) -> Result { if bytes.starts_with(b"NRes") { let document = decode_nres( Arc::from(bytes.to_vec().into_boxed_slice()), ReadProfile::Compatible, ) - .map_err(|err| err.to_string())?; + .map_err(|err| archive_parse_diagnostic("S1.NRES.DECODE", source, err.to_string()))?; let mut sample = Vec::new(); for entry in document.entries().iter().take(sample_limit) { sample.push(NresEntrySummary { @@ -165,15 +234,16 @@ fn inspect_archive_bytes( Arc::from(bytes.to_vec().into_boxed_slice()), fparkan_rsli::ReadProfile::Compatible, ) - .map_err(|err| err.to_string())?; + .map_err(|err| archive_parse_diagnostic("S1.RSLI.DECODE", source, err.to_string()))?; Ok(ArchiveInspection::Rsli { entries: document.entries().len(), }) } else { - match source { - Some(path) => Err(format!("{}: unsupported archive magic", path.display())), - None => Err("unsupported archive magic".to_string()), - } + Err(archive_parse_diagnostic( + "S1.RESOURCE.UNSUPPORTED_ARCHIVE", + source, + "unsupported archive magic".to_string(), + )) } } @@ -202,6 +272,22 @@ pub fn inspect_model_from_root( }) } +/// Loads and validates a model resource through repository-backed lookup. +/// +/// # Errors +/// +/// Returns a string error when the resource cannot be resolved or parsed as a +/// valid model payload. +pub fn load_model_from_root( + root: &Path, + archive: &str, + resource: &str, +) -> Result { + let document = load_model_document_from_root(root, archive, resource)?; + let msh = decode_msh(&document).map_err(|err| err.to_string())?; + validate_msh(&msh).map_err(|err| err.to_string()) +} + /// Inspects a texture through repository-backed resource lookup. /// /// # Errors @@ -288,6 +374,27 @@ fn read_resource_bytes(root: &Path, archive: &str, name: &str) -> Result Result { + let bytes = read_resource_bytes(root, archive, resource)?; + decode_nres(bytes, ReadProfile::Compatible).map_err(|err| err.to_string()) +} + +fn archive_parse_diagnostic( + code: &'static str, + source: Option<&Path>, + message: String, +) -> Diagnostic { + diagnostic(DiagnosticCode(code), message).with_context(DiagnosticContext { + phase: Some(Phase::Parse), + path: source.map(|path| path.display().to_string()), + ..DiagnosticContext::default() + }) +} + #[cfg(test)] mod tests { use super::*; @@ -306,6 +413,23 @@ mod tests { assert!(error.contains("entry table out of bounds")); } + #[test] + fn archive_diagnostic_preserves_source_path() { + let dir = temp_dir("inspect-diagnostic"); + let path = dir.join("broken.nres"); + fs::write(&path, b"NRes").expect("broken nres"); + + let diagnostic = + inspect_archive_file_diagnostic(&path, 0).expect_err("diagnostic failure"); + + assert_eq!(diagnostic.code.0, "S1.NRES.DECODE"); + let expected_path = path.display().to_string(); + assert_eq!( + diagnostic.context.path.as_deref(), + Some(expected_path.as_str()) + ); + } + #[test] fn nres_entry_summary_fields_are_readable() { let dir = temp_dir("inspect-nres");