fix: route archive inspection through byte-safe boundaries
This commit is contained in:
@@ -35,6 +35,8 @@ use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::fmt;
|
||||
use std::fs;
|
||||
use std::io::Write;
|
||||
#[cfg(unix)]
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -69,6 +71,8 @@ pub struct DiscoverOptions {
|
||||
pub struct ManifestEntry {
|
||||
/// Normalized relative path.
|
||||
pub path: String,
|
||||
/// Byte-exact relative host path used for reopening corpus files.
|
||||
pub host_rel_path: PathBuf,
|
||||
/// File size in bytes.
|
||||
pub size: u64,
|
||||
/// SHA-256 content fingerprint.
|
||||
@@ -188,7 +192,7 @@ pub fn discover(root: &Path, options: DiscoverOptions) -> Result<CorpusManifest,
|
||||
}
|
||||
let mut files = Vec::new();
|
||||
walk(root, root, options, &mut files)?;
|
||||
files.sort_by(|a, b| a.path.cmp(&b.path));
|
||||
files.sort_by(|a, b| a.host_rel_path.cmp(&b.host_rel_path));
|
||||
|
||||
let kind = classify(root, &files);
|
||||
let casefold_collisions = detect_casefold_collisions(&files);
|
||||
@@ -243,17 +247,22 @@ fn walk(
|
||||
let rel = path
|
||||
.strip_prefix(root)
|
||||
.map_err(|_| CorpusError::InvalidPath(path.display().to_string()))?;
|
||||
let rel_text = rel
|
||||
#[cfg(unix)]
|
||||
let rel_bytes = rel.as_os_str().as_bytes();
|
||||
#[cfg(not(unix))]
|
||||
let rel_bytes = rel
|
||||
.to_str()
|
||||
.ok_or_else(|| CorpusError::InvalidPath(path.display().to_string()))?;
|
||||
let normalized = normalize_relative(rel_text.as_bytes(), PathPolicy::HostCompatible)
|
||||
.map_err(|_| CorpusError::InvalidPath(rel_text.to_string()))?;
|
||||
.ok_or_else(|| CorpusError::InvalidPath(path.display().to_string()))?
|
||||
.as_bytes();
|
||||
let normalized = normalize_relative(rel_bytes, PathPolicy::HostCompatible)
|
||||
.map_err(|_| CorpusError::InvalidPath(path.display().to_string()))?;
|
||||
let bytes = fs::read(&path).map_err(|source| CorpusError::Io {
|
||||
path: path.clone(),
|
||||
source,
|
||||
})?;
|
||||
out.push(ManifestEntry {
|
||||
path: normalized.as_str().to_string(),
|
||||
path: normalized.display_lossy().to_string(),
|
||||
host_rel_path: rel.to_path_buf(),
|
||||
size: metadata.len(),
|
||||
hash: sha256(&bytes),
|
||||
});
|
||||
@@ -285,7 +294,7 @@ fn detect_casefold_collisions(files: &[ManifestEntry]) -> Vec<Vec<String>> {
|
||||
let mut grouped: BTreeMap<Vec<u8>, BTreeSet<String>> = BTreeMap::new();
|
||||
for file in files {
|
||||
grouped
|
||||
.entry(ascii_lookup_key(file.path.as_bytes()).0)
|
||||
.entry(ascii_lookup_key(path_identity_bytes(&file.host_rel_path)).0)
|
||||
.or_default()
|
||||
.insert(file.path.clone());
|
||||
}
|
||||
@@ -353,7 +362,7 @@ fn inspect_report_file(
|
||||
) -> CorpusFileRecord {
|
||||
let lower = entry.path.to_ascii_lowercase();
|
||||
let mut variant = inspect_path_metrics(&lower, metrics);
|
||||
let path = root.join(&entry.path);
|
||||
let path = root.join(&entry.host_rel_path);
|
||||
let bytes = match fs::read(&path) {
|
||||
Ok(bytes) => bytes,
|
||||
Err(source) => {
|
||||
@@ -439,6 +448,17 @@ fn inspect_report_file(
|
||||
}
|
||||
}
|
||||
|
||||
fn path_identity_bytes(path: &Path) -> &[u8] {
|
||||
#[cfg(unix)]
|
||||
{
|
||||
path.as_os_str().as_bytes()
|
||||
}
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
path.to_str().unwrap_or_default().as_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
fn inspect_path_metrics(lower: &str, metrics: &mut BTreeMap<String, u64>) -> String {
|
||||
let mut variant = "file";
|
||||
if lower.ends_with("data.tma") {
|
||||
@@ -767,11 +787,7 @@ mod tests {
|
||||
fn report_json_contains_metrics_and_hashes_not_paths_or_payloads() {
|
||||
let manifest = CorpusManifest {
|
||||
kind: CorpusKind::Part1,
|
||||
files: vec![ManifestEntry {
|
||||
path: "secret/payload.bin".to_string(),
|
||||
size: 4,
|
||||
hash: sha256(b"DATA"),
|
||||
}],
|
||||
files: vec![manifest_entry("secret/payload.bin", 4, sha256(b"DATA"))],
|
||||
casefold_collisions: Vec::new(),
|
||||
};
|
||||
let report = report(Path::new("."), &manifest).expect("report");
|
||||
@@ -791,11 +807,7 @@ mod tests {
|
||||
let root = temp_dir("report-missing");
|
||||
let manifest = CorpusManifest {
|
||||
kind: CorpusKind::Unknown,
|
||||
files: vec![ManifestEntry {
|
||||
path: "missing.lib".to_string(),
|
||||
size: 1,
|
||||
hash: sha256(b"missing"),
|
||||
}],
|
||||
files: vec![manifest_entry("missing.lib", 1, sha256(b"missing"))],
|
||||
casefold_collisions: Vec::new(),
|
||||
};
|
||||
|
||||
@@ -814,11 +826,7 @@ mod tests {
|
||||
fs::write(root.join("bad.lib"), b"NRes").expect("bad nres");
|
||||
let manifest = CorpusManifest {
|
||||
kind: CorpusKind::Unknown,
|
||||
files: vec![ManifestEntry {
|
||||
path: "bad.lib".to_string(),
|
||||
size: 4,
|
||||
hash: sha256(b"NRes"),
|
||||
}],
|
||||
files: vec![manifest_entry("bad.lib", 4, sha256(b"NRes"))],
|
||||
casefold_collisions: Vec::new(),
|
||||
};
|
||||
|
||||
@@ -857,11 +865,11 @@ mod tests {
|
||||
fs::write(root.join("archive.lib"), &archive).expect("archive");
|
||||
let manifest = CorpusManifest {
|
||||
kind: CorpusKind::Unknown,
|
||||
files: vec![ManifestEntry {
|
||||
path: "archive.lib".to_string(),
|
||||
size: u64::try_from(archive.len()).expect("archive size"),
|
||||
hash: sha256(&archive),
|
||||
}],
|
||||
files: vec![manifest_entry(
|
||||
"archive.lib",
|
||||
u64::try_from(archive.len()).expect("archive size"),
|
||||
sha256(&archive),
|
||||
)],
|
||||
casefold_collisions: Vec::new(),
|
||||
};
|
||||
|
||||
@@ -886,11 +894,7 @@ mod tests {
|
||||
fs::write(root.join("WORLD/MAP/land.map"), build_nres(&[])).expect("land map");
|
||||
let manifest = CorpusManifest {
|
||||
kind: CorpusKind::Unknown,
|
||||
files: vec![ManifestEntry {
|
||||
path: "WORLD/MAP/land.map".to_string(),
|
||||
size: 16,
|
||||
hash: sha256(b"land.map"),
|
||||
}],
|
||||
files: vec![manifest_entry("WORLD/MAP/land.map", 16, sha256(b"land.map"))],
|
||||
casefold_collisions: Vec::new(),
|
||||
};
|
||||
|
||||
@@ -909,11 +913,7 @@ mod tests {
|
||||
fs::write(root.join("WORLD/MAP/land.msh"), build_nres(&[])).expect("land msh");
|
||||
let manifest = CorpusManifest {
|
||||
kind: CorpusKind::Unknown,
|
||||
files: vec![ManifestEntry {
|
||||
path: "WORLD/MAP/land.msh".to_string(),
|
||||
size: 16,
|
||||
hash: sha256(b"land.msh"),
|
||||
}],
|
||||
files: vec![manifest_entry("WORLD/MAP/land.msh", 16, sha256(b"land.msh"))],
|
||||
casefold_collisions: Vec::new(),
|
||||
};
|
||||
|
||||
@@ -932,11 +932,11 @@ mod tests {
|
||||
fs::write(root.join("MISSIONS/test/data.tma"), b"malformed tma").expect("tma");
|
||||
let manifest = CorpusManifest {
|
||||
kind: CorpusKind::Unknown,
|
||||
files: vec![ManifestEntry {
|
||||
path: "MISSIONS/test/data.tma".to_string(),
|
||||
size: 12,
|
||||
hash: sha256(b"malformed tma"),
|
||||
}],
|
||||
files: vec![manifest_entry(
|
||||
"MISSIONS/test/data.tma",
|
||||
12,
|
||||
sha256(b"malformed tma"),
|
||||
)],
|
||||
casefold_collisions: Vec::new(),
|
||||
};
|
||||
|
||||
@@ -955,11 +955,7 @@ mod tests {
|
||||
fs::write(root.join("units/unit.dat"), vec![0u8; 120]).expect("unit");
|
||||
let manifest = CorpusManifest {
|
||||
kind: CorpusKind::Unknown,
|
||||
files: vec![ManifestEntry {
|
||||
path: "units/unit.dat".to_string(),
|
||||
size: 120,
|
||||
hash: sha256(&[0u8; 120]),
|
||||
}],
|
||||
files: vec![manifest_entry("units/unit.dat", 120, sha256(&[0u8; 120]))],
|
||||
casefold_collisions: Vec::new(),
|
||||
};
|
||||
|
||||
@@ -977,11 +973,7 @@ mod tests {
|
||||
fs::write(root.join("patch.nl"), b"NL malformed").expect("rsli");
|
||||
let manifest = CorpusManifest {
|
||||
kind: CorpusKind::Unknown,
|
||||
files: vec![ManifestEntry {
|
||||
path: "patch.nl".to_string(),
|
||||
size: 12,
|
||||
hash: sha256(b"NL malformed"),
|
||||
}],
|
||||
files: vec![manifest_entry("patch.nl", 12, sha256(b"NL malformed"))],
|
||||
casefold_collisions: Vec::new(),
|
||||
};
|
||||
|
||||
@@ -1052,16 +1044,8 @@ mod tests {
|
||||
let manifest = CorpusManifest {
|
||||
kind: CorpusKind::Unknown,
|
||||
files: vec![
|
||||
ManifestEntry {
|
||||
path: "Textures/Foo.TEX".to_string(),
|
||||
size: 1,
|
||||
hash: sha256(b"first"),
|
||||
},
|
||||
ManifestEntry {
|
||||
path: "textures/foo.tex".to_string(),
|
||||
size: 1,
|
||||
hash: sha256(b"second"),
|
||||
},
|
||||
manifest_entry("Textures/Foo.TEX", 1, sha256(b"first")),
|
||||
manifest_entry("textures/foo.tex", 1, sha256(b"second")),
|
||||
],
|
||||
casefold_collisions: Vec::new(),
|
||||
};
|
||||
@@ -1081,11 +1065,7 @@ mod tests {
|
||||
fn fingerprint_changes() {
|
||||
let mut manifest = CorpusManifest {
|
||||
kind: CorpusKind::Unknown,
|
||||
files: vec![ManifestEntry {
|
||||
path: "a".to_string(),
|
||||
size: 1,
|
||||
hash: sha256(b"before"),
|
||||
}],
|
||||
files: vec![manifest_entry("a", 1, sha256(b"before"))],
|
||||
casefold_collisions: Vec::new(),
|
||||
};
|
||||
let a = fingerprint(&manifest);
|
||||
@@ -1118,6 +1098,29 @@ mod tests {
|
||||
let _ = fs::remove_file(tmp);
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn discover_supports_non_utf8_host_paths() {
|
||||
use std::ffi::OsString;
|
||||
use std::os::unix::ffi::OsStringExt;
|
||||
|
||||
let root = temp_dir("non-utf8");
|
||||
let file_name = OsString::from_vec(vec![0xFF, b'.', b'b', b'i', b'n']);
|
||||
let file_path = root.join(&file_name);
|
||||
if let Err(err) = fs::write(&file_path, b"raw") {
|
||||
assert_eq!(err.kind(), std::io::ErrorKind::PermissionDenied);
|
||||
let _ = fs::remove_dir_all(root);
|
||||
return;
|
||||
}
|
||||
|
||||
let manifest = discover(&root, DiscoverOptions::default()).expect("manifest");
|
||||
|
||||
assert_eq!(manifest.files.len(), 1);
|
||||
assert_eq!(manifest.files[0].path, "\u{FFFD}.bin");
|
||||
assert_eq!(manifest.files[0].host_rel_path, PathBuf::from(&file_name));
|
||||
let _ = fs::remove_dir_all(root);
|
||||
}
|
||||
|
||||
struct TestNresEntry<'a> {
|
||||
name: &'a str,
|
||||
type_id: u32,
|
||||
@@ -1164,6 +1167,15 @@ mod tests {
|
||||
out
|
||||
}
|
||||
|
||||
fn manifest_entry(path: &str, size: u64, hash: Sha256Digest) -> ManifestEntry {
|
||||
ManifestEntry {
|
||||
path: path.to_string(),
|
||||
host_rel_path: PathBuf::from(path),
|
||||
size,
|
||||
hash,
|
||||
}
|
||||
}
|
||||
|
||||
fn push_u32(out: &mut Vec<u8>, value: u32) {
|
||||
out.extend_from_slice(&value.to_le_bytes());
|
||||
}
|
||||
|
||||
@@ -6,8 +6,10 @@ license.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[dependencies]
|
||||
fparkan-diagnostics = { path = "../fparkan-diagnostics" }
|
||||
fparkan-msh = { path = "../fparkan-msh" }
|
||||
fparkan-nres = { path = "../fparkan-nres" }
|
||||
fparkan-path = { path = "../fparkan-path" }
|
||||
fparkan-rsli = { path = "../fparkan-rsli" }
|
||||
fparkan-resource = { path = "../fparkan-resource" }
|
||||
fparkan-terrain-format = { path = "../fparkan-terrain-format" }
|
||||
|
||||
@@ -20,14 +20,20 @@
|
||||
)]
|
||||
//! Shared inspection helpers for format-backed tooling.
|
||||
|
||||
use fparkan_msh::{decode_msh, validate_msh};
|
||||
use fparkan_diagnostics::{
|
||||
diagnostic, render_human, Diagnostic, DiagnosticCode, DiagnosticContext, Phase,
|
||||
};
|
||||
use fparkan_msh::{decode_msh, validate_msh, ModelAsset};
|
||||
use fparkan_nres::{decode as decode_nres, NresDocument, ReadProfile};
|
||||
use fparkan_path::{normalize_relative, PathPolicy};
|
||||
use fparkan_resource::{archive_path, resource_name, CachedResourceRepository, ResourceRepository};
|
||||
use fparkan_rsli::decode as decode_rsli;
|
||||
use fparkan_terrain_format::{decode_land_map, decode_land_msh};
|
||||
use fparkan_texm::decode_texm;
|
||||
use fparkan_vfs::DirectoryVfs;
|
||||
use fparkan_vfs::{DirectoryVfs, Vfs};
|
||||
use std::fs;
|
||||
#[cfg(unix)]
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -131,7 +137,70 @@ pub enum LandFileKind {
|
||||
///
|
||||
/// Returns a string error when the archive cannot be read or decoded.
|
||||
pub fn inspect_archive_file(path: &Path, sample_limit: usize) -> Result<ArchiveInspection, String> {
|
||||
let bytes = fs::read(path).map_err(|err| format!("{}: {err}", path.display()))?;
|
||||
inspect_archive_file_diagnostic(path, sample_limit).map_err(|diagnostic| render_human(&diagnostic))
|
||||
}
|
||||
|
||||
/// Inspects a format archive and returns a structured diagnostic on failure.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns a [`Diagnostic`] when the archive cannot be read or decoded.
|
||||
pub fn inspect_archive_file_diagnostic(
|
||||
path: &Path,
|
||||
sample_limit: usize,
|
||||
) -> Result<ArchiveInspection, Diagnostic> {
|
||||
let parent = path.parent().unwrap_or_else(|| Path::new("."));
|
||||
let file_name = path.file_name().ok_or_else(|| {
|
||||
diagnostic(
|
||||
DiagnosticCode("S1.VFS.PATH"),
|
||||
format!("{}: archive path has no file name", path.display()),
|
||||
)
|
||||
.with_context(DiagnosticContext {
|
||||
phase: Some(Phase::Read),
|
||||
path: Some(path.display().to_string()),
|
||||
..DiagnosticContext::default()
|
||||
})
|
||||
})?;
|
||||
#[cfg(unix)]
|
||||
let raw_name = file_name.as_bytes();
|
||||
#[cfg(not(unix))]
|
||||
let raw_name = file_name
|
||||
.to_str()
|
||||
.ok_or_else(|| {
|
||||
diagnostic(
|
||||
DiagnosticCode("S1.VFS.PATH"),
|
||||
format!("{}: archive file name is not valid text", path.display()),
|
||||
)
|
||||
.with_context(DiagnosticContext {
|
||||
phase: Some(Phase::Read),
|
||||
path: Some(path.display().to_string()),
|
||||
..DiagnosticContext::default()
|
||||
})
|
||||
})?
|
||||
.as_bytes();
|
||||
let normalized = normalize_relative(raw_name, PathPolicy::HostCompatible).map_err(|err| {
|
||||
diagnostic(
|
||||
DiagnosticCode("S1.VFS.PATH"),
|
||||
format!("{}: {err}", path.display()),
|
||||
)
|
||||
.with_context(DiagnosticContext {
|
||||
phase: Some(Phase::Read),
|
||||
path: Some(path.display().to_string()),
|
||||
..DiagnosticContext::default()
|
||||
})
|
||||
})?;
|
||||
let vfs = DirectoryVfs::new(parent);
|
||||
let bytes = vfs.read(&normalized).map_err(|err| {
|
||||
diagnostic(
|
||||
DiagnosticCode("S1.VFS.READ"),
|
||||
format!("{}: {err}", path.display()),
|
||||
)
|
||||
.with_context(DiagnosticContext {
|
||||
phase: Some(Phase::Read),
|
||||
path: Some(path.display().to_string()),
|
||||
..DiagnosticContext::default()
|
||||
})
|
||||
})?;
|
||||
inspect_archive_bytes(&bytes, sample_limit, Some(path))
|
||||
}
|
||||
|
||||
@@ -140,13 +209,13 @@ fn inspect_archive_bytes(
|
||||
bytes: &[u8],
|
||||
sample_limit: usize,
|
||||
source: Option<&Path>,
|
||||
) -> Result<ArchiveInspection, String> {
|
||||
) -> Result<ArchiveInspection, Diagnostic> {
|
||||
if bytes.starts_with(b"NRes") {
|
||||
let document = decode_nres(
|
||||
Arc::from(bytes.to_vec().into_boxed_slice()),
|
||||
ReadProfile::Compatible,
|
||||
)
|
||||
.map_err(|err| err.to_string())?;
|
||||
.map_err(|err| archive_parse_diagnostic("S1.NRES.DECODE", source, err.to_string()))?;
|
||||
let mut sample = Vec::new();
|
||||
for entry in document.entries().iter().take(sample_limit) {
|
||||
sample.push(NresEntrySummary {
|
||||
@@ -165,15 +234,16 @@ fn inspect_archive_bytes(
|
||||
Arc::from(bytes.to_vec().into_boxed_slice()),
|
||||
fparkan_rsli::ReadProfile::Compatible,
|
||||
)
|
||||
.map_err(|err| err.to_string())?;
|
||||
.map_err(|err| archive_parse_diagnostic("S1.RSLI.DECODE", source, err.to_string()))?;
|
||||
Ok(ArchiveInspection::Rsli {
|
||||
entries: document.entries().len(),
|
||||
})
|
||||
} else {
|
||||
match source {
|
||||
Some(path) => Err(format!("{}: unsupported archive magic", path.display())),
|
||||
None => Err("unsupported archive magic".to_string()),
|
||||
}
|
||||
Err(archive_parse_diagnostic(
|
||||
"S1.RESOURCE.UNSUPPORTED_ARCHIVE",
|
||||
source,
|
||||
"unsupported archive magic".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -202,6 +272,22 @@ pub fn inspect_model_from_root(
|
||||
})
|
||||
}
|
||||
|
||||
/// Loads and validates a model resource through repository-backed lookup.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns a string error when the resource cannot be resolved or parsed as a
|
||||
/// valid model payload.
|
||||
pub fn load_model_from_root(
|
||||
root: &Path,
|
||||
archive: &str,
|
||||
resource: &str,
|
||||
) -> Result<ModelAsset, String> {
|
||||
let document = load_model_document_from_root(root, archive, resource)?;
|
||||
let msh = decode_msh(&document).map_err(|err| err.to_string())?;
|
||||
validate_msh(&msh).map_err(|err| err.to_string())
|
||||
}
|
||||
|
||||
/// Inspects a texture through repository-backed resource lookup.
|
||||
///
|
||||
/// # Errors
|
||||
@@ -288,6 +374,27 @@ fn read_resource_bytes(root: &Path, archive: &str, name: &str) -> Result<Arc<[u8
|
||||
Ok(Arc::from(bytes.into_owned()))
|
||||
}
|
||||
|
||||
fn load_model_document_from_root(
|
||||
root: &Path,
|
||||
archive: &str,
|
||||
resource: &str,
|
||||
) -> Result<NresDocument, String> {
|
||||
let bytes = read_resource_bytes(root, archive, resource)?;
|
||||
decode_nres(bytes, ReadProfile::Compatible).map_err(|err| err.to_string())
|
||||
}
|
||||
|
||||
fn archive_parse_diagnostic(
|
||||
code: &'static str,
|
||||
source: Option<&Path>,
|
||||
message: String,
|
||||
) -> Diagnostic {
|
||||
diagnostic(DiagnosticCode(code), message).with_context(DiagnosticContext {
|
||||
phase: Some(Phase::Parse),
|
||||
path: source.map(|path| path.display().to_string()),
|
||||
..DiagnosticContext::default()
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -306,6 +413,23 @@ mod tests {
|
||||
assert!(error.contains("entry table out of bounds"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn archive_diagnostic_preserves_source_path() {
|
||||
let dir = temp_dir("inspect-diagnostic");
|
||||
let path = dir.join("broken.nres");
|
||||
fs::write(&path, b"NRes").expect("broken nres");
|
||||
|
||||
let diagnostic =
|
||||
inspect_archive_file_diagnostic(&path, 0).expect_err("diagnostic failure");
|
||||
|
||||
assert_eq!(diagnostic.code.0, "S1.NRES.DECODE");
|
||||
let expected_path = path.display().to_string();
|
||||
assert_eq!(
|
||||
diagnostic.context.path.as_deref(),
|
||||
Some(expected_path.as_str())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nres_entry_summary_fields_are_readable() {
|
||||
let dir = temp_dir("inspect-nres");
|
||||
|
||||
Reference in New Issue
Block a user