fix: strengthen resource fingerprints

This commit is contained in:
2026-06-22 16:31:57 +04:00
parent f69c893a40
commit aa1b809bd8
8 changed files with 313 additions and 75 deletions
Generated
+3
View File
@@ -81,6 +81,7 @@ dependencies = [
name = "fparkan-corpus" name = "fparkan-corpus"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"fparkan-binary",
"fparkan-path", "fparkan-path",
] ]
@@ -199,6 +200,7 @@ dependencies = [
name = "fparkan-resource" name = "fparkan-resource"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"fparkan-binary",
"fparkan-nres", "fparkan-nres",
"fparkan-path", "fparkan-path",
"fparkan-rsli", "fparkan-rsli",
@@ -263,6 +265,7 @@ dependencies = [
name = "fparkan-vfs" name = "fparkan-vfs"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"fparkan-binary",
"fparkan-path", "fparkan-path",
] ]
+192
View File
@@ -3,6 +3,9 @@
use std::fmt; use std::fmt;
/// SHA-256 digest bytes.
pub type Sha256Digest = [u8; 32];
/// Parser limits shared by binary formats. /// Parser limits shared by binary formats.
#[derive(Clone, Copy, Debug)] #[derive(Clone, Copy, Debug)]
pub struct Limits { pub struct Limits {
@@ -262,6 +265,183 @@ pub fn read_lp_bytes(cursor: &mut Cursor<'_>, max: u32) -> Result<Vec<u8>, Decod
Ok(cursor.read_exact(len)?.to_vec()) Ok(cursor.read_exact(len)?.to_vec())
} }
/// Computes a SHA-256 content digest without external dependencies.
#[must_use]
pub fn sha256(bytes: &[u8]) -> Sha256Digest {
const K: [u32; 64] = [
0x428a_2f98,
0x7137_4491,
0xb5c0_fbcf,
0xe9b5_dba5,
0x3956_c25b,
0x59f1_11f1,
0x923f_82a4,
0xab1c_5ed5,
0xd807_aa98,
0x1283_5b01,
0x2431_85be,
0x550c_7dc3,
0x72be_5d74,
0x80de_b1fe,
0x9bdc_06a7,
0xc19b_f174,
0xe49b_69c1,
0xefbe_4786,
0x0fc1_9dc6,
0x240c_a1cc,
0x2de9_2c6f,
0x4a74_84aa,
0x5cb0_a9dc,
0x76f9_88da,
0x983e_5152,
0xa831_c66d,
0xb003_27c8,
0xbf59_7fc7,
0xc6e0_0bf3,
0xd5a7_9147,
0x06ca_6351,
0x1429_2967,
0x27b7_0a85,
0x2e1b_2138,
0x4d2c_6dfc,
0x5338_0d13,
0x650a_7354,
0x766a_0abb,
0x81c2_c92e,
0x9272_2c85,
0xa2bf_e8a1,
0xa81a_664b,
0xc24b_8b70,
0xc76c_51a3,
0xd192_e819,
0xd699_0624,
0xf40e_3585,
0x106a_a070,
0x19a4_c116,
0x1e37_6c08,
0x2748_774c,
0x34b0_bcb5,
0x391c_0cb3,
0x4ed8_aa4a,
0x5b9c_ca4f,
0x682e_6ff3,
0x748f_82ee,
0x78a5_636f,
0x84c8_7814,
0x8cc7_0208,
0x90be_fffa,
0xa450_6ceb,
0xbef9_a3f7,
0xc671_78f2,
];
let mut h = [
0x6a09_e667,
0xbb67_ae85,
0x3c6e_f372,
0xa54f_f53a,
0x510e_527f,
0x9b05_688c,
0x1f83_d9ab,
0x5be0_cd19,
];
let bit_len = (bytes.len() as u64).wrapping_mul(8);
let mut chunks = bytes.chunks_exact(64);
for chunk in &mut chunks {
compress_sha256_chunk(&mut h, chunk, &K);
}
let tail = chunks.remainder();
let mut block = [0u8; 128];
block[..tail.len()].copy_from_slice(tail);
block[tail.len()] = 0x80;
let padded_len = if tail.len() < 56 { 64 } else { 128 };
block[padded_len - 8..padded_len].copy_from_slice(&bit_len.to_be_bytes());
for chunk in block[..padded_len].chunks_exact(64) {
compress_sha256_chunk(&mut h, chunk, &K);
}
let mut out = [0u8; 32];
for (idx, word) in h.iter().enumerate() {
out[idx * 4..idx * 4 + 4].copy_from_slice(&word.to_be_bytes());
}
out
}
/// Renders a SHA-256 digest as lowercase hexadecimal.
#[must_use]
pub fn sha256_hex(digest: &Sha256Digest) -> String {
const HEX: &[u8; 16] = b"0123456789abcdef";
let mut out = String::with_capacity(64);
for byte in digest {
out.push(char::from(HEX[usize::from(byte >> 4)]));
out.push(char::from(HEX[usize::from(byte & 0x0f)]));
}
out
}
#[allow(clippy::many_single_char_names)]
fn compress_sha256_chunk(h: &mut [u32; 8], chunk: &[u8], k: &[u32; 64]) {
let mut w = [0u32; 64];
for (idx, word) in w.iter_mut().take(16).enumerate() {
let base = idx * 4;
*word = u32::from_be_bytes([
chunk[base],
chunk[base + 1],
chunk[base + 2],
chunk[base + 3],
]);
}
for idx in 16..64 {
let s0 = w[idx - 15].rotate_right(7) ^ w[idx - 15].rotate_right(18) ^ (w[idx - 15] >> 3);
let s1 = w[idx - 2].rotate_right(17) ^ w[idx - 2].rotate_right(19) ^ (w[idx - 2] >> 10);
w[idx] = w[idx - 16]
.wrapping_add(s0)
.wrapping_add(w[idx - 7])
.wrapping_add(s1);
}
let mut a = h[0];
let mut b = h[1];
let mut c = h[2];
let mut d = h[3];
let mut e = h[4];
let mut f = h[5];
let mut g = h[6];
let mut hh = h[7];
for idx in 0..64 {
let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25);
let ch = (e & f) ^ ((!e) & g);
let temp1 = hh
.wrapping_add(s1)
.wrapping_add(ch)
.wrapping_add(k[idx])
.wrapping_add(w[idx]);
let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22);
let maj = (a & b) ^ (a & c) ^ (b & c);
let temp2 = s0.wrapping_add(maj);
hh = g;
g = f;
f = e;
e = d.wrapping_add(temp1);
d = c;
c = b;
b = a;
a = temp1.wrapping_add(temp2);
}
h[0] = h[0].wrapping_add(a);
h[1] = h[1].wrapping_add(b);
h[2] = h[2].wrapping_add(c);
h[3] = h[3].wrapping_add(d);
h[4] = h[4].wrapping_add(e);
h[5] = h[5].wrapping_add(f);
h[6] = h[6].wrapping_add(g);
h[7] = h[7].wrapping_add(hh);
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@@ -305,4 +485,16 @@ mod tests {
); );
assert_eq!(cursor.offset(), 4); assert_eq!(cursor.offset(), 4);
} }
#[test]
fn sha256_matches_known_vectors() {
assert_eq!(
sha256_hex(&sha256(b"")),
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
);
assert_eq!(
sha256_hex(&sha256(b"abc")),
"ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
);
}
} }
+1
View File
@@ -6,6 +6,7 @@ license.workspace = true
repository.workspace = true repository.workspace = true
[dependencies] [dependencies]
fparkan-binary = { path = "../fparkan-binary" }
fparkan-path = { path = "../fparkan-path" } fparkan-path = { path = "../fparkan-path" }
[lints] [lints]
+20 -31
View File
@@ -1,6 +1,7 @@
#![forbid(unsafe_code)] #![forbid(unsafe_code)]
//! Licensed corpus discovery and aggregate reports. //! Licensed corpus discovery and aggregate reports.
use fparkan_binary::{sha256, sha256_hex, Sha256Digest};
use fparkan_path::{ascii_lookup_key, normalize_relative, PathPolicy}; use fparkan_path::{ascii_lookup_key, normalize_relative, PathPolicy};
use std::collections::{BTreeMap, BTreeSet}; use std::collections::{BTreeMap, BTreeSet};
use std::fmt; use std::fmt;
@@ -39,8 +40,8 @@ pub struct ManifestEntry {
pub path: String, pub path: String,
/// File size in bytes. /// File size in bytes.
pub size: u64, pub size: u64,
/// Stable content fingerprint. /// SHA-256 content fingerprint.
pub hash: u64, pub hash: Sha256Digest,
} }
/// Corpus manifest. /// Corpus manifest.
@@ -70,7 +71,7 @@ pub struct CorpusReport {
/// Casefold collision count. /// Casefold collision count.
pub casefold_collisions: usize, pub casefold_collisions: usize,
/// Manifest fingerprint. /// Manifest fingerprint.
pub fingerprint: u64, pub fingerprint: Sha256Digest,
} }
/// Corpus error. /// Corpus error.
@@ -187,7 +188,7 @@ fn walk(
out.push(ManifestEntry { out.push(ManifestEntry {
path: normalized.as_str().to_string(), path: normalized.as_str().to_string(),
size: metadata.len(), size: metadata.len(),
hash: stable_hash(&bytes), hash: sha256(&bytes),
}); });
} }
Ok(()) Ok(())
@@ -352,27 +353,15 @@ fn inspect_nres_entries(bytes: &[u8]) -> Option<Vec<NresEntryBrief>> {
/// Computes stable manifest fingerprint. /// Computes stable manifest fingerprint.
#[must_use] #[must_use]
pub fn fingerprint(manifest: &CorpusManifest) -> u64 { pub fn fingerprint(manifest: &CorpusManifest) -> Sha256Digest {
let mut state = 0xcbf2_9ce4_8422_2325; let mut bytes = Vec::new();
for file in &manifest.files { for file in &manifest.files {
hash_into(&mut state, file.path.as_bytes()); bytes.extend_from_slice(file.path.as_bytes());
hash_into(&mut state, &file.size.to_le_bytes()); bytes.push(0);
hash_into(&mut state, &file.hash.to_le_bytes()); bytes.extend_from_slice(&file.size.to_le_bytes());
} bytes.extend_from_slice(&file.hash);
state
}
fn stable_hash(bytes: &[u8]) -> u64 {
let mut state = 0xcbf2_9ce4_8422_2325;
hash_into(&mut state, bytes);
state
}
fn hash_into(state: &mut u64, bytes: &[u8]) {
for byte in bytes {
*state ^= u64::from(*byte);
*state = state.wrapping_mul(0x0000_0100_0000_01b3);
} }
sha256(&bytes)
} }
/// Writes report atomically. /// Writes report atomically.
@@ -413,13 +402,13 @@ pub fn write_report_atomic(path: &Path, report: &CorpusReport) -> Result<(), Cor
#[must_use] #[must_use]
pub fn render_report_json(report: &CorpusReport) -> String { pub fn render_report_json(report: &CorpusReport) -> String {
let mut out = format!( let mut out = format!(
"{{\"schema_version\":\"fparkan-corpus-report-v1\",\"schema\":{},\"kind\":\"{:?}\",\"files\":{},\"bytes\":{},\"casefold_collisions\":{},\"fingerprint\":\"{:016x}\",\"metrics\":{{", "{{\"schema_version\":\"fparkan-corpus-report-v1\",\"schema\":{},\"kind\":\"{:?}\",\"files\":{},\"bytes\":{},\"casefold_collisions\":{},\"fingerprint\":\"{}\",\"metrics\":{{",
report.schema, report.schema,
report.kind, report.kind,
report.files, report.files,
report.bytes, report.bytes,
report.casefold_collisions, report.casefold_collisions,
report.fingerprint sha256_hex(&report.fingerprint)
); );
for (idx, (key, value)) in report.metrics.iter().enumerate() { for (idx, (key, value)) in report.metrics.iter().enumerate() {
if idx > 0 { if idx > 0 {
@@ -528,7 +517,7 @@ mod tests {
files: vec![ManifestEntry { files: vec![ManifestEntry {
path: "secret/payload.bin".to_string(), path: "secret/payload.bin".to_string(),
size: 4, size: 4,
hash: stable_hash(b"DATA"), hash: sha256(b"DATA"),
}], }],
casefold_collisions: Vec::new(), casefold_collisions: Vec::new(),
}; };
@@ -604,12 +593,12 @@ mod tests {
ManifestEntry { ManifestEntry {
path: "Textures/Foo.TEX".to_string(), path: "Textures/Foo.TEX".to_string(),
size: 1, size: 1,
hash: 1, hash: sha256(b"first"),
}, },
ManifestEntry { ManifestEntry {
path: "textures/foo.tex".to_string(), path: "textures/foo.tex".to_string(),
size: 1, size: 1,
hash: 2, hash: sha256(b"second"),
}, },
], ],
casefold_collisions: Vec::new(), casefold_collisions: Vec::new(),
@@ -633,12 +622,12 @@ mod tests {
files: vec![ManifestEntry { files: vec![ManifestEntry {
path: "a".to_string(), path: "a".to_string(),
size: 1, size: 1,
hash: 1, hash: sha256(b"before"),
}], }],
casefold_collisions: Vec::new(), casefold_collisions: Vec::new(),
}; };
let a = fingerprint(&manifest); let a = fingerprint(&manifest);
manifest.files[0].hash = 2; manifest.files[0].hash = sha256(b"after");
assert_ne!(a, fingerprint(&manifest)); assert_ne!(a, fingerprint(&manifest));
} }
@@ -658,7 +647,7 @@ mod tests {
bytes: 0, bytes: 0,
metrics: BTreeMap::new(), metrics: BTreeMap::new(),
casefold_collisions: 0, casefold_collisions: 0,
fingerprint: 0, fingerprint: sha256(b"empty-report"),
}; };
write_report_atomic(&tmp, &report).expect("write"); write_report_atomic(&tmp, &report).expect("write");
assert!(tmp.is_file()); assert!(tmp.is_file());
+1
View File
@@ -6,6 +6,7 @@ license.workspace = true
repository.workspace = true repository.workspace = true
[dependencies] [dependencies]
fparkan-binary = { path = "../fparkan-binary" }
fparkan-nres = { path = "../fparkan-nres" } fparkan-nres = { path = "../fparkan-nres" }
fparkan-path = { path = "../fparkan-path" } fparkan-path = { path = "../fparkan-path" }
fparkan-rsli = { path = "../fparkan-rsli" } fparkan-rsli = { path = "../fparkan-rsli" }
+6 -5
View File
@@ -1,6 +1,7 @@
#![forbid(unsafe_code)] #![forbid(unsafe_code)]
//! Resource identity and repository ports. //! Resource identity and repository ports.
use fparkan_binary::Sha256Digest;
use fparkan_path::{normalize_relative, NormalizedPath, PathPolicy, ResourceName}; use fparkan_path::{normalize_relative, NormalizedPath, PathPolicy, ResourceName};
use fparkan_vfs::{Vfs, VfsError}; use fparkan_vfs::{Vfs, VfsError};
use std::collections::BTreeMap; use std::collections::BTreeMap;
@@ -188,7 +189,7 @@ struct RepositoryState {
struct ArchiveSlot { struct ArchiveSlot {
path: NormalizedPath, path: NormalizedPath,
fingerprint: u64, fingerprint: Sha256Digest,
generation: u64, generation: u64,
kind: ArchiveKind, kind: ArchiveKind,
document: ArchiveDocument, document: ArchiveDocument,
@@ -378,7 +379,7 @@ impl CachedResourceRepository {
fn cached_id( fn cached_id(
&self, &self,
path: &NormalizedPath, path: &NormalizedPath,
fingerprint: u64, fingerprint: Sha256Digest,
) -> Result<Option<ArchiveId>, ResourceError> { ) -> Result<Option<ArchiveId>, ResourceError> {
let state = self.state.lock().map_err(|_| ResourceError::Poisoned)?; let state = self.state.lock().map_err(|_| ResourceError::Poisoned)?;
let Some(id) = state.paths.get(path.as_str()).copied() else { let Some(id) = state.paths.get(path.as_str()).copied() else {
@@ -504,7 +505,7 @@ impl ArchiveSlot {
fn decode_archive( fn decode_archive(
path: NormalizedPath, path: NormalizedPath,
bytes: Arc<[u8]>, bytes: Arc<[u8]>,
fingerprint: u64, fingerprint: Sha256Digest,
) -> Result<ArchiveSlot, ResourceError> { ) -> Result<ArchiveSlot, ResourceError> {
if bytes.starts_with(b"NRes") { if bytes.starts_with(b"NRes") {
let document = fparkan_nres::decode(bytes, fparkan_nres::ReadProfile::Compatible) let document = fparkan_nres::decode(bytes, fparkan_nres::ReadProfile::Compatible)
@@ -693,7 +694,7 @@ mod tests {
b"before" b"before"
); );
std::fs::write(&host_path, build_nres(&[("a.bin", b"after".as_slice())])) std::fs::write(&host_path, build_nres(&[("a.bin", b"after!".as_slice())]))
.expect("updated archive"); .expect("updated archive");
let reopened = repo.open_archive(&path).expect("open updated archive"); let reopened = repo.open_archive(&path).expect("open updated archive");
let second = repo let second = repo
@@ -706,7 +707,7 @@ mod tests {
assert!(matches!(repo.read(first), Err(ResourceError::StaleHandle))); assert!(matches!(repo.read(first), Err(ResourceError::StaleHandle)));
assert_eq!( assert_eq!(
repo.read(second).expect("read updated").as_slice(), repo.read(second).expect("read updated").as_slice(),
b"after" b"after!"
); );
let _ = std::fs::remove_dir_all(root); let _ = std::fs::remove_dir_all(root);
} }
+1
View File
@@ -6,6 +6,7 @@ license.workspace = true
repository.workspace = true repository.workspace = true
[dependencies] [dependencies]
fparkan-binary = { path = "../fparkan-binary" }
fparkan-path = { path = "../fparkan-path" } fparkan-path = { path = "../fparkan-path" }
[lints] [lints]
+88 -38
View File
@@ -1,19 +1,21 @@
#![forbid(unsafe_code)] #![forbid(unsafe_code)]
//! Virtual filesystem ports for resource loading. //! Virtual filesystem ports for resource loading.
use fparkan_binary::{sha256, Sha256Digest};
use fparkan_path::{ascii_lookup_key, join_under, NormalizedPath}; use fparkan_path::{ascii_lookup_key, join_under, NormalizedPath};
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::fs; use std::fs;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::Arc; use std::sync::{Arc, Mutex};
use std::time::SystemTime;
/// VFS metadata. /// VFS metadata.
#[derive(Clone, Debug, Eq, PartialEq)] #[derive(Clone, Debug, Eq, PartialEq)]
pub struct VfsMetadata { pub struct VfsMetadata {
/// Byte length. /// Byte length.
pub len: u64, pub len: u64,
/// Stable-enough source fingerprint for cache invalidation. /// SHA-256 content fingerprint for cache invalidation.
pub fingerprint: u64, pub fingerprint: Sha256Digest,
} }
/// VFS entry. /// VFS entry.
@@ -80,6 +82,7 @@ pub trait Vfs: Send + Sync {
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct DirectoryVfs { pub struct DirectoryVfs {
root: PathBuf, root: PathBuf,
fingerprint_cache: Arc<Mutex<BTreeMap<PathBuf, CachedHostFingerprint>>>,
} }
impl DirectoryVfs { impl DirectoryVfs {
@@ -88,6 +91,7 @@ impl DirectoryVfs {
pub fn new(root: impl AsRef<Path>) -> Self { pub fn new(root: impl AsRef<Path>) -> Self {
Self { Self {
root: root.as_ref().to_path_buf(), root: root.as_ref().to_path_buf(),
fingerprint_cache: Arc::default(),
} }
} }
@@ -95,12 +99,23 @@ impl DirectoryVfs {
join_under(&self.root, path).map_err(|_| VfsError::Path)?; join_under(&self.root, path).map_err(|_| VfsError::Path)?;
resolve_casefolded(&self.root, path.as_str()) resolve_casefolded(&self.root, path.as_str())
} }
fn metadata_from_host_file(&self, path: &Path) -> Result<VfsMetadata, VfsError> {
let metadata = fs::symlink_metadata(path).map_err(VfsError::Io)?;
metadata_from_host_file_with_cache(path, &metadata, &self.fingerprint_cache)
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
struct CachedHostFingerprint {
len: u64,
modified: Option<SystemTime>,
fingerprint: Sha256Digest,
} }
impl Vfs for DirectoryVfs { impl Vfs for DirectoryVfs {
fn metadata(&self, path: &NormalizedPath) -> Result<VfsMetadata, VfsError> { fn metadata(&self, path: &NormalizedPath) -> Result<VfsMetadata, VfsError> {
let meta = fs::symlink_metadata(self.host_path(path)?).map_err(VfsError::Io)?; self.metadata_from_host_file(&self.host_path(path)?)
Ok(metadata_from_fs(&meta))
} }
fn read(&self, path: &NormalizedPath) -> Result<Arc<[u8]>, VfsError> { fn read(&self, path: &NormalizedPath) -> Result<Arc<[u8]>, VfsError> {
@@ -123,11 +138,15 @@ impl Vfs for DirectoryVfs {
let metadata = fs::symlink_metadata(&base).map_err(VfsError::Io)?; let metadata = fs::symlink_metadata(&base).map_err(VfsError::Io)?;
entries.push(VfsEntry { entries.push(VfsEntry {
path: prefix.clone(), path: prefix.clone(),
metadata: metadata_from_fs(&metadata), metadata: metadata_from_host_file_with_cache(
&base,
&metadata,
&self.fingerprint_cache,
)?,
}); });
return Ok(entries); return Ok(entries);
} }
list_recursive(&self.root, &base, &mut entries)?; list_recursive(&self.root, &base, &self.fingerprint_cache, &mut entries)?;
entries.sort_by(|a, b| a.path.as_str().cmp(b.path.as_str())); entries.sort_by(|a, b| a.path.as_str().cmp(b.path.as_str()));
Ok(entries) Ok(entries)
} }
@@ -174,7 +193,12 @@ fn select_casefolded_match(
} }
} }
fn list_recursive(root: &Path, dir: &Path, out: &mut Vec<VfsEntry>) -> Result<(), VfsError> { fn list_recursive(
root: &Path,
dir: &Path,
fingerprint_cache: &Mutex<BTreeMap<PathBuf, CachedHostFingerprint>>,
out: &mut Vec<VfsEntry>,
) -> Result<(), VfsError> {
let read_dir = fs::read_dir(dir).map_err(VfsError::Io)?; let read_dir = fs::read_dir(dir).map_err(VfsError::Io)?;
let mut children = Vec::new(); let mut children = Vec::new();
for entry in read_dir { for entry in read_dir {
@@ -188,7 +212,7 @@ fn list_recursive(root: &Path, dir: &Path, out: &mut Vec<VfsEntry>) -> Result<()
return Err(VfsError::Path); return Err(VfsError::Path);
} }
if metadata.is_dir() { if metadata.is_dir() {
list_recursive(root, &child, out)?; list_recursive(root, &child, fingerprint_cache, out)?;
continue; continue;
} }
if !metadata.is_file() { if !metadata.is_file() {
@@ -203,25 +227,49 @@ fn list_recursive(root: &Path, dir: &Path, out: &mut Vec<VfsEntry>) -> Result<()
.map_err(|_| VfsError::Path)?; .map_err(|_| VfsError::Path)?;
out.push(VfsEntry { out.push(VfsEntry {
path, path,
metadata: metadata_from_fs(&metadata), metadata: metadata_from_host_file_with_cache(&child, &metadata, fingerprint_cache)?,
}); });
} }
Ok(()) Ok(())
} }
fn metadata_from_fs(metadata: &fs::Metadata) -> VfsMetadata { fn metadata_from_host_file_with_cache(
let mut fingerprint = 0xcbf2_9ce4_8422_2325; path: &Path,
hash_u64(&mut fingerprint, metadata.len()); metadata: &fs::Metadata,
if let Ok(modified) = metadata.modified() { fingerprint_cache: &Mutex<BTreeMap<PathBuf, CachedHostFingerprint>>,
if let Ok(duration) = modified.duration_since(std::time::UNIX_EPOCH) { ) -> Result<VfsMetadata, VfsError> {
hash_u64(&mut fingerprint, duration.as_secs()); if !metadata.is_file() {
hash_u64(&mut fingerprint, u64::from(duration.subsec_nanos())); return Err(VfsError::Path);
} }
let len = metadata.len();
let modified = metadata.modified().ok();
if let Some(cached) = fingerprint_cache
.lock()
.map_err(|_| VfsError::Path)?
.get(path)
.cloned()
.filter(|cached| cached.len == len && cached.modified == modified)
{
return Ok(VfsMetadata {
len,
fingerprint: cached.fingerprint,
});
} }
VfsMetadata {
len: metadata.len(), let bytes = fs::read(path).map_err(VfsError::Io)?;
let fingerprint = sha256(&bytes);
fingerprint_cache
.lock()
.map_err(|_| VfsError::Path)?
.insert(
path.to_path_buf(),
CachedHostFingerprint {
len,
modified,
fingerprint, fingerprint,
} },
);
Ok(VfsMetadata { len, fingerprint })
} }
/// In-memory VFS. /// In-memory VFS.
@@ -276,7 +324,7 @@ impl Vfs for MemoryVfs {
.ok_or_else(|| VfsError::NotFound(path.as_str().to_string()))?; .ok_or_else(|| VfsError::NotFound(path.as_str().to_string()))?;
Ok(VfsMetadata { Ok(VfsMetadata {
len: bytes.len() as u64, len: bytes.len() as u64,
fingerprint: stable_hash(bytes), fingerprint: sha256(bytes),
}) })
} }
@@ -305,7 +353,7 @@ impl Vfs for MemoryVfs {
path: normalized, path: normalized,
metadata: VfsMetadata { metadata: VfsMetadata {
len: bytes.len() as u64, len: bytes.len() as u64,
fingerprint: stable_hash(bytes), fingerprint: sha256(bytes),
}, },
}); });
} }
@@ -314,22 +362,6 @@ impl Vfs for MemoryVfs {
} }
} }
fn stable_hash(bytes: &[u8]) -> u64 {
let mut state = 0xcbf2_9ce4_8422_2325;
for byte in bytes {
state ^= u64::from(*byte);
state = state.wrapping_mul(0x0000_0100_0000_01b3);
}
state
}
fn hash_u64(state: &mut u64, value: u64) {
for byte in value.to_le_bytes() {
*state ^= u64::from(byte);
*state = state.wrapping_mul(0x0000_0100_0000_01b3);
}
}
/// Layered VFS with deterministic first-layer precedence. /// Layered VFS with deterministic first-layer precedence.
#[derive(Clone, Default)] #[derive(Clone, Default)]
pub struct OverlayVfs { pub struct OverlayVfs {
@@ -475,6 +507,24 @@ mod tests {
std::fs::remove_dir_all(root).expect("cleanup"); std::fs::remove_dir_all(root).expect("cleanup");
} }
#[test]
fn directory_vfs_fingerprint_changes_for_same_length_content() {
let root = unique_test_dir("content-fingerprint");
std::fs::create_dir_all(root.join("DATA")).expect("mkdir");
std::fs::write(root.join("DATA").join("File.bin"), b"before").expect("write before");
let vfs = DirectoryVfs::new(&root);
let path = normalize_relative(b"DATA/File.bin", PathPolicy::StrictLegacy).expect("path");
let before = vfs.metadata(&path).expect("before metadata");
std::fs::write(root.join("DATA").join("File.bin"), b"after!").expect("write after");
let after = vfs.metadata(&path).expect("after metadata");
assert_eq!(before.len, after.len);
assert_ne!(before.fingerprint, after.fingerprint);
std::fs::remove_dir_all(root).expect("cleanup");
}
#[cfg(unix)] #[cfg(unix)]
#[test] #[test]
fn directory_vfs_rejects_symlink_escape() { fn directory_vfs_rejects_symlink_escape() {