fix: strengthen resource fingerprints
This commit is contained in:
Generated
+3
@@ -81,6 +81,7 @@ dependencies = [
|
||||
name = "fparkan-corpus"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"fparkan-binary",
|
||||
"fparkan-path",
|
||||
]
|
||||
|
||||
@@ -199,6 +200,7 @@ dependencies = [
|
||||
name = "fparkan-resource"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"fparkan-binary",
|
||||
"fparkan-nres",
|
||||
"fparkan-path",
|
||||
"fparkan-rsli",
|
||||
@@ -263,6 +265,7 @@ dependencies = [
|
||||
name = "fparkan-vfs"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"fparkan-binary",
|
||||
"fparkan-path",
|
||||
]
|
||||
|
||||
|
||||
@@ -3,6 +3,9 @@
|
||||
|
||||
use std::fmt;
|
||||
|
||||
/// SHA-256 digest bytes.
|
||||
pub type Sha256Digest = [u8; 32];
|
||||
|
||||
/// Parser limits shared by binary formats.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct Limits {
|
||||
@@ -262,6 +265,183 @@ pub fn read_lp_bytes(cursor: &mut Cursor<'_>, max: u32) -> Result<Vec<u8>, Decod
|
||||
Ok(cursor.read_exact(len)?.to_vec())
|
||||
}
|
||||
|
||||
/// Computes a SHA-256 content digest without external dependencies.
|
||||
#[must_use]
|
||||
pub fn sha256(bytes: &[u8]) -> Sha256Digest {
|
||||
const K: [u32; 64] = [
|
||||
0x428a_2f98,
|
||||
0x7137_4491,
|
||||
0xb5c0_fbcf,
|
||||
0xe9b5_dba5,
|
||||
0x3956_c25b,
|
||||
0x59f1_11f1,
|
||||
0x923f_82a4,
|
||||
0xab1c_5ed5,
|
||||
0xd807_aa98,
|
||||
0x1283_5b01,
|
||||
0x2431_85be,
|
||||
0x550c_7dc3,
|
||||
0x72be_5d74,
|
||||
0x80de_b1fe,
|
||||
0x9bdc_06a7,
|
||||
0xc19b_f174,
|
||||
0xe49b_69c1,
|
||||
0xefbe_4786,
|
||||
0x0fc1_9dc6,
|
||||
0x240c_a1cc,
|
||||
0x2de9_2c6f,
|
||||
0x4a74_84aa,
|
||||
0x5cb0_a9dc,
|
||||
0x76f9_88da,
|
||||
0x983e_5152,
|
||||
0xa831_c66d,
|
||||
0xb003_27c8,
|
||||
0xbf59_7fc7,
|
||||
0xc6e0_0bf3,
|
||||
0xd5a7_9147,
|
||||
0x06ca_6351,
|
||||
0x1429_2967,
|
||||
0x27b7_0a85,
|
||||
0x2e1b_2138,
|
||||
0x4d2c_6dfc,
|
||||
0x5338_0d13,
|
||||
0x650a_7354,
|
||||
0x766a_0abb,
|
||||
0x81c2_c92e,
|
||||
0x9272_2c85,
|
||||
0xa2bf_e8a1,
|
||||
0xa81a_664b,
|
||||
0xc24b_8b70,
|
||||
0xc76c_51a3,
|
||||
0xd192_e819,
|
||||
0xd699_0624,
|
||||
0xf40e_3585,
|
||||
0x106a_a070,
|
||||
0x19a4_c116,
|
||||
0x1e37_6c08,
|
||||
0x2748_774c,
|
||||
0x34b0_bcb5,
|
||||
0x391c_0cb3,
|
||||
0x4ed8_aa4a,
|
||||
0x5b9c_ca4f,
|
||||
0x682e_6ff3,
|
||||
0x748f_82ee,
|
||||
0x78a5_636f,
|
||||
0x84c8_7814,
|
||||
0x8cc7_0208,
|
||||
0x90be_fffa,
|
||||
0xa450_6ceb,
|
||||
0xbef9_a3f7,
|
||||
0xc671_78f2,
|
||||
];
|
||||
let mut h = [
|
||||
0x6a09_e667,
|
||||
0xbb67_ae85,
|
||||
0x3c6e_f372,
|
||||
0xa54f_f53a,
|
||||
0x510e_527f,
|
||||
0x9b05_688c,
|
||||
0x1f83_d9ab,
|
||||
0x5be0_cd19,
|
||||
];
|
||||
|
||||
let bit_len = (bytes.len() as u64).wrapping_mul(8);
|
||||
let mut chunks = bytes.chunks_exact(64);
|
||||
for chunk in &mut chunks {
|
||||
compress_sha256_chunk(&mut h, chunk, &K);
|
||||
}
|
||||
|
||||
let tail = chunks.remainder();
|
||||
let mut block = [0u8; 128];
|
||||
block[..tail.len()].copy_from_slice(tail);
|
||||
block[tail.len()] = 0x80;
|
||||
let padded_len = if tail.len() < 56 { 64 } else { 128 };
|
||||
block[padded_len - 8..padded_len].copy_from_slice(&bit_len.to_be_bytes());
|
||||
for chunk in block[..padded_len].chunks_exact(64) {
|
||||
compress_sha256_chunk(&mut h, chunk, &K);
|
||||
}
|
||||
|
||||
let mut out = [0u8; 32];
|
||||
for (idx, word) in h.iter().enumerate() {
|
||||
out[idx * 4..idx * 4 + 4].copy_from_slice(&word.to_be_bytes());
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Renders a SHA-256 digest as lowercase hexadecimal.
|
||||
#[must_use]
|
||||
pub fn sha256_hex(digest: &Sha256Digest) -> String {
|
||||
const HEX: &[u8; 16] = b"0123456789abcdef";
|
||||
let mut out = String::with_capacity(64);
|
||||
for byte in digest {
|
||||
out.push(char::from(HEX[usize::from(byte >> 4)]));
|
||||
out.push(char::from(HEX[usize::from(byte & 0x0f)]));
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
#[allow(clippy::many_single_char_names)]
|
||||
fn compress_sha256_chunk(h: &mut [u32; 8], chunk: &[u8], k: &[u32; 64]) {
|
||||
let mut w = [0u32; 64];
|
||||
for (idx, word) in w.iter_mut().take(16).enumerate() {
|
||||
let base = idx * 4;
|
||||
*word = u32::from_be_bytes([
|
||||
chunk[base],
|
||||
chunk[base + 1],
|
||||
chunk[base + 2],
|
||||
chunk[base + 3],
|
||||
]);
|
||||
}
|
||||
for idx in 16..64 {
|
||||
let s0 = w[idx - 15].rotate_right(7) ^ w[idx - 15].rotate_right(18) ^ (w[idx - 15] >> 3);
|
||||
let s1 = w[idx - 2].rotate_right(17) ^ w[idx - 2].rotate_right(19) ^ (w[idx - 2] >> 10);
|
||||
w[idx] = w[idx - 16]
|
||||
.wrapping_add(s0)
|
||||
.wrapping_add(w[idx - 7])
|
||||
.wrapping_add(s1);
|
||||
}
|
||||
|
||||
let mut a = h[0];
|
||||
let mut b = h[1];
|
||||
let mut c = h[2];
|
||||
let mut d = h[3];
|
||||
let mut e = h[4];
|
||||
let mut f = h[5];
|
||||
let mut g = h[6];
|
||||
let mut hh = h[7];
|
||||
|
||||
for idx in 0..64 {
|
||||
let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25);
|
||||
let ch = (e & f) ^ ((!e) & g);
|
||||
let temp1 = hh
|
||||
.wrapping_add(s1)
|
||||
.wrapping_add(ch)
|
||||
.wrapping_add(k[idx])
|
||||
.wrapping_add(w[idx]);
|
||||
let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22);
|
||||
let maj = (a & b) ^ (a & c) ^ (b & c);
|
||||
let temp2 = s0.wrapping_add(maj);
|
||||
|
||||
hh = g;
|
||||
g = f;
|
||||
f = e;
|
||||
e = d.wrapping_add(temp1);
|
||||
d = c;
|
||||
c = b;
|
||||
b = a;
|
||||
a = temp1.wrapping_add(temp2);
|
||||
}
|
||||
|
||||
h[0] = h[0].wrapping_add(a);
|
||||
h[1] = h[1].wrapping_add(b);
|
||||
h[2] = h[2].wrapping_add(c);
|
||||
h[3] = h[3].wrapping_add(d);
|
||||
h[4] = h[4].wrapping_add(e);
|
||||
h[5] = h[5].wrapping_add(f);
|
||||
h[6] = h[6].wrapping_add(g);
|
||||
h[7] = h[7].wrapping_add(hh);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -305,4 +485,16 @@ mod tests {
|
||||
);
|
||||
assert_eq!(cursor.offset(), 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sha256_matches_known_vectors() {
|
||||
assert_eq!(
|
||||
sha256_hex(&sha256(b"")),
|
||||
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
|
||||
);
|
||||
assert_eq!(
|
||||
sha256_hex(&sha256(b"abc")),
|
||||
"ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ license.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[dependencies]
|
||||
fparkan-binary = { path = "../fparkan-binary" }
|
||||
fparkan-path = { path = "../fparkan-path" }
|
||||
|
||||
[lints]
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#![forbid(unsafe_code)]
|
||||
//! Licensed corpus discovery and aggregate reports.
|
||||
|
||||
use fparkan_binary::{sha256, sha256_hex, Sha256Digest};
|
||||
use fparkan_path::{ascii_lookup_key, normalize_relative, PathPolicy};
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::fmt;
|
||||
@@ -39,8 +40,8 @@ pub struct ManifestEntry {
|
||||
pub path: String,
|
||||
/// File size in bytes.
|
||||
pub size: u64,
|
||||
/// Stable content fingerprint.
|
||||
pub hash: u64,
|
||||
/// SHA-256 content fingerprint.
|
||||
pub hash: Sha256Digest,
|
||||
}
|
||||
|
||||
/// Corpus manifest.
|
||||
@@ -70,7 +71,7 @@ pub struct CorpusReport {
|
||||
/// Casefold collision count.
|
||||
pub casefold_collisions: usize,
|
||||
/// Manifest fingerprint.
|
||||
pub fingerprint: u64,
|
||||
pub fingerprint: Sha256Digest,
|
||||
}
|
||||
|
||||
/// Corpus error.
|
||||
@@ -187,7 +188,7 @@ fn walk(
|
||||
out.push(ManifestEntry {
|
||||
path: normalized.as_str().to_string(),
|
||||
size: metadata.len(),
|
||||
hash: stable_hash(&bytes),
|
||||
hash: sha256(&bytes),
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
@@ -352,27 +353,15 @@ fn inspect_nres_entries(bytes: &[u8]) -> Option<Vec<NresEntryBrief>> {
|
||||
|
||||
/// Computes stable manifest fingerprint.
|
||||
#[must_use]
|
||||
pub fn fingerprint(manifest: &CorpusManifest) -> u64 {
|
||||
let mut state = 0xcbf2_9ce4_8422_2325;
|
||||
pub fn fingerprint(manifest: &CorpusManifest) -> Sha256Digest {
|
||||
let mut bytes = Vec::new();
|
||||
for file in &manifest.files {
|
||||
hash_into(&mut state, file.path.as_bytes());
|
||||
hash_into(&mut state, &file.size.to_le_bytes());
|
||||
hash_into(&mut state, &file.hash.to_le_bytes());
|
||||
}
|
||||
state
|
||||
}
|
||||
|
||||
fn stable_hash(bytes: &[u8]) -> u64 {
|
||||
let mut state = 0xcbf2_9ce4_8422_2325;
|
||||
hash_into(&mut state, bytes);
|
||||
state
|
||||
}
|
||||
|
||||
fn hash_into(state: &mut u64, bytes: &[u8]) {
|
||||
for byte in bytes {
|
||||
*state ^= u64::from(*byte);
|
||||
*state = state.wrapping_mul(0x0000_0100_0000_01b3);
|
||||
bytes.extend_from_slice(file.path.as_bytes());
|
||||
bytes.push(0);
|
||||
bytes.extend_from_slice(&file.size.to_le_bytes());
|
||||
bytes.extend_from_slice(&file.hash);
|
||||
}
|
||||
sha256(&bytes)
|
||||
}
|
||||
|
||||
/// Writes report atomically.
|
||||
@@ -413,13 +402,13 @@ pub fn write_report_atomic(path: &Path, report: &CorpusReport) -> Result<(), Cor
|
||||
#[must_use]
|
||||
pub fn render_report_json(report: &CorpusReport) -> String {
|
||||
let mut out = format!(
|
||||
"{{\"schema_version\":\"fparkan-corpus-report-v1\",\"schema\":{},\"kind\":\"{:?}\",\"files\":{},\"bytes\":{},\"casefold_collisions\":{},\"fingerprint\":\"{:016x}\",\"metrics\":{{",
|
||||
"{{\"schema_version\":\"fparkan-corpus-report-v1\",\"schema\":{},\"kind\":\"{:?}\",\"files\":{},\"bytes\":{},\"casefold_collisions\":{},\"fingerprint\":\"{}\",\"metrics\":{{",
|
||||
report.schema,
|
||||
report.kind,
|
||||
report.files,
|
||||
report.bytes,
|
||||
report.casefold_collisions,
|
||||
report.fingerprint
|
||||
sha256_hex(&report.fingerprint)
|
||||
);
|
||||
for (idx, (key, value)) in report.metrics.iter().enumerate() {
|
||||
if idx > 0 {
|
||||
@@ -528,7 +517,7 @@ mod tests {
|
||||
files: vec![ManifestEntry {
|
||||
path: "secret/payload.bin".to_string(),
|
||||
size: 4,
|
||||
hash: stable_hash(b"DATA"),
|
||||
hash: sha256(b"DATA"),
|
||||
}],
|
||||
casefold_collisions: Vec::new(),
|
||||
};
|
||||
@@ -604,12 +593,12 @@ mod tests {
|
||||
ManifestEntry {
|
||||
path: "Textures/Foo.TEX".to_string(),
|
||||
size: 1,
|
||||
hash: 1,
|
||||
hash: sha256(b"first"),
|
||||
},
|
||||
ManifestEntry {
|
||||
path: "textures/foo.tex".to_string(),
|
||||
size: 1,
|
||||
hash: 2,
|
||||
hash: sha256(b"second"),
|
||||
},
|
||||
],
|
||||
casefold_collisions: Vec::new(),
|
||||
@@ -633,12 +622,12 @@ mod tests {
|
||||
files: vec![ManifestEntry {
|
||||
path: "a".to_string(),
|
||||
size: 1,
|
||||
hash: 1,
|
||||
hash: sha256(b"before"),
|
||||
}],
|
||||
casefold_collisions: Vec::new(),
|
||||
};
|
||||
let a = fingerprint(&manifest);
|
||||
manifest.files[0].hash = 2;
|
||||
manifest.files[0].hash = sha256(b"after");
|
||||
assert_ne!(a, fingerprint(&manifest));
|
||||
}
|
||||
|
||||
@@ -658,7 +647,7 @@ mod tests {
|
||||
bytes: 0,
|
||||
metrics: BTreeMap::new(),
|
||||
casefold_collisions: 0,
|
||||
fingerprint: 0,
|
||||
fingerprint: sha256(b"empty-report"),
|
||||
};
|
||||
write_report_atomic(&tmp, &report).expect("write");
|
||||
assert!(tmp.is_file());
|
||||
|
||||
@@ -6,6 +6,7 @@ license.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[dependencies]
|
||||
fparkan-binary = { path = "../fparkan-binary" }
|
||||
fparkan-nres = { path = "../fparkan-nres" }
|
||||
fparkan-path = { path = "../fparkan-path" }
|
||||
fparkan-rsli = { path = "../fparkan-rsli" }
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#![forbid(unsafe_code)]
|
||||
//! Resource identity and repository ports.
|
||||
|
||||
use fparkan_binary::Sha256Digest;
|
||||
use fparkan_path::{normalize_relative, NormalizedPath, PathPolicy, ResourceName};
|
||||
use fparkan_vfs::{Vfs, VfsError};
|
||||
use std::collections::BTreeMap;
|
||||
@@ -188,7 +189,7 @@ struct RepositoryState {
|
||||
|
||||
struct ArchiveSlot {
|
||||
path: NormalizedPath,
|
||||
fingerprint: u64,
|
||||
fingerprint: Sha256Digest,
|
||||
generation: u64,
|
||||
kind: ArchiveKind,
|
||||
document: ArchiveDocument,
|
||||
@@ -378,7 +379,7 @@ impl CachedResourceRepository {
|
||||
fn cached_id(
|
||||
&self,
|
||||
path: &NormalizedPath,
|
||||
fingerprint: u64,
|
||||
fingerprint: Sha256Digest,
|
||||
) -> Result<Option<ArchiveId>, ResourceError> {
|
||||
let state = self.state.lock().map_err(|_| ResourceError::Poisoned)?;
|
||||
let Some(id) = state.paths.get(path.as_str()).copied() else {
|
||||
@@ -504,7 +505,7 @@ impl ArchiveSlot {
|
||||
fn decode_archive(
|
||||
path: NormalizedPath,
|
||||
bytes: Arc<[u8]>,
|
||||
fingerprint: u64,
|
||||
fingerprint: Sha256Digest,
|
||||
) -> Result<ArchiveSlot, ResourceError> {
|
||||
if bytes.starts_with(b"NRes") {
|
||||
let document = fparkan_nres::decode(bytes, fparkan_nres::ReadProfile::Compatible)
|
||||
@@ -693,7 +694,7 @@ mod tests {
|
||||
b"before"
|
||||
);
|
||||
|
||||
std::fs::write(&host_path, build_nres(&[("a.bin", b"after".as_slice())]))
|
||||
std::fs::write(&host_path, build_nres(&[("a.bin", b"after!".as_slice())]))
|
||||
.expect("updated archive");
|
||||
let reopened = repo.open_archive(&path).expect("open updated archive");
|
||||
let second = repo
|
||||
@@ -706,7 +707,7 @@ mod tests {
|
||||
assert!(matches!(repo.read(first), Err(ResourceError::StaleHandle)));
|
||||
assert_eq!(
|
||||
repo.read(second).expect("read updated").as_slice(),
|
||||
b"after"
|
||||
b"after!"
|
||||
);
|
||||
let _ = std::fs::remove_dir_all(root);
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ license.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[dependencies]
|
||||
fparkan-binary = { path = "../fparkan-binary" }
|
||||
fparkan-path = { path = "../fparkan-path" }
|
||||
|
||||
[lints]
|
||||
|
||||
@@ -1,19 +1,21 @@
|
||||
#![forbid(unsafe_code)]
|
||||
//! Virtual filesystem ports for resource loading.
|
||||
|
||||
use fparkan_binary::{sha256, Sha256Digest};
|
||||
use fparkan_path::{ascii_lookup_key, join_under, NormalizedPath};
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::SystemTime;
|
||||
|
||||
/// VFS metadata.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct VfsMetadata {
|
||||
/// Byte length.
|
||||
pub len: u64,
|
||||
/// Stable-enough source fingerprint for cache invalidation.
|
||||
pub fingerprint: u64,
|
||||
/// SHA-256 content fingerprint for cache invalidation.
|
||||
pub fingerprint: Sha256Digest,
|
||||
}
|
||||
|
||||
/// VFS entry.
|
||||
@@ -80,6 +82,7 @@ pub trait Vfs: Send + Sync {
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DirectoryVfs {
|
||||
root: PathBuf,
|
||||
fingerprint_cache: Arc<Mutex<BTreeMap<PathBuf, CachedHostFingerprint>>>,
|
||||
}
|
||||
|
||||
impl DirectoryVfs {
|
||||
@@ -88,6 +91,7 @@ impl DirectoryVfs {
|
||||
pub fn new(root: impl AsRef<Path>) -> Self {
|
||||
Self {
|
||||
root: root.as_ref().to_path_buf(),
|
||||
fingerprint_cache: Arc::default(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -95,12 +99,23 @@ impl DirectoryVfs {
|
||||
join_under(&self.root, path).map_err(|_| VfsError::Path)?;
|
||||
resolve_casefolded(&self.root, path.as_str())
|
||||
}
|
||||
|
||||
fn metadata_from_host_file(&self, path: &Path) -> Result<VfsMetadata, VfsError> {
|
||||
let metadata = fs::symlink_metadata(path).map_err(VfsError::Io)?;
|
||||
metadata_from_host_file_with_cache(path, &metadata, &self.fingerprint_cache)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
struct CachedHostFingerprint {
|
||||
len: u64,
|
||||
modified: Option<SystemTime>,
|
||||
fingerprint: Sha256Digest,
|
||||
}
|
||||
|
||||
impl Vfs for DirectoryVfs {
|
||||
fn metadata(&self, path: &NormalizedPath) -> Result<VfsMetadata, VfsError> {
|
||||
let meta = fs::symlink_metadata(self.host_path(path)?).map_err(VfsError::Io)?;
|
||||
Ok(metadata_from_fs(&meta))
|
||||
self.metadata_from_host_file(&self.host_path(path)?)
|
||||
}
|
||||
|
||||
fn read(&self, path: &NormalizedPath) -> Result<Arc<[u8]>, VfsError> {
|
||||
@@ -123,11 +138,15 @@ impl Vfs for DirectoryVfs {
|
||||
let metadata = fs::symlink_metadata(&base).map_err(VfsError::Io)?;
|
||||
entries.push(VfsEntry {
|
||||
path: prefix.clone(),
|
||||
metadata: metadata_from_fs(&metadata),
|
||||
metadata: metadata_from_host_file_with_cache(
|
||||
&base,
|
||||
&metadata,
|
||||
&self.fingerprint_cache,
|
||||
)?,
|
||||
});
|
||||
return Ok(entries);
|
||||
}
|
||||
list_recursive(&self.root, &base, &mut entries)?;
|
||||
list_recursive(&self.root, &base, &self.fingerprint_cache, &mut entries)?;
|
||||
entries.sort_by(|a, b| a.path.as_str().cmp(b.path.as_str()));
|
||||
Ok(entries)
|
||||
}
|
||||
@@ -174,7 +193,12 @@ fn select_casefolded_match(
|
||||
}
|
||||
}
|
||||
|
||||
fn list_recursive(root: &Path, dir: &Path, out: &mut Vec<VfsEntry>) -> Result<(), VfsError> {
|
||||
fn list_recursive(
|
||||
root: &Path,
|
||||
dir: &Path,
|
||||
fingerprint_cache: &Mutex<BTreeMap<PathBuf, CachedHostFingerprint>>,
|
||||
out: &mut Vec<VfsEntry>,
|
||||
) -> Result<(), VfsError> {
|
||||
let read_dir = fs::read_dir(dir).map_err(VfsError::Io)?;
|
||||
let mut children = Vec::new();
|
||||
for entry in read_dir {
|
||||
@@ -188,7 +212,7 @@ fn list_recursive(root: &Path, dir: &Path, out: &mut Vec<VfsEntry>) -> Result<()
|
||||
return Err(VfsError::Path);
|
||||
}
|
||||
if metadata.is_dir() {
|
||||
list_recursive(root, &child, out)?;
|
||||
list_recursive(root, &child, fingerprint_cache, out)?;
|
||||
continue;
|
||||
}
|
||||
if !metadata.is_file() {
|
||||
@@ -203,25 +227,49 @@ fn list_recursive(root: &Path, dir: &Path, out: &mut Vec<VfsEntry>) -> Result<()
|
||||
.map_err(|_| VfsError::Path)?;
|
||||
out.push(VfsEntry {
|
||||
path,
|
||||
metadata: metadata_from_fs(&metadata),
|
||||
metadata: metadata_from_host_file_with_cache(&child, &metadata, fingerprint_cache)?,
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn metadata_from_fs(metadata: &fs::Metadata) -> VfsMetadata {
|
||||
let mut fingerprint = 0xcbf2_9ce4_8422_2325;
|
||||
hash_u64(&mut fingerprint, metadata.len());
|
||||
if let Ok(modified) = metadata.modified() {
|
||||
if let Ok(duration) = modified.duration_since(std::time::UNIX_EPOCH) {
|
||||
hash_u64(&mut fingerprint, duration.as_secs());
|
||||
hash_u64(&mut fingerprint, u64::from(duration.subsec_nanos()));
|
||||
fn metadata_from_host_file_with_cache(
|
||||
path: &Path,
|
||||
metadata: &fs::Metadata,
|
||||
fingerprint_cache: &Mutex<BTreeMap<PathBuf, CachedHostFingerprint>>,
|
||||
) -> Result<VfsMetadata, VfsError> {
|
||||
if !metadata.is_file() {
|
||||
return Err(VfsError::Path);
|
||||
}
|
||||
let len = metadata.len();
|
||||
let modified = metadata.modified().ok();
|
||||
if let Some(cached) = fingerprint_cache
|
||||
.lock()
|
||||
.map_err(|_| VfsError::Path)?
|
||||
.get(path)
|
||||
.cloned()
|
||||
.filter(|cached| cached.len == len && cached.modified == modified)
|
||||
{
|
||||
return Ok(VfsMetadata {
|
||||
len,
|
||||
fingerprint: cached.fingerprint,
|
||||
});
|
||||
}
|
||||
VfsMetadata {
|
||||
len: metadata.len(),
|
||||
|
||||
let bytes = fs::read(path).map_err(VfsError::Io)?;
|
||||
let fingerprint = sha256(&bytes);
|
||||
fingerprint_cache
|
||||
.lock()
|
||||
.map_err(|_| VfsError::Path)?
|
||||
.insert(
|
||||
path.to_path_buf(),
|
||||
CachedHostFingerprint {
|
||||
len,
|
||||
modified,
|
||||
fingerprint,
|
||||
}
|
||||
},
|
||||
);
|
||||
Ok(VfsMetadata { len, fingerprint })
|
||||
}
|
||||
|
||||
/// In-memory VFS.
|
||||
@@ -276,7 +324,7 @@ impl Vfs for MemoryVfs {
|
||||
.ok_or_else(|| VfsError::NotFound(path.as_str().to_string()))?;
|
||||
Ok(VfsMetadata {
|
||||
len: bytes.len() as u64,
|
||||
fingerprint: stable_hash(bytes),
|
||||
fingerprint: sha256(bytes),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -305,7 +353,7 @@ impl Vfs for MemoryVfs {
|
||||
path: normalized,
|
||||
metadata: VfsMetadata {
|
||||
len: bytes.len() as u64,
|
||||
fingerprint: stable_hash(bytes),
|
||||
fingerprint: sha256(bytes),
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -314,22 +362,6 @@ impl Vfs for MemoryVfs {
|
||||
}
|
||||
}
|
||||
|
||||
fn stable_hash(bytes: &[u8]) -> u64 {
|
||||
let mut state = 0xcbf2_9ce4_8422_2325;
|
||||
for byte in bytes {
|
||||
state ^= u64::from(*byte);
|
||||
state = state.wrapping_mul(0x0000_0100_0000_01b3);
|
||||
}
|
||||
state
|
||||
}
|
||||
|
||||
fn hash_u64(state: &mut u64, value: u64) {
|
||||
for byte in value.to_le_bytes() {
|
||||
*state ^= u64::from(byte);
|
||||
*state = state.wrapping_mul(0x0000_0100_0000_01b3);
|
||||
}
|
||||
}
|
||||
|
||||
/// Layered VFS with deterministic first-layer precedence.
|
||||
#[derive(Clone, Default)]
|
||||
pub struct OverlayVfs {
|
||||
@@ -475,6 +507,24 @@ mod tests {
|
||||
std::fs::remove_dir_all(root).expect("cleanup");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn directory_vfs_fingerprint_changes_for_same_length_content() {
|
||||
let root = unique_test_dir("content-fingerprint");
|
||||
std::fs::create_dir_all(root.join("DATA")).expect("mkdir");
|
||||
std::fs::write(root.join("DATA").join("File.bin"), b"before").expect("write before");
|
||||
|
||||
let vfs = DirectoryVfs::new(&root);
|
||||
let path = normalize_relative(b"DATA/File.bin", PathPolicy::StrictLegacy).expect("path");
|
||||
let before = vfs.metadata(&path).expect("before metadata");
|
||||
std::fs::write(root.join("DATA").join("File.bin"), b"after!").expect("write after");
|
||||
let after = vfs.metadata(&path).expect("after metadata");
|
||||
|
||||
assert_eq!(before.len, after.len);
|
||||
assert_ne!(before.fingerprint, after.fingerprint);
|
||||
|
||||
std::fs::remove_dir_all(root).expect("cleanup");
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn directory_vfs_rejects_symlink_escape() {
|
||||
|
||||
Reference in New Issue
Block a user