fix: strengthen resource fingerprints
This commit is contained in:
Generated
+3
@@ -81,6 +81,7 @@ dependencies = [
|
|||||||
name = "fparkan-corpus"
|
name = "fparkan-corpus"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"fparkan-binary",
|
||||||
"fparkan-path",
|
"fparkan-path",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -199,6 +200,7 @@ dependencies = [
|
|||||||
name = "fparkan-resource"
|
name = "fparkan-resource"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"fparkan-binary",
|
||||||
"fparkan-nres",
|
"fparkan-nres",
|
||||||
"fparkan-path",
|
"fparkan-path",
|
||||||
"fparkan-rsli",
|
"fparkan-rsli",
|
||||||
@@ -263,6 +265,7 @@ dependencies = [
|
|||||||
name = "fparkan-vfs"
|
name = "fparkan-vfs"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"fparkan-binary",
|
||||||
"fparkan-path",
|
"fparkan-path",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,9 @@
|
|||||||
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
|
/// SHA-256 digest bytes.
|
||||||
|
pub type Sha256Digest = [u8; 32];
|
||||||
|
|
||||||
/// Parser limits shared by binary formats.
|
/// Parser limits shared by binary formats.
|
||||||
#[derive(Clone, Copy, Debug)]
|
#[derive(Clone, Copy, Debug)]
|
||||||
pub struct Limits {
|
pub struct Limits {
|
||||||
@@ -262,6 +265,183 @@ pub fn read_lp_bytes(cursor: &mut Cursor<'_>, max: u32) -> Result<Vec<u8>, Decod
|
|||||||
Ok(cursor.read_exact(len)?.to_vec())
|
Ok(cursor.read_exact(len)?.to_vec())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Computes a SHA-256 content digest without external dependencies.
|
||||||
|
#[must_use]
|
||||||
|
pub fn sha256(bytes: &[u8]) -> Sha256Digest {
|
||||||
|
const K: [u32; 64] = [
|
||||||
|
0x428a_2f98,
|
||||||
|
0x7137_4491,
|
||||||
|
0xb5c0_fbcf,
|
||||||
|
0xe9b5_dba5,
|
||||||
|
0x3956_c25b,
|
||||||
|
0x59f1_11f1,
|
||||||
|
0x923f_82a4,
|
||||||
|
0xab1c_5ed5,
|
||||||
|
0xd807_aa98,
|
||||||
|
0x1283_5b01,
|
||||||
|
0x2431_85be,
|
||||||
|
0x550c_7dc3,
|
||||||
|
0x72be_5d74,
|
||||||
|
0x80de_b1fe,
|
||||||
|
0x9bdc_06a7,
|
||||||
|
0xc19b_f174,
|
||||||
|
0xe49b_69c1,
|
||||||
|
0xefbe_4786,
|
||||||
|
0x0fc1_9dc6,
|
||||||
|
0x240c_a1cc,
|
||||||
|
0x2de9_2c6f,
|
||||||
|
0x4a74_84aa,
|
||||||
|
0x5cb0_a9dc,
|
||||||
|
0x76f9_88da,
|
||||||
|
0x983e_5152,
|
||||||
|
0xa831_c66d,
|
||||||
|
0xb003_27c8,
|
||||||
|
0xbf59_7fc7,
|
||||||
|
0xc6e0_0bf3,
|
||||||
|
0xd5a7_9147,
|
||||||
|
0x06ca_6351,
|
||||||
|
0x1429_2967,
|
||||||
|
0x27b7_0a85,
|
||||||
|
0x2e1b_2138,
|
||||||
|
0x4d2c_6dfc,
|
||||||
|
0x5338_0d13,
|
||||||
|
0x650a_7354,
|
||||||
|
0x766a_0abb,
|
||||||
|
0x81c2_c92e,
|
||||||
|
0x9272_2c85,
|
||||||
|
0xa2bf_e8a1,
|
||||||
|
0xa81a_664b,
|
||||||
|
0xc24b_8b70,
|
||||||
|
0xc76c_51a3,
|
||||||
|
0xd192_e819,
|
||||||
|
0xd699_0624,
|
||||||
|
0xf40e_3585,
|
||||||
|
0x106a_a070,
|
||||||
|
0x19a4_c116,
|
||||||
|
0x1e37_6c08,
|
||||||
|
0x2748_774c,
|
||||||
|
0x34b0_bcb5,
|
||||||
|
0x391c_0cb3,
|
||||||
|
0x4ed8_aa4a,
|
||||||
|
0x5b9c_ca4f,
|
||||||
|
0x682e_6ff3,
|
||||||
|
0x748f_82ee,
|
||||||
|
0x78a5_636f,
|
||||||
|
0x84c8_7814,
|
||||||
|
0x8cc7_0208,
|
||||||
|
0x90be_fffa,
|
||||||
|
0xa450_6ceb,
|
||||||
|
0xbef9_a3f7,
|
||||||
|
0xc671_78f2,
|
||||||
|
];
|
||||||
|
let mut h = [
|
||||||
|
0x6a09_e667,
|
||||||
|
0xbb67_ae85,
|
||||||
|
0x3c6e_f372,
|
||||||
|
0xa54f_f53a,
|
||||||
|
0x510e_527f,
|
||||||
|
0x9b05_688c,
|
||||||
|
0x1f83_d9ab,
|
||||||
|
0x5be0_cd19,
|
||||||
|
];
|
||||||
|
|
||||||
|
let bit_len = (bytes.len() as u64).wrapping_mul(8);
|
||||||
|
let mut chunks = bytes.chunks_exact(64);
|
||||||
|
for chunk in &mut chunks {
|
||||||
|
compress_sha256_chunk(&mut h, chunk, &K);
|
||||||
|
}
|
||||||
|
|
||||||
|
let tail = chunks.remainder();
|
||||||
|
let mut block = [0u8; 128];
|
||||||
|
block[..tail.len()].copy_from_slice(tail);
|
||||||
|
block[tail.len()] = 0x80;
|
||||||
|
let padded_len = if tail.len() < 56 { 64 } else { 128 };
|
||||||
|
block[padded_len - 8..padded_len].copy_from_slice(&bit_len.to_be_bytes());
|
||||||
|
for chunk in block[..padded_len].chunks_exact(64) {
|
||||||
|
compress_sha256_chunk(&mut h, chunk, &K);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut out = [0u8; 32];
|
||||||
|
for (idx, word) in h.iter().enumerate() {
|
||||||
|
out[idx * 4..idx * 4 + 4].copy_from_slice(&word.to_be_bytes());
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Renders a SHA-256 digest as lowercase hexadecimal.
|
||||||
|
#[must_use]
|
||||||
|
pub fn sha256_hex(digest: &Sha256Digest) -> String {
|
||||||
|
const HEX: &[u8; 16] = b"0123456789abcdef";
|
||||||
|
let mut out = String::with_capacity(64);
|
||||||
|
for byte in digest {
|
||||||
|
out.push(char::from(HEX[usize::from(byte >> 4)]));
|
||||||
|
out.push(char::from(HEX[usize::from(byte & 0x0f)]));
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::many_single_char_names)]
|
||||||
|
fn compress_sha256_chunk(h: &mut [u32; 8], chunk: &[u8], k: &[u32; 64]) {
|
||||||
|
let mut w = [0u32; 64];
|
||||||
|
for (idx, word) in w.iter_mut().take(16).enumerate() {
|
||||||
|
let base = idx * 4;
|
||||||
|
*word = u32::from_be_bytes([
|
||||||
|
chunk[base],
|
||||||
|
chunk[base + 1],
|
||||||
|
chunk[base + 2],
|
||||||
|
chunk[base + 3],
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
for idx in 16..64 {
|
||||||
|
let s0 = w[idx - 15].rotate_right(7) ^ w[idx - 15].rotate_right(18) ^ (w[idx - 15] >> 3);
|
||||||
|
let s1 = w[idx - 2].rotate_right(17) ^ w[idx - 2].rotate_right(19) ^ (w[idx - 2] >> 10);
|
||||||
|
w[idx] = w[idx - 16]
|
||||||
|
.wrapping_add(s0)
|
||||||
|
.wrapping_add(w[idx - 7])
|
||||||
|
.wrapping_add(s1);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut a = h[0];
|
||||||
|
let mut b = h[1];
|
||||||
|
let mut c = h[2];
|
||||||
|
let mut d = h[3];
|
||||||
|
let mut e = h[4];
|
||||||
|
let mut f = h[5];
|
||||||
|
let mut g = h[6];
|
||||||
|
let mut hh = h[7];
|
||||||
|
|
||||||
|
for idx in 0..64 {
|
||||||
|
let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25);
|
||||||
|
let ch = (e & f) ^ ((!e) & g);
|
||||||
|
let temp1 = hh
|
||||||
|
.wrapping_add(s1)
|
||||||
|
.wrapping_add(ch)
|
||||||
|
.wrapping_add(k[idx])
|
||||||
|
.wrapping_add(w[idx]);
|
||||||
|
let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22);
|
||||||
|
let maj = (a & b) ^ (a & c) ^ (b & c);
|
||||||
|
let temp2 = s0.wrapping_add(maj);
|
||||||
|
|
||||||
|
hh = g;
|
||||||
|
g = f;
|
||||||
|
f = e;
|
||||||
|
e = d.wrapping_add(temp1);
|
||||||
|
d = c;
|
||||||
|
c = b;
|
||||||
|
b = a;
|
||||||
|
a = temp1.wrapping_add(temp2);
|
||||||
|
}
|
||||||
|
|
||||||
|
h[0] = h[0].wrapping_add(a);
|
||||||
|
h[1] = h[1].wrapping_add(b);
|
||||||
|
h[2] = h[2].wrapping_add(c);
|
||||||
|
h[3] = h[3].wrapping_add(d);
|
||||||
|
h[4] = h[4].wrapping_add(e);
|
||||||
|
h[5] = h[5].wrapping_add(f);
|
||||||
|
h[6] = h[6].wrapping_add(g);
|
||||||
|
h[7] = h[7].wrapping_add(hh);
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -305,4 +485,16 @@ mod tests {
|
|||||||
);
|
);
|
||||||
assert_eq!(cursor.offset(), 4);
|
assert_eq!(cursor.offset(), 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sha256_matches_known_vectors() {
|
||||||
|
assert_eq!(
|
||||||
|
sha256_hex(&sha256(b"")),
|
||||||
|
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
sha256_hex(&sha256(b"abc")),
|
||||||
|
"ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ license.workspace = true
|
|||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
fparkan-binary = { path = "../fparkan-binary" }
|
||||||
fparkan-path = { path = "../fparkan-path" }
|
fparkan-path = { path = "../fparkan-path" }
|
||||||
|
|
||||||
[lints]
|
[lints]
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#![forbid(unsafe_code)]
|
#![forbid(unsafe_code)]
|
||||||
//! Licensed corpus discovery and aggregate reports.
|
//! Licensed corpus discovery and aggregate reports.
|
||||||
|
|
||||||
|
use fparkan_binary::{sha256, sha256_hex, Sha256Digest};
|
||||||
use fparkan_path::{ascii_lookup_key, normalize_relative, PathPolicy};
|
use fparkan_path::{ascii_lookup_key, normalize_relative, PathPolicy};
|
||||||
use std::collections::{BTreeMap, BTreeSet};
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
@@ -39,8 +40,8 @@ pub struct ManifestEntry {
|
|||||||
pub path: String,
|
pub path: String,
|
||||||
/// File size in bytes.
|
/// File size in bytes.
|
||||||
pub size: u64,
|
pub size: u64,
|
||||||
/// Stable content fingerprint.
|
/// SHA-256 content fingerprint.
|
||||||
pub hash: u64,
|
pub hash: Sha256Digest,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Corpus manifest.
|
/// Corpus manifest.
|
||||||
@@ -70,7 +71,7 @@ pub struct CorpusReport {
|
|||||||
/// Casefold collision count.
|
/// Casefold collision count.
|
||||||
pub casefold_collisions: usize,
|
pub casefold_collisions: usize,
|
||||||
/// Manifest fingerprint.
|
/// Manifest fingerprint.
|
||||||
pub fingerprint: u64,
|
pub fingerprint: Sha256Digest,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Corpus error.
|
/// Corpus error.
|
||||||
@@ -187,7 +188,7 @@ fn walk(
|
|||||||
out.push(ManifestEntry {
|
out.push(ManifestEntry {
|
||||||
path: normalized.as_str().to_string(),
|
path: normalized.as_str().to_string(),
|
||||||
size: metadata.len(),
|
size: metadata.len(),
|
||||||
hash: stable_hash(&bytes),
|
hash: sha256(&bytes),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -352,27 +353,15 @@ fn inspect_nres_entries(bytes: &[u8]) -> Option<Vec<NresEntryBrief>> {
|
|||||||
|
|
||||||
/// Computes stable manifest fingerprint.
|
/// Computes stable manifest fingerprint.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn fingerprint(manifest: &CorpusManifest) -> u64 {
|
pub fn fingerprint(manifest: &CorpusManifest) -> Sha256Digest {
|
||||||
let mut state = 0xcbf2_9ce4_8422_2325;
|
let mut bytes = Vec::new();
|
||||||
for file in &manifest.files {
|
for file in &manifest.files {
|
||||||
hash_into(&mut state, file.path.as_bytes());
|
bytes.extend_from_slice(file.path.as_bytes());
|
||||||
hash_into(&mut state, &file.size.to_le_bytes());
|
bytes.push(0);
|
||||||
hash_into(&mut state, &file.hash.to_le_bytes());
|
bytes.extend_from_slice(&file.size.to_le_bytes());
|
||||||
}
|
bytes.extend_from_slice(&file.hash);
|
||||||
state
|
|
||||||
}
|
|
||||||
|
|
||||||
fn stable_hash(bytes: &[u8]) -> u64 {
|
|
||||||
let mut state = 0xcbf2_9ce4_8422_2325;
|
|
||||||
hash_into(&mut state, bytes);
|
|
||||||
state
|
|
||||||
}
|
|
||||||
|
|
||||||
fn hash_into(state: &mut u64, bytes: &[u8]) {
|
|
||||||
for byte in bytes {
|
|
||||||
*state ^= u64::from(*byte);
|
|
||||||
*state = state.wrapping_mul(0x0000_0100_0000_01b3);
|
|
||||||
}
|
}
|
||||||
|
sha256(&bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Writes report atomically.
|
/// Writes report atomically.
|
||||||
@@ -413,13 +402,13 @@ pub fn write_report_atomic(path: &Path, report: &CorpusReport) -> Result<(), Cor
|
|||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn render_report_json(report: &CorpusReport) -> String {
|
pub fn render_report_json(report: &CorpusReport) -> String {
|
||||||
let mut out = format!(
|
let mut out = format!(
|
||||||
"{{\"schema_version\":\"fparkan-corpus-report-v1\",\"schema\":{},\"kind\":\"{:?}\",\"files\":{},\"bytes\":{},\"casefold_collisions\":{},\"fingerprint\":\"{:016x}\",\"metrics\":{{",
|
"{{\"schema_version\":\"fparkan-corpus-report-v1\",\"schema\":{},\"kind\":\"{:?}\",\"files\":{},\"bytes\":{},\"casefold_collisions\":{},\"fingerprint\":\"{}\",\"metrics\":{{",
|
||||||
report.schema,
|
report.schema,
|
||||||
report.kind,
|
report.kind,
|
||||||
report.files,
|
report.files,
|
||||||
report.bytes,
|
report.bytes,
|
||||||
report.casefold_collisions,
|
report.casefold_collisions,
|
||||||
report.fingerprint
|
sha256_hex(&report.fingerprint)
|
||||||
);
|
);
|
||||||
for (idx, (key, value)) in report.metrics.iter().enumerate() {
|
for (idx, (key, value)) in report.metrics.iter().enumerate() {
|
||||||
if idx > 0 {
|
if idx > 0 {
|
||||||
@@ -528,7 +517,7 @@ mod tests {
|
|||||||
files: vec![ManifestEntry {
|
files: vec![ManifestEntry {
|
||||||
path: "secret/payload.bin".to_string(),
|
path: "secret/payload.bin".to_string(),
|
||||||
size: 4,
|
size: 4,
|
||||||
hash: stable_hash(b"DATA"),
|
hash: sha256(b"DATA"),
|
||||||
}],
|
}],
|
||||||
casefold_collisions: Vec::new(),
|
casefold_collisions: Vec::new(),
|
||||||
};
|
};
|
||||||
@@ -604,12 +593,12 @@ mod tests {
|
|||||||
ManifestEntry {
|
ManifestEntry {
|
||||||
path: "Textures/Foo.TEX".to_string(),
|
path: "Textures/Foo.TEX".to_string(),
|
||||||
size: 1,
|
size: 1,
|
||||||
hash: 1,
|
hash: sha256(b"first"),
|
||||||
},
|
},
|
||||||
ManifestEntry {
|
ManifestEntry {
|
||||||
path: "textures/foo.tex".to_string(),
|
path: "textures/foo.tex".to_string(),
|
||||||
size: 1,
|
size: 1,
|
||||||
hash: 2,
|
hash: sha256(b"second"),
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
casefold_collisions: Vec::new(),
|
casefold_collisions: Vec::new(),
|
||||||
@@ -633,12 +622,12 @@ mod tests {
|
|||||||
files: vec![ManifestEntry {
|
files: vec![ManifestEntry {
|
||||||
path: "a".to_string(),
|
path: "a".to_string(),
|
||||||
size: 1,
|
size: 1,
|
||||||
hash: 1,
|
hash: sha256(b"before"),
|
||||||
}],
|
}],
|
||||||
casefold_collisions: Vec::new(),
|
casefold_collisions: Vec::new(),
|
||||||
};
|
};
|
||||||
let a = fingerprint(&manifest);
|
let a = fingerprint(&manifest);
|
||||||
manifest.files[0].hash = 2;
|
manifest.files[0].hash = sha256(b"after");
|
||||||
assert_ne!(a, fingerprint(&manifest));
|
assert_ne!(a, fingerprint(&manifest));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -658,7 +647,7 @@ mod tests {
|
|||||||
bytes: 0,
|
bytes: 0,
|
||||||
metrics: BTreeMap::new(),
|
metrics: BTreeMap::new(),
|
||||||
casefold_collisions: 0,
|
casefold_collisions: 0,
|
||||||
fingerprint: 0,
|
fingerprint: sha256(b"empty-report"),
|
||||||
};
|
};
|
||||||
write_report_atomic(&tmp, &report).expect("write");
|
write_report_atomic(&tmp, &report).expect("write");
|
||||||
assert!(tmp.is_file());
|
assert!(tmp.is_file());
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ license.workspace = true
|
|||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
fparkan-binary = { path = "../fparkan-binary" }
|
||||||
fparkan-nres = { path = "../fparkan-nres" }
|
fparkan-nres = { path = "../fparkan-nres" }
|
||||||
fparkan-path = { path = "../fparkan-path" }
|
fparkan-path = { path = "../fparkan-path" }
|
||||||
fparkan-rsli = { path = "../fparkan-rsli" }
|
fparkan-rsli = { path = "../fparkan-rsli" }
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#![forbid(unsafe_code)]
|
#![forbid(unsafe_code)]
|
||||||
//! Resource identity and repository ports.
|
//! Resource identity and repository ports.
|
||||||
|
|
||||||
|
use fparkan_binary::Sha256Digest;
|
||||||
use fparkan_path::{normalize_relative, NormalizedPath, PathPolicy, ResourceName};
|
use fparkan_path::{normalize_relative, NormalizedPath, PathPolicy, ResourceName};
|
||||||
use fparkan_vfs::{Vfs, VfsError};
|
use fparkan_vfs::{Vfs, VfsError};
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
@@ -188,7 +189,7 @@ struct RepositoryState {
|
|||||||
|
|
||||||
struct ArchiveSlot {
|
struct ArchiveSlot {
|
||||||
path: NormalizedPath,
|
path: NormalizedPath,
|
||||||
fingerprint: u64,
|
fingerprint: Sha256Digest,
|
||||||
generation: u64,
|
generation: u64,
|
||||||
kind: ArchiveKind,
|
kind: ArchiveKind,
|
||||||
document: ArchiveDocument,
|
document: ArchiveDocument,
|
||||||
@@ -378,7 +379,7 @@ impl CachedResourceRepository {
|
|||||||
fn cached_id(
|
fn cached_id(
|
||||||
&self,
|
&self,
|
||||||
path: &NormalizedPath,
|
path: &NormalizedPath,
|
||||||
fingerprint: u64,
|
fingerprint: Sha256Digest,
|
||||||
) -> Result<Option<ArchiveId>, ResourceError> {
|
) -> Result<Option<ArchiveId>, ResourceError> {
|
||||||
let state = self.state.lock().map_err(|_| ResourceError::Poisoned)?;
|
let state = self.state.lock().map_err(|_| ResourceError::Poisoned)?;
|
||||||
let Some(id) = state.paths.get(path.as_str()).copied() else {
|
let Some(id) = state.paths.get(path.as_str()).copied() else {
|
||||||
@@ -504,7 +505,7 @@ impl ArchiveSlot {
|
|||||||
fn decode_archive(
|
fn decode_archive(
|
||||||
path: NormalizedPath,
|
path: NormalizedPath,
|
||||||
bytes: Arc<[u8]>,
|
bytes: Arc<[u8]>,
|
||||||
fingerprint: u64,
|
fingerprint: Sha256Digest,
|
||||||
) -> Result<ArchiveSlot, ResourceError> {
|
) -> Result<ArchiveSlot, ResourceError> {
|
||||||
if bytes.starts_with(b"NRes") {
|
if bytes.starts_with(b"NRes") {
|
||||||
let document = fparkan_nres::decode(bytes, fparkan_nres::ReadProfile::Compatible)
|
let document = fparkan_nres::decode(bytes, fparkan_nres::ReadProfile::Compatible)
|
||||||
@@ -693,7 +694,7 @@ mod tests {
|
|||||||
b"before"
|
b"before"
|
||||||
);
|
);
|
||||||
|
|
||||||
std::fs::write(&host_path, build_nres(&[("a.bin", b"after".as_slice())]))
|
std::fs::write(&host_path, build_nres(&[("a.bin", b"after!".as_slice())]))
|
||||||
.expect("updated archive");
|
.expect("updated archive");
|
||||||
let reopened = repo.open_archive(&path).expect("open updated archive");
|
let reopened = repo.open_archive(&path).expect("open updated archive");
|
||||||
let second = repo
|
let second = repo
|
||||||
@@ -706,7 +707,7 @@ mod tests {
|
|||||||
assert!(matches!(repo.read(first), Err(ResourceError::StaleHandle)));
|
assert!(matches!(repo.read(first), Err(ResourceError::StaleHandle)));
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
repo.read(second).expect("read updated").as_slice(),
|
repo.read(second).expect("read updated").as_slice(),
|
||||||
b"after"
|
b"after!"
|
||||||
);
|
);
|
||||||
let _ = std::fs::remove_dir_all(root);
|
let _ = std::fs::remove_dir_all(root);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ license.workspace = true
|
|||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
fparkan-binary = { path = "../fparkan-binary" }
|
||||||
fparkan-path = { path = "../fparkan-path" }
|
fparkan-path = { path = "../fparkan-path" }
|
||||||
|
|
||||||
[lints]
|
[lints]
|
||||||
|
|||||||
@@ -1,19 +1,21 @@
|
|||||||
#![forbid(unsafe_code)]
|
#![forbid(unsafe_code)]
|
||||||
//! Virtual filesystem ports for resource loading.
|
//! Virtual filesystem ports for resource loading.
|
||||||
|
|
||||||
|
use fparkan_binary::{sha256, Sha256Digest};
|
||||||
use fparkan_path::{ascii_lookup_key, join_under, NormalizedPath};
|
use fparkan_path::{ascii_lookup_key, join_under, NormalizedPath};
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::Arc;
|
use std::sync::{Arc, Mutex};
|
||||||
|
use std::time::SystemTime;
|
||||||
|
|
||||||
/// VFS metadata.
|
/// VFS metadata.
|
||||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
pub struct VfsMetadata {
|
pub struct VfsMetadata {
|
||||||
/// Byte length.
|
/// Byte length.
|
||||||
pub len: u64,
|
pub len: u64,
|
||||||
/// Stable-enough source fingerprint for cache invalidation.
|
/// SHA-256 content fingerprint for cache invalidation.
|
||||||
pub fingerprint: u64,
|
pub fingerprint: Sha256Digest,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// VFS entry.
|
/// VFS entry.
|
||||||
@@ -80,6 +82,7 @@ pub trait Vfs: Send + Sync {
|
|||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct DirectoryVfs {
|
pub struct DirectoryVfs {
|
||||||
root: PathBuf,
|
root: PathBuf,
|
||||||
|
fingerprint_cache: Arc<Mutex<BTreeMap<PathBuf, CachedHostFingerprint>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DirectoryVfs {
|
impl DirectoryVfs {
|
||||||
@@ -88,6 +91,7 @@ impl DirectoryVfs {
|
|||||||
pub fn new(root: impl AsRef<Path>) -> Self {
|
pub fn new(root: impl AsRef<Path>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
root: root.as_ref().to_path_buf(),
|
root: root.as_ref().to_path_buf(),
|
||||||
|
fingerprint_cache: Arc::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -95,12 +99,23 @@ impl DirectoryVfs {
|
|||||||
join_under(&self.root, path).map_err(|_| VfsError::Path)?;
|
join_under(&self.root, path).map_err(|_| VfsError::Path)?;
|
||||||
resolve_casefolded(&self.root, path.as_str())
|
resolve_casefolded(&self.root, path.as_str())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn metadata_from_host_file(&self, path: &Path) -> Result<VfsMetadata, VfsError> {
|
||||||
|
let metadata = fs::symlink_metadata(path).map_err(VfsError::Io)?;
|
||||||
|
metadata_from_host_file_with_cache(path, &metadata, &self.fingerprint_cache)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
|
struct CachedHostFingerprint {
|
||||||
|
len: u64,
|
||||||
|
modified: Option<SystemTime>,
|
||||||
|
fingerprint: Sha256Digest,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Vfs for DirectoryVfs {
|
impl Vfs for DirectoryVfs {
|
||||||
fn metadata(&self, path: &NormalizedPath) -> Result<VfsMetadata, VfsError> {
|
fn metadata(&self, path: &NormalizedPath) -> Result<VfsMetadata, VfsError> {
|
||||||
let meta = fs::symlink_metadata(self.host_path(path)?).map_err(VfsError::Io)?;
|
self.metadata_from_host_file(&self.host_path(path)?)
|
||||||
Ok(metadata_from_fs(&meta))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read(&self, path: &NormalizedPath) -> Result<Arc<[u8]>, VfsError> {
|
fn read(&self, path: &NormalizedPath) -> Result<Arc<[u8]>, VfsError> {
|
||||||
@@ -123,11 +138,15 @@ impl Vfs for DirectoryVfs {
|
|||||||
let metadata = fs::symlink_metadata(&base).map_err(VfsError::Io)?;
|
let metadata = fs::symlink_metadata(&base).map_err(VfsError::Io)?;
|
||||||
entries.push(VfsEntry {
|
entries.push(VfsEntry {
|
||||||
path: prefix.clone(),
|
path: prefix.clone(),
|
||||||
metadata: metadata_from_fs(&metadata),
|
metadata: metadata_from_host_file_with_cache(
|
||||||
|
&base,
|
||||||
|
&metadata,
|
||||||
|
&self.fingerprint_cache,
|
||||||
|
)?,
|
||||||
});
|
});
|
||||||
return Ok(entries);
|
return Ok(entries);
|
||||||
}
|
}
|
||||||
list_recursive(&self.root, &base, &mut entries)?;
|
list_recursive(&self.root, &base, &self.fingerprint_cache, &mut entries)?;
|
||||||
entries.sort_by(|a, b| a.path.as_str().cmp(b.path.as_str()));
|
entries.sort_by(|a, b| a.path.as_str().cmp(b.path.as_str()));
|
||||||
Ok(entries)
|
Ok(entries)
|
||||||
}
|
}
|
||||||
@@ -174,7 +193,12 @@ fn select_casefolded_match(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn list_recursive(root: &Path, dir: &Path, out: &mut Vec<VfsEntry>) -> Result<(), VfsError> {
|
fn list_recursive(
|
||||||
|
root: &Path,
|
||||||
|
dir: &Path,
|
||||||
|
fingerprint_cache: &Mutex<BTreeMap<PathBuf, CachedHostFingerprint>>,
|
||||||
|
out: &mut Vec<VfsEntry>,
|
||||||
|
) -> Result<(), VfsError> {
|
||||||
let read_dir = fs::read_dir(dir).map_err(VfsError::Io)?;
|
let read_dir = fs::read_dir(dir).map_err(VfsError::Io)?;
|
||||||
let mut children = Vec::new();
|
let mut children = Vec::new();
|
||||||
for entry in read_dir {
|
for entry in read_dir {
|
||||||
@@ -188,7 +212,7 @@ fn list_recursive(root: &Path, dir: &Path, out: &mut Vec<VfsEntry>) -> Result<()
|
|||||||
return Err(VfsError::Path);
|
return Err(VfsError::Path);
|
||||||
}
|
}
|
||||||
if metadata.is_dir() {
|
if metadata.is_dir() {
|
||||||
list_recursive(root, &child, out)?;
|
list_recursive(root, &child, fingerprint_cache, out)?;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if !metadata.is_file() {
|
if !metadata.is_file() {
|
||||||
@@ -203,25 +227,49 @@ fn list_recursive(root: &Path, dir: &Path, out: &mut Vec<VfsEntry>) -> Result<()
|
|||||||
.map_err(|_| VfsError::Path)?;
|
.map_err(|_| VfsError::Path)?;
|
||||||
out.push(VfsEntry {
|
out.push(VfsEntry {
|
||||||
path,
|
path,
|
||||||
metadata: metadata_from_fs(&metadata),
|
metadata: metadata_from_host_file_with_cache(&child, &metadata, fingerprint_cache)?,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn metadata_from_fs(metadata: &fs::Metadata) -> VfsMetadata {
|
fn metadata_from_host_file_with_cache(
|
||||||
let mut fingerprint = 0xcbf2_9ce4_8422_2325;
|
path: &Path,
|
||||||
hash_u64(&mut fingerprint, metadata.len());
|
metadata: &fs::Metadata,
|
||||||
if let Ok(modified) = metadata.modified() {
|
fingerprint_cache: &Mutex<BTreeMap<PathBuf, CachedHostFingerprint>>,
|
||||||
if let Ok(duration) = modified.duration_since(std::time::UNIX_EPOCH) {
|
) -> Result<VfsMetadata, VfsError> {
|
||||||
hash_u64(&mut fingerprint, duration.as_secs());
|
if !metadata.is_file() {
|
||||||
hash_u64(&mut fingerprint, u64::from(duration.subsec_nanos()));
|
return Err(VfsError::Path);
|
||||||
}
|
}
|
||||||
|
let len = metadata.len();
|
||||||
|
let modified = metadata.modified().ok();
|
||||||
|
if let Some(cached) = fingerprint_cache
|
||||||
|
.lock()
|
||||||
|
.map_err(|_| VfsError::Path)?
|
||||||
|
.get(path)
|
||||||
|
.cloned()
|
||||||
|
.filter(|cached| cached.len == len && cached.modified == modified)
|
||||||
|
{
|
||||||
|
return Ok(VfsMetadata {
|
||||||
|
len,
|
||||||
|
fingerprint: cached.fingerprint,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
VfsMetadata {
|
|
||||||
len: metadata.len(),
|
let bytes = fs::read(path).map_err(VfsError::Io)?;
|
||||||
|
let fingerprint = sha256(&bytes);
|
||||||
|
fingerprint_cache
|
||||||
|
.lock()
|
||||||
|
.map_err(|_| VfsError::Path)?
|
||||||
|
.insert(
|
||||||
|
path.to_path_buf(),
|
||||||
|
CachedHostFingerprint {
|
||||||
|
len,
|
||||||
|
modified,
|
||||||
fingerprint,
|
fingerprint,
|
||||||
}
|
},
|
||||||
|
);
|
||||||
|
Ok(VfsMetadata { len, fingerprint })
|
||||||
}
|
}
|
||||||
|
|
||||||
/// In-memory VFS.
|
/// In-memory VFS.
|
||||||
@@ -276,7 +324,7 @@ impl Vfs for MemoryVfs {
|
|||||||
.ok_or_else(|| VfsError::NotFound(path.as_str().to_string()))?;
|
.ok_or_else(|| VfsError::NotFound(path.as_str().to_string()))?;
|
||||||
Ok(VfsMetadata {
|
Ok(VfsMetadata {
|
||||||
len: bytes.len() as u64,
|
len: bytes.len() as u64,
|
||||||
fingerprint: stable_hash(bytes),
|
fingerprint: sha256(bytes),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -305,7 +353,7 @@ impl Vfs for MemoryVfs {
|
|||||||
path: normalized,
|
path: normalized,
|
||||||
metadata: VfsMetadata {
|
metadata: VfsMetadata {
|
||||||
len: bytes.len() as u64,
|
len: bytes.len() as u64,
|
||||||
fingerprint: stable_hash(bytes),
|
fingerprint: sha256(bytes),
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -314,22 +362,6 @@ impl Vfs for MemoryVfs {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn stable_hash(bytes: &[u8]) -> u64 {
|
|
||||||
let mut state = 0xcbf2_9ce4_8422_2325;
|
|
||||||
for byte in bytes {
|
|
||||||
state ^= u64::from(*byte);
|
|
||||||
state = state.wrapping_mul(0x0000_0100_0000_01b3);
|
|
||||||
}
|
|
||||||
state
|
|
||||||
}
|
|
||||||
|
|
||||||
fn hash_u64(state: &mut u64, value: u64) {
|
|
||||||
for byte in value.to_le_bytes() {
|
|
||||||
*state ^= u64::from(byte);
|
|
||||||
*state = state.wrapping_mul(0x0000_0100_0000_01b3);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Layered VFS with deterministic first-layer precedence.
|
/// Layered VFS with deterministic first-layer precedence.
|
||||||
#[derive(Clone, Default)]
|
#[derive(Clone, Default)]
|
||||||
pub struct OverlayVfs {
|
pub struct OverlayVfs {
|
||||||
@@ -475,6 +507,24 @@ mod tests {
|
|||||||
std::fs::remove_dir_all(root).expect("cleanup");
|
std::fs::remove_dir_all(root).expect("cleanup");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn directory_vfs_fingerprint_changes_for_same_length_content() {
|
||||||
|
let root = unique_test_dir("content-fingerprint");
|
||||||
|
std::fs::create_dir_all(root.join("DATA")).expect("mkdir");
|
||||||
|
std::fs::write(root.join("DATA").join("File.bin"), b"before").expect("write before");
|
||||||
|
|
||||||
|
let vfs = DirectoryVfs::new(&root);
|
||||||
|
let path = normalize_relative(b"DATA/File.bin", PathPolicy::StrictLegacy).expect("path");
|
||||||
|
let before = vfs.metadata(&path).expect("before metadata");
|
||||||
|
std::fs::write(root.join("DATA").join("File.bin"), b"after!").expect("write after");
|
||||||
|
let after = vfs.metadata(&path).expect("after metadata");
|
||||||
|
|
||||||
|
assert_eq!(before.len, after.len);
|
||||||
|
assert_ne!(before.fingerprint, after.fingerprint);
|
||||||
|
|
||||||
|
std::fs::remove_dir_all(root).expect("cleanup");
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
#[test]
|
#[test]
|
||||||
fn directory_vfs_rejects_symlink_escape() {
|
fn directory_vfs_rejects_symlink_escape() {
|
||||||
|
|||||||
Reference in New Issue
Block a user