Initial vendor packages

Signed-off-by: Valentin Popov <valentin@popov.link>
This commit is contained in:
2024-01-08 01:21:28 +04:00
parent 5ecd8cf2cb
commit 1b6a04ca55
7309 changed files with 2160054 additions and 0 deletions

View File

@ -0,0 +1 @@
{"files":{"CHANGELOG.md":"4f762219d91ec3f7b6f4f25c6c4f4a83a5c3a0db1503827d9e1541c5fb8b62a1","Cargo.toml":"1df396cf8d69ee6f333bae303d4051c9f189167fa1ce07791becc5d3f2bd14fe","LICENSE.md":"42a35170233e83e18856792e748de4c1ce4a63b2afce9a370c89ef3fe23f9f2d","README.md":"a0568aab174ff15c2252898c7520ce0bd539cfb75d799238acd34dec44ee772c","src/hash.rs":"b5d0aea3ad486cccb67a156606e932b857cf0db5014393df50f290a495836d17","src/imp/avx2.rs":"46a83702bfe182278851d75f83b916a9fe0cc1ff5d1244d1912960ee1fcd82b2","src/imp/avx512.rs":"6e96adfe3099319529c486a186c71c85d92e93b29ea73558b84d65c618367858","src/imp/mod.rs":"9e6a1e69c40b03922b66a939b60e4217886a8aed3757af8d5a1e168c888121dd","src/imp/neon.rs":"2edad3b532b3384c83607a8adc6a1a46b4428708d02cb213f6afd333f60cfc99","src/imp/scalar.rs":"950c9f1202110da14d4ce725591fb2bdab36b73e500d2654e3a75c1a69a4ca5d","src/imp/sse2.rs":"fabefe43151e989a8a24910c682ae2661c30ee39363f0903311e6da60e192658","src/imp/ssse3.rs":"12f719d17bd9044c374f217312ea094625f22436e27bd12a28c60469f9448608","src/imp/wasm.rs":"3b49cde90bde87548e5169c061bf1abdde328710f75cde807d0921130aa232f1","src/lib.rs":"94525f007da93fbb09bbcdc164bc86a13a9526464203eb508ce65e5752c1a3f2"},"package":"d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"}

12
vendor/simd-adler32/CHANGELOG.md vendored Normal file
View File

@ -0,0 +1,12 @@
# Changelog
## 0.3.3 - 2021-04-14
### Features
- **from_checksum**: add `Adler32::from_checksum`
### Performance Improvements
- **scalar**: improve scalar performance by 90-600%
- Defer modulo until right before u16 overflow

67
vendor/simd-adler32/Cargo.toml vendored Normal file
View File

@ -0,0 +1,67 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2018"
name = "simd-adler32"
version = "0.3.7"
authors = ["Marvin Countryman <me@maar.vin>"]
exclude = ["bench"]
description = "A SIMD-accelerated Adler-32 hash algorithm implementation."
readme = "README.md"
keywords = [
"simd",
"avx2",
"ssse3",
"adler",
"adler32",
]
categories = [
"algorithms",
"no-std",
]
license = "MIT"
repository = "https://github.com/mcountryman/simd-adler32"
[profile.release]
opt-level = 2
debug = 2
[[bench]]
name = "alts"
path = "bench/alts.rs"
harness = false
[[bench]]
name = "variants"
path = "bench/variants.rs"
harness = false
[dev-dependencies.adler]
version = "1.0.2"
[dev-dependencies.adler32]
version = "1.2.0"
[dev-dependencies.criterion]
version = "0.3"
[dev-dependencies.rand]
version = "0.8"
[features]
const-generics = []
default = [
"std",
"const-generics",
]
nightly = []
std = []

21
vendor/simd-adler32/LICENSE.md vendored Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) [2021] [Marvin Countryman]
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

131
vendor/simd-adler32/README.md vendored Normal file
View File

@ -0,0 +1,131 @@
<h1 align="center">simd-adler32</h1>
<p align="center">
<a href="https://docs.rs/simd-adler32">
<img alt="docs.rs badge" src="https://img.shields.io/docsrs/simd-adler32?style=flat-square">
</a>
<a href="https://crates.io/crates/simd-adler32">
<img alt="crates.io badge" src="https://img.shields.io/crates/v/simd-adler32?style=flat-square">
</a>
<a href="https://github.com/mcountryman/simd-adler32/blob/main/LICENSE.md">
<img alt="mit license badge" src="https://img.shields.io/github/license/mcountryman/simd-adler32?style=flat-square">
</a>
</p>
A SIMD-accelerated Adler-32 hash algorithm implementation.
## Features
- No dependencies
- Support `no_std` (with `default-features = false`)
- Runtime CPU feature detection (when `std` enabled)
- Blazing fast performance on as many targets as possible (currently only x86 and x86_64)
- Default to scalar implementation when simd not available
## Quick start
> Cargo.toml
```toml
[dependencies]
simd-adler32 = "*"
```
> example.rs
```rust
use simd_adler32::Adler32;
let mut adler = Adler32::new();
adler.write(b"rust is pretty cool, man");
let hash = adler.finish();
println!("{}", hash);
// 1921255656
```
## Support
**CPU Features**
| impl | arch | feature |
| ---- | ---------------- | ------- |
| ✅ | `x86`, `x86_64` | avx512 |
| ✅ | `x86`, `x86_64` | avx2 |
| ✅ | `x86`, `x86_64` | ssse3 |
| ✅ | `x86`, `x86_64` | sse2 |
| 🚧 | `arm`, `aarch64` | neon |
| ✅ | `wasm32` | simd128 |
**MSRV** `1.36.0`\*\*
Minimum supported rust version is tested before a new version is published. [**] Feature
`const-generics` needs to disabled to build on rustc versions `<1.51` which can be done
by updating your dependency definition to the following.
> Cargo.toml
```toml
[dependencies]
simd-adler32 = { version "*", default-features = false, features = ["std"] }
```
## Performance
Benchmarks listed display number of randomly generated bytes (10k / 100k) and library
name. Benchmarks sources can be found under the [bench](/bench) directory. Crates used for
comparison are [adler](https://crates.io/crates/adler) and
[adler32](https://crates.io/crates/adler32).
> Windows 10 Pro - Intel i5-8300H @ 2.30GHz
| name | avg. time | avg. thrpt |
| ----------------------- | --------------- | ------------------ |
| **10k/simd-adler32** | **212.61 ns** | **43.805 GiB/s** |
| 10k/wuffs | 3843 ns | 2.63 GiB/s\* |
| 10k/adler32 | 4.8084 us | 1.9369 GiB/s |
| 10k/adler | 17.979 us | 530.43 MiB/s |
| ----------------------- | --------------- | ------------------ |
| **100k/simd-adler32** | **2.7951 us** | **33.320 GiB/s** |
| 100k/wuffs | 34733 ns | 2.6814 GiB/s\* |
| 100k/adler32 | 48.488 us | 1.9207 GiB/s |
| 100k/adler | 178.36 us | 534.69 MiB/s |
\* wuffs ran using mingw64/gcc, ran with `wuffs bench -ccompilers=gcc -reps=1 -iterscale=300 std/adler32`.
> MacBookPro16,1 - Intel i9-9880H CPU @ 2.30GHz
| name | avg. time | avg. thrpt |
| ----------------------- | --------------- | ------------------ |
| **10k/simd-adler32** | **200.37 ns** | **46.480 GiB/s** |
| 10k/adler32 | 4.1516 us | 2.2433 GiB/s |
| 10k/adler | 10.220 us | 933.15 MiB/s |
| ----------------------- | --------------- | ------------------ |
| **100k/simd-adler32** | **2.3282 us** | **40.003 GiB/s** |
| 100k/adler32 | 41.130 us | 2.2643 GiB/s |
| 100k/adler | 83.776 us | 534.69 MiB/s |
## Safety
This crate contains a significant amount of `unsafe` code due to the requirement of `unsafe`
for simd intrinsics. Fuzzing is done on release and debug builds prior to publishing via
`afl`. Fuzzy tests can be found under [fuzz](/fuzz) the directory.
## Resources
- [LICENSE](./LICENSE.md) - MIT
- [CHANGELOG](./CHANGELOG.md)
## Credits
Thank you to the contributors of the following projects.
- [adler](https://github.com/jonas-schievink/adler)
- [adler32](https://github.com/remram44/adler32-rs)
- [crc32fast](https://github.com/srijs/rust-crc32fast)
- [wuffs](https://github.com/google/wuffs)
- [chromium](https://bugs.chromium.org/p/chromium/issues/detail?id=762564)
- [zlib](https://zlib.net/)
## Contributing
Feel free to submit a issue or pull request. :smile:

156
vendor/simd-adler32/src/hash.rs vendored Normal file
View File

@ -0,0 +1,156 @@
use crate::{Adler32, Adler32Hash};
impl Adler32Hash for &[u8] {
fn hash(&self) -> u32 {
let mut hash = Adler32::new();
hash.write(self);
hash.finish()
}
}
impl Adler32Hash for &str {
fn hash(&self) -> u32 {
let mut hash = Adler32::new();
hash.write(self.as_bytes());
hash.finish()
}
}
#[cfg(feature = "const-generics")]
impl<const SIZE: usize> Adler32Hash for [u8; SIZE] {
fn hash(&self) -> u32 {
let mut hash = Adler32::new();
hash.write(self);
hash.finish()
}
}
macro_rules! array_impl {
($s:expr, $($size:expr),+) => {
array_impl!($s);
$(array_impl!{$size})*
};
($size:expr) => {
#[cfg(not(feature = "const-generics"))]
impl Adler32Hash for [u8; $size] {
fn hash(&self) -> u32 {
let mut hash = Adler32::new();
hash.write(self);
hash.finish()
}
}
};
}
array_impl!(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64,
65,
66,
67,
68,
69,
70,
71,
72,
73,
74,
75,
76,
77,
78,
79,
80,
81,
82,
83,
84,
85,
86,
87,
88,
89,
90,
91,
92,
93,
94,
95,
96,
97,
98,
99,
100,
1024,
1024 * 1024,
1024 * 1024 * 1024,
2048,
4096
);

214
vendor/simd-adler32/src/imp/avx2.rs vendored Normal file
View File

@ -0,0 +1,214 @@
use super::Adler32Imp;
/// Resolves update implementation if CPU supports avx2 instructions.
pub fn get_imp() -> Option<Adler32Imp> {
get_imp_inner()
}
#[inline]
#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
fn get_imp_inner() -> Option<Adler32Imp> {
if std::is_x86_feature_detected!("avx2") {
Some(imp::update)
} else {
None
}
}
#[inline]
#[cfg(all(
target_feature = "avx2",
not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))
))]
fn get_imp_inner() -> Option<Adler32Imp> {
Some(imp::update)
}
#[inline]
#[cfg(all(
not(target_feature = "avx2"),
not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))
))]
fn get_imp_inner() -> Option<Adler32Imp> {
None
}
#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
any(feature = "std", target_feature = "avx2")
))]
mod imp {
const MOD: u32 = 65521;
const NMAX: usize = 5552;
const BLOCK_SIZE: usize = 32;
const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE;
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
unsafe { update_imp(a, b, data) }
}
#[inline]
#[target_feature(enable = "avx2")]
unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
let mut a = a as u32;
let mut b = b as u32;
let chunks = data.chunks_exact(CHUNK_SIZE);
let remainder = chunks.remainder();
for chunk in chunks {
update_chunk_block(&mut a, &mut b, chunk);
}
update_block(&mut a, &mut b, remainder);
(a as u16, b as u16)
}
#[inline]
unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
debug_assert_eq!(
chunk.len(),
CHUNK_SIZE,
"Unexpected chunk size (expected {}, got {})",
CHUNK_SIZE,
chunk.len()
);
reduce_add_blocks(a, b, chunk);
*a %= MOD;
*b %= MOD;
}
#[inline]
unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
debug_assert!(
chunk.len() <= CHUNK_SIZE,
"Unexpected chunk size (expected <= {}, got {})",
CHUNK_SIZE,
chunk.len()
);
for byte in reduce_add_blocks(a, b, chunk) {
*a += *byte as u32;
*b += *a;
}
*a %= MOD;
*b %= MOD;
}
#[inline(always)]
unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] {
if chunk.len() < BLOCK_SIZE {
return chunk;
}
let blocks = chunk.chunks_exact(BLOCK_SIZE);
let blocks_remainder = blocks.remainder();
let one_v = _mm256_set1_epi16(1);
let zero_v = _mm256_setzero_si256();
let weights = get_weights();
let mut p_v = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, (*a * blocks.len() as u32) as _);
let mut a_v = _mm256_setzero_si256();
let mut b_v = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, *b as _);
for block in blocks {
let block_ptr = block.as_ptr() as *const _;
let block = _mm256_loadu_si256(block_ptr);
p_v = _mm256_add_epi32(p_v, a_v);
a_v = _mm256_add_epi32(a_v, _mm256_sad_epu8(block, zero_v));
let mad = _mm256_maddubs_epi16(block, weights);
b_v = _mm256_add_epi32(b_v, _mm256_madd_epi16(mad, one_v));
}
b_v = _mm256_add_epi32(b_v, _mm256_slli_epi32(p_v, 5));
*a += reduce_add(a_v);
*b = reduce_add(b_v);
blocks_remainder
}
#[inline(always)]
unsafe fn reduce_add(v: __m256i) -> u32 {
let sum = _mm_add_epi32(_mm256_castsi256_si128(v), _mm256_extracti128_si256(v, 1));
let hi = _mm_unpackhi_epi64(sum, sum);
let sum = _mm_add_epi32(hi, sum);
let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1));
let sum = _mm_add_epi32(sum, hi);
_mm_cvtsi128_si32(sum) as _
}
#[inline(always)]
unsafe fn get_weights() -> __m256i {
_mm256_set_epi8(
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32,
)
}
}
#[cfg(test)]
mod tests {
use rand::Rng;
#[test]
fn zeroes() {
assert_sum_eq(&[]);
assert_sum_eq(&[0]);
assert_sum_eq(&[0, 0]);
assert_sum_eq(&[0; 100]);
assert_sum_eq(&[0; 1024]);
assert_sum_eq(&[0; 1024 * 1024]);
}
#[test]
fn ones() {
assert_sum_eq(&[]);
assert_sum_eq(&[1]);
assert_sum_eq(&[1, 1]);
assert_sum_eq(&[1; 100]);
assert_sum_eq(&[1; 1024]);
assert_sum_eq(&[1; 1024 * 1024]);
}
#[test]
fn random() {
let mut random = [0; 1024 * 1024];
rand::thread_rng().fill(&mut random[..]);
assert_sum_eq(&random[..1]);
assert_sum_eq(&random[..100]);
assert_sum_eq(&random[..1024]);
assert_sum_eq(&random[..1024 * 1024]);
}
/// Example calculation from https://en.wikipedia.org/wiki/Adler-32.
#[test]
fn wiki() {
assert_sum_eq(b"Wikipedia");
}
fn assert_sum_eq(data: &[u8]) {
if let Some(update) = super::get_imp() {
let (a, b) = update(1, 0, data);
let left = u32::from(b) << 16 | u32::from(a);
let right = adler::adler32_slice(data);
assert_eq!(left, right, "len({})", data.len());
}
}
}

242
vendor/simd-adler32/src/imp/avx512.rs vendored Normal file
View File

@ -0,0 +1,242 @@
use super::Adler32Imp;
/// Resolves update implementation if CPU supports avx512f and avx512bw instructions.
pub fn get_imp() -> Option<Adler32Imp> {
get_imp_inner()
}
#[inline]
#[cfg(all(
feature = "std",
feature = "nightly",
any(target_arch = "x86", target_arch = "x86_64")
))]
fn get_imp_inner() -> Option<Adler32Imp> {
let has_avx512f = std::is_x86_feature_detected!("avx512f");
let has_avx512bw = std::is_x86_feature_detected!("avx512bw");
if has_avx512f && has_avx512bw {
Some(imp::update)
} else {
None
}
}
#[inline]
#[cfg(all(
feature = "nightly",
all(target_feature = "avx512f", target_feature = "avx512bw"),
not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))
))]
fn get_imp_inner() -> Option<Adler32Imp> {
Some(imp::update)
}
#[inline]
#[cfg(all(
not(all(feature = "nightly", target_feature = "avx512f", target_feature = "avx512bw")),
not(all(
feature = "std",
feature = "nightly",
any(target_arch = "x86", target_arch = "x86_64")
))
))]
fn get_imp_inner() -> Option<Adler32Imp> {
None
}
#[cfg(all(
feature = "nightly",
any(target_arch = "x86", target_arch = "x86_64"),
any(
feature = "std",
all(target_feature = "avx512f", target_feature = "avx512bw")
)
))]
mod imp {
const MOD: u32 = 65521;
const NMAX: usize = 5552;
const BLOCK_SIZE: usize = 64;
const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE;
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
unsafe { update_imp(a, b, data) }
}
#[inline]
#[target_feature(enable = "avx512f")]
#[target_feature(enable = "avx512bw")]
unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
let mut a = a as u32;
let mut b = b as u32;
let chunks = data.chunks_exact(CHUNK_SIZE);
let remainder = chunks.remainder();
for chunk in chunks {
update_chunk_block(&mut a, &mut b, chunk);
}
update_block(&mut a, &mut b, remainder);
(a as u16, b as u16)
}
#[inline]
unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
debug_assert_eq!(
chunk.len(),
CHUNK_SIZE,
"Unexpected chunk size (expected {}, got {})",
CHUNK_SIZE,
chunk.len()
);
reduce_add_blocks(a, b, chunk);
*a %= MOD;
*b %= MOD;
}
#[inline]
unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
debug_assert!(
chunk.len() <= CHUNK_SIZE,
"Unexpected chunk size (expected <= {}, got {})",
CHUNK_SIZE,
chunk.len()
);
for byte in reduce_add_blocks(a, b, chunk) {
*a += *byte as u32;
*b += *a;
}
*a %= MOD;
*b %= MOD;
}
#[inline(always)]
unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] {
if chunk.len() < BLOCK_SIZE {
return chunk;
}
let blocks = chunk.chunks_exact(BLOCK_SIZE);
let blocks_remainder = blocks.remainder();
let one_v = _mm512_set1_epi16(1);
let zero_v = _mm512_setzero_si512();
let weights = get_weights();
let p_v = (*a * blocks.len() as u32) as _;
let mut p_v = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, p_v);
let mut a_v = _mm512_setzero_si512();
let mut b_v = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, *b as _);
for block in blocks {
let block_ptr = block.as_ptr() as *const _;
let block = _mm512_loadu_si512(block_ptr);
p_v = _mm512_add_epi32(p_v, a_v);
a_v = _mm512_add_epi32(a_v, _mm512_sad_epu8(block, zero_v));
let mad = _mm512_maddubs_epi16(block, weights);
b_v = _mm512_add_epi32(b_v, _mm512_madd_epi16(mad, one_v));
}
b_v = _mm512_add_epi32(b_v, _mm512_slli_epi32(p_v, 6));
*a += reduce_add(a_v);
*b = reduce_add(b_v);
blocks_remainder
}
#[inline(always)]
unsafe fn reduce_add(v: __m512i) -> u32 {
let v: [__m256i; 2] = core::mem::transmute(v);
reduce_add_256(v[0]) + reduce_add_256(v[1])
}
#[inline(always)]
unsafe fn reduce_add_256(v: __m256i) -> u32 {
let v: [__m128i; 2] = core::mem::transmute(v);
let sum = _mm_add_epi32(v[0], v[1]);
let hi = _mm_unpackhi_epi64(sum, sum);
let sum = _mm_add_epi32(hi, sum);
let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1));
let sum = _mm_add_epi32(sum, hi);
let sum = _mm_cvtsi128_si32(sum) as _;
sum
}
#[inline(always)]
unsafe fn get_weights() -> __m512i {
_mm512_set_epi8(
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
)
}
}
#[cfg(test)]
mod tests {
use rand::Rng;
#[test]
fn zeroes() {
assert_sum_eq(&[]);
assert_sum_eq(&[0]);
assert_sum_eq(&[0, 0]);
assert_sum_eq(&[0; 100]);
assert_sum_eq(&[0; 1024]);
assert_sum_eq(&[0; 1024 * 1024]);
}
#[test]
fn ones() {
assert_sum_eq(&[]);
assert_sum_eq(&[1]);
assert_sum_eq(&[1, 1]);
assert_sum_eq(&[1; 100]);
assert_sum_eq(&[1; 1024]);
assert_sum_eq(&[1; 1024 * 1024]);
}
#[test]
fn random() {
let mut random = [0; 1024 * 1024];
rand::thread_rng().fill(&mut random[..]);
assert_sum_eq(&random[..1]);
assert_sum_eq(&random[..100]);
assert_sum_eq(&random[..1024]);
assert_sum_eq(&random[..1024 * 1024]);
}
/// Example calculation from https://en.wikipedia.org/wiki/Adler-32.
#[test]
fn wiki() {
assert_sum_eq(b"Wikipedia");
}
fn assert_sum_eq(data: &[u8]) {
if let Some(update) = super::get_imp() {
let (a, b) = update(1, 0, data);
let left = u32::from(b) << 16 | u32::from(a);
let right = adler::adler32_slice(data);
assert_eq!(left, right, "len({})", data.len());
}
}
}

23
vendor/simd-adler32/src/imp/mod.rs vendored Normal file
View File

@ -0,0 +1,23 @@
pub mod avx2;
pub mod avx512;
pub mod scalar;
pub mod sse2;
pub mod ssse3;
pub mod wasm;
pub type Adler32Imp = fn(u16, u16, &[u8]) -> (u16, u16);
#[inline]
#[allow(non_snake_case)]
pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
((z << 6) | (y << 4) | (x << 2) | w) as i32
}
pub fn get_imp() -> Adler32Imp {
avx512::get_imp()
.or_else(avx2::get_imp)
.or_else(ssse3::get_imp)
.or_else(sse2::get_imp)
.or_else(wasm::get_imp)
.unwrap_or(scalar::update)
}

241
vendor/simd-adler32/src/imp/neon.rs vendored Normal file
View File

@ -0,0 +1,241 @@
use super::Adler32Imp;
/// Resolves update implementation if CPU supports avx512f and avx512bw instructions.
pub fn get_imp() -> Option<Adler32Imp> {
get_imp_inner()
}
#[inline]
#[cfg(all(feature = "std", feature = "nightly", target_arch = "arm"))]
fn get_imp_inner() -> Option<Adler32Imp> {
if std::is_arm_feature_detected("neon") {
Some(imp::update)
} else {
None
}
}
#[inline]
#[cfg(all(feature = "std", feature = "nightly", target_arch = "aarch64"))]
fn get_imp_inner() -> Option<Adler32Imp> {
if std::is_aarch64_feature_detected("neon") {
Some(imp::update)
} else {
None
}
}
#[inline]
#[cfg(all(
feature = "nightly",
target_feature = "neon",
not(all(feature = "std", any(target_arch = "arm", target_arch = "aarch64")))
))]
fn get_imp_inner() -> Option<Adler32Imp> {
Some(imp::update)
}
#[inline]
#[cfg(all(
not(target_feature = "neon"),
not(all(
feature = "std",
feature = "nightly",
any(target_arch = "arm", target_arch = "aarch64")
))
))]
fn get_imp_inner() -> Option<Adler32Imp> {
None
}
#[cfg(all(
feature = "nightly",
any(target_arch = "arm", target_arch = "aarch64"),
any(feature = "std", target_feature = "neon")
))]
mod imp {
const MOD: u32 = 65521;
const NMAX: usize = 5552;
const BLOCK_SIZE: usize = 64;
const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE;
#[cfg(target_arch = "aarch64")]
use core::arch::aarch64::*;
#[cfg(target_arch = "arm")]
use core::arch::arm::*;
pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
unsafe { update_imp(a, b, data) }
}
#[inline]
#[target_feature(enable = "neon")]
unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
let mut a = a as u32;
let mut b = b as u32;
let chunks = data.chunks_exact(CHUNK_SIZE);
let remainder = chunks.remainder();
for chunk in chunks {
update_chunk_block(&mut a, &mut b, chunk);
}
update_block(&mut a, &mut b, remainder);
(a as u16, b as u16)
}
#[inline]
unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
debug_assert_eq!(
chunk.len(),
CHUNK_SIZE,
"Unexpected chunk size (expected {}, got {})",
CHUNK_SIZE,
chunk.len()
);
reduce_add_blocks(a, b, chunk);
*a %= MOD;
*b %= MOD;
}
#[inline]
unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
debug_assert!(
chunk.len() <= CHUNK_SIZE,
"Unexpected chunk size (expected <= {}, got {})",
CHUNK_SIZE,
chunk.len()
);
for byte in reduce_add_blocks(a, b, chunk) {
*a += *byte as u32;
*b += *a;
}
*a %= MOD;
*b %= MOD;
}
#[inline(always)]
unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] {
if chunk.len() < BLOCK_SIZE {
return chunk;
}
let blocks = chunk.chunks_exact(BLOCK_SIZE);
let blocks_remainder = blocks.remainder();
let one_v = _mm512_set1_epi16(1);
let zero_v = _mm512_setzero_si512();
let weights = get_weights();
let p_v = (*a * blocks.len() as u32) as _;
let mut p_v = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, p_v);
let mut a_v = _mm512_setzero_si512();
let mut b_v = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, *b as _);
for block in blocks {
let block_ptr = block.as_ptr() as *const _;
let block = _mm512_loadu_si512(block_ptr);
p_v = _mm512_add_epi32(p_v, a_v);
a_v = _mm512_add_epi32(a_v, _mm512_sad_epu8(block, zero_v));
let mad = _mm512_maddubs_epi16(block, weights);
b_v = _mm512_add_epi32(b_v, _mm512_madd_epi16(mad, one_v));
}
b_v = _mm512_add_epi32(b_v, _mm512_slli_epi32(p_v, 6));
*a += reduce_add(a_v);
*b = reduce_add(b_v);
blocks_remainder
}
#[inline(always)]
unsafe fn reduce_add(v: __m512i) -> u32 {
let v: [__m256i; 2] = core::mem::transmute(v);
reduce_add_256(v[0]) + reduce_add_256(v[1])
}
#[inline(always)]
unsafe fn reduce_add_256(v: __m256i) -> u32 {
let v: [__m128i; 2] = core::mem::transmute(v);
let sum = _mm_add_epi32(v[0], v[1]);
let hi = _mm_unpackhi_epi64(sum, sum);
let sum = _mm_add_epi32(hi, sum);
let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1));
let sum = _mm_add_epi32(sum, hi);
let sum = _mm_cvtsi128_si32(sum) as _;
sum
}
#[inline(always)]
unsafe fn get_weights() -> __m512i {
_mm512_set_epi8(
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
)
}
}
#[cfg(test)]
mod tests {
use rand::Rng;
#[test]
fn zeroes() {
assert_sum_eq(&[]);
assert_sum_eq(&[0]);
assert_sum_eq(&[0, 0]);
assert_sum_eq(&[0; 100]);
assert_sum_eq(&[0; 1024]);
assert_sum_eq(&[0; 1024 * 1024]);
}
#[test]
fn ones() {
assert_sum_eq(&[]);
assert_sum_eq(&[1]);
assert_sum_eq(&[1, 1]);
assert_sum_eq(&[1; 100]);
assert_sum_eq(&[1; 1024]);
assert_sum_eq(&[1; 1024 * 1024]);
}
#[test]
fn random() {
let mut random = [0; 1024 * 1024];
rand::thread_rng().fill(&mut random[..]);
assert_sum_eq(&random[..1]);
assert_sum_eq(&random[..100]);
assert_sum_eq(&random[..1024]);
assert_sum_eq(&random[..1024 * 1024]);
}
/// Example calculation from https://en.wikipedia.org/wiki/Adler-32.
#[test]
fn wiki() {
assert_sum_eq(b"Wikipedia");
}
fn assert_sum_eq(data: &[u8]) {
if let Some(update) = super::get_imp() {
let (a, b) = update(1, 0, data);
let left = u32::from(b) << 16 | u32::from(a);
let right = adler::adler32_slice(data);
assert_eq!(left, right, "len({})", data.len());
}
}
}

69
vendor/simd-adler32/src/imp/scalar.rs vendored Normal file
View File

@ -0,0 +1,69 @@
const MOD: u32 = 65521;
const NMAX: usize = 5552;
pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
let mut a = a as u32;
let mut b = b as u32;
let chunks = data.chunks_exact(NMAX);
let remainder = chunks.remainder();
for chunk in chunks {
for byte in chunk {
a = a.wrapping_add(*byte as _);
b = b.wrapping_add(a);
}
a %= MOD;
b %= MOD;
}
for byte in remainder {
a = a.wrapping_add(*byte as _);
b = b.wrapping_add(a);
}
a %= MOD;
b %= MOD;
(a as u16, b as u16)
}
#[cfg(test)]
mod tests {
#[test]
fn zeroes() {
assert_eq!(adler32(&[]), 1);
assert_eq!(adler32(&[0]), 1 | 1 << 16);
assert_eq!(adler32(&[0, 0]), 1 | 2 << 16);
assert_eq!(adler32(&[0; 100]), 0x00640001);
assert_eq!(adler32(&[0; 1024]), 0x04000001);
assert_eq!(adler32(&[0; 1024 * 1024]), 0x00f00001);
}
#[test]
fn ones() {
assert_eq!(adler32(&[0xff; 1024]), 0x79a6fc2e);
assert_eq!(adler32(&[0xff; 1024 * 1024]), 0x8e88ef11);
}
#[test]
fn mixed() {
assert_eq!(adler32(&[1]), 2 | 2 << 16);
assert_eq!(adler32(&[40]), 41 | 41 << 16);
assert_eq!(adler32(&[0xA5; 1024 * 1024]), 0xd5009ab1);
}
/// Example calculation from https://en.wikipedia.org/wiki/Adler-32.
#[test]
fn wiki() {
assert_eq!(adler32(b"Wikipedia"), 0x11E60398);
}
fn adler32(data: &[u8]) -> u32 {
let (a, b) = super::update(1, 0, data);
u32::from(b) << 16 | u32::from(a)
}
}

233
vendor/simd-adler32/src/imp/sse2.rs vendored Normal file
View File

@ -0,0 +1,233 @@
use super::Adler32Imp;
/// Resolves update implementation if CPU supports sse2 instructions.
pub fn get_imp() -> Option<Adler32Imp> {
get_imp_inner()
}
#[inline]
#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
fn get_imp_inner() -> Option<Adler32Imp> {
if std::is_x86_feature_detected!("sse2") {
Some(imp::update)
} else {
None
}
}
#[inline]
#[cfg(all(
target_feature = "sse2",
not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))
))]
fn get_imp_inner() -> Option<Adler32Imp> {
Some(imp::update)
}
#[inline]
#[cfg(all(
not(target_feature = "sse2"),
not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))
))]
fn get_imp_inner() -> Option<Adler32Imp> {
None
}
#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
any(feature = "std", target_feature = "sse2")
))]
mod imp {
const MOD: u32 = 65521;
const NMAX: usize = 5552;
const BLOCK_SIZE: usize = 32;
const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE;
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
unsafe { update_imp(a, b, data) }
}
#[inline]
#[target_feature(enable = "sse2")]
unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
let mut a = a as u32;
let mut b = b as u32;
let chunks = data.chunks_exact(CHUNK_SIZE);
let remainder = chunks.remainder();
for chunk in chunks {
update_chunk_block(&mut a, &mut b, chunk);
}
update_block(&mut a, &mut b, remainder);
(a as u16, b as u16)
}
unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
debug_assert_eq!(
chunk.len(),
CHUNK_SIZE,
"Unexpected chunk size (expected {}, got {})",
CHUNK_SIZE,
chunk.len()
);
reduce_add_blocks(a, b, chunk);
*a %= MOD;
*b %= MOD;
}
unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
debug_assert!(
chunk.len() <= CHUNK_SIZE,
"Unexpected chunk size (expected <= {}, got {})",
CHUNK_SIZE,
chunk.len()
);
for byte in reduce_add_blocks(a, b, chunk) {
*a += *byte as u32;
*b += *a;
}
*a %= MOD;
*b %= MOD;
}
#[inline(always)]
unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] {
if chunk.len() < BLOCK_SIZE {
return chunk;
}
let blocks = chunk.chunks_exact(BLOCK_SIZE);
let blocks_remainder = blocks.remainder();
let zero_v = _mm_setzero_si128();
let weight_hi_v = get_weight_hi();
let weight_lo_v = get_weight_lo();
let mut p_v = _mm_set_epi32(0, 0, 0, (*a * blocks.len() as u32) as _);
let mut a_v = _mm_setzero_si128();
let mut b_v = _mm_set_epi32(0, 0, 0, *b as _);
for block in blocks {
let block_ptr = block.as_ptr() as *const _;
let left_v = _mm_loadu_si128(block_ptr);
let right_v = _mm_loadu_si128(block_ptr.add(1));
p_v = _mm_add_epi32(p_v, a_v);
a_v = _mm_add_epi32(a_v, _mm_sad_epu8(left_v, zero_v));
let mad = maddubs(left_v, weight_hi_v);
b_v = _mm_add_epi32(b_v, mad);
a_v = _mm_add_epi32(a_v, _mm_sad_epu8(right_v, zero_v));
let mad = maddubs(right_v, weight_lo_v);
b_v = _mm_add_epi32(b_v, mad);
}
b_v = _mm_add_epi32(b_v, _mm_slli_epi32(p_v, 5));
*a += reduce_add(a_v);
*b = reduce_add(b_v);
blocks_remainder
}
#[inline(always)]
unsafe fn maddubs(a: __m128i, b: __m128i) -> __m128i {
let a_lo = _mm_unpacklo_epi8(a, _mm_setzero_si128());
let a_hi = _mm_unpackhi_epi8(a, _mm_setzero_si128());
let b_lo = _mm_unpacklo_epi8(b, _mm_setzero_si128());
let b_hi = _mm_unpackhi_epi8(b, _mm_setzero_si128());
let lo = _mm_madd_epi16(a_lo, b_lo);
let hi = _mm_madd_epi16(a_hi, b_hi);
_mm_add_epi32(lo, hi)
}
#[inline(always)]
unsafe fn reduce_add(v: __m128i) -> u32 {
let hi = _mm_unpackhi_epi64(v, v);
let sum = _mm_add_epi32(hi, v);
let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1));
let sum = _mm_add_epi32(sum, hi);
_mm_cvtsi128_si32(sum) as _
}
#[inline(always)]
unsafe fn get_weight_lo() -> __m128i {
_mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
}
#[inline(always)]
unsafe fn get_weight_hi() -> __m128i {
_mm_set_epi8(
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
)
}
}
#[cfg(test)]
mod tests {
use rand::Rng;
#[test]
fn zeroes() {
assert_sum_eq(&[]);
assert_sum_eq(&[0]);
assert_sum_eq(&[0, 0]);
assert_sum_eq(&[0; 100]);
assert_sum_eq(&[0; 1024]);
assert_sum_eq(&[0; 1024 * 1024]);
}
#[test]
fn ones() {
assert_sum_eq(&[]);
assert_sum_eq(&[1]);
assert_sum_eq(&[1, 1]);
assert_sum_eq(&[1; 100]);
assert_sum_eq(&[1; 1024]);
assert_sum_eq(&[1; 1024 * 1024]);
}
#[test]
fn random() {
let mut random = [0; 1024 * 1024];
rand::thread_rng().fill(&mut random[..]);
assert_sum_eq(&random[..1]);
assert_sum_eq(&random[..100]);
assert_sum_eq(&random[..1024]);
assert_sum_eq(&random[..1024 * 1024]);
}
/// Example calculation from https://en.wikipedia.org/wiki/Adler-32.
#[test]
fn wiki() {
assert_sum_eq(b"Wikipedia");
}
fn assert_sum_eq(data: &[u8]) {
if let Some(update) = super::get_imp() {
let (a, b) = update(1, 0, data);
let left = u32::from(b) << 16 | u32::from(a);
let right = adler::adler32_slice(data);
assert_eq!(left, right, "len({})", data.len());
}
}
}

219
vendor/simd-adler32/src/imp/ssse3.rs vendored Normal file
View File

@ -0,0 +1,219 @@
use super::Adler32Imp;
/// Resolves update implementation if CPU supports ssse3 instructions.
pub fn get_imp() -> Option<Adler32Imp> {
get_imp_inner()
}
#[inline]
#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
fn get_imp_inner() -> Option<Adler32Imp> {
if std::is_x86_feature_detected!("ssse3") {
Some(imp::update)
} else {
None
}
}
#[inline]
#[cfg(all(
target_feature = "ssse3",
not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))
))]
fn get_imp_inner() -> Option<Adler32Imp> {
Some(imp::update)
}
#[inline]
#[cfg(all(
not(target_feature = "ssse3"),
not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))
))]
fn get_imp_inner() -> Option<Adler32Imp> {
None
}
#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
any(feature = "std", target_feature = "ssse3")
))]
mod imp {
const MOD: u32 = 65521;
const NMAX: usize = 5552;
const BLOCK_SIZE: usize = 32;
const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE;
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
unsafe { update_imp(a, b, data) }
}
#[inline]
#[target_feature(enable = "ssse3")]
unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
let mut a = a as u32;
let mut b = b as u32;
let chunks = data.chunks_exact(CHUNK_SIZE);
let remainder = chunks.remainder();
for chunk in chunks {
update_chunk_block(&mut a, &mut b, chunk);
}
update_block(&mut a, &mut b, remainder);
(a as u16, b as u16)
}
unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
debug_assert_eq!(
chunk.len(),
CHUNK_SIZE,
"Unexpected chunk size (expected {}, got {})",
CHUNK_SIZE,
chunk.len()
);
reduce_add_blocks(a, b, chunk);
*a %= MOD;
*b %= MOD;
}
unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
debug_assert!(
chunk.len() <= CHUNK_SIZE,
"Unexpected chunk size (expected <= {}, got {})",
CHUNK_SIZE,
chunk.len()
);
for byte in reduce_add_blocks(a, b, chunk) {
*a += *byte as u32;
*b += *a;
}
*a %= MOD;
*b %= MOD;
}
#[inline(always)]
unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] {
if chunk.len() < BLOCK_SIZE {
return chunk;
}
let blocks = chunk.chunks_exact(BLOCK_SIZE);
let blocks_remainder = blocks.remainder();
let one_v = _mm_set1_epi16(1);
let zero_v = _mm_set1_epi16(0);
let weight_hi_v = get_weight_hi();
let weight_lo_v = get_weight_lo();
let mut p_v = _mm_set_epi32(0, 0, 0, (*a * blocks.len() as u32) as _);
let mut a_v = _mm_set_epi32(0, 0, 0, 0);
let mut b_v = _mm_set_epi32(0, 0, 0, *b as _);
for block in blocks {
let block_ptr = block.as_ptr() as *const _;
let left_v = _mm_loadu_si128(block_ptr);
let right_v = _mm_loadu_si128(block_ptr.add(1));
p_v = _mm_add_epi32(p_v, a_v);
a_v = _mm_add_epi32(a_v, _mm_sad_epu8(left_v, zero_v));
let mad = _mm_maddubs_epi16(left_v, weight_hi_v);
b_v = _mm_add_epi32(b_v, _mm_madd_epi16(mad, one_v));
a_v = _mm_add_epi32(a_v, _mm_sad_epu8(right_v, zero_v));
let mad = _mm_maddubs_epi16(right_v, weight_lo_v);
b_v = _mm_add_epi32(b_v, _mm_madd_epi16(mad, one_v));
}
b_v = _mm_add_epi32(b_v, _mm_slli_epi32(p_v, 5));
*a += reduce_add(a_v);
*b = reduce_add(b_v);
blocks_remainder
}
#[inline(always)]
unsafe fn reduce_add(v: __m128i) -> u32 {
let hi = _mm_unpackhi_epi64(v, v);
let sum = _mm_add_epi32(hi, v);
let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1));
let sum = _mm_add_epi32(sum, hi);
_mm_cvtsi128_si32(sum) as _
}
#[inline(always)]
unsafe fn get_weight_lo() -> __m128i {
_mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
}
#[inline(always)]
unsafe fn get_weight_hi() -> __m128i {
_mm_set_epi8(
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
)
}
}
#[cfg(test)]
mod tests {
use rand::Rng;
#[test]
fn zeroes() {
assert_sum_eq(&[]);
assert_sum_eq(&[0]);
assert_sum_eq(&[0, 0]);
assert_sum_eq(&[0; 100]);
assert_sum_eq(&[0; 1024]);
assert_sum_eq(&[0; 1024 * 1024]);
}
#[test]
fn ones() {
assert_sum_eq(&[]);
assert_sum_eq(&[1]);
assert_sum_eq(&[1, 1]);
assert_sum_eq(&[1; 100]);
assert_sum_eq(&[1; 1024]);
assert_sum_eq(&[1; 1024 * 1024]);
}
#[test]
fn random() {
let mut random = [0; 1024 * 1024];
rand::thread_rng().fill(&mut random[..]);
assert_sum_eq(&random[..1]);
assert_sum_eq(&random[..100]);
assert_sum_eq(&random[..1024]);
assert_sum_eq(&random[..1024 * 1024]);
}
/// Example calculation from https://en.wikipedia.org/wiki/Adler-32.
#[test]
fn wiki() {
assert_sum_eq(b"Wikipedia");
}
fn assert_sum_eq(data: &[u8]) {
if let Some(update) = super::get_imp() {
let (a, b) = update(1, 0, data);
let left = u32::from(b) << 16 | u32::from(a);
let right = adler::adler32_slice(data);
assert_eq!(left, right, "len({})", data.len());
}
}
}

217
vendor/simd-adler32/src/imp/wasm.rs vendored Normal file
View File

@ -0,0 +1,217 @@
use super::Adler32Imp;
/// Resolves update implementation if CPU supports simd128 instructions.
pub fn get_imp() -> Option<Adler32Imp> {
get_imp_inner()
}
#[inline]
#[cfg(target_feature = "simd128")]
fn get_imp_inner() -> Option<Adler32Imp> {
Some(imp::update)
}
#[inline]
#[cfg(not(target_feature = "simd128"))]
fn get_imp_inner() -> Option<Adler32Imp> {
None
}
#[cfg(target_feature = "simd128")]
mod imp {
const MOD: u32 = 65521;
const NMAX: usize = 5552;
const BLOCK_SIZE: usize = 32;
const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE;
#[cfg(target_arch = "wasm32")]
use core::arch::wasm32::*;
#[cfg(target_arch = "wasm64")]
use core::arch::wasm64::*;
pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
update_imp(a, b, data)
}
#[inline]
#[target_feature(enable = "simd128")]
fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
let mut a = a as u32;
let mut b = b as u32;
let chunks = data.chunks_exact(CHUNK_SIZE);
let remainder = chunks.remainder();
for chunk in chunks {
update_chunk_block(&mut a, &mut b, chunk);
}
update_block(&mut a, &mut b, remainder);
(a as u16, b as u16)
}
fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
debug_assert_eq!(
chunk.len(),
CHUNK_SIZE,
"Unexpected chunk size (expected {}, got {})",
CHUNK_SIZE,
chunk.len()
);
reduce_add_blocks(a, b, chunk);
*a %= MOD;
*b %= MOD;
}
fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
debug_assert!(
chunk.len() <= CHUNK_SIZE,
"Unexpected chunk size (expected <= {}, got {})",
CHUNK_SIZE,
chunk.len()
);
for byte in reduce_add_blocks(a, b, chunk) {
*a += *byte as u32;
*b += *a;
}
*a %= MOD;
*b %= MOD;
}
#[inline(always)]
fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] {
if chunk.len() < BLOCK_SIZE {
return chunk;
}
let blocks = chunk.chunks_exact(BLOCK_SIZE);
let blocks_remainder = blocks.remainder();
let weight_hi_v = get_weight_hi();
let weight_lo_v = get_weight_lo();
let mut p_v = u32x4(*a * blocks.len() as u32, 0, 0, 0);
let mut a_v = u32x4(0, 0, 0, 0);
let mut b_v = u32x4(*b, 0, 0, 0);
for block in blocks {
let block_ptr = block.as_ptr() as *const v128;
let v_lo = unsafe { block_ptr.read_unaligned() };
let v_hi = unsafe { block_ptr.add(1).read_unaligned() };
p_v = u32x4_add(p_v, a_v);
a_v = u32x4_add(a_v, u32x4_extadd_quarters_u8x16(v_lo));
let mad = i32x4_dot_i8x16(v_lo, weight_lo_v);
b_v = u32x4_add(b_v, mad);
a_v = u32x4_add(a_v, u32x4_extadd_quarters_u8x16(v_hi));
let mad = i32x4_dot_i8x16(v_hi, weight_hi_v);
b_v = u32x4_add(b_v, mad);
}
b_v = u32x4_add(b_v, u32x4_shl(p_v, 5));
*a += reduce_add(a_v);
*b = reduce_add(b_v);
blocks_remainder
}
#[inline(always)]
fn i32x4_dot_i8x16(a: v128, b: v128) -> v128 {
let a_lo = u16x8_extend_low_u8x16(a);
let a_hi = u16x8_extend_high_u8x16(a);
let b_lo = u16x8_extend_low_u8x16(b);
let b_hi = u16x8_extend_high_u8x16(b);
let lo = i32x4_dot_i16x8(a_lo, b_lo);
let hi = i32x4_dot_i16x8(a_hi, b_hi);
i32x4_add(lo, hi)
}
#[inline(always)]
fn u32x4_extadd_quarters_u8x16(a: v128) -> v128 {
u32x4_extadd_pairwise_u16x8(u16x8_extadd_pairwise_u8x16(a))
}
#[inline(always)]
fn reduce_add(v: v128) -> u32 {
let arr: [u32; 4] = unsafe { std::mem::transmute(v) };
let mut sum = 0u32;
for val in arr {
sum = sum.wrapping_add(val);
}
sum
}
#[inline(always)]
fn get_weight_lo() -> v128 {
u8x16(
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
)
}
#[inline(always)]
fn get_weight_hi() -> v128 {
u8x16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
}
}
#[cfg(test)]
mod tests {
use rand::Rng;
#[test]
fn zeroes() {
assert_sum_eq(&[]);
assert_sum_eq(&[0]);
assert_sum_eq(&[0, 0]);
assert_sum_eq(&[0; 100]);
assert_sum_eq(&[0; 1024]);
assert_sum_eq(&[0; 512 * 1024]);
}
#[test]
fn ones() {
assert_sum_eq(&[]);
assert_sum_eq(&[1]);
assert_sum_eq(&[1, 1]);
assert_sum_eq(&[1; 100]);
assert_sum_eq(&[1; 1024]);
assert_sum_eq(&[1; 512 * 1024]);
}
#[test]
fn random() {
let mut random = [0; 512 * 1024];
rand::thread_rng().fill(&mut random[..]);
assert_sum_eq(&random[..1]);
assert_sum_eq(&random[..100]);
assert_sum_eq(&random[..1024]);
assert_sum_eq(&random[..512 * 1024]);
}
/// Example calculation from https://en.wikipedia.org/wiki/Adler-32.
#[test]
fn wiki() {
assert_sum_eq(b"Wikipedia");
}
fn assert_sum_eq(data: &[u8]) {
if let Some(update) = super::get_imp() {
let (a, b) = update(1, 0, data);
let left = u32::from(b) << 16 | u32::from(a);
let right = adler::adler32_slice(data);
assert_eq!(left, right, "len({})", data.len());
}
}
}

310
vendor/simd-adler32/src/lib.rs vendored Normal file
View File

@ -0,0 +1,310 @@
//! # simd-adler32
//!
//! A SIMD-accelerated Adler-32 hash algorithm implementation.
//!
//! ## Features
//!
//! - No dependencies
//! - Support `no_std` (with `default-features = false`)
//! - Runtime CPU feature detection (when `std` enabled)
//! - Blazing fast performance on as many targets as possible (currently only x86 and x86_64)
//! - Default to scalar implementation when simd not available
//!
//! ## Quick start
//!
//! > Cargo.toml
//!
//! ```toml
//! [dependencies]
//! simd-adler32 = "*"
//! ```
//!
//! > example.rs
//!
//! ```rust
//! use simd_adler32::Adler32;
//!
//! let mut adler = Adler32::new();
//! adler.write(b"rust is pretty cool, man");
//! let hash = adler.finish();
//!
//! println!("{}", hash);
//! // 1921255656
//! ```
//!
//! ## Feature flags
//!
//! * `std` - Enabled by default
//!
//! Enables std support, see [CPU Feature Detection](#cpu-feature-detection) for runtime
//! detection support.
//! * `nightly`
//!
//! Enables nightly features required for avx512 support.
//!
//! * `const-generics` - Enabled by default
//!
//! Enables const-generics support allowing for user-defined array hashing by value. See
//! [`Adler32Hash`] for details.
//!
//! ## Support
//!
//! **CPU Features**
//!
//! | impl | arch | feature |
//! | ---- | ---------------- | ------- |
//! | ✅ | `x86`, `x86_64` | avx512 |
//! | ✅ | `x86`, `x86_64` | avx2 |
//! | ✅ | `x86`, `x86_64` | ssse3 |
//! | ✅ | `x86`, `x86_64` | sse2 |
//! | 🚧 | `arm`, `aarch64` | neon |
//! | | `wasm32` | simd128 |
//!
//! **MSRV** `1.36.0`\*\*
//!
//! Minimum supported rust version is tested before a new version is published. [**] Feature
//! `const-generics` needs to disabled to build on rustc versions `<1.51` which can be done
//! by updating your dependency definition to the following.
//!
//! ## CPU Feature Detection
//! simd-adler32 supports both runtime and compile time CPU feature detection using the
//! `std::is_x86_feature_detected` macro when the `Adler32` struct is instantiated with
//! the `new` fn.
//!
//! Without `std` feature enabled simd-adler32 falls back to compile time feature detection
//! using `target-feature` or `target-cpu` flags supplied to rustc. See [https://rust-lang.github.io/packed_simd/perf-guide/target-feature/rustflags.html](https://rust-lang.github.io/packed_simd/perf-guide/target-feature/rustflags.html)
//! for more information.
//!
//! Feature detection tries to use the fastest supported feature first.
#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(feature = "nightly", feature(stdsimd, avx512_target_feature))]
#[doc(hidden)]
pub mod hash;
#[doc(hidden)]
pub mod imp;
pub use hash::*;
use imp::{get_imp, Adler32Imp};
/// An adler32 hash generator type.
#[derive(Clone)]
pub struct Adler32 {
a: u16,
b: u16,
update: Adler32Imp,
}
impl Adler32 {
/// Constructs a new `Adler32`.
///
/// Potential overhead here due to runtime feature detection although in testing on 100k
/// and 10k random byte arrays it was not really noticeable.
///
/// # Examples
/// ```rust
/// use simd_adler32::Adler32;
///
/// let mut adler = Adler32::new();
/// ```
pub fn new() -> Self {
Default::default()
}
/// Constructs a new `Adler32` using existing checksum.
///
/// Potential overhead here due to runtime feature detection although in testing on 100k
/// and 10k random byte arrays it was not really noticeable.
///
/// # Examples
/// ```rust
/// use simd_adler32::Adler32;
///
/// let mut adler = Adler32::from_checksum(0xdeadbeaf);
/// ```
pub fn from_checksum(checksum: u32) -> Self {
Self {
a: checksum as u16,
b: (checksum >> 16) as u16,
update: get_imp(),
}
}
/// Computes hash for supplied data and stores results in internal state.
pub fn write(&mut self, data: &[u8]) {
let (a, b) = (self.update)(self.a, self.b, data);
self.a = a;
self.b = b;
}
/// Returns the hash value for the values written so far.
///
/// Despite its name, the method does not reset the hashers internal state. Additional
/// writes will continue from the current value. If you need to start a fresh hash
/// value, you will have to use `reset`.
pub fn finish(&self) -> u32 {
(u32::from(self.b) << 16) | u32::from(self.a)
}
/// Resets the internal state.
pub fn reset(&mut self) {
self.a = 1;
self.b = 0;
}
}
/// Compute Adler-32 hash on `Adler32Hash` type.
///
/// # Arguments
/// * `hash` - A Adler-32 hash-able type.
///
/// # Examples
/// ```rust
/// use simd_adler32::adler32;
///
/// let hash = adler32(b"Adler-32");
/// println!("{}", hash); // 800813569
/// ```
pub fn adler32<H: Adler32Hash>(hash: &H) -> u32 {
hash.hash()
}
/// A Adler-32 hash-able type.
pub trait Adler32Hash {
/// Feeds this value into `Adler32`.
fn hash(&self) -> u32;
}
impl Default for Adler32 {
fn default() -> Self {
Self {
a: 1,
b: 0,
update: get_imp(),
}
}
}
#[cfg(feature = "std")]
pub mod read {
//! Reader-based hashing.
//!
//! # Example
//! ```rust
//! use std::io::Cursor;
//! use simd_adler32::read::adler32;
//!
//! let mut reader = Cursor::new(b"Hello there");
//! let hash = adler32(&mut reader).unwrap();
//!
//! println!("{}", hash) // 800813569
//! ```
use crate::Adler32;
use std::io::{Read, Result};
/// Compute Adler-32 hash on reader until EOF.
///
/// # Example
/// ```rust
/// use std::io::Cursor;
/// use simd_adler32::read::adler32;
///
/// let mut reader = Cursor::new(b"Hello there");
/// let hash = adler32(&mut reader).unwrap();
///
/// println!("{}", hash) // 800813569
/// ```
pub fn adler32<R: Read>(reader: &mut R) -> Result<u32> {
let mut hash = Adler32::new();
let mut buf = [0; 4096];
loop {
match reader.read(&mut buf) {
Ok(0) => return Ok(hash.finish()),
Ok(n) => {
hash.write(&buf[..n]);
}
Err(err) => return Err(err),
}
}
}
}
#[cfg(feature = "std")]
pub mod bufread {
//! BufRead-based hashing.
//!
//! Separate `BufRead` trait implemented to allow for custom buffer size optimization.
//!
//! # Example
//! ```rust
//! use std::io::{Cursor, BufReader};
//! use simd_adler32::bufread::adler32;
//!
//! let mut reader = Cursor::new(b"Hello there");
//! let mut reader = BufReader::new(reader);
//! let hash = adler32(&mut reader).unwrap();
//!
//! println!("{}", hash) // 800813569
//! ```
use crate::Adler32;
use std::io::{BufRead, ErrorKind, Result};
/// Compute Adler-32 hash on buf reader until EOF.
///
/// # Example
/// ```rust
/// use std::io::{Cursor, BufReader};
/// use simd_adler32::bufread::adler32;
///
/// let mut reader = Cursor::new(b"Hello there");
/// let mut reader = BufReader::new(reader);
/// let hash = adler32(&mut reader).unwrap();
///
/// println!("{}", hash) // 800813569
/// ```
pub fn adler32<R: BufRead>(reader: &mut R) -> Result<u32> {
let mut hash = Adler32::new();
loop {
let consumed = match reader.fill_buf() {
Ok(buf) => {
if buf.is_empty() {
return Ok(hash.finish());
}
hash.write(buf);
buf.len()
}
Err(err) => match err.kind() {
ErrorKind::Interrupted => continue,
ErrorKind::UnexpectedEof => return Ok(hash.finish()),
_ => return Err(err),
},
};
reader.consume(consumed);
}
}
}
#[cfg(test)]
mod tests {
#[test]
fn test_from_checksum() {
let buf = b"rust is pretty cool man";
let sum = 0xdeadbeaf;
let mut simd = super::Adler32::from_checksum(sum);
let mut adler = adler::Adler32::from_checksum(sum);
simd.write(buf);
adler.write_slice(buf);
let simd = simd.finish();
let scalar = adler.checksum();
assert_eq!(simd, scalar);
}
}