Initial vendor packages

Signed-off-by: Valentin Popov <valentin@popov.link>
This commit is contained in:
2024-01-08 01:21:28 +04:00
parent 5ecd8cf2cb
commit 1b6a04ca55
7309 changed files with 2160054 additions and 0 deletions

View File

@ -0,0 +1,412 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
// Fallback implementation using global locks.
//
// This implementation uses seqlock for global locks.
//
// This is basically based on global locks in crossbeam-utils's `AtomicCell`,
// but seqlock is implemented in a way that does not depend on UB
// (see comments in optimistic_read method in atomic! macro for details).
//
// Note that we cannot use a lock per atomic type, since the in-memory representation of the atomic
// type and the value type must be the same.
#![cfg_attr(
any(
all(
target_arch = "x86_64",
not(portable_atomic_no_cmpxchg16b_target_feature),
not(portable_atomic_no_outline_atomics),
not(any(target_env = "sgx", miri)),
),
all(
target_arch = "powerpc64",
feature = "fallback",
not(portable_atomic_no_outline_atomics),
portable_atomic_outline_atomics, // TODO(powerpc64): currently disabled by default
any(
all(
target_os = "linux",
any(
target_env = "gnu",
all(
any(target_env = "musl", target_env = "ohos"),
not(target_feature = "crt-static"),
),
portable_atomic_outline_atomics,
),
),
target_os = "android",
target_os = "freebsd",
),
not(any(miri, portable_atomic_sanitize_thread)),
),
all(
target_arch = "arm",
not(portable_atomic_no_asm),
any(target_os = "linux", target_os = "android"),
not(portable_atomic_no_outline_atomics),
),
),
allow(dead_code)
)]
#[macro_use]
pub(crate) mod utils;
// Use "wide" sequence lock if the pointer width <= 32 for preventing its counter against wrap
// around.
//
// In narrow architectures (pointer width <= 16), the counter is still <= 32-bit and may be
// vulnerable to wrap around. But it's mostly okay, since in such a primitive hardware, the
// counter will not be increased that fast.
//
// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI,
// aarch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is available and fast,
// so use it to implement normal sequence lock.
cfg_has_fast_atomic_64! {
mod seq_lock;
}
cfg_no_fast_atomic_64! {
#[path = "seq_lock_wide.rs"]
mod seq_lock;
}
use core::{cell::UnsafeCell, mem, sync::atomic::Ordering};
use seq_lock::{SeqLock, SeqLockWriteGuard};
use utils::CachePadded;
// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI,
// aarch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is fast,
// so use it to reduce chunks of byte-wise atomic memcpy.
use seq_lock::{AtomicChunk, Chunk};
// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L969-L1016.
#[inline]
#[must_use]
fn lock(addr: usize) -> &'static SeqLock {
// The number of locks is a prime number because we want to make sure `addr % LEN` gets
// dispersed across all locks.
//
// crossbeam-utils 0.8.7 uses 97 here but does not use CachePadded,
// so the actual concurrency level will be smaller.
const LEN: usize = 67;
#[allow(clippy::declare_interior_mutable_const)]
const L: CachePadded<SeqLock> = CachePadded::new(SeqLock::new());
static LOCKS: [CachePadded<SeqLock>; LEN] = [
L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
L, L, L, L, L, L, L,
];
// If the modulus is a constant number, the compiler will use crazy math to transform this into
// a sequence of cheap arithmetic operations rather than using the slow modulo instruction.
&LOCKS[addr % LEN]
}
macro_rules! atomic {
($atomic_type:ident, $int_type:ident, $align:literal) => {
#[repr(C, align($align))]
pub(crate) struct $atomic_type {
v: UnsafeCell<$int_type>,
}
impl $atomic_type {
const LEN: usize = mem::size_of::<$int_type>() / mem::size_of::<Chunk>();
#[inline]
unsafe fn chunks(&self) -> &[AtomicChunk; Self::LEN] {
static_assert!($atomic_type::LEN > 1);
static_assert!(mem::size_of::<$int_type>() % mem::size_of::<Chunk>() == 0);
// SAFETY: the caller must uphold the safety contract for `chunks`.
unsafe { &*(self.v.get() as *const $int_type as *const [AtomicChunk; Self::LEN]) }
}
#[inline]
fn optimistic_read(&self) -> $int_type {
// Using `MaybeUninit<[usize; Self::LEN]>` here doesn't change codegen: https://godbolt.org/z/86f8s733M
let mut dst: [Chunk; Self::LEN] = [0; Self::LEN];
// SAFETY:
// - There are no threads that perform non-atomic concurrent write operations.
// - There is no writer that updates the value using atomic operations of different granularity.
//
// If the atomic operation is not used here, it will cause a data race
// when `write` performs concurrent write operation.
// Such a data race is sometimes considered virtually unproblematic
// in SeqLock implementations:
//
// - https://github.com/Amanieu/seqlock/issues/2
// - https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L1111-L1116
// - https://rust-lang.zulipchat.com/#narrow/stream/136281-t-lang.2Fwg-unsafe-code-guidelines/topic/avoiding.20UB.20due.20to.20races.20by.20discarding.20result.3F
//
// However, in our use case, the implementation that loads/stores value as
// chunks of usize is enough fast and sound, so we use that implementation.
//
// See also atomic-memcpy crate, a generic implementation of this pattern:
// https://github.com/taiki-e/atomic-memcpy
let chunks = unsafe { self.chunks() };
for i in 0..Self::LEN {
dst[i] = chunks[i].load(Ordering::Relaxed);
}
// SAFETY: integers are plain old data types so we can always transmute to them.
unsafe { mem::transmute::<[Chunk; Self::LEN], $int_type>(dst) }
}
#[inline]
fn read(&self, _guard: &SeqLockWriteGuard<'static>) -> $int_type {
// This calls optimistic_read that can return teared value, but the resulting value
// is guaranteed not to be teared because we hold the lock to write.
self.optimistic_read()
}
#[inline]
fn write(&self, val: $int_type, _guard: &SeqLockWriteGuard<'static>) {
// SAFETY: integers are plain old data types so we can always transmute them to arrays of integers.
let val = unsafe { mem::transmute::<$int_type, [Chunk; Self::LEN]>(val) };
// SAFETY:
// - The guard guarantees that we hold the lock to write.
// - There are no threads that perform non-atomic concurrent read or write operations.
//
// See optimistic_read for the reason that atomic operations are used here.
let chunks = unsafe { self.chunks() };
for i in 0..Self::LEN {
chunks[i].store(val[i], Ordering::Relaxed);
}
}
}
// Send is implicitly implemented.
// SAFETY: any data races are prevented by the lock and atomic operation.
unsafe impl Sync for $atomic_type {}
impl_default_no_fetch_ops!($atomic_type, $int_type);
impl_default_bit_opts!($atomic_type, $int_type);
impl $atomic_type {
#[inline]
pub(crate) const fn new(v: $int_type) -> Self {
Self { v: UnsafeCell::new(v) }
}
#[inline]
pub(crate) fn is_lock_free() -> bool {
Self::is_always_lock_free()
}
#[inline]
pub(crate) const fn is_always_lock_free() -> bool {
false
}
#[inline]
pub(crate) fn get_mut(&mut self) -> &mut $int_type {
// SAFETY: the mutable reference guarantees unique ownership.
// (UnsafeCell::get_mut requires Rust 1.50)
unsafe { &mut *self.v.get() }
}
#[inline]
pub(crate) fn into_inner(self) -> $int_type {
self.v.into_inner()
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn load(&self, order: Ordering) -> $int_type {
crate::utils::assert_load_ordering(order);
let lock = lock(self.v.get() as usize);
// Try doing an optimistic read first.
if let Some(stamp) = lock.optimistic_read() {
let val = self.optimistic_read();
if lock.validate_read(stamp) {
return val;
}
}
// Grab a regular write lock so that writers don't starve this load.
let guard = lock.write();
let val = self.read(&guard);
// The value hasn't been changed. Drop the guard without incrementing the stamp.
guard.abort();
val
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn store(&self, val: $int_type, order: Ordering) {
crate::utils::assert_store_ordering(order);
let guard = lock(self.v.get() as usize).write();
self.write(val, &guard)
}
#[inline]
pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type {
let guard = lock(self.v.get() as usize).write();
let prev = self.read(&guard);
self.write(val, &guard);
prev
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn compare_exchange(
&self,
current: $int_type,
new: $int_type,
success: Ordering,
failure: Ordering,
) -> Result<$int_type, $int_type> {
crate::utils::assert_compare_exchange_ordering(success, failure);
let guard = lock(self.v.get() as usize).write();
let prev = self.read(&guard);
if prev == current {
self.write(new, &guard);
Ok(prev)
} else {
// The value hasn't been changed. Drop the guard without incrementing the stamp.
guard.abort();
Err(prev)
}
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn compare_exchange_weak(
&self,
current: $int_type,
new: $int_type,
success: Ordering,
failure: Ordering,
) -> Result<$int_type, $int_type> {
self.compare_exchange(current, new, success, failure)
}
#[inline]
pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type {
let guard = lock(self.v.get() as usize).write();
let prev = self.read(&guard);
self.write(prev.wrapping_add(val), &guard);
prev
}
#[inline]
pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type {
let guard = lock(self.v.get() as usize).write();
let prev = self.read(&guard);
self.write(prev.wrapping_sub(val), &guard);
prev
}
#[inline]
pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type {
let guard = lock(self.v.get() as usize).write();
let prev = self.read(&guard);
self.write(prev & val, &guard);
prev
}
#[inline]
pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type {
let guard = lock(self.v.get() as usize).write();
let prev = self.read(&guard);
self.write(!(prev & val), &guard);
prev
}
#[inline]
pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type {
let guard = lock(self.v.get() as usize).write();
let prev = self.read(&guard);
self.write(prev | val, &guard);
prev
}
#[inline]
pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type {
let guard = lock(self.v.get() as usize).write();
let prev = self.read(&guard);
self.write(prev ^ val, &guard);
prev
}
#[inline]
pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type {
let guard = lock(self.v.get() as usize).write();
let prev = self.read(&guard);
self.write(core::cmp::max(prev, val), &guard);
prev
}
#[inline]
pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type {
let guard = lock(self.v.get() as usize).write();
let prev = self.read(&guard);
self.write(core::cmp::min(prev, val), &guard);
prev
}
#[inline]
pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type {
let guard = lock(self.v.get() as usize).write();
let prev = self.read(&guard);
self.write(!prev, &guard);
prev
}
#[inline]
pub(crate) fn not(&self, order: Ordering) {
self.fetch_not(order);
}
#[inline]
pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type {
let guard = lock(self.v.get() as usize).write();
let prev = self.read(&guard);
self.write(prev.wrapping_neg(), &guard);
prev
}
#[inline]
pub(crate) fn neg(&self, order: Ordering) {
self.fetch_neg(order);
}
#[inline]
pub(crate) const fn as_ptr(&self) -> *mut $int_type {
self.v.get()
}
}
};
}
#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_64)))]
#[cfg_attr(
not(portable_atomic_no_cfg_target_has_atomic),
cfg(any(test, not(target_has_atomic = "64")))
)]
cfg_no_fast_atomic_64! {
atomic!(AtomicI64, i64, 8);
atomic!(AtomicU64, u64, 8);
}
atomic!(AtomicI128, i128, 16);
atomic!(AtomicU128, u128, 16);
#[cfg(test)]
mod tests {
use super::*;
cfg_no_fast_atomic_64! {
test_atomic_int!(i64);
test_atomic_int!(u64);
}
test_atomic_int!(i128);
test_atomic_int!(u128);
// load/store/swap implementation is not affected by signedness, so it is
// enough to test only unsigned types.
cfg_no_fast_atomic_64! {
stress_test!(u64);
}
stress_test!(u128);
}

View File

@ -0,0 +1,172 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
// Helper for outline-atomics.
//
// On architectures where DW atomics are not supported on older CPUs, we use
// fallback implementation when DW atomic instructions are not supported and
// outline-atomics is enabled.
//
// This module provides helpers to implement them.
use core::sync::atomic::Ordering;
#[cfg(any(target_arch = "x86_64", target_arch = "powerpc64"))]
pub(crate) type Udw = u128;
#[cfg(any(target_arch = "x86_64", target_arch = "powerpc64"))]
pub(crate) type AtomicUdw = super::super::fallback::AtomicU128;
#[cfg(any(target_arch = "x86_64", target_arch = "powerpc64"))]
pub(crate) type AtomicIdw = super::super::fallback::AtomicI128;
#[cfg(target_arch = "arm")]
pub(crate) type Udw = u64;
#[cfg(target_arch = "arm")]
pub(crate) type AtomicUdw = super::super::fallback::AtomicU64;
#[cfg(target_arch = "arm")]
pub(crate) type AtomicIdw = super::super::fallback::AtomicI64;
// Asserts that the function is called in the correct context.
macro_rules! debug_assert_outline_atomics {
() => {
#[cfg(target_arch = "x86_64")]
{
debug_assert!(!super::detect::detect().has_cmpxchg16b());
}
#[cfg(target_arch = "powerpc64")]
{
debug_assert!(!super::detect::detect().has_quadword_atomics());
}
#[cfg(target_arch = "arm")]
{
debug_assert!(!super::has_kuser_cmpxchg64());
}
};
}
#[cold]
pub(crate) unsafe fn atomic_load(src: *mut Udw, order: Ordering) -> Udw {
debug_assert_outline_atomics!();
#[allow(clippy::cast_ptr_alignment)]
// SAFETY: the caller must uphold the safety contract.
unsafe {
(*(src as *const AtomicUdw)).load(order)
}
}
fn_alias! {
#[cold]
pub(crate) unsafe fn(src: *mut Udw) -> Udw;
// fallback's atomic load has at least acquire semantics.
#[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))]
atomic_load_non_seqcst = atomic_load(Ordering::Acquire);
atomic_load_seqcst = atomic_load(Ordering::SeqCst);
}
#[cold]
pub(crate) unsafe fn atomic_store(dst: *mut Udw, val: Udw, order: Ordering) {
debug_assert_outline_atomics!();
#[allow(clippy::cast_ptr_alignment)]
// SAFETY: the caller must uphold the safety contract.
unsafe {
(*(dst as *const AtomicUdw)).store(val, order);
}
}
fn_alias! {
#[cold]
pub(crate) unsafe fn(dst: *mut Udw, val: Udw);
// fallback's atomic store has at least release semantics.
#[cfg(not(target_arch = "arm"))]
atomic_store_non_seqcst = atomic_store(Ordering::Release);
atomic_store_seqcst = atomic_store(Ordering::SeqCst);
}
#[cold]
pub(crate) unsafe fn atomic_compare_exchange(
dst: *mut Udw,
old: Udw,
new: Udw,
success: Ordering,
failure: Ordering,
) -> (Udw, bool) {
debug_assert_outline_atomics!();
#[allow(clippy::cast_ptr_alignment)]
// SAFETY: the caller must uphold the safety contract.
unsafe {
match (*(dst as *const AtomicUdw)).compare_exchange(old, new, success, failure) {
Ok(v) => (v, true),
Err(v) => (v, false),
}
}
}
fn_alias! {
#[cold]
pub(crate) unsafe fn(dst: *mut Udw, old: Udw, new: Udw) -> (Udw, bool);
// fallback's atomic CAS has at least AcqRel semantics.
#[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))]
atomic_compare_exchange_non_seqcst
= atomic_compare_exchange(Ordering::AcqRel, Ordering::Acquire);
atomic_compare_exchange_seqcst
= atomic_compare_exchange(Ordering::SeqCst, Ordering::SeqCst);
}
macro_rules! atomic_rmw_3 {
(
$name:ident($atomic_type:ident::$method_name:ident),
$non_seqcst_alias:ident, $seqcst_alias:ident
) => {
#[cold]
pub(crate) unsafe fn $name(dst: *mut Udw, val: Udw, order: Ordering) -> Udw {
debug_assert_outline_atomics!();
#[allow(clippy::cast_ptr_alignment)]
// SAFETY: the caller must uphold the safety contract.
unsafe {
(*(dst as *const $atomic_type)).$method_name(val as _, order) as Udw
}
}
fn_alias! {
#[cold]
pub(crate) unsafe fn(dst: *mut Udw, val: Udw) -> Udw;
// fallback's atomic RMW has at least AcqRel semantics.
#[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))]
$non_seqcst_alias = $name(Ordering::AcqRel);
$seqcst_alias = $name(Ordering::SeqCst);
}
};
}
macro_rules! atomic_rmw_2 {
(
$name:ident($atomic_type:ident::$method_name:ident),
$non_seqcst_alias:ident, $seqcst_alias:ident
) => {
#[cold]
pub(crate) unsafe fn $name(dst: *mut Udw, order: Ordering) -> Udw {
debug_assert_outline_atomics!();
#[allow(clippy::cast_ptr_alignment)]
// SAFETY: the caller must uphold the safety contract.
unsafe {
(*(dst as *const $atomic_type)).$method_name(order) as Udw
}
}
fn_alias! {
#[cold]
pub(crate) unsafe fn(dst: *mut Udw) -> Udw;
// fallback's atomic RMW has at least AcqRel semantics.
#[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))]
$non_seqcst_alias = $name(Ordering::AcqRel);
$seqcst_alias = $name(Ordering::SeqCst);
}
};
}
atomic_rmw_3!(atomic_swap(AtomicUdw::swap), atomic_swap_non_seqcst, atomic_swap_seqcst);
atomic_rmw_3!(atomic_add(AtomicUdw::fetch_add), atomic_add_non_seqcst, atomic_add_seqcst);
atomic_rmw_3!(atomic_sub(AtomicUdw::fetch_sub), atomic_sub_non_seqcst, atomic_sub_seqcst);
atomic_rmw_3!(atomic_and(AtomicUdw::fetch_and), atomic_and_non_seqcst, atomic_and_seqcst);
atomic_rmw_3!(atomic_nand(AtomicUdw::fetch_nand), atomic_nand_non_seqcst, atomic_nand_seqcst);
atomic_rmw_3!(atomic_or(AtomicUdw::fetch_or), atomic_or_non_seqcst, atomic_or_seqcst);
atomic_rmw_3!(atomic_xor(AtomicUdw::fetch_xor), atomic_xor_non_seqcst, atomic_xor_seqcst);
atomic_rmw_3!(atomic_max(AtomicIdw::fetch_max), atomic_max_non_seqcst, atomic_max_seqcst);
atomic_rmw_3!(atomic_umax(AtomicUdw::fetch_max), atomic_umax_non_seqcst, atomic_umax_seqcst);
atomic_rmw_3!(atomic_min(AtomicIdw::fetch_min), atomic_min_non_seqcst, atomic_min_seqcst);
atomic_rmw_3!(atomic_umin(AtomicUdw::fetch_min), atomic_umin_non_seqcst, atomic_umin_seqcst);
atomic_rmw_2!(atomic_not(AtomicUdw::fetch_not), atomic_not_non_seqcst, atomic_not_seqcst);
atomic_rmw_2!(atomic_neg(AtomicUdw::fetch_neg), atomic_neg_non_seqcst, atomic_neg_seqcst);

View File

@ -0,0 +1,147 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/seq_lock.rs.
use core::{
mem::ManuallyDrop,
sync::atomic::{self, Ordering},
};
use super::utils::Backoff;
// See mod.rs for details.
#[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))]
pub(super) use core::sync::atomic::AtomicU64 as AtomicStamp;
#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))]
pub(super) use core::sync::atomic::AtomicUsize as AtomicStamp;
#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))]
pub(super) type Stamp = usize;
#[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))]
pub(super) type Stamp = u64;
// See mod.rs for details.
pub(super) type AtomicChunk = AtomicStamp;
pub(super) type Chunk = Stamp;
/// A simple stamped lock.
pub(super) struct SeqLock {
/// The current state of the lock.
///
/// All bits except the least significant one hold the current stamp. When locked, the state
/// equals 1 and doesn't contain a valid stamp.
state: AtomicStamp,
}
impl SeqLock {
#[inline]
pub(super) const fn new() -> Self {
Self { state: AtomicStamp::new(0) }
}
/// If not locked, returns the current stamp.
///
/// This method should be called before optimistic reads.
#[inline]
pub(super) fn optimistic_read(&self) -> Option<Stamp> {
let state = self.state.load(Ordering::Acquire);
if state == 1 {
None
} else {
Some(state)
}
}
/// Returns `true` if the current stamp is equal to `stamp`.
///
/// This method should be called after optimistic reads to check whether they are valid. The
/// argument `stamp` should correspond to the one returned by method `optimistic_read`.
#[inline]
pub(super) fn validate_read(&self, stamp: Stamp) -> bool {
atomic::fence(Ordering::Acquire);
self.state.load(Ordering::Relaxed) == stamp
}
/// Grabs the lock for writing.
#[inline]
pub(super) fn write(&self) -> SeqLockWriteGuard<'_> {
let mut backoff = Backoff::new();
loop {
let previous = self.state.swap(1, Ordering::Acquire);
if previous != 1 {
atomic::fence(Ordering::Release);
return SeqLockWriteGuard { lock: self, state: previous };
}
while self.state.load(Ordering::Relaxed) == 1 {
backoff.snooze();
}
}
}
}
/// An RAII guard that releases the lock and increments the stamp when dropped.
#[must_use]
pub(super) struct SeqLockWriteGuard<'a> {
/// The parent lock.
lock: &'a SeqLock,
/// The stamp before locking.
state: Stamp,
}
impl SeqLockWriteGuard<'_> {
/// Releases the lock without incrementing the stamp.
#[inline]
pub(super) fn abort(self) {
// We specifically don't want to call drop(), since that's
// what increments the stamp.
let this = ManuallyDrop::new(self);
// Restore the stamp.
//
// Release ordering for synchronizing with `optimistic_read`.
this.lock.state.store(this.state, Ordering::Release);
}
}
impl Drop for SeqLockWriteGuard<'_> {
#[inline]
fn drop(&mut self) {
// Release the lock and increment the stamp.
//
// Release ordering for synchronizing with `optimistic_read`.
self.lock.state.store(self.state.wrapping_add(2), Ordering::Release);
}
}
#[cfg(test)]
mod tests {
use super::SeqLock;
#[test]
fn smoke() {
let lock = SeqLock::new();
let before = lock.optimistic_read().unwrap();
assert!(lock.validate_read(before));
{
let _guard = lock.write();
}
assert!(!lock.validate_read(before));
let after = lock.optimistic_read().unwrap();
assert_ne!(before, after);
}
#[test]
fn test_abort() {
let lock = SeqLock::new();
let before = lock.optimistic_read().unwrap();
{
let guard = lock.write();
guard.abort();
}
let after = lock.optimistic_read().unwrap();
assert_eq!(before, after, "aborted write does not update the stamp");
}
}

View File

@ -0,0 +1,180 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/seq_lock_wide.rs.
use core::{
mem::ManuallyDrop,
sync::atomic::{self, AtomicUsize, Ordering},
};
use super::utils::Backoff;
// See mod.rs for details.
pub(super) type AtomicChunk = AtomicUsize;
pub(super) type Chunk = usize;
/// A simple stamped lock.
///
/// The state is represented as two `AtomicUsize`: `state_hi` for high bits and `state_lo` for low
/// bits.
pub(super) struct SeqLock {
/// The high bits of the current state of the lock.
state_hi: AtomicUsize,
/// The low bits of the current state of the lock.
///
/// All bits except the least significant one hold the current stamp. When locked, the state_lo
/// equals 1 and doesn't contain a valid stamp.
state_lo: AtomicUsize,
}
impl SeqLock {
#[inline]
pub(super) const fn new() -> Self {
Self { state_hi: AtomicUsize::new(0), state_lo: AtomicUsize::new(0) }
}
/// If not locked, returns the current stamp.
///
/// This method should be called before optimistic reads.
#[inline]
pub(super) fn optimistic_read(&self) -> Option<(usize, usize)> {
// The acquire loads from `state_hi` and `state_lo` synchronize with the release stores in
// `SeqLockWriteGuard::drop` and `SeqLockWriteGuard::abort`.
//
// As a consequence, we can make sure that (1) all writes within the era of `state_hi - 1`
// happens before now; and therefore, (2) if `state_lo` is even, all writes within the
// critical section of (`state_hi`, `state_lo`) happens before now.
let state_hi = self.state_hi.load(Ordering::Acquire);
let state_lo = self.state_lo.load(Ordering::Acquire);
if state_lo == 1 {
None
} else {
Some((state_hi, state_lo))
}
}
/// Returns `true` if the current stamp is equal to `stamp`.
///
/// This method should be called after optimistic reads to check whether they are valid. The
/// argument `stamp` should correspond to the one returned by method `optimistic_read`.
#[inline]
pub(super) fn validate_read(&self, stamp: (usize, usize)) -> bool {
// Thanks to the fence, if we're noticing any modification to the data at the critical
// section of `(stamp.0, stamp.1)`, then the critical section's write of 1 to state_lo should be
// visible.
atomic::fence(Ordering::Acquire);
// So if `state_lo` coincides with `stamp.1`, then either (1) we're noticing no modification
// to the data after the critical section of `(stamp.0, stamp.1)`, or (2) `state_lo` wrapped
// around.
//
// If (2) is the case, the acquire ordering ensures we see the new value of `state_hi`.
let state_lo = self.state_lo.load(Ordering::Acquire);
// If (2) is the case and `state_hi` coincides with `stamp.0`, then `state_hi` also wrapped
// around, which we give up to correctly validate the read.
let state_hi = self.state_hi.load(Ordering::Relaxed);
// Except for the case that both `state_hi` and `state_lo` wrapped around, the following
// condition implies that we're noticing no modification to the data after the critical
// section of `(stamp.0, stamp.1)`.
(state_hi, state_lo) == stamp
}
/// Grabs the lock for writing.
#[inline]
pub(super) fn write(&self) -> SeqLockWriteGuard<'_> {
let mut backoff = Backoff::new();
loop {
let previous = self.state_lo.swap(1, Ordering::Acquire);
if previous != 1 {
// To synchronize with the acquire fence in `validate_read` via any modification to
// the data at the critical section of `(state_hi, previous)`.
atomic::fence(Ordering::Release);
return SeqLockWriteGuard { lock: self, state_lo: previous };
}
while self.state_lo.load(Ordering::Relaxed) == 1 {
backoff.snooze();
}
}
}
}
/// An RAII guard that releases the lock and increments the stamp when dropped.
#[must_use]
pub(super) struct SeqLockWriteGuard<'a> {
/// The parent lock.
lock: &'a SeqLock,
/// The stamp before locking.
state_lo: usize,
}
impl SeqLockWriteGuard<'_> {
/// Releases the lock without incrementing the stamp.
#[inline]
pub(super) fn abort(self) {
// We specifically don't want to call drop(), since that's
// what increments the stamp.
let this = ManuallyDrop::new(self);
// Restore the stamp.
//
// Release ordering for synchronizing with `optimistic_read`.
this.lock.state_lo.store(this.state_lo, Ordering::Release);
}
}
impl Drop for SeqLockWriteGuard<'_> {
#[inline]
fn drop(&mut self) {
let state_lo = self.state_lo.wrapping_add(2);
// Increase the high bits if the low bits wrap around.
//
// Release ordering for synchronizing with `optimistic_read`.
if state_lo == 0 {
let state_hi = self.lock.state_hi.load(Ordering::Relaxed);
self.lock.state_hi.store(state_hi.wrapping_add(1), Ordering::Release);
}
// Release the lock and increment the stamp.
//
// Release ordering for synchronizing with `optimistic_read`.
self.lock.state_lo.store(state_lo, Ordering::Release);
}
}
#[cfg(test)]
mod tests {
use super::SeqLock;
#[test]
fn smoke() {
let lock = SeqLock::new();
let before = lock.optimistic_read().unwrap();
assert!(lock.validate_read(before));
{
let _guard = lock.write();
}
assert!(!lock.validate_read(before));
let after = lock.optimistic_read().unwrap();
assert_ne!(before, after);
}
#[test]
fn test_abort() {
let lock = SeqLock::new();
let before = lock.optimistic_read().unwrap();
{
let guard = lock.write();
guard.abort();
}
let after = lock.optimistic_read().unwrap();
assert_eq!(before, after, "aborted write does not update the stamp");
}
}

View File

@ -0,0 +1,141 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
use core::ops;
// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/9384f1eb2b356364e201ad38545e03c837d55f3a/crossbeam-utils/src/cache_padded.rs.
/// Pads and aligns a value to the length of a cache line.
// Starting from Intel's Sandy Bridge, spatial prefetcher is now pulling pairs of 64-byte cache
// lines at a time, so we have to align to 128 bytes rather than 64.
//
// Sources:
// - https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
// - https://github.com/facebook/folly/blob/1b5288e6eea6df074758f877c849b6e73bbb9fbb/folly/lang/Align.h#L107
//
// ARM's big.LITTLE architecture has asymmetric cores and "big" cores have 128-byte cache line size.
//
// Sources:
// - https://www.mono-project.com/news/2016/09/12/arm64-icache/
//
// powerpc64 has 128-byte cache line size.
//
// Sources:
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_ppc64x.go#L9
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/powerpc/include/asm/cache.h#L26
#[cfg_attr(
any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "powerpc64"),
repr(align(128))
)]
// arm, mips, mips64, sparc, and hexagon have 32-byte cache line size.
//
// Sources:
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_arm.go#L7
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips.go#L7
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mipsle.go#L7
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips64x.go#L9
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L17
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/hexagon/include/asm/cache.h#L12
#[cfg_attr(
any(
target_arch = "arm",
target_arch = "mips",
target_arch = "mips32r6",
target_arch = "mips64",
target_arch = "mips64r6",
target_arch = "sparc",
target_arch = "hexagon",
),
repr(align(32))
)]
// m68k has 16-byte cache line size.
//
// Sources:
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/m68k/include/asm/cache.h#L9
#[cfg_attr(target_arch = "m68k", repr(align(16)))]
// s390x has 256-byte cache line size.
//
// Sources:
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_s390x.go#L7
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/s390/include/asm/cache.h#L13
#[cfg_attr(target_arch = "s390x", repr(align(256)))]
// x86, wasm, riscv, and sparc64 have 64-byte cache line size.
//
// Sources:
// - https://github.com/golang/go/blob/dda2991c2ea0c5914714469c4defc2562a907230/src/internal/cpu/cpu_x86.go#L9
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_wasm.go#L7
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/riscv/include/asm/cache.h#L10
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L19
//
// All others are assumed to have 64-byte cache line size.
#[cfg_attr(
not(any(
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "powerpc64",
target_arch = "arm",
target_arch = "mips",
target_arch = "mips32r6",
target_arch = "mips64",
target_arch = "mips64r6",
target_arch = "sparc",
target_arch = "hexagon",
target_arch = "m68k",
target_arch = "s390x",
)),
repr(align(64))
)]
pub(crate) struct CachePadded<T> {
value: T,
}
impl<T> CachePadded<T> {
#[inline]
pub(crate) const fn new(value: T) -> Self {
Self { value }
}
}
impl<T> ops::Deref for CachePadded<T> {
type Target = T;
#[inline]
fn deref(&self) -> &Self::Target {
&self.value
}
}
// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/backoff.rs.
// Adjusted to reduce spinning.
/// Performs exponential backoff in spin loops.
pub(crate) struct Backoff {
step: u32,
}
// https://github.com/oneapi-src/oneTBB/blob/v2021.5.0/include/oneapi/tbb/detail/_utils.h#L46-L48
const SPIN_LIMIT: u32 = 4;
impl Backoff {
#[inline]
pub(crate) const fn new() -> Self {
Self { step: 0 }
}
#[inline]
pub(crate) fn snooze(&mut self) {
if self.step <= SPIN_LIMIT {
for _ in 0..1 << self.step {
#[allow(deprecated)]
core::sync::atomic::spin_loop_hint();
}
self.step += 1;
} else {
#[cfg(not(feature = "std"))]
for _ in 0..1 << self.step {
#[allow(deprecated)]
core::sync::atomic::spin_loop_hint();
}
#[cfg(feature = "std")]
std::thread::yield_now();
}
}
}