Restaged repo, allocator and runtime implemented, ioring-backed async fs/net/channel/timer primitives

This commit is contained in:
2026-03-19 17:54:29 -04:00
commit 3fd8209420
51 changed files with 11471 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/target

199
Cargo.lock generated Normal file
View File

@@ -0,0 +1,199 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "atomic-waker"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
[[package]]
name = "bytes"
version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
[[package]]
name = "futures-channel"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d"
dependencies = [
"futures-core",
]
[[package]]
name = "futures-core"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
[[package]]
name = "http"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a"
dependencies = [
"bytes",
"itoa",
]
[[package]]
name = "http-body"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
dependencies = [
"bytes",
"http",
]
[[package]]
name = "http-body-util"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
dependencies = [
"bytes",
"futures-core",
"http",
"http-body",
"pin-project-lite",
]
[[package]]
name = "httparse"
version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
[[package]]
name = "hyper"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11"
dependencies = [
"atomic-waker",
"bytes",
"futures-channel",
"futures-core",
"http",
"http-body",
"httparse",
"itoa",
"pin-project-lite",
"pin-utils",
"smallvec",
"tokio",
"want",
]
[[package]]
name = "itoa"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
[[package]]
name = "libc"
version = "0.2.183"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
[[package]]
name = "pin-project-lite"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
[[package]]
name = "pin-utils"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "proc-macro2"
version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
dependencies = [
"proc-macro2",
]
[[package]]
name = "ruin-runtime"
version = "0.1.0"
dependencies = [
"bytes",
"http-body-util",
"hyper",
"libc",
"ruin-runtime-proc-macros",
]
[[package]]
name = "ruin-runtime-proc-macros"
version = "0.1.0"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "syn"
version = "2.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "tokio"
version = "1.50.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d"
dependencies = [
"pin-project-lite",
]
[[package]]
name = "try-lock"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
[[package]]
name = "want"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
dependencies = [
"try-lock",
]

3
Cargo.toml Normal file
View File

@@ -0,0 +1,3 @@
[workspace]
resolver = "3"
members = ["lib/*"]

13
lib/runtime/Cargo.toml Normal file
View File

@@ -0,0 +1,13 @@
[package]
name = "ruin-runtime"
version = "0.1.0"
edition = "2024"
[dependencies]
hyper = { version = "1.8", default-features = false, features = ["client", "http1"] }
libc = "0.2"
ruin_runtime_proc_macros = { package = "ruin-runtime-proc-macros", path = "../runtime_proc_macros" }
[dev-dependencies]
bytes = "1"
http-body-util = "0.1"

View File

@@ -0,0 +1,81 @@
use ruin_runtime::fs::{self, File};
use std::path::PathBuf;
fn preview(bytes: &[u8]) -> String {
String::from_utf8_lossy(bytes).replace('\n', "\\n")
}
#[ruin_runtime::async_main]
async fn main() {
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let cargo_toml = manifest_dir.join("Cargo.toml");
let src_dir = manifest_dir.join("src");
println!("manifest dir: {}", manifest_dir.display());
let cargo_meta = fs::metadata(&cargo_toml)
.await
.expect("Cargo.toml metadata should load");
println!(
"Cargo.toml: {} bytes, file={}, empty={}",
cargo_meta.len(),
cargo_meta.is_file(),
cargo_meta.is_empty()
);
let mut file = File::open(&cargo_toml)
.await
.expect("Cargo.toml should open for reading");
let file_meta = file
.metadata()
.await
.expect("opened file metadata should load");
println!("opened file metadata size: {}", file_meta.len());
let mut sequential = vec![0; 96];
let sequential_read = file
.read(&mut sequential)
.await
.expect("sequential read should succeed");
sequential.truncate(sequential_read);
println!(
"sequential read ({sequential_read} bytes): {}",
preview(&sequential)
);
let cloned = file.try_clone().await.expect("file clone should succeed");
let mut positioned = [0u8; 48];
let positioned_read = cloned
.read_at(0, &mut positioned)
.await
.expect("positioned read should succeed");
println!(
"positioned read ({positioned_read} bytes): {}",
preview(&positioned[..positioned_read])
);
let cargo_text = fs::read_to_string(&cargo_toml)
.await
.expect("read_to_string should succeed");
println!("Cargo.toml line count: {}", cargo_text.lines().count());
let mut dir = fs::read_dir(&src_dir)
.await
.expect("src directory should be readable");
let mut entries = Vec::new();
while let Some(entry) = dir
.next_entry()
.await
.expect("read_dir stream should succeed")
{
let metadata = entry.metadata().await.expect("entry metadata should load");
let kind = if metadata.is_dir() { "dir" } else { "file" };
entries.push((entry.file_name().to_string_lossy().into_owned(), kind));
}
entries.sort_by(|left, right| left.0.cmp(&right.0));
println!("src entries:");
for (name, kind) in entries.iter().take(8) {
println!(" - {name} ({kind})");
}
}

View File

@@ -0,0 +1,160 @@
use ruin_runtime::channel::{mpsc, oneshot};
use ruin_runtime::{queue_future, spawn_worker, time::sleep};
use std::fmt;
use std::sync::OnceLock;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
static START: OnceLock<Instant> = OnceLock::new();
static ACTUAL_ORDER: AtomicUsize = AtomicUsize::new(1);
macro_rules! log_event {
($expected:literal, $($arg:tt)*) => {{
log_event_impl($expected, format_args!($($arg)*));
}};
}
fn log_event_impl(expected: usize, message: fmt::Arguments<'_>) {
let actual = ACTUAL_ORDER.fetch_add(1, Ordering::SeqCst);
let elapsed = START
.get()
.expect("showcase start time should be initialized")
.elapsed()
.as_millis();
println!(
"[actual {actual:02} | expected {expected:02} | +{elapsed:04}ms | ts {}] {message}",
unix_timestamp_millis(),
);
}
fn unix_timestamp_millis() -> String {
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("system clock should be after the Unix epoch");
format!("{}.{:03}", now.as_secs(), now.subsec_millis())
}
enum WorkerEvent {
Log(String),
PresentRequest {
frame: &'static str,
ack: oneshot::Sender<&'static str>,
},
}
#[ruin_runtime::async_main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
START.get_or_init(Instant::now);
let (job_tx, mut job_rx) = mpsc::channel::<&'static str>(1);
let (event_tx, mut event_rx) = mpsc::unbounded_channel::<WorkerEvent>();
let worker = spawn_worker(
move || {
queue_future(async move {
while let Some(job) = job_rx.recv().await {
event_tx
.send(WorkerEvent::Log(format!(
"[worker] accepted job `{job}` from main thread"
)))
.unwrap_or_else(|_| {
panic!("worker should be able to report accepted jobs")
});
sleep(Duration::from_millis(20)).await;
if job == "upload-frame" {
let (ack_tx, mut ack_rx) = oneshot::channel();
event_tx
.send(WorkerEvent::PresentRequest {
frame: job,
ack: ack_tx,
})
.unwrap_or_else(|_| {
panic!("worker should be able to request presentation")
});
let ack = ack_rx
.recv()
.await
.expect("main thread should acknowledge frame");
event_tx
.send(WorkerEvent::Log(format!(
"[worker] got oneshot ack `{ack}` for `{job}`"
)))
.unwrap_or_else(|_| {
panic!("worker should be able to report ack reception")
});
}
}
event_tx
.send(WorkerEvent::Log(
"[worker] bounded command channel closed; worker is done".into(),
))
.unwrap_or_else(|_| panic!("worker should be able to report shutdown"));
});
},
|| log_event!(12, "[main] worker exited"),
);
queue_future(async move {
log_event!(1, "[main] bounded mpsc send: enqueue `prepare-scene`");
job_tx
.send("prepare-scene")
.await
.expect("prepare-scene should be sent");
log_event!(
2,
"[main] bounded mpsc send: enqueue `upload-frame` (fits once worker drains capacity)"
);
job_tx
.send("upload-frame")
.await
.expect("upload-frame should be sent");
log_event!(
3,
"[main] bounded mpsc send: enqueue `flush-stats` (waits for capacity/backpressure)"
);
job_tx
.send("flush-stats")
.await
.expect("flush-stats should be sent");
log_event!(
5,
"[main] drop bounded sender to close worker command stream"
);
drop(job_tx);
});
let mut event_count = 0usize;
while let Some(event) = event_rx.recv().await {
event_count += 1;
match event {
WorkerEvent::Log(message) => {
let expected = match event_count {
1 => 4,
2 => 6,
4 => 9,
5 => 10,
6 => 11,
_ => 10 + event_count,
};
log_event_impl(expected, format_args!("{message}"));
}
WorkerEvent::PresentRequest { frame, ack } => {
log_event!(
7,
"[main] unbounded mpsc recv: worker requests presentation for `{frame}`"
);
ack.send("presented")
.expect("main thread should be able to answer oneshot");
log_event!(8, "[main] oneshot send: acknowledged frame presentation");
}
}
}
let _ = worker;
Ok(())
}

View File

@@ -0,0 +1,75 @@
use std::io::{Read as _, Write as _};
use std::net::TcpListener as StdTcpListener;
use std::thread;
use std::time::Duration;
use bytes::Bytes;
use http_body_util::{BodyExt, Empty};
use hyper::Request;
use ruin_runtime::time::sleep;
use ruin_runtime::{clear_interval, queue_future, set_interval};
fn spawn_demo_server() -> std::io::Result<(std::net::SocketAddr, thread::JoinHandle<()>)> {
let listener = StdTcpListener::bind(("127.0.0.1", 0))?;
let address = listener.local_addr()?;
let handle = thread::Builder::new()
.name("hyper-demo-server".into())
.spawn(move || {
let (mut stream, peer) = listener.accept().expect("demo server should accept");
let mut request = [0; 1024];
let read = stream.read(&mut request).expect("demo server should read");
println!("[server] accepted {peer}, saw {} request bytes", read);
let response = concat!(
"HTTP/1.1 200 OK\r\n",
"content-type: text/plain; charset=utf-8\r\n",
"content-length: 24\r\n",
"connection: close\r\n",
"\r\n",
"hello from ruin runtime!"
);
stream
.write_all(response.as_bytes())
.expect("demo server should reply");
})
.map_err(std::io::Error::other)?;
Ok((address, handle))
}
#[ruin_runtime::async_main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let (address, server) = spawn_demo_server()?;
let stream = ruin_runtime::net::TcpStream::connect(address).await?;
let (mut sender, connection) = hyper::client::conn::http1::handshake(stream).await?;
queue_future(async move {
if let Err(error) = connection.await {
eprintln!("[runtime] hyper connection ended with error: {error}");
}
});
println!("Sleeping a moment to let the server start...");
let interval = set_interval(Duration::from_millis(400), || println!("..."));
sleep(Duration::from_secs(2)).await;
clear_interval(&interval);
println!("Let's go!");
let request = Request::builder()
.method("GET")
.uri(format!("http://{address}/demo"))
.header("host", address.to_string())
.body(Empty::<Bytes>::new())?;
let response = sender.send_request(request).await?;
let status = response.status();
let body = response.into_body().collect().await?.to_bytes();
println!(
"[client] status={status}, body={}",
String::from_utf8_lossy(&body)
);
server
.join()
.expect("demo server thread should exit cleanly");
Ok(())
}

View File

@@ -0,0 +1,228 @@
use ruin_runtime::{
IntervalHandle, ThreadHandle, clear_interval, current_thread_handle, queue_future,
queue_microtask, queue_task, set_interval, set_timeout, spawn_worker, yield_now,
};
use std::cell::{Cell, RefCell};
use std::fmt;
use std::rc::Rc;
use std::sync::OnceLock;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
static START: OnceLock<Instant> = OnceLock::new();
static ACTUAL_ORDER: AtomicUsize = AtomicUsize::new(1);
macro_rules! log_event {
($expected:literal, $($arg:tt)*) => {{
log_event_impl($expected, format_args!($($arg)*));
}};
}
fn log_event_impl(expected: usize, message: fmt::Arguments<'_>) {
let actual = ACTUAL_ORDER.fetch_add(1, Ordering::SeqCst);
let elapsed = START
.get()
.expect("showcase start time should be initialized")
.elapsed()
.as_millis();
println!(
"[actual {actual:02} | expected {expected:02} | +{elapsed:04}ms | ts {}] {message}",
unix_timestamp_millis(),
);
}
fn unix_timestamp_millis() -> String {
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("system clock should be after the Unix epoch");
format!("{}.{:03}", now.as_secs(), now.subsec_millis())
}
fn queue_log(handle: &ThreadHandle, expected: usize, message: impl Into<String>) {
let message = message.into();
let queued = handle.queue_task(move || {
log_event_impl(expected, format_args!("{message}"));
});
assert!(queued, "main thread should accept log task {expected}");
}
fn queue_log_microtask(handle: &ThreadHandle, expected: usize, message: impl Into<String>) {
let message = message.into();
let queued = handle.queue_microtask(move || {
log_event_impl(expected, format_args!("{message}"));
});
assert!(queued, "main thread should accept log microtask {expected}");
}
#[ruin_runtime::main]
fn main() {
START.get_or_init(Instant::now);
queue_microtask(|| log_event!(1, "[main] boot microtask: prime UI state"));
queue_future(async {
log_event!(2, "[main] future: fetch scene metadata");
yield_now().await;
log_event!(4, "[main] future: scene metadata cached");
});
queue_microtask(|| {
log_event!(3, "[main] microtask queued immediately");
});
let main_handle = current_thread_handle();
queue_task(move || {
log_event!(
5,
"[main] boot task: paint first frame and start background worker"
);
let dashboard_interval = Rc::new(RefCell::new(None::<IntervalHandle>));
let dashboard_ticks = Rc::new(Cell::new(0usize));
{
let slot = Rc::clone(&dashboard_interval);
let ticks = Rc::clone(&dashboard_ticks);
set_dashboard_interval(slot, ticks);
}
set_timeout(Duration::from_millis(30), || {
log_event!(11, "[main] timeout: network snapshot ready");
});
let main_for_worker = main_handle.clone();
let worker = spawn_worker(
move || {
queue_log(
&main_for_worker,
6,
"[worker->main] startup task: prepare upload queue",
);
{
let main_for_microtask = main_for_worker.clone();
queue_microtask(move || {
queue_log(
&main_for_microtask,
7,
"[worker->main] microtask: inspect staging buffers",
);
});
}
{
let main_for_future = main_for_worker.clone();
queue_future(async move {
queue_log(
&main_for_future,
8,
"[worker->main] future: compile shader variants",
);
yield_now().await;
queue_log(
&main_for_future,
9,
"[worker->main] future: shader cache is warm",
);
});
}
{
let main_for_task = main_for_worker.clone();
queue_task(move || {
queue_log(
&main_for_task,
10,
"[worker->main] task: upload static geometry",
);
});
}
let sample_interval = Rc::new(RefCell::new(None::<IntervalHandle>));
let sample_count = Rc::new(Cell::new(0usize));
{
let slot = Rc::clone(&sample_interval);
let count = Rc::clone(&sample_count);
let main_for_samples = main_for_worker.clone();
let handle = set_interval(Duration::from_millis(40), move || {
let next = count.get() + 1;
count.set(next);
queue_log(
&main_for_samples,
if next == 1 { 12 } else { 17 },
format!("[worker->main] interval: sample batch {next} ready"),
);
if next == 2 {
let interval = slot.borrow_mut().take().expect("interval should exist");
clear_interval(&interval);
queue_log(&main_for_samples, 18, "[worker->main] interval stopped");
}
});
*sample_interval.borrow_mut() = Some(handle);
}
{
let main_for_flush = main_for_worker.clone();
set_timeout(Duration::from_millis(110), move || {
queue_log_microtask(
&main_for_flush,
20,
"[worker->main] timeout: flushed final upload batch",
);
});
}
},
|| log_event!(21, "[main] worker exited"),
);
set_timeout(Duration::from_millis(70), move || {
let queued = worker.queue_task({
let main_from_remote_task = main_handle.clone();
move || {
queue_log(
&main_from_remote_task,
15,
"[worker->main] remote task: upload late texture atlas",
);
let main_from_remote_microtask = main_from_remote_task.clone();
queue_microtask(move || {
queue_log(
&main_from_remote_microtask,
16,
"[worker->main] remote microtask: retire staging pages",
);
});
}
});
log_event!(
14,
"[main] timeout: queue late texture upload on worker (queued={queued})"
);
});
set_timeout(Duration::from_millis(140), || {
log_event!(22, "[main] final timeout: commit frame statistics");
});
});
}
fn set_dashboard_interval(slot: Rc<RefCell<Option<IntervalHandle>>>, ticks: Rc<Cell<usize>>) {
let slot_for_callback = Rc::clone(&slot);
let handle = set_interval(Duration::from_millis(50), move || {
let next = ticks.get() + 1;
ticks.set(next);
if next == 1 {
log_event!(13, "[main] interval: dashboard tick 1");
return;
}
let interval = slot_for_callback
.borrow_mut()
.take()
.expect("interval should exist");
clear_interval(&interval);
log_event!(19, "[main] interval: dashboard tick 2 and stop");
});
*slot.borrow_mut() = Some(handle);
}

View File

@@ -0,0 +1,4 @@
//! Async channels for inter-thread communication.
pub mod mpsc;
pub mod oneshot;

View File

@@ -0,0 +1,575 @@
use std::collections::VecDeque;
use std::future::poll_fn;
use std::pin::Pin;
use std::sync::{Arc, Mutex};
use std::task::{Context, Poll};
use crate::op::completion::{CompletionFuture, CompletionHandle};
use crate::sys::linux::channel::runtime_waiter;
pub fn channel<T: Send + 'static>(capacity: usize) -> (Sender<T>, Receiver<T>) {
assert!(capacity > 0, "bounded channels require capacity > 0");
let shared = Arc::new(Mutex::new(State::new(Some(capacity))));
(
Sender {
shared: Arc::clone(&shared),
},
Receiver { shared },
)
}
pub fn unbounded_channel<T: Send + 'static>() -> (UnboundedSender<T>, Receiver<T>) {
let shared = Arc::new(Mutex::new(State::new(None)));
(
UnboundedSender {
shared: Arc::clone(&shared),
},
Receiver { shared },
)
}
pub struct Sender<T: Send + 'static> {
shared: Arc<Mutex<State<T>>>,
}
pub struct UnboundedSender<T: Send + 'static> {
shared: Arc<Mutex<State<T>>>,
}
pub struct Receiver<T: Send + 'static> {
shared: Arc<Mutex<State<T>>>,
}
struct State<T: Send + 'static> {
queue: VecDeque<T>,
capacity: Option<usize>,
sender_count: usize,
receiver_closed: bool,
recv_waiter: Option<CompletionHandle<Option<T>>>,
send_waiters: VecDeque<SendWaiter<T>>,
next_waiter_id: usize,
}
struct SendWaiter<T: Send + 'static> {
id: usize,
value: T,
handle: CompletionHandle<Result<(), SendError<T>>>,
}
#[derive(Debug, Eq, PartialEq)]
pub struct SendError<T>(pub T);
#[derive(Debug, Eq, PartialEq)]
pub enum TrySendError<T> {
Full(T),
Closed(T),
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum TryRecvError {
Empty,
Disconnected,
}
impl<T: Send + 'static> State<T> {
fn new(capacity: Option<usize>) -> Self {
Self {
queue: VecDeque::new(),
capacity,
sender_count: 1,
receiver_closed: false,
recv_waiter: None,
send_waiters: VecDeque::new(),
next_waiter_id: 1,
}
}
fn try_send_now(&mut self, value: T) -> Result<(), TrySendError<T>> {
if self.receiver_closed {
return Err(TrySendError::Closed(value));
}
if let Some(waiter) = self.recv_waiter.take() {
waiter.complete(Some(value));
return Ok(());
}
if self
.capacity
.is_some_and(|capacity| self.queue.len() >= capacity)
{
return Err(TrySendError::Full(value));
}
self.queue.push_back(value);
Ok(())
}
fn enqueue_send_waiter(
&mut self,
value: T,
handle: CompletionHandle<Result<(), SendError<T>>>,
) -> usize {
let id = self.next_waiter_id;
self.next_waiter_id = self.next_waiter_id.wrapping_add(1);
self.send_waiters
.push_back(SendWaiter { id, value, handle });
id
}
fn remove_send_waiter(&mut self, waiter_id: usize) -> bool {
let Some(index) = self
.send_waiters
.iter()
.position(|waiter| waiter.id == waiter_id)
else {
return false;
};
self.send_waiters.remove(index);
true
}
fn pump_senders(&mut self) {
loop {
if self.receiver_closed {
self.fail_pending_senders();
break;
}
let has_capacity = self
.capacity
.is_none_or(|capacity| self.queue.len() < capacity);
if !has_capacity {
break;
}
let Some(waiter) = self.send_waiters.pop_front() else {
break;
};
if let Some(receiver) = self.recv_waiter.take() {
receiver.complete(Some(waiter.value));
} else {
self.queue.push_back(waiter.value);
}
waiter.handle.complete(Ok(()));
}
if self.queue.is_empty()
&& self.sender_count == 0
&& let Some(waiter) = self.recv_waiter.take()
{
waiter.complete(None);
}
}
fn fail_pending_senders(&mut self) {
while let Some(waiter) = self.send_waiters.pop_front() {
waiter.handle.complete(Err(SendError(waiter.value)));
}
}
fn close_receiver(&mut self) {
self.receiver_closed = true;
self.fail_pending_senders();
if self.queue.is_empty()
&& let Some(waiter) = self.recv_waiter.take()
{
waiter.complete(None);
}
}
fn drop_sender(&mut self) {
self.sender_count = self.sender_count.saturating_sub(1);
if self.sender_count == 0
&& self.queue.is_empty()
&& let Some(waiter) = self.recv_waiter.take()
{
waiter.complete(None);
}
}
}
impl<T: Send + 'static> Clone for Sender<T> {
fn clone(&self) -> Self {
self.shared
.lock()
.expect("mpsc state should not be poisoned")
.sender_count += 1;
Self {
shared: Arc::clone(&self.shared),
}
}
}
impl<T: Send + 'static> Clone for UnboundedSender<T> {
fn clone(&self) -> Self {
self.shared
.lock()
.expect("mpsc state should not be poisoned")
.sender_count += 1;
Self {
shared: Arc::clone(&self.shared),
}
}
}
impl<T: Send + 'static> Sender<T> {
pub async fn send(&self, value: T) -> Result<(), SendError<T>> {
let mut value = Some(value);
let mut wait = None;
poll_fn(|cx| self.poll_send(cx, &mut value, &mut wait)).await
}
pub fn try_send(&self, value: T) -> Result<(), TrySendError<T>> {
self.shared
.lock()
.expect("mpsc state should not be poisoned")
.try_send_now(value)
}
pub fn is_closed(&self) -> bool {
self.shared
.lock()
.expect("mpsc state should not be poisoned")
.receiver_closed
}
fn poll_send(
&self,
cx: &mut Context<'_>,
value_slot: &mut Option<T>,
wait: &mut Option<CompletionFuture<Result<(), SendError<T>>>>,
) -> Poll<Result<(), SendError<T>>> {
if let Some(future) = wait.as_mut() {
match Pin::new(future).poll(cx) {
Poll::Ready(result) => {
wait.take();
Poll::Ready(result)
}
Poll::Pending => Poll::Pending,
}
} else {
let mut state = self
.shared
.lock()
.expect("mpsc state should not be poisoned");
match state.try_send_now(value_slot.take().expect("send value should be present")) {
Ok(()) => Poll::Ready(Ok(())),
Err(TrySendError::Closed(value)) => Poll::Ready(Err(SendError(value))),
Err(TrySendError::Full(returned)) => {
drop(state);
let (future, handle) = runtime_waiter::<Result<(), SendError<T>>>();
let state_shared = Arc::clone(&self.shared);
let registration = {
let mut state = state_shared
.lock()
.expect("mpsc state should not be poisoned");
match state.try_send_now(returned) {
Ok(()) => Ok(None),
Err(TrySendError::Closed(value)) => Err(SendError(value)),
Err(TrySendError::Full(value)) => {
Ok(Some(state.enqueue_send_waiter(value, handle.clone())))
}
}
};
match registration {
Ok(None) => {
handle.complete(Ok(()));
*wait = Some(future);
self.poll_send(cx, value_slot, wait)
}
Err(error) => {
handle.complete(Err(error));
*wait = Some(future);
self.poll_send(cx, value_slot, wait)
}
Ok(Some(waiter_id)) => {
let cancel_shared = Arc::clone(&self.shared);
let cancel_handle = handle.clone();
handle.set_cancel(move || {
let mut state = cancel_shared
.lock()
.expect("mpsc state should not be poisoned");
let _ = state.remove_send_waiter(waiter_id);
drop(state);
cancel_handle.finish(None);
});
*wait = Some(future);
self.poll_send(cx, value_slot, wait)
}
}
}
}
}
}
}
impl<T: Send + 'static> UnboundedSender<T> {
pub fn send(&self, value: T) -> Result<(), SendError<T>> {
self.shared
.lock()
.expect("mpsc state should not be poisoned")
.try_send_now(value)
.map_err(|error| match error {
TrySendError::Full(value) | TrySendError::Closed(value) => SendError(value),
})
}
pub fn is_closed(&self) -> bool {
self.shared
.lock()
.expect("mpsc state should not be poisoned")
.receiver_closed
}
}
impl<T: Send + 'static> Receiver<T> {
pub async fn recv(&mut self) -> Option<T> {
let mut wait = None;
poll_fn(|cx| self.poll_recv(cx, &mut wait)).await
}
pub fn try_recv(&mut self) -> Result<T, TryRecvError> {
let mut state = self
.shared
.lock()
.expect("mpsc state should not be poisoned");
if let Some(value) = state.queue.pop_front() {
state.pump_senders();
Ok(value)
} else if state.sender_count == 0 || state.receiver_closed {
Err(TryRecvError::Disconnected)
} else {
Err(TryRecvError::Empty)
}
}
pub fn close(&mut self) {
self.shared
.lock()
.expect("mpsc state should not be poisoned")
.close_receiver();
}
pub fn is_closed(&self) -> bool {
let state = self
.shared
.lock()
.expect("mpsc state should not be poisoned");
state.receiver_closed || state.sender_count == 0
}
fn poll_recv(
&mut self,
cx: &mut Context<'_>,
wait: &mut Option<CompletionFuture<Option<T>>>,
) -> Poll<Option<T>> {
if let Some(future) = wait.as_mut() {
match Pin::new(future).poll(cx) {
Poll::Ready(result) => {
wait.take();
Poll::Ready(result)
}
Poll::Pending => Poll::Pending,
}
} else {
let (future, handle) = runtime_waiter::<Option<T>>();
let cancel_shared = Arc::clone(&self.shared);
let cancel_handle = handle.clone();
handle.set_cancel(move || {
let mut state = cancel_shared
.lock()
.expect("mpsc state should not be poisoned");
let _ = state.recv_waiter.take();
drop(state);
cancel_handle.finish(None);
});
{
let mut state = self
.shared
.lock()
.expect("mpsc state should not be poisoned");
if let Some(value) = state.queue.pop_front() {
state.pump_senders();
handle.complete(Some(value));
} else if state.receiver_closed || state.sender_count == 0 {
handle.complete(None);
} else {
assert!(
state.recv_waiter.is_none(),
"only one mpsc receive operation may wait at a time"
);
state.recv_waiter = Some(handle.clone());
}
}
*wait = Some(future);
self.poll_recv(cx, wait)
}
}
}
impl<T: Send + 'static> Drop for Sender<T> {
fn drop(&mut self) {
self.shared
.lock()
.expect("mpsc state should not be poisoned")
.drop_sender();
}
}
impl<T: Send + 'static> Drop for UnboundedSender<T> {
fn drop(&mut self) {
self.shared
.lock()
.expect("mpsc state should not be poisoned")
.drop_sender();
}
}
impl<T: Send + 'static> Drop for Receiver<T> {
fn drop(&mut self) {
self.shared
.lock()
.expect("mpsc state should not be poisoned")
.close_receiver();
}
}
#[cfg(test)]
mod tests {
use std::sync::{Arc, Mutex};
use std::time::Duration;
use crate::time::sleep;
use crate::{queue_future, queue_task, run, spawn_worker};
use super::{TryRecvError, TrySendError, channel, unbounded_channel};
#[test]
fn bounded_channel_applies_backpressure() {
let log = Arc::new(Mutex::new(Vec::<String>::new()));
let log_for_task = Arc::clone(&log);
queue_task(move || {
let (sender, mut receiver) = channel(1);
let log_for_sender = Arc::clone(&log_for_task);
let log_for_receiver = Arc::clone(&log_for_task);
queue_future(async move {
sender
.send("first")
.await
.expect("first send should succeed");
log_for_sender
.lock()
.unwrap()
.push("sent first".to_string());
sender
.send("second")
.await
.expect("second send should succeed");
log_for_sender
.lock()
.unwrap()
.push("sent second".to_string());
});
queue_future(async move {
sleep(Duration::from_millis(5)).await;
let first = receiver.recv().await.expect("first recv should succeed");
log_for_receiver
.lock()
.unwrap()
.push(format!("received {first}"));
let second = receiver.recv().await.expect("second recv should succeed");
log_for_receiver
.lock()
.unwrap()
.push(format!("received {second}"));
});
});
run();
let log = log.lock().unwrap();
let sent_first = log.iter().position(|entry| entry == "sent first").unwrap();
let received_first = log
.iter()
.position(|entry| entry == "received first")
.unwrap();
let sent_second = log.iter().position(|entry| entry == "sent second").unwrap();
let received_second = log
.iter()
.position(|entry| entry == "received second")
.unwrap();
assert!(
sent_first < received_first,
"first send should happen before first recv"
);
assert!(
received_first < sent_second,
"second send should not complete before capacity is freed"
);
assert!(
received_first < received_second,
"receiver should observe messages in FIFO order"
);
}
#[test]
fn unbounded_channel_moves_messages_across_worker_threads() {
let log = Arc::new(Mutex::new(Vec::new()));
let log_for_task = Arc::clone(&log);
queue_task(move || {
let (sender, mut receiver) = unbounded_channel::<String>();
let worker_sender = sender.clone();
let log_for_receiver = Arc::clone(&log_for_task);
let _worker = spawn_worker(
move || {
queue_task(move || {
worker_sender
.send("worker boot".into())
.expect("worker boot send should succeed");
worker_sender
.send("worker done".into())
.expect("worker done send should succeed");
});
},
|| {},
);
drop(sender);
queue_future(async move {
while let Some(message) = receiver.recv().await {
log_for_receiver.lock().unwrap().push(message);
}
});
});
run();
assert_eq!(
log.lock().unwrap().as_slice(),
["worker boot", "worker done"]
);
}
#[test]
fn try_send_try_recv_and_close_semantics_work() {
let (sender, mut receiver) = channel(1);
sender
.try_send(1usize)
.expect("initial send should succeed");
assert_eq!(sender.try_send(2usize), Err(TrySendError::Full(2)));
assert_eq!(receiver.try_recv(), Ok(1));
assert_eq!(receiver.try_recv(), Err(TryRecvError::Empty));
receiver.close();
assert!(sender.is_closed(), "sender should observe closed receiver");
assert_eq!(sender.try_send(3usize), Err(TrySendError::Closed(3)));
assert_eq!(receiver.try_recv(), Err(TryRecvError::Disconnected));
}
}

View File

@@ -0,0 +1,281 @@
use std::future::poll_fn;
use std::pin::Pin;
use std::sync::{Arc, Mutex};
use std::task::{Context, Poll};
use crate::op::completion::{CompletionFuture, CompletionHandle};
use crate::sys::linux::channel::runtime_waiter;
pub fn channel<T: Send + 'static>() -> (Sender<T>, Receiver<T>) {
let shared = Arc::new(Mutex::new(State {
value: None,
sender_alive: true,
receiver_closed: false,
waiter: None,
}));
(
Sender {
shared: Some(Arc::clone(&shared)),
},
Receiver {
shared,
consumed: false,
},
)
}
pub struct Sender<T: Send + 'static> {
shared: Option<Arc<Mutex<State<T>>>>,
}
pub struct Receiver<T: Send + 'static> {
shared: Arc<Mutex<State<T>>>,
consumed: bool,
}
struct State<T: Send + 'static> {
value: Option<T>,
sender_alive: bool,
receiver_closed: bool,
waiter: Option<CompletionHandle<Result<T, RecvError>>>,
}
#[derive(Debug, Eq, PartialEq)]
pub struct SendError<T>(pub T);
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct RecvError;
#[derive(Debug, Eq, PartialEq)]
pub enum TryRecvError {
Empty,
Closed,
}
impl<T: Send + 'static> Sender<T> {
pub fn send(mut self, value: T) -> Result<(), SendError<T>> {
let Some(shared) = self.shared.take() else {
return Err(SendError(value));
};
let waiter = {
let mut state = shared.lock().expect("oneshot state should not be poisoned");
state.sender_alive = false;
if state.receiver_closed {
return Err(SendError(value));
}
state.waiter.take()
};
if let Some(waiter) = waiter {
waiter.complete(Ok(value));
} else {
shared
.lock()
.expect("oneshot state should not be poisoned")
.value = Some(value);
}
Ok(())
}
pub fn is_closed(&self) -> bool {
self.shared.as_ref().is_none_or(|shared| {
shared
.lock()
.expect("oneshot state should not be poisoned")
.receiver_closed
})
}
}
impl<T: Send + 'static> Receiver<T> {
pub async fn recv(&mut self) -> Result<T, RecvError> {
let mut wait = None;
poll_fn(|cx| self.poll_recv(cx, &mut wait)).await
}
pub fn try_recv(&mut self) -> Result<T, TryRecvError> {
if self.consumed {
return Err(TryRecvError::Closed);
}
let mut state = self
.shared
.lock()
.expect("oneshot state should not be poisoned");
if let Some(value) = state.value.take() {
self.consumed = true;
return Ok(value);
}
if state.receiver_closed || !state.sender_alive {
self.consumed = true;
Err(TryRecvError::Closed)
} else {
Err(TryRecvError::Empty)
}
}
pub fn close(&mut self) {
let mut state = self
.shared
.lock()
.expect("oneshot state should not be poisoned");
state.receiver_closed = true;
}
pub fn is_closed(&self) -> bool {
let state = self
.shared
.lock()
.expect("oneshot state should not be poisoned");
state.receiver_closed || !state.sender_alive
}
fn poll_recv(
&mut self,
cx: &mut Context<'_>,
wait: &mut Option<CompletionFuture<Result<T, RecvError>>>,
) -> Poll<Result<T, RecvError>> {
if self.consumed {
return Poll::Ready(Err(RecvError));
}
if let Some(future) = wait.as_mut() {
match Pin::new(future).poll(cx) {
Poll::Ready(result) => {
wait.take();
self.consumed = true;
Poll::Ready(result)
}
Poll::Pending => Poll::Pending,
}
} else {
let (future, handle) = runtime_waiter::<Result<T, RecvError>>();
let cancel_shared = Arc::clone(&self.shared);
let cancel_handle = handle.clone();
handle.set_cancel(move || {
let mut state = cancel_shared
.lock()
.expect("oneshot state should not be poisoned");
let _ = state.waiter.take();
drop(state);
cancel_handle.finish(None);
});
let mut immediate = None;
{
let mut state = self
.shared
.lock()
.expect("oneshot state should not be poisoned");
if let Some(value) = state.value.take() {
immediate = Some(Ok(value));
} else if state.receiver_closed || !state.sender_alive {
immediate = Some(Err(RecvError));
} else {
assert!(
state.waiter.is_none(),
"only one oneshot receive operation may wait at a time"
);
state.waiter = Some(handle.clone());
}
}
if let Some(result) = immediate {
handle.complete(result);
}
*wait = Some(future);
self.poll_recv(cx, wait)
}
}
}
impl<T: Send + 'static> Drop for Sender<T> {
fn drop(&mut self) {
let Some(shared) = self.shared.take() else {
return;
};
let waiter = {
let mut state = shared.lock().expect("oneshot state should not be poisoned");
if !state.sender_alive {
return;
}
state.sender_alive = false;
if state.value.is_none() {
state.waiter.take()
} else {
None
}
};
if let Some(waiter) = waiter {
waiter.complete(Err(RecvError));
}
}
}
impl<T: Send + 'static> Drop for Receiver<T> {
fn drop(&mut self) {
let mut state = self
.shared
.lock()
.expect("oneshot state should not be poisoned");
state.receiver_closed = true;
let _ = state.waiter.take();
}
}
#[cfg(test)]
mod tests {
use std::sync::{Arc, Mutex};
use crate::{queue_future, queue_task, run, spawn_worker};
use super::{TryRecvError, channel};
#[test]
fn oneshot_cross_thread_round_trip() {
let result = Arc::new(Mutex::new(None::<usize>));
let result_for_task = Arc::clone(&result);
queue_task(move || {
let (sender, mut receiver) = channel();
let result_for_task = Arc::clone(&result_for_task);
let _worker = spawn_worker(
move || {
queue_task(move || {
sender.send(42usize).expect("oneshot send should succeed");
});
},
|| {},
);
queue_future(async move {
let value = receiver.recv().await.expect("oneshot recv should succeed");
*result_for_task.lock().unwrap() = Some(value);
});
});
run();
assert_eq!(*result.lock().unwrap(), Some(42));
}
#[test]
fn oneshot_try_recv_and_close() {
let (sender, mut receiver) = channel::<usize>();
assert_eq!(receiver.try_recv(), Err(TryRecvError::Empty));
receiver.close();
assert!(
sender.send(7).is_err(),
"closed receiver should reject send"
);
assert_eq!(receiver.try_recv(), Err(TryRecvError::Closed));
}
}

552
lib/runtime/src/fs.rs Normal file
View File

@@ -0,0 +1,552 @@
//! Portable async filesystem API.
//!
//! Cancellation semantics:
//! - Dropping an I/O future cancels interest in the result.
//! - The runtime issues best-effort kernel cancellation where supported.
//! - The underlying OS operation may still complete after the future is dropped.
use std::ffi::OsStr;
use std::io;
use std::os::fd::{AsRawFd, OwnedFd};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use crate::op::fs::{
FileType as RawFileType, FsOp, MetadataTarget, OpenOptions as OpOpenOptions,
RawDirEntry as OpDirEntry, RawMetadata,
};
use crate::sys::linux::fs as sys_fs;
struct FileInner {
fd: OwnedFd,
}
pub struct File {
inner: Arc<FileInner>,
}
pub struct OpenOptions {
inner: OpOpenOptions,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Metadata {
inner: RawMetadata,
}
pub struct ReadDir {
inner: sys_fs::ReadDirStream,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct DirEntry {
inner: OpDirEntry,
}
impl File {
pub async fn open(path: impl AsRef<Path>) -> io::Result<Self> {
OpenOptions::new().read(true).open(path).await
}
pub async fn create(path: impl AsRef<Path>) -> io::Result<Self> {
OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(path)
.await
}
pub async fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.read_impl(None, buf).await
}
pub async fn read_exact(&mut self, mut buf: &mut [u8]) -> io::Result<()> {
while !buf.is_empty() {
let read = self.read(buf).await?;
if read == 0 {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"failed to fill whole buffer",
));
}
buf = &mut buf[read..];
}
Ok(())
}
pub async fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.write_impl(None, buf).await
}
pub async fn write_all(&mut self, mut buf: &[u8]) -> io::Result<()> {
while !buf.is_empty() {
let written = self.write(buf).await?;
if written == 0 {
return Err(io::Error::new(
io::ErrorKind::WriteZero,
"failed to write whole buffer",
));
}
buf = &buf[written..];
}
Ok(())
}
pub async fn flush(&mut self) -> io::Result<()> {
Ok(())
}
pub async fn sync_all(&self) -> io::Result<()> {
sys_fs::sync_all(FsOp::SyncAll { fd: self.raw_fd() }).await
}
pub async fn sync_data(&self) -> io::Result<()> {
sys_fs::sync_data(FsOp::SyncData { fd: self.raw_fd() }).await
}
pub async fn read_at(&self, offset: u64, buf: &mut [u8]) -> io::Result<usize> {
self.read_impl(Some(offset), buf).await
}
pub async fn read_exact_at(&self, mut offset: u64, mut buf: &mut [u8]) -> io::Result<()> {
while !buf.is_empty() {
let read = self.read_at(offset, buf).await?;
if read == 0 {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"failed to fill whole buffer",
));
}
offset = offset.saturating_add(read as u64);
buf = &mut buf[read..];
}
Ok(())
}
pub async fn write_at(&self, offset: u64, buf: &[u8]) -> io::Result<usize> {
self.write_impl(Some(offset), buf).await
}
pub async fn write_all_at(&self, mut offset: u64, mut buf: &[u8]) -> io::Result<()> {
while !buf.is_empty() {
let written = self.write_at(offset, buf).await?;
if written == 0 {
return Err(io::Error::new(
io::ErrorKind::WriteZero,
"failed to write whole buffer",
));
}
offset = offset.saturating_add(written as u64);
buf = &buf[written..];
}
Ok(())
}
pub async fn metadata(&self) -> io::Result<Metadata> {
sys_fs::metadata(FsOp::Metadata {
target: MetadataTarget::File(self.raw_fd()),
follow_symlinks: true,
})
.await
.map(Metadata::from_raw)
}
pub async fn set_len(&self, len: u64) -> io::Result<()> {
sys_fs::set_len(FsOp::SetLen {
fd: self.raw_fd(),
len,
})
.await
}
pub async fn try_clone(&self) -> io::Result<Self> {
sys_fs::try_clone(FsOp::Duplicate { fd: self.raw_fd() })
.await
.map(File::from_owned_fd)
}
fn from_owned_fd(fd: OwnedFd) -> Self {
Self {
inner: Arc::new(FileInner { fd }),
}
}
fn raw_fd(&self) -> i32 {
self.inner.fd.as_raw_fd()
}
async fn read_impl(&self, offset: Option<u64>, buf: &mut [u8]) -> io::Result<usize> {
let data = sys_fs::read(FsOp::Read {
fd: self.raw_fd(),
offset,
len: buf.len(),
})
.await?;
let read = data.len();
buf[..read].copy_from_slice(&data);
Ok(read)
}
async fn write_impl(&self, offset: Option<u64>, buf: &[u8]) -> io::Result<usize> {
sys_fs::write(FsOp::Write {
fd: self.raw_fd(),
offset,
data: buf.to_vec(),
})
.await
}
}
impl OpenOptions {
pub fn new() -> Self {
Self {
inner: OpOpenOptions::default(),
}
}
pub fn read(&mut self, value: bool) -> &mut Self {
self.inner.read = value;
self
}
pub fn write(&mut self, value: bool) -> &mut Self {
self.inner.write = value;
self
}
pub fn append(&mut self, value: bool) -> &mut Self {
self.inner.append = value;
self
}
pub fn truncate(&mut self, value: bool) -> &mut Self {
self.inner.truncate = value;
self
}
pub fn create(&mut self, value: bool) -> &mut Self {
self.inner.create = value;
self
}
pub fn create_new(&mut self, value: bool) -> &mut Self {
self.inner.create_new = value;
self
}
pub async fn open(&self, path: impl AsRef<Path>) -> io::Result<File> {
sys_fs::open(FsOp::Open {
path: path.as_ref().to_path_buf(),
options: self.inner.clone(),
})
.await
.map(File::from_owned_fd)
}
}
impl Default for OpenOptions {
fn default() -> Self {
Self::new()
}
}
impl Metadata {
fn from_raw(inner: RawMetadata) -> Self {
Self { inner }
}
pub fn len(&self) -> u64 {
self.inner.len
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn is_file(&self) -> bool {
self.inner.file_type == RawFileType::File
}
pub fn is_dir(&self) -> bool {
self.inner.file_type == RawFileType::Directory
}
pub fn is_symlink(&self) -> bool {
self.inner.file_type == RawFileType::Symlink
}
pub fn mode(&self) -> u16 {
self.inner.mode
}
}
impl ReadDir {
pub async fn next_entry(&mut self) -> io::Result<Option<DirEntry>> {
self.inner
.next_entry()
.await
.map(|entry| entry.map(|inner| DirEntry { inner }))
}
}
impl DirEntry {
pub fn path(&self) -> PathBuf {
self.inner.path.clone()
}
pub fn file_name(&self) -> &OsStr {
self.inner.file_name.as_os_str()
}
pub async fn metadata(&self) -> io::Result<Metadata> {
metadata(self.path()).await
}
}
pub async fn read(path: impl AsRef<Path>) -> io::Result<Vec<u8>> {
let mut file = File::open(path.as_ref()).await?;
let mut output = Vec::new();
let mut chunk = vec![0; 8192];
loop {
let read = file.read(&mut chunk).await?;
if read == 0 {
return Ok(output);
}
output.extend_from_slice(&chunk[..read]);
}
}
pub async fn read_to_string(path: impl AsRef<Path>) -> io::Result<String> {
let bytes = read(path).await?;
String::from_utf8(bytes).map_err(|error| io::Error::new(io::ErrorKind::InvalidData, error))
}
pub async fn write(path: impl AsRef<Path>, data: impl AsRef<[u8]>) -> io::Result<()> {
let mut file = OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(path)
.await?;
file.write_all(data.as_ref()).await
}
pub async fn metadata(path: impl AsRef<Path>) -> io::Result<Metadata> {
sys_fs::metadata(FsOp::Metadata {
target: MetadataTarget::Path(path.as_ref().to_path_buf()),
follow_symlinks: true,
})
.await
.map(Metadata::from_raw)
}
pub async fn create_dir(path: impl AsRef<Path>) -> io::Result<()> {
sys_fs::create_dir(FsOp::CreateDir {
path: path.as_ref().to_path_buf(),
recursive: false,
mode: 0o777,
})
.await
}
pub async fn create_dir_all(path: impl AsRef<Path>) -> io::Result<()> {
let path = path.as_ref();
let mut current = PathBuf::new();
for component in path.components() {
current.push(component.as_os_str());
if current.as_os_str().is_empty() {
continue;
}
match create_dir(&current).await {
Ok(()) => {}
Err(error) if error.kind() == io::ErrorKind::AlreadyExists => {}
Err(error) => return Err(error),
}
}
Ok(())
}
pub async fn remove_file(path: impl AsRef<Path>) -> io::Result<()> {
sys_fs::remove_file(FsOp::RemoveFile {
path: path.as_ref().to_path_buf(),
})
.await
}
pub async fn remove_dir(path: impl AsRef<Path>) -> io::Result<()> {
sys_fs::remove_dir(FsOp::RemoveDir {
path: path.as_ref().to_path_buf(),
})
.await
}
pub async fn rename(from: impl AsRef<Path>, to: impl AsRef<Path>) -> io::Result<()> {
sys_fs::rename(FsOp::Rename {
from: from.as_ref().to_path_buf(),
to: to.as_ref().to_path_buf(),
})
.await
}
pub async fn read_dir(path: impl AsRef<Path>) -> io::Result<ReadDir> {
sys_fs::read_dir(FsOp::ReadDir {
path: path.as_ref().to_path_buf(),
})
.map(|inner| ReadDir { inner })
}
#[cfg(test)]
mod tests {
use super::{
OpenOptions, create_dir_all, metadata, read, read_dir, read_to_string, remove_dir,
remove_file, rename, write,
};
use crate::queue_future;
use crate::{queue_task, run};
use std::collections::BTreeSet;
use std::ffi::OsString;
use std::path::PathBuf;
use std::process;
use std::sync::Arc;
use std::sync::Mutex;
use std::sync::OnceLock;
use std::time::{SystemTime, UNIX_EPOCH};
fn test_lock() -> &'static Mutex<()> {
static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
LOCK.get_or_init(|| Mutex::new(()))
}
fn unique_path(label: &str) -> PathBuf {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("system time should be after epoch")
.as_nanos();
std::env::temp_dir().join(format!("ruin-runtime-{label}-{}-{nanos}", process::id()))
}
#[test]
fn async_fs_round_trip() {
let _guard = test_lock().lock().unwrap();
let root = unique_path("fs-round-trip");
let nested = root.join("nested");
let file_path = nested.join("hello.txt");
let renamed_path = nested.join("renamed.txt");
let output = Arc::new(Mutex::new(None::<String>));
{
let output = Arc::clone(&output);
queue_task(move || {
queue_future(async move {
create_dir_all(&nested)
.await
.expect("dir creation should succeed");
write(&file_path, b"hello world")
.await
.expect("initial write should succeed");
let file = OpenOptions::new()
.read(true)
.write(true)
.open(&file_path)
.await
.expect("open should succeed");
file.write_at(6, b"runtime")
.await
.expect("positioned write should succeed");
file.sync_all().await.expect("sync should succeed");
let mut prefix = [0u8; 5];
file.read_exact_at(0, &mut prefix)
.await
.expect("positioned read should succeed");
assert_eq!(&prefix, b"hello");
let meta = file.metadata().await.expect("metadata should succeed");
assert!(meta.is_file());
assert!(meta.len() >= 13);
let cloned = file.try_clone().await.expect("clone should succeed");
cloned.set_len(13).await.expect("truncate should succeed");
rename(&file_path, &renamed_path)
.await
.expect("rename should succeed");
let text = read_to_string(&renamed_path)
.await
.expect("read_to_string should succeed");
assert_eq!(text, "hello runtime");
let bytes = read(&renamed_path).await.expect("read should succeed");
assert_eq!(bytes, b"hello runtime");
let path_meta = metadata(&renamed_path)
.await
.expect("path metadata should work");
assert!(path_meta.is_file());
*output.lock().unwrap() = Some(text);
remove_file(&renamed_path)
.await
.expect("remove_file should succeed");
remove_dir(&nested)
.await
.expect("remove nested dir should succeed");
remove_dir(&root)
.await
.expect("remove root dir should succeed");
});
});
}
run();
assert_eq!(output.lock().unwrap().as_deref(), Some("hello runtime"));
}
#[test]
fn async_read_dir_streams_entries() {
let _guard = test_lock().lock().unwrap();
let root = unique_path("fs-read-dir");
let one = root.join("one.txt");
let two = root.join("two.txt");
let seen: Arc<Mutex<BTreeSet<OsString>>> = Arc::new(Mutex::new(BTreeSet::new()));
{
let seen = Arc::clone(&seen);
queue_task(move || {
queue_future(async move {
create_dir_all(&root)
.await
.expect("dir creation should succeed");
write(&one, b"1").await.expect("write one should succeed");
write(&two, b"2").await.expect("write two should succeed");
let mut dir = read_dir(&root).await.expect("read_dir should succeed");
while let Some(entry) = dir.next_entry().await.expect("stream should succeed") {
seen.lock()
.unwrap()
.insert(entry.file_name().to_os_string());
}
remove_file(&one).await.expect("remove one should succeed");
remove_file(&two).await.expect("remove two should succeed");
remove_dir(&root).await.expect("remove root should succeed");
});
});
}
run();
let seen = seen.lock().unwrap();
assert!(seen.contains(&OsString::from("one.txt")));
assert!(seen.contains(&OsString::from("two.txt")));
}
}

78
lib/runtime/src/lib.rs Normal file
View File

@@ -0,0 +1,78 @@
//! RUIN runtime foundations.
//!
//! This crate provides a Linux x86_64 runtime substrate: the mesh allocator, the reactor, and a
//! single-threaded runtime loop with worker-thread task forwarding.
#![feature(thread_local)]
#[cfg(not(all(target_os = "linux", target_arch = "x86_64")))]
compile_error!("ruin-runtime currently supports only Linux x86_64.");
extern crate alloc;
pub mod channel;
pub mod fs;
pub mod net;
pub mod op;
pub mod platform;
pub mod sys;
pub mod time;
pub use ruin_runtime_proc_macros::{async_main, main};
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
pub use platform::linux_x86_64::mesh_alloc::{
ActiveMeshGuard, Arena, AtomicBitmap, BitIter, CLASS_TO_SIZE, CompactionAdvice,
CompactionEstimate, CompactionRecommendation, CompactionSkipReason,
DEFAULT_GLOBAL_MINIHEAP_CAPACITY, FutexMutex, GlobalMeshAllocator, MeshAllocator, MeshStats,
MiniHeap, MiniHeapFlags, MiniHeapId, Mwc, Mwc64, NUM_SIZE_CLASSES, PageConfig, PlatformHooks,
PlatformInstallError, RelaxedBitmap, RuntimeCompactionPolicy, RuntimeCompactionResult,
ShuffleEntry, ShuffleVector, Span, ThreadLocalHeap, byte_size_for_class,
ensure_fault_mediation_installed, install_platform_hooks, ok_to_proceed, page_count,
page_shift, page_size, retry_on_efault, retry_on_efault_ptrs, round_up_to_page,
runtime_slots_per_span, size_class_for,
};
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
pub use platform::linux_x86_64::mesh_alloc::{FreelistId, bitmaps_meshable};
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
pub use platform::linux_x86_64::reactor::{
Reactor, ReadyEvents, ThreadNotifier, create, create_reactor, monotonic_now,
};
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
pub use platform::linux_x86_64::runtime::{
IntervalHandle, JoinHandle, ThreadHandle, TimeoutHandle, WorkerHandle, clear_interval,
clear_timeout, current_thread_handle, queue_future, queue_microtask, queue_task, run,
set_interval, set_timeout, spawn_worker, yield_now,
};
pub const fn default_global_allocator() -> GlobalMeshAllocator {
GlobalMeshAllocator::with_default_config()
}
#[cfg(test)]
mod tests {
use super::{MeshAllocator, page_size};
#[test]
fn mesh_allocator_smoke_test() {
let mut allocator =
MeshAllocator::new(page_size() * 1024, 256).expect("allocator should initialize");
let small = allocator
.allocate(64)
.expect("small allocation should succeed");
unsafe {
small.write_bytes(0xAB, 64);
}
allocator.deallocate(small);
let large_size = page_size() * 2;
let large = allocator
.allocate(large_size)
.expect("large allocation should succeed");
unsafe {
large.write_bytes(0xCD, large_size);
}
allocator.deallocate(large);
}
}

963
lib/runtime/src/net.rs Normal file
View File

@@ -0,0 +1,963 @@
//! Portable async networking API.
use std::future::Future;
use std::io;
use std::net::{Shutdown, SocketAddr, ToSocketAddrs};
use std::os::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd};
use std::pin::Pin;
use std::sync::{Arc, Mutex};
use std::task::{Context, Poll};
use std::time::Duration;
use hyper::rt::{Read as HyperRead, ReadBufCursor, Write as HyperWrite};
use crate::op::net::NetOp;
#[derive(Debug)]
struct TcpStreamInner {
fd: OwnedFd,
timeouts: Mutex<SocketTimeouts>,
}
#[derive(Debug)]
struct TcpListenerInner {
fd: OwnedFd,
}
#[derive(Debug)]
struct UdpSocketInner {
fd: OwnedFd,
timeouts: Mutex<SocketTimeouts>,
}
#[derive(Clone, Copy, Debug, Default)]
struct SocketTimeouts {
read: Option<Duration>,
write: Option<Duration>,
}
type PendingRead = Pin<Box<dyn Future<Output = io::Result<Vec<u8>>> + 'static>>;
type PendingWrite = Pin<Box<dyn Future<Output = io::Result<usize>> + 'static>>;
type PendingShutdown = Pin<Box<dyn Future<Output = io::Result<()>> + 'static>>;
pub struct TcpStream {
inner: Arc<TcpStreamInner>,
pending_read: Option<PendingRead>,
pending_write: Option<PendingWrite>,
pending_shutdown: Option<PendingShutdown>,
}
#[derive(Clone, Debug)]
pub struct TcpListener {
inner: Arc<TcpListenerInner>,
}
#[derive(Debug)]
pub struct UdpSocket {
inner: Arc<UdpSocketInner>,
}
impl TcpStream {
pub async fn connect<A>(addr: A) -> io::Result<Self>
where
A: ToSocketAddrs + Send + 'static,
{
let addrs = crate::sys::linux::net::resolve_addrs(addr).await?;
let mut last_error = None;
for addr in addrs {
match crate::sys::linux::net::connect_stream(addr).await {
Ok(fd) => return Ok(Self::from_owned_fd(fd)),
Err(error) => last_error = Some(error),
}
}
Err(last_error.unwrap_or_else(|| {
io::Error::new(
io::ErrorKind::AddrNotAvailable,
"address resolution returned no usable TCP endpoints",
)
}))
}
pub async fn connect_timeout(addr: &SocketAddr, timeout: Duration) -> io::Result<Self> {
validate_timeout(timeout)?;
crate::sys::linux::net::connect_stream_timeout(*addr, timeout)
.await
.map(Self::from_owned_fd)
}
pub async fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let data = match self.read_timeout_value() {
Some(timeout) => {
crate::sys::linux::net::recv_timeout(self.raw_fd(), buf.len(), 0, timeout).await?
}
None => {
crate::sys::linux::net::recv(NetOp::Recv {
fd: self.raw_fd(),
len: buf.len(),
flags: 0,
})
.await?
}
};
let read = data.len();
buf[..read].copy_from_slice(&data);
Ok(read)
}
pub async fn read_exact(&mut self, mut buf: &mut [u8]) -> io::Result<()> {
while !buf.is_empty() {
let read = self.read(buf).await?;
if read == 0 {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"failed to fill whole buffer",
));
}
buf = &mut buf[read..];
}
Ok(())
}
pub async fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
match self.write_timeout_value() {
Some(timeout) => {
crate::sys::linux::net::send_timeout(self.raw_fd(), buf.to_vec(), 0, timeout).await
}
None => {
crate::sys::linux::net::send(NetOp::Send {
fd: self.raw_fd(),
data: buf.to_vec(),
flags: 0,
})
.await
}
}
}
pub async fn write_all(&mut self, mut buf: &[u8]) -> io::Result<()> {
while !buf.is_empty() {
let written = self.write(buf).await?;
if written == 0 {
return Err(io::Error::new(
io::ErrorKind::WriteZero,
"failed to write whole buffer",
));
}
buf = &buf[written..];
}
Ok(())
}
pub async fn shutdown(&self, how: Shutdown) -> io::Result<()> {
crate::sys::linux::net::shutdown(NetOp::Shutdown {
fd: self.raw_fd(),
how,
})
.await
}
pub async fn try_clone(&self) -> io::Result<Self> {
crate::sys::linux::net::duplicate(self.raw_fd())
.await
.map(Self::from_owned_fd)
}
pub fn local_addr(&self) -> io::Result<SocketAddr> {
crate::sys::linux::net::local_addr(self.raw_fd())
}
pub fn peer_addr(&self) -> io::Result<SocketAddr> {
crate::sys::linux::net::peer_addr(self.raw_fd())
}
pub fn nodelay(&self) -> io::Result<bool> {
crate::sys::linux::net::nodelay(self.raw_fd())
}
pub fn set_nodelay(&self, enabled: bool) -> io::Result<()> {
crate::sys::linux::net::set_nodelay(self.raw_fd(), enabled)
}
pub fn ttl(&self) -> io::Result<u32> {
crate::sys::linux::net::ttl(self.raw_fd())
}
pub fn set_ttl(&self, ttl: u32) -> io::Result<()> {
crate::sys::linux::net::set_ttl(self.raw_fd(), ttl)
}
pub fn read_timeout(&self) -> io::Result<Option<Duration>> {
Ok(self.read_timeout_value())
}
pub fn set_read_timeout(&self, timeout: Option<Duration>) -> io::Result<()> {
validate_optional_timeout(timeout)?;
self.inner.timeouts.lock().unwrap().read = timeout;
Ok(())
}
pub fn write_timeout(&self) -> io::Result<Option<Duration>> {
Ok(self.write_timeout_value())
}
pub fn set_write_timeout(&self, timeout: Option<Duration>) -> io::Result<()> {
validate_optional_timeout(timeout)?;
self.inner.timeouts.lock().unwrap().write = timeout;
Ok(())
}
fn from_owned_fd(fd: OwnedFd) -> Self {
Self {
inner: Arc::new(TcpStreamInner {
fd,
timeouts: Mutex::new(SocketTimeouts::default()),
}),
pending_read: None,
pending_write: None,
pending_shutdown: None,
}
}
fn raw_fd(&self) -> RawFd {
self.inner.fd.as_raw_fd()
}
fn read_timeout_value(&self) -> Option<Duration> {
self.inner.timeouts.lock().unwrap().read
}
fn write_timeout_value(&self) -> Option<Duration> {
self.inner.timeouts.lock().unwrap().write
}
}
impl TcpListener {
pub async fn bind<A>(addr: A) -> io::Result<Self>
where
A: ToSocketAddrs + Send + 'static,
{
let addrs = crate::sys::linux::net::resolve_addrs(addr).await?;
let mut last_error = None;
for addr in addrs {
match crate::sys::linux::net::bind_listener(addr, None).await {
Ok(fd) => return Ok(Self::from_owned_fd(fd)),
Err(error) => last_error = Some(error),
}
}
Err(last_error.unwrap_or_else(|| {
io::Error::new(
io::ErrorKind::AddrNotAvailable,
"address resolution returned no usable listener endpoints",
)
}))
}
pub async fn accept(&self) -> io::Result<(TcpStream, SocketAddr)> {
let accepted = crate::sys::linux::net::accept(NetOp::Accept { fd: self.raw_fd() }).await?;
let stream = TcpStream::from_owned_fd(unsafe { OwnedFd::from_raw_fd(accepted.fd) });
Ok((stream, accepted.peer_addr))
}
pub fn local_addr(&self) -> io::Result<SocketAddr> {
crate::sys::linux::net::local_addr(self.raw_fd())
}
pub fn ttl(&self) -> io::Result<u32> {
crate::sys::linux::net::ttl(self.raw_fd())
}
pub fn set_ttl(&self, ttl: u32) -> io::Result<()> {
crate::sys::linux::net::set_ttl(self.raw_fd(), ttl)
}
fn from_owned_fd(fd: OwnedFd) -> Self {
Self {
inner: Arc::new(TcpListenerInner { fd }),
}
}
fn raw_fd(&self) -> RawFd {
self.inner.fd.as_raw_fd()
}
}
impl UdpSocket {
pub async fn bind<A>(addr: A) -> io::Result<Self>
where
A: ToSocketAddrs + Send + 'static,
{
let addrs = crate::sys::linux::net::resolve_addrs(addr).await?;
let mut last_error = None;
for addr in addrs {
match crate::sys::linux::net::bind_datagram(addr).await {
Ok(fd) => return Ok(Self::from_owned_fd(fd)),
Err(error) => last_error = Some(error),
}
}
Err(last_error.unwrap_or_else(|| {
io::Error::new(
io::ErrorKind::AddrNotAvailable,
"address resolution returned no usable UDP endpoints",
)
}))
}
pub async fn connect<A>(&self, addr: A) -> io::Result<()>
where
A: ToSocketAddrs + Send + 'static,
{
let addrs = crate::sys::linux::net::resolve_addrs(addr).await?;
let mut last_error = None;
for addr in addrs {
match crate::sys::linux::net::connect(NetOp::Connect {
fd: self.raw_fd(),
addr,
})
.await
{
Ok(()) => return Ok(()),
Err(error) => last_error = Some(error),
}
}
Err(last_error.unwrap_or_else(|| {
io::Error::new(
io::ErrorKind::AddrNotAvailable,
"address resolution returned no usable UDP peers",
)
}))
}
pub async fn send(&self, buf: &[u8]) -> io::Result<usize> {
match self.write_timeout_value() {
Some(timeout) => {
crate::sys::linux::net::send_timeout(self.raw_fd(), buf.to_vec(), 0, timeout).await
}
None => {
crate::sys::linux::net::send(NetOp::Send {
fd: self.raw_fd(),
data: buf.to_vec(),
flags: 0,
})
.await
}
}
}
pub async fn recv(&self, buf: &mut [u8]) -> io::Result<usize> {
let data = match self.read_timeout_value() {
Some(timeout) => {
crate::sys::linux::net::recv_timeout(self.raw_fd(), buf.len(), 0, timeout).await?
}
None => {
crate::sys::linux::net::recv(NetOp::Recv {
fd: self.raw_fd(),
len: buf.len(),
flags: 0,
})
.await?
}
};
let read = data.len();
buf[..read].copy_from_slice(&data);
Ok(read)
}
pub async fn peek(&self, buf: &mut [u8]) -> io::Result<usize> {
let data = match self.read_timeout_value() {
Some(timeout) => {
crate::sys::linux::net::recv_timeout(
self.raw_fd(),
buf.len(),
libc::MSG_PEEK,
timeout,
)
.await?
}
None => {
crate::sys::linux::net::recv(NetOp::Recv {
fd: self.raw_fd(),
len: buf.len(),
flags: libc::MSG_PEEK,
})
.await?
}
};
let read = data.len();
buf[..read].copy_from_slice(&data);
Ok(read)
}
pub async fn send_to<A>(&self, buf: &[u8], addr: A) -> io::Result<usize>
where
A: ToSocketAddrs + Send + 'static,
{
let addrs = crate::sys::linux::net::resolve_addrs(addr).await?;
let mut last_error = None;
let timeout = self.write_timeout_value();
for addr in addrs {
let result = match timeout {
Some(timeout) => {
crate::sys::linux::net::send_to_timeout(
self.raw_fd(),
buf.to_vec(),
addr,
0,
timeout,
)
.await
}
None => {
crate::sys::linux::net::send_to(NetOp::SendTo {
fd: self.raw_fd(),
target: addr,
data: buf.to_vec(),
flags: 0,
})
.await
}
};
match result {
Ok(sent) => return Ok(sent),
Err(error) => last_error = Some(error),
}
}
Err(last_error.unwrap_or_else(|| {
io::Error::new(
io::ErrorKind::AddrNotAvailable,
"address resolution returned no usable UDP destinations",
)
}))
}
pub async fn recv_from(&self, buf: &mut [u8]) -> io::Result<(usize, SocketAddr)> {
let datagram = match self.read_timeout_value() {
Some(timeout) => {
crate::sys::linux::net::recv_from_timeout(self.raw_fd(), buf.len(), 0, timeout)
.await?
}
None => {
crate::sys::linux::net::recv_from(NetOp::RecvFrom {
fd: self.raw_fd(),
len: buf.len(),
flags: 0,
})
.await?
}
};
let read = datagram.data.len();
buf[..read].copy_from_slice(&datagram.data);
Ok((read, datagram.peer_addr))
}
pub async fn peek_from(&self, buf: &mut [u8]) -> io::Result<(usize, SocketAddr)> {
let datagram = match self.read_timeout_value() {
Some(timeout) => {
crate::sys::linux::net::recv_from_timeout(
self.raw_fd(),
buf.len(),
libc::MSG_PEEK,
timeout,
)
.await?
}
None => {
crate::sys::linux::net::recv_from(NetOp::RecvFrom {
fd: self.raw_fd(),
len: buf.len(),
flags: libc::MSG_PEEK,
})
.await?
}
};
let read = datagram.data.len();
buf[..read].copy_from_slice(&datagram.data);
Ok((read, datagram.peer_addr))
}
pub async fn try_clone(&self) -> io::Result<Self> {
crate::sys::linux::net::duplicate(self.raw_fd())
.await
.map(Self::from_owned_fd)
}
pub fn local_addr(&self) -> io::Result<SocketAddr> {
crate::sys::linux::net::local_addr(self.raw_fd())
}
pub fn peer_addr(&self) -> io::Result<SocketAddr> {
crate::sys::linux::net::peer_addr(self.raw_fd())
}
pub fn broadcast(&self) -> io::Result<bool> {
crate::sys::linux::net::broadcast(self.raw_fd())
}
pub fn set_broadcast(&self, enabled: bool) -> io::Result<()> {
crate::sys::linux::net::set_broadcast(self.raw_fd(), enabled)
}
pub fn ttl(&self) -> io::Result<u32> {
crate::sys::linux::net::ttl(self.raw_fd())
}
pub fn set_ttl(&self, ttl: u32) -> io::Result<()> {
crate::sys::linux::net::set_ttl(self.raw_fd(), ttl)
}
pub fn read_timeout(&self) -> io::Result<Option<Duration>> {
Ok(self.read_timeout_value())
}
pub fn set_read_timeout(&self, timeout: Option<Duration>) -> io::Result<()> {
validate_optional_timeout(timeout)?;
self.inner.timeouts.lock().unwrap().read = timeout;
Ok(())
}
pub fn write_timeout(&self) -> io::Result<Option<Duration>> {
Ok(self.write_timeout_value())
}
pub fn set_write_timeout(&self, timeout: Option<Duration>) -> io::Result<()> {
validate_optional_timeout(timeout)?;
self.inner.timeouts.lock().unwrap().write = timeout;
Ok(())
}
fn from_owned_fd(fd: OwnedFd) -> Self {
Self {
inner: Arc::new(UdpSocketInner {
fd,
timeouts: Mutex::new(SocketTimeouts::default()),
}),
}
}
fn raw_fd(&self) -> RawFd {
self.inner.fd.as_raw_fd()
}
fn read_timeout_value(&self) -> Option<Duration> {
self.inner.timeouts.lock().unwrap().read
}
fn write_timeout_value(&self) -> Option<Duration> {
self.inner.timeouts.lock().unwrap().write
}
}
impl HyperRead for TcpStream {
fn poll_read(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
mut buf: ReadBufCursor<'_>,
) -> Poll<Result<(), io::Error>> {
let this = self.get_mut();
if buf.remaining() == 0 {
return Poll::Ready(Ok(()));
}
if this.pending_read.is_none() {
this.pending_read = Some(match this.read_timeout_value() {
Some(timeout) => Box::pin(crate::sys::linux::net::recv_timeout(
this.raw_fd(),
buf.remaining(),
0,
timeout,
)),
None => crate::sys::linux::net::recv_future(this.raw_fd(), buf.remaining()),
});
}
let poll = this
.pending_read
.as_mut()
.expect("pending read future should exist")
.as_mut()
.poll(cx);
match poll {
Poll::Ready(Ok(data)) => {
this.pending_read = None;
buf.put_slice(&data);
Poll::Ready(Ok(()))
}
Poll::Ready(Err(error)) => {
this.pending_read = None;
Poll::Ready(Err(error))
}
Poll::Pending => Poll::Pending,
}
}
}
impl HyperWrite for TcpStream {
fn poll_write(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
buf: &[u8],
) -> Poll<Result<usize, io::Error>> {
let this = self.get_mut();
if buf.is_empty() {
return Poll::Ready(Ok(0));
}
if this.pending_write.is_none() {
this.pending_write = Some(match this.write_timeout_value() {
Some(timeout) => Box::pin(crate::sys::linux::net::send_timeout(
this.raw_fd(),
buf.to_vec(),
0,
timeout,
)),
None => crate::sys::linux::net::send_future(this.raw_fd(), buf.to_vec()),
});
}
let poll = this
.pending_write
.as_mut()
.expect("pending write future should exist")
.as_mut()
.poll(cx);
match poll {
Poll::Ready(Ok(written)) => {
this.pending_write = None;
Poll::Ready(Ok(written))
}
Poll::Ready(Err(error)) => {
this.pending_write = None;
Poll::Ready(Err(error))
}
Poll::Pending => Poll::Pending,
}
}
fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Result<(), io::Error>> {
Poll::Ready(Ok(()))
}
fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), io::Error>> {
let this = self.get_mut();
if this.pending_shutdown.is_none() {
this.pending_shutdown = Some(crate::sys::linux::net::shutdown_future(
this.raw_fd(),
Shutdown::Write,
));
}
let poll = this
.pending_shutdown
.as_mut()
.expect("pending shutdown future should exist")
.as_mut()
.poll(cx);
match poll {
Poll::Ready(Ok(())) => {
this.pending_shutdown = None;
Poll::Ready(Ok(()))
}
Poll::Ready(Err(error)) => {
this.pending_shutdown = None;
Poll::Ready(Err(error))
}
Poll::Pending => Poll::Pending,
}
}
}
fn validate_optional_timeout(timeout: Option<Duration>) -> io::Result<()> {
if let Some(timeout) = timeout {
validate_timeout(timeout)?;
}
Ok(())
}
fn validate_timeout(timeout: Duration) -> io::Result<()> {
if timeout.is_zero() {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"zero-duration timeouts are not supported",
));
}
Ok(())
}
#[cfg(test)]
mod tests {
use std::sync::{Arc, Mutex};
use std::time::Duration;
use crate::{queue_future, queue_task, run};
use super::{TcpListener, TcpStream, UdpSocket};
use std::io::ErrorKind;
use std::net::SocketAddr;
#[test]
fn tcp_listener_and_stream_round_trip() {
let received = Arc::new(Mutex::new(None::<Vec<u8>>));
let received_for_task = Arc::clone(&received);
queue_task(move || {
let received_for_task = Arc::clone(&received_for_task);
queue_future(async move {
let listener = Arc::new(
TcpListener::bind(SocketAddr::from(([127, 0, 0, 1], 0)))
.await
.expect("listener should bind"),
);
let local_addr = listener
.local_addr()
.expect("listener should expose address");
let listener_for_accept = Arc::clone(&listener);
let server = queue_future(async move {
let (mut stream, peer_addr) = listener_for_accept
.accept()
.await
.expect("listener should accept");
assert_eq!(peer_addr.ip().to_string(), "127.0.0.1");
let mut buffer = [0; 32];
let read = stream
.read(&mut buffer)
.await
.expect("server read should succeed");
stream
.write_all(b"pong")
.await
.expect("server write should succeed");
buffer[..read].to_vec()
});
let mut client = TcpStream::connect(local_addr)
.await
.expect("client should connect");
client
.set_nodelay(true)
.expect("setting TCP_NODELAY should succeed");
assert!(
client
.nodelay()
.expect("reading TCP_NODELAY should succeed"),
"TCP_NODELAY should be enabled",
);
client
.write_all(b"ping")
.await
.expect("client write should succeed");
let mut response = [0; 4];
client
.read_exact(&mut response)
.await
.expect("client read should succeed");
assert_eq!(&response, b"pong");
let server_bytes = server.await;
*received_for_task
.lock()
.expect("received buffer should not be poisoned") = Some(server_bytes);
});
});
run();
let received = received
.lock()
.expect("received buffer should not be poisoned");
assert_eq!(received.as_deref(), Some(b"ping".as_slice()));
}
#[test]
fn tcp_connect_resolves_localhost() {
let peer = Arc::new(Mutex::new(None::<String>));
let peer_for_task = Arc::clone(&peer);
queue_task(move || {
let peer_for_task = Arc::clone(&peer_for_task);
queue_future(async move {
let listener = Arc::new(
TcpListener::bind(SocketAddr::from(([127, 0, 0, 1], 0)))
.await
.expect("listener should bind"),
);
let port = listener
.local_addr()
.expect("listener should expose address")
.port();
let listener_for_accept = Arc::clone(&listener);
let server = queue_future(async move {
let (stream, peer_addr) = listener_for_accept
.accept()
.await
.expect("listener should accept");
drop(stream);
peer_addr
});
let _client = TcpStream::connect(format!("localhost:{port}"))
.await
.expect("localhost DNS connect should succeed");
let peer_addr = server.await;
*peer_for_task
.lock()
.expect("peer buffer should not be poisoned") =
Some(peer_addr.ip().to_string());
});
});
run();
let peer = peer.lock().expect("peer buffer should not be poisoned");
assert_eq!(peer.as_deref(), Some("127.0.0.1"));
}
#[test]
fn udp_send_to_and_recv_from_round_trip() {
let server_received = Arc::new(Mutex::new(None::<Vec<u8>>));
let server_received_for_task = Arc::clone(&server_received);
queue_task(move || {
let server_received_for_task = Arc::clone(&server_received_for_task);
queue_future(async move {
let server = UdpSocket::bind(SocketAddr::from(([127, 0, 0, 1], 0)))
.await
.expect("server udp socket should bind");
let client = UdpSocket::bind(SocketAddr::from(([127, 0, 0, 1], 0)))
.await
.expect("client udp socket should bind");
server
.set_broadcast(true)
.expect("enabling broadcast should succeed");
assert!(
server
.broadcast()
.expect("reading broadcast should succeed"),
"broadcast should be enabled",
);
client.set_ttl(42).expect("setting ttl should succeed");
assert_eq!(client.ttl().expect("reading ttl should succeed"), 42);
let server_addr = server.local_addr().expect("server should expose address");
let client_addr = client.local_addr().expect("client should expose address");
let server_task = queue_future(async move {
let mut peek_buffer = [0; 32];
let (peeked, peek_peer) = server
.peek_from(&mut peek_buffer)
.await
.expect("server peek_from should succeed");
assert_eq!(&peek_buffer[..peeked], b"ping");
assert_eq!(peek_peer, client_addr);
let mut buffer = [0; 32];
let (read, peer) = server
.recv_from(&mut buffer)
.await
.expect("server recv_from should succeed");
assert_eq!(peer, client_addr);
server
.send_to(b"pong", peer)
.await
.expect("server send_to should succeed");
buffer[..read].to_vec()
});
client
.send_to(b"ping", server_addr)
.await
.expect("client send_to should succeed");
let mut response = [0; 32];
let (read, peer) = client
.recv_from(&mut response)
.await
.expect("client recv_from should succeed");
assert_eq!(peer, server_addr);
assert_eq!(&response[..read], b"pong");
let received = server_task.await;
*server_received_for_task.lock().unwrap() = Some(received);
});
});
run();
let server_received = server_received.lock().unwrap();
assert_eq!(server_received.as_deref(), Some(b"ping".as_slice()));
}
#[test]
fn udp_connected_sockets_and_timeouts_work() {
let observed = Arc::new(Mutex::new(Vec::new()));
let observed_for_task = Arc::clone(&observed);
queue_task(move || {
let observed_for_task = Arc::clone(&observed_for_task);
queue_future(async move {
let server = UdpSocket::bind(SocketAddr::from(([127, 0, 0, 1], 0)))
.await
.expect("server udp socket should bind");
let client = UdpSocket::bind(SocketAddr::from(([127, 0, 0, 1], 0)))
.await
.expect("client udp socket should bind");
let server_addr = server.local_addr().expect("server should expose address");
let client_addr = client.local_addr().expect("client should expose address");
client
.connect(server_addr)
.await
.expect("client udp connect should succeed");
server
.connect(client_addr)
.await
.expect("server udp connect should succeed");
client
.set_read_timeout(Some(Duration::from_millis(5)))
.expect("setting read timeout should succeed");
assert_eq!(
client
.read_timeout()
.expect("reading read timeout should succeed"),
Some(Duration::from_millis(5))
);
let mut buffer = [0; 16];
let error = client
.recv(&mut buffer)
.await
.expect_err("recv should time out before any datagram arrives");
assert_eq!(error.kind(), ErrorKind::TimedOut);
observed_for_task
.lock()
.unwrap()
.push("timed out".to_string());
server
.send(b"hello")
.await
.expect("server send should succeed");
let peeked = client.peek(&mut buffer).await.expect("peek should succeed");
assert_eq!(&buffer[..peeked], b"hello");
let read = client.recv(&mut buffer).await.expect("recv should succeed");
assert_eq!(&buffer[..read], b"hello");
observed_for_task
.lock()
.unwrap()
.push("received".to_string());
});
});
run();
let observed = observed.lock().unwrap();
assert_eq!(observed.as_slice(), ["timed out", "received"]);
}
}

View File

@@ -0,0 +1,147 @@
#![allow(dead_code)]
use std::future::Future;
use std::pin::Pin;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex};
use std::task::{Context, Poll, Waker};
use crate::platform::linux_x86_64::runtime::{ThreadHandle, current_thread_handle};
type CancelCallback = Box<dyn FnOnce() + Send + 'static>;
struct CompletionState<T> {
owner: ThreadHandle,
interested: AtomicBool,
finished: AtomicBool,
wake_queued: AtomicBool,
result: Mutex<Option<T>>,
waker: Mutex<Option<Waker>>,
cancel: Mutex<Option<CancelCallback>>,
}
impl<T: Send + 'static> CompletionState<T> {
fn queue_wake(self: &Arc<Self>) {
if self.wake_queued.swap(true, Ordering::AcqRel) {
return;
}
let state = Arc::clone(self);
if !self.owner.queue_microtask(move || {
state.wake_queued.store(false, Ordering::Release);
if let Some(waker) = state.waker.lock().unwrap().take() {
waker.wake();
}
}) {
self.wake_queued.store(false, Ordering::Release);
}
}
}
pub(crate) struct CompletionFuture<T> {
state: Arc<CompletionState<T>>,
}
pub(crate) struct CompletionHandle<T> {
state: Arc<CompletionState<T>>,
}
impl<T> Clone for CompletionHandle<T> {
fn clone(&self) -> Self {
Self {
state: Arc::clone(&self.state),
}
}
}
pub(crate) fn completion<T: Send + 'static>(
owner: ThreadHandle,
) -> (CompletionFuture<T>, CompletionHandle<T>) {
owner.begin_async_operation();
let state = Arc::new(CompletionState {
owner,
interested: AtomicBool::new(true),
finished: AtomicBool::new(false),
wake_queued: AtomicBool::new(false),
result: Mutex::new(None),
waker: Mutex::new(None),
cancel: Mutex::new(None),
});
(
CompletionFuture {
state: Arc::clone(&state),
},
CompletionHandle { state },
)
}
pub(crate) fn completion_for_current_thread<T: Send + 'static>()
-> (CompletionFuture<T>, CompletionHandle<T>) {
completion(current_thread_handle())
}
impl<T: Send + 'static> CompletionHandle<T> {
pub(crate) fn complete(self, value: T) {
self.finish(Some(value));
}
pub(crate) fn finish(self, value: Option<T>) {
if self.state.finished.swap(true, Ordering::AcqRel) {
return;
}
let interested = self.state.interested.load(Ordering::Acquire);
if interested {
*self.state.result.lock().unwrap() = value;
self.state.queue_wake();
}
let _ = self.state.cancel.lock().unwrap().take();
self.state.owner.finish_async_operation();
}
pub(crate) fn set_cancel(&self, cancel: impl FnOnce() + Send + 'static) {
*self.state.cancel.lock().unwrap() = Some(Box::new(cancel));
}
pub(crate) fn is_interested(&self) -> bool {
self.state.interested.load(Ordering::Acquire)
}
}
impl<T> Future for CompletionFuture<T> {
type Output = T;
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
if let Some(value) = self.state.result.lock().unwrap().take() {
return Poll::Ready(value);
}
*self.state.waker.lock().unwrap() = Some(cx.waker().clone());
if let Some(value) = self.state.result.lock().unwrap().take() {
let _ = self.state.waker.lock().unwrap().take();
return Poll::Ready(value);
}
Poll::Pending
}
}
impl<T> Drop for CompletionFuture<T> {
fn drop(&mut self) {
if !self.state.interested.swap(false, Ordering::AcqRel) {
return;
}
let _ = self.state.result.lock().unwrap().take();
let _ = self.state.waker.lock().unwrap().take();
if !self.state.finished.load(Ordering::Acquire)
&& let Some(cancel) = self.state.cancel.lock().unwrap().take()
{
cancel();
}
}
}

105
lib/runtime/src/op/fs.rs Normal file
View File

@@ -0,0 +1,105 @@
//! Logical filesystem operations.
//!
//! This layer owns request data so the public API can keep borrowed buffers while platform
//! backends pin, stage, or offload as needed.
use std::ffi::OsString;
use std::os::fd::RawFd;
use std::path::PathBuf;
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct OpenOptions {
pub read: bool,
pub write: bool,
pub append: bool,
pub truncate: bool,
pub create: bool,
pub create_new: bool,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum MetadataTarget {
Path(PathBuf),
File(RawFd),
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum FileType {
File,
Directory,
Symlink,
BlockDevice,
CharacterDevice,
Fifo,
Socket,
Unknown,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct RawMetadata {
pub file_type: FileType,
pub mode: u16,
pub len: u64,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct RawDirEntry {
pub path: PathBuf,
pub file_name: OsString,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum FsOp {
Open {
path: PathBuf,
options: OpenOptions,
},
Read {
fd: RawFd,
offset: Option<u64>,
len: usize,
},
Write {
fd: RawFd,
offset: Option<u64>,
data: Vec<u8>,
},
Metadata {
target: MetadataTarget,
follow_symlinks: bool,
},
SetLen {
fd: RawFd,
len: u64,
},
SyncAll {
fd: RawFd,
},
SyncData {
fd: RawFd,
},
Duplicate {
fd: RawFd,
},
CreateDir {
path: PathBuf,
recursive: bool,
mode: u32,
},
RemoveFile {
path: PathBuf,
},
RemoveDir {
path: PathBuf,
},
Rename {
from: PathBuf,
to: PathBuf,
},
ReadDir {
path: PathBuf,
},
Close {
fd: RawFd,
},
}

View File

@@ -0,0 +1,8 @@
//! Internal and public operation-layer building blocks.
//!
//! The operation layer defines logical work units that bridge user-facing APIs and platform
//! backends without leaking platform details upward.
pub(crate) mod completion;
pub mod fs;
pub mod net;

69
lib/runtime/src/op/net.rs Normal file
View File

@@ -0,0 +1,69 @@
//! Logical networking operations shared between the public API and Linux backend.
use std::net::{Shutdown, SocketAddr};
use std::os::fd::RawFd;
#[derive(Debug)]
pub enum NetOp {
Socket {
domain: i32,
socket_type: i32,
protocol: i32,
flags: u32,
},
Connect {
fd: RawFd,
addr: SocketAddr,
},
Bind {
fd: RawFd,
addr: SocketAddr,
},
Listen {
fd: RawFd,
backlog: i32,
},
Accept {
fd: RawFd,
},
Send {
fd: RawFd,
data: Vec<u8>,
flags: i32,
},
SendTo {
fd: RawFd,
target: SocketAddr,
data: Vec<u8>,
flags: i32,
},
Recv {
fd: RawFd,
len: usize,
flags: i32,
},
RecvFrom {
fd: RawFd,
len: usize,
flags: i32,
},
Shutdown {
fd: RawFd,
how: Shutdown,
},
Close {
fd: RawFd,
},
}
#[derive(Clone, Debug)]
pub struct AcceptedSocket {
pub fd: RawFd,
pub peer_addr: SocketAddr,
}
#[derive(Clone, Debug)]
pub struct ReceivedDatagram {
pub data: Vec<u8>,
pub peer_addr: SocketAddr,
}

View File

@@ -0,0 +1,864 @@
use core::alloc::Layout;
use core::mem::size_of;
use core::ptr::copy_nonoverlapping;
use core::sync::atomic::{AtomicU32, Ordering};
use super::arena::Arena;
use super::constants::{
MAX_ATTACHED_MINIHEAPS_PER_CLASS, MAX_SMALL_ALLOCATION, MIN_SHUFFLE_VECTOR_LENGTH,
MINIHEAP_REFILL_GOAL_SIZE, NUM_SIZE_CLASSES, is_below_partial_threshold,
};
use super::fault::{self, ActiveMeshGuard};
use super::meshing::bitmaps_meshable;
use super::miniheap::{MiniHeap, MiniHeapId};
use super::page::{page_count, page_size, runtime_slots_per_span};
use super::platform;
use super::pool::MiniHeapPool;
use super::raw_sys;
use super::rng::Mwc;
use super::shuffle::ShuffleEntry;
use super::size_map::{byte_size_for_class, size_class_for};
use super::stats::{MeshStats, StatsState};
use super::sync::{FutexMutex, futex_wait_for_value, futex_wake_all};
use super::thread_local_heap::ThreadLocalHeap;
#[derive(Debug)]
pub struct MeshAllocator {
arena: Arena,
pool: MiniHeapPool,
bootstrap_thread: *mut ThreadLocalHeap,
compaction_candidates: *mut MiniHeapId,
meshing_rng: Mwc,
mesh_epoch: AtomicU32,
pool_lock: FutexMutex,
stats: StatsState,
}
#[derive(Clone, Copy, Debug)]
struct ResolvedPtr {
owner_id: MiniHeapId,
slot: usize,
}
impl MeshAllocator {
pub fn new(arena_size: usize, miniheap_capacity: u32) -> raw_sys::Result<Self> {
fault::ensure_fault_mediation_installed()?;
let bootstrap_thread = unsafe {
platform::map_anonymous(
size_of::<ThreadLocalHeap>(),
raw_sys::PROT_READ | raw_sys::PROT_WRITE,
)? as *mut ThreadLocalHeap
};
unsafe {
bootstrap_thread.write(ThreadLocalHeap::new()?);
}
let compaction_candidates = unsafe {
platform::map_anonymous(
miniheap_capacity as usize * size_of::<MiniHeapId>(),
raw_sys::PROT_READ | raw_sys::PROT_WRITE,
)? as *mut MiniHeapId
};
Ok(Self {
arena: Arena::with_size(arena_size)?,
pool: MiniHeapPool::with_capacity(miniheap_capacity)?,
bootstrap_thread,
compaction_candidates,
meshing_rng: Mwc::from_os_seed()?,
mesh_epoch: AtomicU32::new(0),
pool_lock: FutexMutex::new(),
stats: StatsState::new(),
})
}
#[inline(always)]
pub fn arena(&self) -> &Arena {
&self.arena
}
#[inline(always)]
pub fn pool(&self) -> &MiniHeapPool {
&self.pool
}
#[inline(always)]
pub fn live_miniheap_count(&self) -> u32 {
self.pool.live_len()
}
pub fn stats(&self) -> MeshStats {
let page = page_size();
let (reusable_span_count, reusable_pages) = self.arena.reusable_span_stats();
let counters = self.stats.snapshot();
let mut stats = MeshStats {
arena_size: self.arena.arena_size(),
reserved_bytes: self.arena.reserved_pages() as usize * page,
reusable_span_count,
reusable_span_bytes: reusable_pages as usize * page,
live_miniheaps: self.pool.live_len(),
small_allocations: counters.small_allocations,
small_deallocations: counters.small_deallocations,
large_allocations: counters.large_allocations,
large_deallocations: counters.large_deallocations,
compact_calls: counters.compact_calls,
meshes_performed: counters.meshes_performed,
meshed_pages: counters.meshed_pages,
meshed_bytes: counters.meshed_bytes,
..MeshStats::default()
};
let mut candidate_heaps_by_class = [0u32; NUM_SIZE_CLASSES];
let mut candidate_pages_by_class = [0u32; NUM_SIZE_CLASSES];
let mut candidate_free_bytes_by_class = [0usize; NUM_SIZE_CLASSES];
let mut candidate_span_bytes_by_class = [0usize; NUM_SIZE_CLASSES];
let len = self.pool.len();
let mut id_value = 1u32;
while id_value <= len {
let id = MiniHeapId::new(id_value);
if let Some(heap) = self.pool.get(id) {
if heap.is_large_alloc() {
stats.live_large_allocations += 1;
stats.live_large_bytes += heap.span_size();
stats.retained_large_span_bytes += heap.span_size();
id_value += 1;
continue;
}
stats.live_small_heaps += 1;
stats.live_small_bytes += heap.in_use_count() as usize * heap.object_size();
stats.virtual_small_span_bytes += heap.span_size();
if heap.is_meshed() {
stats.meshed_small_heaps += 1;
} else {
stats.retained_small_span_bytes += heap.span_size();
}
if heap.is_full() {
stats.full_small_heaps += 1;
} else if !heap.is_empty() {
stats.partial_small_heaps += 1;
}
if !heap.is_attached() && !heap.is_full() && !heap.is_meshed() {
stats.reusable_small_heaps += 1;
}
if self.heap_is_compaction_candidate(heap.size_class(), heap) {
let class = heap.size_class() as usize;
stats.compaction.candidate_heaps += 1;
stats.compaction.candidate_pages += heap.span().length;
stats.compaction.candidate_free_bytes += heap.bytes_free();
candidate_heaps_by_class[class] += 1;
candidate_pages_by_class[class] += heap.span().length;
candidate_free_bytes_by_class[class] += heap.bytes_free();
candidate_span_bytes_by_class[class] = heap.span_size();
}
}
id_value += 1;
}
let mut class = 1usize;
while class < NUM_SIZE_CLASSES {
let span_bytes = candidate_span_bytes_by_class[class];
if let Some(pair_bound_by_free) =
candidate_free_bytes_by_class[class].checked_div(span_bytes)
{
let pair_bound_by_count = candidate_heaps_by_class[class] / 2;
let best_case_meshes = pair_bound_by_count.min(pair_bound_by_free as u32);
let pages_per_mesh =
candidate_pages_by_class[class] / candidate_heaps_by_class[class].max(1);
stats.compaction.best_case_meshes += best_case_meshes;
stats.compaction.best_case_reclaimable_pages += best_case_meshes * pages_per_mesh;
stats.compaction.best_case_reclaimable_bytes +=
best_case_meshes as usize * span_bytes;
}
class += 1;
}
stats
}
pub fn allocate(&mut self, size: usize) -> Option<*mut u8> {
let thread_heap = unsafe { &mut *self.bootstrap_thread };
self.allocate_with_thread(size, thread_heap)
}
pub fn allocate_with_thread(
&mut self,
size: usize,
thread_heap: &mut ThreadLocalHeap,
) -> Option<*mut u8> {
let size = size.max(1);
if size <= MAX_SMALL_ALLOCATION {
let class = size_class_for(size)?;
if let Some(ptr) = self.try_allocate_small_local(thread_heap, class) {
return Some(ptr);
}
return self.allocate_small_with_thread(thread_heap, class);
}
self.allocate_large(size)
}
pub fn allocate_layout(&mut self, layout: Layout) -> Option<*mut u8> {
let thread_heap = unsafe { &mut *self.bootstrap_thread };
self.allocate_layout_with_thread(thread_heap, layout)
}
pub fn allocate_layout_with_thread(
&mut self,
thread_heap: &mut ThreadLocalHeap,
layout: Layout,
) -> Option<*mut u8> {
let aligned_size = round_up_to_alignment(layout.size().max(1), layout.align())?;
if aligned_size <= MAX_SMALL_ALLOCATION && layout.align() <= page_size() {
let class = size_class_for(aligned_size)?;
if byte_size_for_class(class).is_multiple_of(layout.align()) {
if let Some(ptr) = self.try_allocate_small_local(thread_heap, class) {
return Some(ptr);
}
return self.allocate_small_with_thread(thread_heap, class);
}
}
self.allocate_large_aligned(aligned_size, layout.align())
}
pub fn deallocate(&mut self, ptr: *mut u8) {
let thread_heap = unsafe { &mut *self.bootstrap_thread };
self.deallocate_with_thread(ptr, thread_heap);
}
pub fn deallocate_with_thread(&mut self, ptr: *mut u8, thread_heap: &mut ThreadLocalHeap) {
if ptr.is_null() {
return;
}
let Some(resolved) = self.resolve_pointer(ptr) else {
return;
};
let id = resolved.owner_id;
let Some(heap) = self.pool.get(id) else {
return;
};
if heap.is_large_alloc() {
let span = heap.span();
self.stats.record_large_deallocation();
let _ = heap.free_offset(0);
self.arena.clear_miniheap(span);
self.arena.release_span(span);
let _ = {
let _guard = self.pool_lock.lock();
self.pool.release(id)
};
return;
}
let slot = resolved.slot;
let class = heap.size_class();
let thread_id = thread_heap.thread_id();
self.stats.record_small_deallocation();
if heap.current_thread() == thread_id && heap.is_attached() && !heap.is_meshed() {
let state = thread_heap.class_mut(class);
let attached_idx = state.find_attached(id);
if let Some(attached_idx) = attached_idx
&& !state.shuffle.is_full()
{
let cached = state.shuffle.count_entries_for_offset(attached_idx as u16);
if cached + 1 == heap.max_count() as usize {
self.release_class_attached(thread_heap, class);
if let Some(heap) = self.pool.get(id) {
let _ = heap.free_offset(slot);
}
self.reclaim_empty_detached_heap(id);
return;
}
if cached + 1 < heap.max_count() as usize {
state
.shuffle
.push(ShuffleEntry::new(attached_idx as u16, slot as u16));
return;
}
}
}
let state = thread_heap.class_mut(class);
let _ = heap.free_offset(slot);
if heap.is_attached()
&& is_below_partial_threshold(heap.in_use_count(), heap.max_count() as u32)
{
heap.unset_attached();
if let Some(attached_idx) = state.find_attached(id) {
state.attached_ids[attached_idx as usize] = MiniHeapId::new(0);
state.attached_heaps[attached_idx as usize] = core::ptr::null();
}
}
if heap.is_empty() && !heap.is_meshed() {
self.reclaim_empty_detached_heap(id);
}
}
pub fn deallocate_layout(&mut self, ptr: *mut u8, _layout: Layout) {
self.deallocate(ptr);
}
pub fn try_deallocate_local(&self, ptr: *mut u8, thread_heap: &mut ThreadLocalHeap) -> bool {
if ptr.is_null() {
return true;
}
if self.mesh_epoch.load(Ordering::Acquire) & 1 != 0 {
return false;
}
let Some(resolved) = self.resolve_pointer(ptr) else {
return true;
};
let id = resolved.owner_id;
let Some(heap) = self.pool.get(id) else {
return true;
};
if heap.is_large_alloc()
|| heap.current_thread() != thread_heap.thread_id()
|| heap.is_meshed()
|| !heap.contains_ptr(self.arena.base_ptr() as usize, ptr)
{
return false;
}
let class = heap.size_class();
let slot = resolved.slot;
let state = thread_heap.class_mut(class);
let Some(attached_idx) = state.find_attached(id) else {
return false;
};
if state.shuffle.is_full() {
return false;
}
let cached = state.shuffle.count_entries_for_offset(attached_idx as u16);
if cached + 1 >= heap.max_count() as usize {
return false;
}
state
.shuffle
.push(ShuffleEntry::new(attached_idx as u16, slot as u16));
self.stats.record_small_deallocation();
true
}
/// # Safety
///
/// `ptr` must have been allocated by this allocator with `layout`, and must not be used
/// after this call if a new allocation is returned.
pub unsafe fn reallocate(
&mut self,
ptr: *mut u8,
layout: Layout,
new_size: usize,
) -> Option<*mut u8> {
let thread_heap = unsafe { &mut *self.bootstrap_thread };
unsafe { self.reallocate_with_thread(ptr, layout, new_size, thread_heap) }
}
/// # Safety
///
/// `ptr` must have been allocated by this allocator with `layout`, and must not be used
/// after this call if a new allocation is returned.
pub unsafe fn reallocate_with_thread(
&mut self,
ptr: *mut u8,
layout: Layout,
new_size: usize,
thread_heap: &mut ThreadLocalHeap,
) -> Option<*mut u8> {
if ptr.is_null() {
return self.allocate_layout_with_thread(
thread_heap,
Layout::from_size_align(new_size.max(1), layout.align()).ok()?,
);
}
if new_size == 0 {
self.deallocate_with_thread(ptr, thread_heap);
return None;
}
let new_layout = Layout::from_size_align(new_size, layout.align()).ok()?;
let new_ptr = self.allocate_layout_with_thread(thread_heap, new_layout)?;
unsafe {
copy_nonoverlapping(ptr, new_ptr, layout.size().min(new_size));
}
self.deallocate_with_thread(ptr, thread_heap);
Some(new_ptr)
}
pub fn compact(&mut self) -> usize {
let thread_heap = unsafe { &mut *self.bootstrap_thread };
self.compact_with_thread(thread_heap)
}
pub fn compact_with_thread(&mut self, thread_heap: &mut ThreadLocalHeap) -> usize {
let _epoch_guard = MeshingEpochGuard::new(core::ptr::addr_of!(self.mesh_epoch));
self.stats.record_compact_call();
self.shutdown_thread(thread_heap);
let mut meshes = 0usize;
for class_idx in 1..NUM_SIZE_CLASSES {
meshes += self.mesh_class_candidates(class_idx as u8);
}
meshes
}
pub fn try_allocate_small_local(
&self,
thread_heap: &mut ThreadLocalHeap,
class: u8,
) -> Option<*mut u8> {
if self.mesh_epoch.load(Ordering::Acquire) & 1 != 0 {
return None;
}
if thread_heap.class(class).shuffle.is_exhausted() && !self.local_refill(thread_heap, class)
{
return None;
}
let state = thread_heap.class_mut(class);
let entry = state.shuffle.pop()?;
let heap = state.heap_at(entry.miniheap_offset as usize)?;
self.stats.record_small_allocation();
Some(heap.ptr_from_offset(self.arena.base_ptr() as usize, entry.slot_index as usize))
}
fn allocate_small_with_thread(
&mut self,
thread_heap: &mut ThreadLocalHeap,
class: u8,
) -> Option<*mut u8> {
self.global_refill(thread_heap, class)?;
self.try_allocate_small_local(thread_heap, class)
}
fn allocate_large(&mut self, size: usize) -> Option<*mut u8> {
self.allocate_large_aligned(size, 1)
}
fn allocate_large_aligned(&mut self, size: usize, align: usize) -> Option<*mut u8> {
let page_align = page_alignment_for(align)?;
let (_, span) = self.arena.allocate_bytes(size, page_align)?;
let (id, heap) = {
let _guard = self.pool_lock.lock();
self.pool.allocate(span, 1, size)?
};
self.arena.track_miniheap(span, id);
self.stats.record_large_allocation();
heap.malloc_at(self.arena.base_ptr() as usize, 0)
}
fn local_refill(&self, thread_heap: &mut ThreadLocalHeap, class: u8) -> bool {
let state = thread_heap.class_mut(class);
let count = state.attached_len as usize;
if count == 0 {
return false;
}
let mut scanned = 0usize;
while scanned < count && state.shuffle.is_exhausted() {
let idx = (state.attached_cursor as usize) % count;
state.attached_cursor = ((idx + 1) % count) as u8;
let heap_ptr = state.attached_heaps[idx];
if !heap_ptr.is_null() {
let heap = unsafe { &*heap_ptr };
if !heap.is_full() {
let _ = state.shuffle.refill_from_heap(idx as u16, heap);
}
}
scanned += 1;
}
!state.shuffle.is_exhausted()
}
fn global_refill(&mut self, thread_heap: &mut ThreadLocalHeap, class: u8) -> Option<()> {
self.release_class_attached(thread_heap, class);
let object_size = byte_size_for_class(class);
let object_count = miniheap_object_count(object_size);
let page_count = page_count(object_size * object_count) as u32;
let thread_id = thread_heap.thread_id();
let mut bytes_free = self.attach_reusable_heaps(thread_heap, class);
while bytes_free < MINIHEAP_REFILL_GOAL_SIZE && !thread_heap.class(class).attached_full() {
let (_, span) = self.arena.page_alloc(page_count, 1)?;
let (id, heap) = {
let _guard = self.pool_lock.lock();
self.pool.allocate(span, object_count as u16, object_size)?
};
self.arena.track_miniheap(span, id);
let slot = thread_heap
.class_mut(class)
.push_attached(id, heap as *const MiniHeap)?;
heap.set_attached(thread_id);
heap.set_shuffle_vector_offset(slot);
bytes_free += heap.bytes_free();
}
thread_heap.class_mut(class).shuffle.clear();
if self.local_refill(thread_heap, class) {
Some(())
} else {
None
}
}
fn attach_reusable_heaps(&mut self, thread_heap: &mut ThreadLocalHeap, class: u8) -> usize {
let mut bytes_free = 0usize;
let len = self.pool.len();
let thread_id = thread_heap.thread_id();
let mut id_val = 1u32;
while id_val <= len
&& bytes_free < MINIHEAP_REFILL_GOAL_SIZE
&& !thread_heap.class(class).attached_full()
{
let id = MiniHeapId::new(id_val);
if let Some(heap) = self.pool.get(id)
&& heap.size_class() == class
&& !heap.is_attached()
&& !heap.is_full()
&& !heap.is_meshed()
&& let Some(slot) = thread_heap
.class_mut(class)
.push_attached(id, heap as *const MiniHeap)
{
heap.set_attached(thread_id);
heap.set_shuffle_vector_offset(slot);
bytes_free += heap.bytes_free();
}
id_val += 1;
}
bytes_free
}
fn release_all_attached(&mut self, thread_heap: &mut ThreadLocalHeap) {
for class in 1..NUM_SIZE_CLASSES as u8 {
self.release_class_attached(thread_heap, class);
}
}
pub fn shutdown_thread(&mut self, thread_heap: &mut ThreadLocalHeap) {
self.release_all_attached(thread_heap);
}
fn reclaim_empty_detached_heap(&mut self, id: MiniHeapId) {
let Some(heap) = self.pool.get(id) else {
return;
};
if !heap.is_empty() || heap.is_meshed() || heap.is_attached() || heap.has_meshed_partner() {
return;
}
let span = heap.span();
self.arena.clear_miniheap(span);
self.arena.release_span(span);
let _ = {
let _guard = self.pool_lock.lock();
self.pool.release(id)
};
}
fn release_class_attached(&mut self, thread_heap: &mut ThreadLocalHeap, class: u8) {
let mut released_ids = [MiniHeapId::new(0); MAX_ATTACHED_MINIHEAPS_PER_CLASS];
let released_len;
{
let state = thread_heap.class_mut(class);
for entry in state.shuffle.active_entries() {
let attached_idx = entry.miniheap_offset as usize;
if attached_idx >= state.attached_len as usize {
continue;
}
if let Some(heap) = state.heap_at(attached_idx) {
let _ = heap.free_offset(entry.slot_index as usize);
}
}
released_len = state.attached_len as usize;
for (idx, id) in released_ids.iter_mut().enumerate().take(released_len) {
*id = state.attached_ids[idx];
if let Some(heap) = state.heap_at(idx) {
heap.unset_attached();
}
}
state.clear_attached();
}
for id in released_ids.into_iter().take(released_len) {
if id != MiniHeapId::new(0) {
self.reclaim_empty_detached_heap(id);
}
}
}
fn heap_is_compaction_candidate(&self, class: u8, heap: &MiniHeap) -> bool {
heap.size_class() == class
&& !heap.is_attached()
&& !heap.is_full()
&& !heap.is_meshed()
&& heap.object_size() < page_size()
&& is_below_partial_threshold(heap.in_use_count(), heap.max_count() as u32)
}
fn mesh_pair(&mut self, dst_id: MiniHeapId, src_id: MiniHeapId) -> raw_sys::Result<()> {
let dst = self.pool.get(dst_id).expect("valid dst id");
let src = self.pool.get(src_id).expect("valid src id");
let span_size = dst.span_size();
let arena_base = self.arena.base_ptr() as usize;
let object_size = dst.object_size();
let src_snapshot = src.bitmap().snapshot();
let keep = dst.ptr_from_offset(arena_base, 0);
let remove = src.ptr_from_offset(arena_base, 0);
let barrier = ActiveMeshGuard::begin(remove, span_size)?;
let scratch = match unsafe { self.arena.begin_mesh(remove, span_size) } {
Ok(scratch) => scratch,
Err(error) => {
barrier.finish();
return Err(error);
}
};
for slot in src_snapshot.iter_set_bits() {
let src_ptr = unsafe { scratch.add(slot * object_size) };
let dst_ptr = dst.ptr_from_offset(arena_base, slot);
unsafe {
copy_nonoverlapping(src_ptr, dst_ptr, object_size);
}
let _ = dst.bitmap().try_set(slot);
let _ = src.free_offset(slot);
}
let previous_family_head = dst.next_meshed();
src.track_meshed_span(previous_family_head);
dst.track_meshed_span(src_id);
src.set_meshed();
if let Err(error) = unsafe { self.arena.finalize_mesh(keep, remove, scratch, span_size) } {
let _ = unsafe { self.arena.abort_mesh(remove, scratch, span_size) };
barrier.finish();
return Err(error);
}
barrier.finish();
self.arena.free_phys(remove, span_size)?;
self.stats.record_mesh(dst.span().length, span_size);
Ok(())
}
fn mesh_class_candidates(&mut self, class: u8) -> usize {
let mut candidate_len = self.collect_compaction_candidates(class);
if candidate_len < 2 {
return 0;
}
self.shuffle_compaction_candidates(candidate_len);
let mut meshes = 0usize;
while candidate_len > 1 {
let left_index = candidate_len - 1;
let left_id = unsafe { *self.compaction_candidates.add(left_index) };
candidate_len -= 1;
let mut match_index = 0usize;
while match_index < candidate_len {
let right_id = unsafe { *self.compaction_candidates.add(match_index) };
let mesh_result = if let (Some(left), Some(right)) =
(self.pool.get(left_id), self.pool.get(right_id))
{
if self.heap_is_compaction_candidate(class, left)
&& self.heap_is_compaction_candidate(class, right)
&& bitmaps_meshable(left.bitmap(), right.bitmap())
{
if left.has_meshed_partner() && !right.has_meshed_partner() {
self.mesh_pair(left_id, right_id)
} else if right.has_meshed_partner() && !left.has_meshed_partner() {
self.mesh_pair(right_id, left_id)
} else if !right.has_meshed_partner() {
self.mesh_pair(left_id, right_id)
} else if !left.has_meshed_partner() {
self.mesh_pair(right_id, left_id)
} else {
Err(raw_sys::Error(raw_sys::EAGAIN))
}
} else {
Err(raw_sys::Error(raw_sys::EAGAIN))
}
} else {
Err(raw_sys::Error(raw_sys::EAGAIN))
};
if mesh_result.is_ok() {
meshes += 1;
if match_index != candidate_len - 1 {
unsafe {
let replacement = *self.compaction_candidates.add(candidate_len - 1);
self.compaction_candidates
.add(match_index)
.write(replacement);
}
}
candidate_len -= 1;
break;
}
match_index += 1;
}
}
meshes
}
fn collect_compaction_candidates(&mut self, class: u8) -> usize {
let len = self.pool.len();
let mut candidate_len = 0usize;
let mut id_val = 1u32;
while id_val <= len {
let id = MiniHeapId::new(id_val);
if let Some(heap) = self.pool.get(id)
&& self.heap_is_compaction_candidate(class, heap)
{
unsafe {
self.compaction_candidates.add(candidate_len).write(id);
}
candidate_len += 1;
}
id_val += 1;
}
candidate_len
}
fn shuffle_compaction_candidates(&mut self, len: usize) {
if len <= 1 {
return;
}
let mut index = len - 1;
while index > 0 {
let swap_index = self.meshing_rng.in_range(0, index);
unsafe {
let left = *self.compaction_candidates.add(index);
let right = *self.compaction_candidates.add(swap_index);
self.compaction_candidates.add(index).write(right);
self.compaction_candidates.add(swap_index).write(left);
}
index -= 1;
}
}
fn resolve_pointer(&self, ptr: *mut u8) -> Option<ResolvedPtr> {
loop {
let start_epoch = self.mesh_epoch.load(Ordering::Acquire);
if start_epoch & 1 != 0 {
futex_wait_for_value(&self.mesh_epoch, start_epoch);
continue;
}
let owner_id = self.arena.miniheap_id_for_ptr(ptr)?;
let slot = self.resolve_family_slot(owner_id, ptr)?;
let end_epoch = self.mesh_epoch.load(Ordering::Acquire);
if start_epoch == end_epoch {
return Some(ResolvedPtr { owner_id, slot });
}
}
}
fn resolve_family_slot(&self, owner_id: MiniHeapId, ptr: *mut u8) -> Option<usize> {
let arena_base = self.arena.base_ptr() as usize;
let owner = self.pool.get(owner_id)?;
if owner.contains_ptr(arena_base, ptr) {
return Some(owner.slot_for_ptr(arena_base, ptr));
}
let mut current = owner.next_meshed();
while current.has_value() {
let heap = self.pool.get(current)?;
if heap.contains_ptr(arena_base, ptr) {
return Some(heap.slot_for_ptr(arena_base, ptr));
}
current = heap.next_meshed();
}
None
}
}
fn miniheap_object_count(object_size: usize) -> usize {
let bitmap_limit = runtime_slots_per_span();
(page_size() / object_size)
.max(MIN_SHUFFLE_VECTOR_LENGTH)
.min(bitmap_limit)
}
#[inline(always)]
fn round_up_to_alignment(size: usize, align: usize) -> Option<usize> {
debug_assert!(align.is_power_of_two());
let mask = align - 1;
size.checked_add(mask).map(|value| value & !mask)
}
#[inline(always)]
fn page_alignment_for(align: usize) -> Option<u32> {
let page = page_size();
if align <= page {
return Some(1);
}
let pages = align / page;
if pages * page != align {
return None;
}
u32::try_from(pages).ok()
}
struct MeshingEpochGuard {
epoch: *const AtomicU32,
}
impl MeshingEpochGuard {
fn new(epoch: *const AtomicU32) -> MeshingEpochGuard {
let epoch_ref = unsafe { &*epoch };
let previous = epoch_ref.fetch_add(1, Ordering::AcqRel);
debug_assert_eq!(previous & 1, 0);
MeshingEpochGuard { epoch }
}
}
impl Drop for MeshingEpochGuard {
fn drop(&mut self) {
let epoch = unsafe { &*self.epoch };
let previous = epoch.fetch_add(1, Ordering::AcqRel);
debug_assert_eq!(previous & 1, 1);
futex_wake_all(epoch);
}
}
impl Drop for MeshAllocator {
fn drop(&mut self) {
unsafe {
core::ptr::drop_in_place(self.bootstrap_thread);
let _ = platform::munmap(
self.bootstrap_thread.cast::<u8>(),
size_of::<ThreadLocalHeap>(),
);
let _ = platform::munmap(
self.compaction_candidates.cast::<u8>(),
self.pool.capacity() as usize * size_of::<MiniHeapId>(),
);
}
}
}

View File

@@ -0,0 +1,428 @@
use core::cell::UnsafeCell;
use core::mem::size_of;
use core::ptr::null_mut;
use core::sync::atomic::{AtomicU32, Ordering};
use super::constants::DEFAULT_ARENA_SIZE;
use super::miniheap::MiniHeapId;
use super::page::{PageConfig, page_count, round_up_to_page};
use super::platform;
use super::raw_sys;
use super::span::Span;
use super::sync::FutexMutex;
const MAX_FREE_SPANS: usize = 4096;
#[derive(Debug)]
pub struct Arena {
config: PageConfig,
arena_size: usize,
page_count: u32,
fd: i32,
base: *mut u8,
owners: *mut AtomicU32,
next_page: AtomicU32,
free_spans: *mut Span,
free_span_count: UnsafeCell<u32>,
free_span_lock: FutexMutex,
}
impl Arena {
#[inline]
pub fn new() -> raw_sys::Result<Self> {
Self::with_size(DEFAULT_ARENA_SIZE)
}
#[inline]
pub fn with_size(arena_size: usize) -> raw_sys::Result<Self> {
let config = PageConfig::get();
assert!(arena_size > 0);
assert_eq!(arena_size % config.size(), 0);
let page_count = page_count(arena_size) as u32;
let fd = platform::memfd_create(c"rust-mesh-alloc".as_ptr().cast(), raw_sys::MFD_CLOEXEC)?;
platform::ftruncate(fd, arena_size as u64)?;
let base = unsafe {
platform::mmap(
null_mut(),
arena_size,
raw_sys::PROT_READ | raw_sys::PROT_WRITE,
raw_sys::MAP_SHARED,
fd,
0,
)?
};
let owner_bytes = page_count as usize * size_of::<AtomicU32>();
let owners = unsafe {
platform::map_anonymous(owner_bytes, raw_sys::PROT_READ | raw_sys::PROT_WRITE)?
as *mut AtomicU32
};
let free_span_bytes = MAX_FREE_SPANS * size_of::<Span>();
let free_spans = unsafe {
platform::map_anonymous(free_span_bytes, raw_sys::PROT_READ | raw_sys::PROT_WRITE)?
as *mut Span
};
Ok(Self {
config,
arena_size,
page_count,
fd,
base,
owners,
next_page: AtomicU32::new(0),
free_spans,
free_span_count: UnsafeCell::new(0),
free_span_lock: FutexMutex::new(),
})
}
#[inline(always)]
pub const fn config(&self) -> PageConfig {
self.config
}
#[inline(always)]
pub const fn arena_size(&self) -> usize {
self.arena_size
}
#[inline(always)]
pub const fn base_ptr(&self) -> *mut u8 {
self.base
}
#[inline(always)]
pub fn contains(&self, ptr: *const u8) -> bool {
let start = self.base as usize;
let end = start + self.arena_size;
let value = ptr as usize;
start <= value && value < end
}
#[inline]
pub fn reserve_pages(&self, page_count: u32, page_alignment: u32) -> Option<Span> {
assert!(page_count > 0);
assert!(page_alignment > 0);
let alignment = page_alignment.next_power_of_two();
if let Some(span) = self.take_free_span(page_count, alignment) {
return Some(span);
}
loop {
let current = self.next_page.load(Ordering::Acquire);
let aligned = align_up_u32(current, alignment);
let end = aligned.checked_add(page_count)?;
if end > self.page_count {
return None;
}
match self
.next_page
.compare_exchange(current, end, Ordering::AcqRel, Ordering::Acquire)
{
Ok(_) => return Some(Span::new(aligned, page_count)),
Err(_) => continue,
}
}
}
#[inline]
pub fn page_alloc(&self, page_count: u32, page_alignment: u32) -> Option<(*mut u8, Span)> {
let span = self.reserve_pages(page_count, page_alignment)?;
Some((self.ptr_from_offset(span.offset as usize), span))
}
#[inline]
pub fn allocate_bytes(&self, size: usize, page_alignment: u32) -> Option<(*mut u8, Span)> {
let pages = page_count(size) as u32;
self.page_alloc(pages, page_alignment)
}
#[inline(always)]
pub fn reserved_pages(&self) -> u32 {
self.next_page.load(Ordering::Acquire)
}
pub fn reusable_span_stats(&self) -> (u32, u32) {
let _guard = self.free_span_lock.lock();
let count = unsafe { *self.free_span_count.get() };
let mut pages = 0u32;
let mut index = 0usize;
while index < count as usize {
let span = unsafe { *self.free_spans.add(index) };
pages += span.length;
index += 1;
}
(count, pages)
}
#[inline]
pub fn track_miniheap(&self, span: Span, id: MiniHeapId) {
for page in 0..span.length {
self.owner_at_offset(span.offset + page)
.store(id.value(), Ordering::Release);
}
}
#[inline]
pub fn clear_miniheap(&self, span: Span) {
for page in 0..span.length {
self.owner_at_offset(span.offset + page)
.store(0, Ordering::Release);
}
}
#[inline]
pub fn release_span(&self, span: Span) {
if span.empty() {
return;
}
let _guard = self.free_span_lock.lock();
let count = unsafe { &mut *self.free_span_count.get() };
let mut merged = span;
let mut index = 0usize;
while index < *count as usize {
let other = unsafe { *self.free_spans.add(index) };
if other.offset + other.length == merged.offset {
merged = Span::new(other.offset, other.length + merged.length);
self.remove_free_span_at(index, count);
continue;
}
if merged.offset + merged.length == other.offset {
merged = Span::new(merged.offset, merged.length + other.length);
self.remove_free_span_at(index, count);
continue;
}
index += 1;
}
self.push_free_span(merged, count);
}
#[inline]
pub fn miniheap_id_for_ptr(&self, ptr: *const u8) -> Option<MiniHeapId> {
if !self.contains(ptr) {
return None;
}
let off = self.offset_for(ptr);
let value = self.owner_at_offset(off).load(Ordering::Acquire);
if value == 0 {
None
} else {
Some(MiniHeapId::new(value))
}
}
/// # Safety
///
/// `remove..remove+size` must describe a valid, page-aligned mapping within this arena.
/// The returned alias is a private scratch mapping of the old source backing. The original
/// `remove` range is protected with `PROT_NONE` and must be restored or remapped by the
/// caller before any blocked mutators are allowed to resume.
#[inline]
pub unsafe fn begin_mesh(&self, remove: *mut u8, size: usize) -> raw_sys::Result<*mut u8> {
let rounded = round_up_to_page(size);
let remove_off = self.offset_for(remove);
unsafe {
platform::mprotect(remove, rounded, raw_sys::PROT_NONE)?;
platform::mmap(
core::ptr::null_mut(),
rounded,
raw_sys::PROT_READ,
raw_sys::MAP_SHARED,
self.fd,
(remove_off as usize * self.config.size()) as u64,
)
}
}
/// # Safety
///
/// Restores the source mapping to its original backing after a failed mesh attempt.
#[inline]
pub unsafe fn abort_mesh(
&self,
remove: *mut u8,
scratch: *mut u8,
size: usize,
) -> raw_sys::Result<()> {
let rounded = round_up_to_page(size);
let remove_off = self.offset_for(remove);
unsafe {
if !scratch.is_null() {
platform::munmap(scratch, rounded)?;
}
platform::mmap(
remove,
rounded,
raw_sys::PROT_READ | raw_sys::PROT_WRITE,
raw_sys::MAP_SHARED | raw_sys::MAP_FIXED,
self.fd,
(remove_off as usize * self.config.size()) as u64,
)?;
}
Ok(())
}
/// # Safety
///
/// `keep` and `remove` must each point to valid page-aligned ranges of at least `size`
/// bytes within this arena. The caller must ensure that aliasing these ranges is valid for
/// the current allocator state and that any required object copying has already completed.
#[inline]
pub unsafe fn finalize_mesh(
&self,
keep: *mut u8,
remove: *mut u8,
scratch: *mut u8,
size: usize,
) -> raw_sys::Result<()> {
let rounded = round_up_to_page(size);
let keep_off = self.offset_for(keep);
let remove_off = self.offset_for(remove);
let pages = page_count(rounded);
unsafe {
platform::mmap(
remove,
rounded,
raw_sys::PROT_READ | raw_sys::PROT_WRITE,
raw_sys::MAP_SHARED | raw_sys::MAP_FIXED,
self.fd,
(keep_off as usize * self.config.size()) as u64,
)?;
if !scratch.is_null() {
platform::munmap(scratch, rounded)?;
}
}
let keep_id = self.owner_at_offset(keep_off).load(Ordering::Acquire);
for page in 0..pages {
self.owner_at_offset(remove_off + page as u32)
.store(keep_id, Ordering::Release);
}
Ok(())
}
#[inline]
pub fn free_phys(&self, ptr: *mut u8, size: usize) -> raw_sys::Result<()> {
let rounded = round_up_to_page(size);
let offset = (ptr as usize).wrapping_sub(self.base as usize);
platform::fallocate(
self.fd,
raw_sys::FALLOC_FL_PUNCH_HOLE | raw_sys::FALLOC_FL_KEEP_SIZE,
offset as u64,
rounded as u64,
)
}
#[inline]
pub fn reset_identity_mapping(&self, span: Span) -> raw_sys::Result<()> {
let ptr = self.ptr_from_offset(span.offset as usize);
unsafe {
platform::mmap(
ptr,
span.byte_length_for_page_size(self.config.size()),
raw_sys::PROT_READ | raw_sys::PROT_WRITE,
raw_sys::MAP_SHARED | raw_sys::MAP_FIXED,
self.fd,
(span.offset as usize * self.config.size()) as u64,
)?;
}
Ok(())
}
#[inline(always)]
pub fn offset_for(&self, ptr: *const u8) -> u32 {
let delta = (ptr as usize).wrapping_sub(self.base as usize);
(delta >> self.config.shift()) as u32
}
#[inline(always)]
pub fn ptr_from_offset(&self, offset: usize) -> *mut u8 {
unsafe { self.base.add(offset << self.config.shift()) }
}
#[inline(always)]
fn owner_at_offset(&self, offset: u32) -> &AtomicU32 {
assert!(offset < self.page_count);
unsafe { &*self.owners.add(offset as usize) }
}
}
impl Drop for Arena {
fn drop(&mut self) {
let owner_bytes = self.page_count as usize * size_of::<AtomicU32>();
let free_span_bytes = MAX_FREE_SPANS * size_of::<Span>();
unsafe {
let _ = platform::munmap(self.free_spans as *mut u8, free_span_bytes);
let _ = platform::munmap(self.owners as *mut u8, owner_bytes);
let _ = platform::munmap(self.base, self.arena_size);
}
let _ = platform::close(self.fd);
}
}
#[inline(always)]
fn align_up_u32(value: u32, alignment: u32) -> u32 {
debug_assert!(alignment.is_power_of_two());
(value + alignment - 1) & !(alignment - 1)
}
impl Arena {
fn take_free_span(&self, page_count: u32, alignment: u32) -> Option<Span> {
let _guard = self.free_span_lock.lock();
let count = unsafe { &mut *self.free_span_count.get() };
let mut index = 0usize;
while index < *count as usize {
let span = unsafe { *self.free_spans.add(index) };
let aligned = align_up_u32(span.offset, alignment);
let prefix = aligned.checked_sub(span.offset)?;
let total = prefix.checked_add(page_count)?;
if total <= span.length {
self.remove_free_span_at(index, count);
if prefix > 0 {
self.push_free_span(Span::new(span.offset, prefix), count);
}
let suffix_offset = aligned + page_count;
let suffix_length = span.length - total;
if suffix_length > 0 {
self.push_free_span(Span::new(suffix_offset, suffix_length), count);
}
return Some(Span::new(aligned, page_count));
}
index += 1;
}
None
}
fn push_free_span(&self, span: Span, count: &mut u32) {
assert!((*count as usize) < MAX_FREE_SPANS);
unsafe {
self.free_spans.add(*count as usize).write(span);
}
*count += 1;
}
fn remove_free_span_at(&self, index: usize, count: &mut u32) {
debug_assert!(index < *count as usize);
let last = *count as usize - 1;
if index != last {
let replacement = unsafe { *self.free_spans.add(last) };
unsafe {
self.free_spans.add(index).write(replacement);
}
}
*count -= 1;
}
}

View File

@@ -0,0 +1,236 @@
use core::sync::atomic::{AtomicUsize, Ordering};
use super::constants::MAX_OBJECT_SLOTS_PER_SPAN;
const USIZE_BITS: usize = usize::BITS as usize;
const BITMAP_WORDS: usize = MAX_OBJECT_SLOTS_PER_SPAN / USIZE_BITS;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct RelaxedBitmap {
bit_count: u16,
words: [usize; BITMAP_WORDS],
}
impl RelaxedBitmap {
#[inline]
pub fn new(bit_count: usize) -> Self {
assert!(bit_count <= MAX_OBJECT_SLOTS_PER_SPAN);
Self {
bit_count: bit_count as u16,
words: [0; BITMAP_WORDS],
}
}
#[inline(always)]
pub const fn bit_count(&self) -> usize {
self.bit_count as usize
}
#[inline(always)]
pub fn words(&self) -> &[usize; BITMAP_WORDS] {
&self.words
}
#[inline(always)]
pub fn words_mut(&mut self) -> &mut [usize; BITMAP_WORDS] {
&mut self.words
}
#[inline]
pub fn clear(&mut self) {
self.words = [0; BITMAP_WORDS];
}
#[inline]
pub fn set_all(&mut self) {
self.words = [usize::MAX; BITMAP_WORDS];
self.mask_unused_bits();
}
#[inline]
pub fn invert_masked(&mut self) {
for word in &mut self.words {
*word = !*word;
}
self.mask_unused_bits();
}
#[inline(always)]
pub fn try_set(&mut self, index: usize) -> bool {
let (word, mask) = word_and_mask(index);
let old = self.words[word];
self.words[word] = old | mask;
old & mask == 0
}
#[inline(always)]
pub fn unset(&mut self, index: usize) -> bool {
let (word, mask) = word_and_mask(index);
let old = self.words[word];
self.words[word] = old & !mask;
old & mask != 0
}
#[inline(always)]
pub fn is_set(&self, index: usize) -> bool {
let (word, mask) = word_and_mask(index);
self.words[word] & mask != 0
}
#[inline]
pub fn in_use_count(&self) -> u32 {
self.words.iter().map(|word| word.count_ones()).sum()
}
#[inline]
pub fn iter_set_bits(&self) -> BitIter {
BitIter::new(self.words, self.bit_count())
}
#[inline]
fn mask_unused_bits(&mut self) {
let valid_bits = self.bit_count();
if valid_bits == MAX_OBJECT_SLOTS_PER_SPAN {
return;
}
let used_words = valid_bits / USIZE_BITS;
let remainder = valid_bits % USIZE_BITS;
for word in self
.words
.iter_mut()
.skip(used_words + usize::from(remainder != 0))
{
*word = 0;
}
if remainder != 0 {
self.words[used_words] &= (1usize << remainder) - 1;
}
}
}
#[derive(Debug)]
pub struct AtomicBitmap {
bit_count: u16,
words: [AtomicUsize; BITMAP_WORDS],
}
impl AtomicBitmap {
#[inline]
pub fn new(bit_count: usize) -> Self {
assert!(bit_count <= MAX_OBJECT_SLOTS_PER_SPAN);
Self {
bit_count: bit_count as u16,
words: [const { AtomicUsize::new(0) }; BITMAP_WORDS],
}
}
#[inline(always)]
pub const fn bit_count(&self) -> usize {
self.bit_count as usize
}
#[inline(always)]
pub fn try_set(&self, index: usize) -> bool {
let (word, mask) = word_and_mask(index);
let old = self.words[word].fetch_or(mask, Ordering::AcqRel);
old & mask == 0
}
#[inline(always)]
pub fn unset(&self, index: usize) -> bool {
let (word, mask) = word_and_mask(index);
let old = self.words[word].fetch_and(!mask, Ordering::AcqRel);
old & mask != 0
}
#[inline(always)]
pub fn is_set(&self, index: usize) -> bool {
let (word, mask) = word_and_mask(index);
self.words[word].load(Ordering::Acquire) & mask != 0
}
#[inline]
pub fn in_use_count(&self) -> u32 {
self.words
.iter()
.map(|word| word.load(Ordering::Acquire).count_ones())
.sum()
}
#[inline]
pub fn swap_words(&self, new_words: &[usize; BITMAP_WORDS]) -> [usize; BITMAP_WORDS] {
let mut old_words = [0usize; BITMAP_WORDS];
let mut i = 0;
while i < BITMAP_WORDS {
old_words[i] = self.words[i].swap(new_words[i], Ordering::AcqRel);
i += 1;
}
old_words
}
#[inline]
pub fn snapshot(&self) -> RelaxedBitmap {
let mut bitmap = RelaxedBitmap::new(self.bit_count());
let mut i = 0;
while i < BITMAP_WORDS {
bitmap.words_mut()[i] = self.words[i].load(Ordering::Acquire);
i += 1;
}
bitmap
}
#[inline]
pub fn take_free_bits(&self) -> RelaxedBitmap {
let mut all_used = RelaxedBitmap::new(self.bit_count());
all_used.set_all();
let previous = self.swap_words(all_used.words());
let mut free_bits = RelaxedBitmap::new(self.bit_count());
*free_bits.words_mut() = previous;
free_bits.invert_masked();
free_bits
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct BitIter {
words: [usize; BITMAP_WORDS],
bit_count: usize,
next_index: usize,
}
impl BitIter {
#[inline(always)]
pub fn new(words: [usize; BITMAP_WORDS], bit_count: usize) -> Self {
Self {
words,
bit_count,
next_index: 0,
}
}
}
impl Iterator for BitIter {
type Item = usize;
fn next(&mut self) -> Option<Self::Item> {
while self.next_index < self.bit_count {
let current = self.next_index;
self.next_index += 1;
let (word, mask) = word_and_mask(current);
if self.words[word] & mask != 0 {
return Some(current);
}
}
None
}
}
#[inline(always)]
fn word_and_mask(index: usize) -> (usize, usize) {
debug_assert!(index < MAX_OBJECT_SLOTS_PER_SPAN);
let word = index / USIZE_BITS;
let bit = index % USIZE_BITS;
(word, 1usize << bit)
}

View File

@@ -0,0 +1,19 @@
pub const MIN_SUPPORTED_PAGE_SIZE: usize = 4096;
pub const MAX_SUPPORTED_PAGE_SIZE: usize = 16384;
pub const MIN_OBJECT_SIZE: usize = 16;
pub const MAX_SMALL_ALLOCATION: usize = 16_384;
pub const NUM_SIZE_CLASSES: usize = 25;
pub const OCCUPANCY_CUTOFF_NUMERATOR: u32 = 4;
pub const OCCUPANCY_CUTOFF_DENOMINATOR: u32 = 5;
pub const MIN_SHUFFLE_VECTOR_LENGTH: usize = 8;
pub const MAX_ATTACHED_MINIHEAPS_PER_CLASS: usize = 48;
pub const MAX_OBJECT_SLOTS_PER_SPAN: usize = MAX_SUPPORTED_PAGE_SIZE / MIN_OBJECT_SIZE;
pub const MAX_SHUFFLE_VECTOR_LENGTH: usize = MAX_OBJECT_SLOTS_PER_SPAN;
pub const DEFAULT_ARENA_SIZE: usize = 64 * 1024 * 1024 * 1024;
pub const MINIHEAP_REFILL_GOAL_SIZE: usize = 16 * 1024;
#[inline(always)]
pub const fn is_below_partial_threshold(in_use: u32, max_count: u32) -> bool {
in_use.saturating_mul(OCCUPANCY_CUTOFF_DENOMINATOR)
< max_count.saturating_mul(OCCUPANCY_CUTOFF_NUMERATOR)
}

View File

@@ -0,0 +1,132 @@
use core::sync::atomic::{AtomicU32, AtomicUsize, Ordering};
use super::raw_sys;
use super::sync::{futex_wait_for_value, futex_wake_all};
const INSTALL_UNINITIALIZED: u32 = 0;
const INSTALL_READY: u32 = 1;
const INSTALL_FAILED: u32 = 2;
static INSTALL_STATE: AtomicU32 = AtomicU32::new(INSTALL_UNINITIALIZED);
static ACTIVE_MESH_SEQ: AtomicU32 = AtomicU32::new(0);
static ACTIVE_MESH_START: AtomicUsize = AtomicUsize::new(0);
static ACTIVE_MESH_LEN: AtomicUsize = AtomicUsize::new(0);
#[derive(Debug)]
pub struct ActiveMeshGuard {
active: bool,
}
impl ActiveMeshGuard {
pub fn begin(start: *mut u8, len: usize) -> raw_sys::Result<Self> {
ensure_fault_mediation_installed()?;
loop {
let seq = ACTIVE_MESH_SEQ.load(Ordering::Acquire);
if seq & 1 == 0 {
break;
}
futex_wait_for_value(&ACTIVE_MESH_SEQ, seq);
}
ACTIVE_MESH_START.store(start as usize, Ordering::Release);
ACTIVE_MESH_LEN.store(len, Ordering::Release);
let previous = ACTIVE_MESH_SEQ.fetch_add(1, Ordering::AcqRel);
debug_assert_eq!(previous & 1, 0);
Ok(Self { active: true })
}
pub fn finish(mut self) {
self.release();
}
fn release(&mut self) {
if !self.active {
return;
}
ACTIVE_MESH_START.store(0, Ordering::Release);
ACTIVE_MESH_LEN.store(0, Ordering::Release);
let previous = ACTIVE_MESH_SEQ.fetch_add(1, Ordering::AcqRel);
debug_assert_eq!(previous & 1, 1);
futex_wake_all(&ACTIVE_MESH_SEQ);
self.active = false;
}
}
impl Drop for ActiveMeshGuard {
fn drop(&mut self) {
self.release();
}
}
pub fn ensure_fault_mediation_installed() -> raw_sys::Result<()> {
match INSTALL_STATE.compare_exchange(
INSTALL_UNINITIALIZED,
INSTALL_READY,
Ordering::AcqRel,
Ordering::Acquire,
) {
Ok(_) | Err(INSTALL_READY) => Ok(()),
Err(_) => {
INSTALL_STATE.store(INSTALL_FAILED, Ordering::Release);
Err(raw_sys::Error(raw_sys::EAGAIN))
}
}
}
pub fn ok_to_proceed(ptr: *const u8) -> bool {
let address = ptr as usize;
let mut waited = false;
loop {
let seq = ACTIVE_MESH_SEQ.load(Ordering::Acquire);
if seq & 1 == 0 {
return waited;
}
let start = ACTIVE_MESH_START.load(Ordering::Acquire);
let len = ACTIVE_MESH_LEN.load(Ordering::Acquire);
let end = start.saturating_add(len);
if address < start || address >= end {
return waited;
}
waited = true;
futex_wait_for_value(&ACTIVE_MESH_SEQ, seq);
}
}
pub fn retry_on_efault<T, F>(ptr: *const u8, mut op: F) -> raw_sys::Result<T>
where
F: FnMut() -> raw_sys::Result<T>,
{
loop {
match op() {
Ok(value) => return Ok(value),
Err(error) if error.errno() == raw_sys::EFAULT && ok_to_proceed(ptr) => continue,
Err(error) => return Err(error),
}
}
}
pub fn retry_on_efault_ptrs<T, F>(ptrs: &[*const u8], mut op: F) -> raw_sys::Result<T>
where
F: FnMut() -> raw_sys::Result<T>,
{
loop {
match op() {
Ok(value) => return Ok(value),
Err(error) if error.errno() == raw_sys::EFAULT => {
let mut waited = false;
for &ptr in ptrs {
if !ptr.is_null() {
waited |= ok_to_proceed(ptr);
}
}
if waited {
continue;
}
return Err(error);
}
Err(error) => return Err(error),
}
}
}

View File

@@ -0,0 +1,453 @@
use core::alloc::{GlobalAlloc, Layout};
use core::cell::UnsafeCell;
use core::mem::MaybeUninit;
use core::ptr::{addr_of_mut, drop_in_place, null, null_mut};
use core::sync::atomic::{AtomicU32, Ordering};
use super::allocator::MeshAllocator;
use super::constants::DEFAULT_ARENA_SIZE;
use super::stats::{
CompactionAdvice, CompactionSkipReason, MeshStats, RuntimeCompactionPolicy,
RuntimeCompactionResult,
};
use super::sync::{FutexMutex, futex_wait_for_value, futex_wake_all};
use super::thread_local_heap::ThreadLocalHeap;
const INIT_UNINITIALIZED: u32 = 0;
const INIT_IN_PROGRESS: u32 = 1;
const INIT_READY: u32 = 2;
const INIT_FAILED: u32 = 3;
pub const DEFAULT_GLOBAL_MINIHEAP_CAPACITY: u32 = 4096;
const TLS_UNINITIALIZED: u32 = 0;
const TLS_READY: u32 = 1;
const TLS_FAILED: u32 = 2;
const SAFEPOINT_INACTIVE: u32 = 0;
const SAFEPOINT_ACTIVE: u32 = 1;
#[thread_local]
static mut THREAD_HEAP_STATE: u32 = TLS_UNINITIALIZED;
#[thread_local]
static mut THREAD_HEAP: MaybeUninit<ThreadLocalHeap> = MaybeUninit::uninit();
#[thread_local]
static mut THREAD_SAFEPOINT_STATE: u32 = SAFEPOINT_INACTIVE;
#[thread_local]
static mut THREAD_HEAP_OWNER: *const GlobalMeshAllocator = null();
#[derive(Debug)]
pub struct GlobalMeshAllocator {
arena_size: usize,
miniheap_capacity: u32,
init_state: AtomicU32,
registered_threads: AtomicU32,
quiescent_threads: AtomicU32,
lock: FutexMutex,
allocator: UnsafeCell<MaybeUninit<MeshAllocator>>,
}
impl GlobalMeshAllocator {
pub const fn new(arena_size: usize, miniheap_capacity: u32) -> Self {
Self {
arena_size,
miniheap_capacity,
init_state: AtomicU32::new(INIT_UNINITIALIZED),
registered_threads: AtomicU32::new(0),
quiescent_threads: AtomicU32::new(0),
lock: FutexMutex::new(),
allocator: UnsafeCell::new(MaybeUninit::uninit()),
}
}
pub const fn with_default_config() -> Self {
Self::new(DEFAULT_ARENA_SIZE, DEFAULT_GLOBAL_MINIHEAP_CAPACITY)
}
pub fn init_thread(&self) -> bool {
self.thread_heap().is_some()
}
pub fn shutdown_thread(&self) {
unsafe {
let state_ptr = addr_of_mut!(THREAD_HEAP_STATE);
let heap_ptr = addr_of_mut!(THREAD_HEAP);
let safepoint_ptr = addr_of_mut!(THREAD_SAFEPOINT_STATE);
let owner_ptr = addr_of_mut!(THREAD_HEAP_OWNER);
if *safepoint_ptr == SAFEPOINT_ACTIVE {
self.quiescent_threads.fetch_sub(1, Ordering::AcqRel);
*safepoint_ptr = SAFEPOINT_INACTIVE;
}
if *state_ptr == TLS_READY {
let _ = self.with_existing_allocator_mut(|allocator| {
allocator.shutdown_thread((*heap_ptr).assume_init_mut());
});
drop_in_place((*heap_ptr).as_mut_ptr());
self.registered_threads.fetch_sub(1, Ordering::AcqRel);
}
*state_ptr = TLS_UNINITIALIZED;
*owner_ptr = null();
}
}
/// Marks the current thread quiescent for cooperative compaction.
///
/// This is **not** true concurrent compaction: arbitrary loads/stores through existing raw
/// pointers remain outside allocator control, so compaction is only safe once all registered
/// allocator threads have voluntarily entered this state.
pub fn enter_quiescent_compaction_state(&self) -> bool {
if self.thread_heap().is_none() {
return false;
}
unsafe {
let safepoint_ptr = addr_of_mut!(THREAD_SAFEPOINT_STATE);
if *safepoint_ptr == SAFEPOINT_ACTIVE {
return true;
}
*safepoint_ptr = SAFEPOINT_ACTIVE;
}
self.quiescent_threads.fetch_add(1, Ordering::AcqRel);
true
}
/// Leaves the cooperative quiescent compaction state for the current thread.
pub fn leave_quiescent_compaction_state(&self) {
unsafe {
let safepoint_ptr = addr_of_mut!(THREAD_SAFEPOINT_STATE);
if *safepoint_ptr == SAFEPOINT_ACTIVE {
*safepoint_ptr = SAFEPOINT_INACTIVE;
self.quiescent_threads.fetch_sub(1, Ordering::AcqRel);
}
}
}
/// Returns true only when every registered allocator thread is quiescent.
///
/// This is a cooperative global-quiescence check, not a proof that active mutators can safely
/// race with remap/migration.
pub fn quiescent_compaction_ready(&self) -> bool {
let registered = self.registered_threads.load(Ordering::Acquire);
registered != 0 && registered == self.quiescent_threads.load(Ordering::Acquire)
}
/// Runs cooperative quiescent compaction when the current thread and every other registered
/// allocator thread have voluntarily stopped allocator-visible activity.
///
/// This API intentionally does **not** claim to provide concurrent compaction with active
/// mutators. Achieving that would require heavier machinery such as page-fault mediation,
/// signal handling, syscall retry/interposition, or equivalent runtime coordination for raw
/// pointer accesses and kernel I/O into moving pages.
pub fn compact_when_quiescent(
&self,
policy: RuntimeCompactionPolicy,
) -> RuntimeCompactionResult {
if !self.current_thread_in_safepoint() {
return RuntimeCompactionResult::Skipped {
reason: CompactionSkipReason::NotAtSafepoint,
advice: self.compaction_advice(),
};
}
if !self.quiescent_compaction_ready() {
return RuntimeCompactionResult::Skipped {
reason: CompactionSkipReason::ThreadsActive,
advice: self.compaction_advice(),
};
}
let Some(thread_heap) = self.thread_heap() else {
return RuntimeCompactionResult::Skipped {
reason: CompactionSkipReason::ThreadUnavailable,
advice: self.compaction_advice(),
};
};
let Some(result) = self.with_existing_allocator_mut(|allocator| {
allocator.shutdown_thread(thread_heap);
let advice = allocator.stats().compaction_advice();
if !policy.should_compact(&advice) {
return RuntimeCompactionResult::Skipped {
reason: CompactionSkipReason::Policy,
advice: Some(advice),
};
}
let meshes = allocator.compact_with_thread(thread_heap);
RuntimeCompactionResult::Compacted { meshes, advice }
}) else {
return RuntimeCompactionResult::Skipped {
reason: CompactionSkipReason::AllocatorUnavailable,
advice: None,
};
};
result
}
/// Compatibility alias for `enter_quiescent_compaction_state`.
pub fn enter_compaction_safepoint(&self) -> bool {
self.enter_quiescent_compaction_state()
}
/// Compatibility alias for `leave_quiescent_compaction_state`.
pub fn leave_compaction_safepoint(&self) {
self.leave_quiescent_compaction_state();
}
/// Compatibility alias for `quiescent_compaction_ready`.
pub fn compaction_safepoint_ready(&self) -> bool {
self.quiescent_compaction_ready()
}
/// Compatibility alias for `compact_when_quiescent`.
pub fn compact_at_safepoint(&self, policy: RuntimeCompactionPolicy) -> RuntimeCompactionResult {
self.compact_when_quiescent(policy)
}
pub fn compact(&self) -> usize {
let Some(thread_heap) = self.thread_heap() else {
return 0;
};
self.with_allocator(|allocator| allocator.compact_with_thread(thread_heap))
.unwrap_or_default()
}
pub fn stats(&self) -> Option<MeshStats> {
self.with_existing_allocator(|allocator| allocator.stats())
}
pub fn compaction_advice(&self) -> Option<CompactionAdvice> {
self.stats().map(|stats| stats.compaction_advice())
}
fn with_allocator<R>(&self, f: impl FnOnce(&mut MeshAllocator) -> R) -> Option<R> {
if !self.ensure_initialized() {
return None;
}
let _guard = self.lock.lock();
let allocator = unsafe { (&mut *self.allocator.get()).assume_init_mut() };
Some(f(allocator))
}
fn with_existing_allocator<R>(&self, f: impl FnOnce(&MeshAllocator) -> R) -> Option<R> {
if self.init_state.load(Ordering::Acquire) != INIT_READY {
return None;
}
let _guard = self.lock.lock();
if self.init_state.load(Ordering::Acquire) != INIT_READY {
return None;
}
let allocator = unsafe { (&*self.allocator.get()).assume_init_ref() };
Some(f(allocator))
}
fn with_existing_allocator_mut<R>(&self, f: impl FnOnce(&mut MeshAllocator) -> R) -> Option<R> {
if self.init_state.load(Ordering::Acquire) != INIT_READY {
return None;
}
let _guard = self.lock.lock();
if self.init_state.load(Ordering::Acquire) != INIT_READY {
return None;
}
let allocator = unsafe { (&mut *self.allocator.get()).assume_init_mut() };
Some(f(allocator))
}
fn ensure_initialized(&self) -> bool {
loop {
match self.init_state.load(Ordering::Acquire) {
INIT_READY => return true,
INIT_FAILED => return false,
INIT_UNINITIALIZED => {
if self
.init_state
.compare_exchange(
INIT_UNINITIALIZED,
INIT_IN_PROGRESS,
Ordering::AcqRel,
Ordering::Acquire,
)
.is_ok()
{
let result = MeshAllocator::new(self.arena_size, self.miniheap_capacity);
match result {
Ok(allocator) => unsafe {
(*self.allocator.get()).write(allocator);
self.init_state.store(INIT_READY, Ordering::Release);
futex_wake_all(&self.init_state);
return true;
},
Err(_) => {
self.init_state.store(INIT_FAILED, Ordering::Release);
futex_wake_all(&self.init_state);
return false;
}
}
}
}
INIT_IN_PROGRESS => futex_wait_for_value(&self.init_state, INIT_IN_PROGRESS),
_ => return false,
}
}
}
fn thread_heap(&self) -> Option<&'static mut ThreadLocalHeap> {
unsafe {
let owner_ptr = addr_of_mut!(THREAD_HEAP_OWNER);
let heap_ptr = addr_of_mut!(THREAD_HEAP);
match THREAD_HEAP_STATE {
TLS_READY if core::ptr::eq(*owner_ptr, self) => Some((*heap_ptr).assume_init_mut()),
TLS_READY => {
self.reset_foreign_thread_heap();
self.thread_heap()
}
TLS_FAILED => None,
TLS_UNINITIALIZED => match ThreadLocalHeap::new() {
Ok(heap) => {
(*heap_ptr).write(heap);
*addr_of_mut!(THREAD_SAFEPOINT_STATE) = SAFEPOINT_INACTIVE;
*owner_ptr = self as *const _;
self.registered_threads.fetch_add(1, Ordering::AcqRel);
THREAD_HEAP_STATE = TLS_READY;
Some((*heap_ptr).assume_init_mut())
}
Err(_) => {
THREAD_HEAP_STATE = TLS_FAILED;
None
}
},
_ => None,
}
}
}
fn current_thread_in_safepoint(&self) -> bool {
let _ = self;
unsafe {
core::ptr::eq(THREAD_HEAP_OWNER, self) && THREAD_SAFEPOINT_STATE == SAFEPOINT_ACTIVE
}
}
fn reset_foreign_thread_heap(&self) {
let _ = self;
unsafe {
if THREAD_HEAP_STATE == TLS_READY {
drop_in_place(addr_of_mut!(THREAD_HEAP).cast::<ThreadLocalHeap>());
}
THREAD_HEAP_STATE = TLS_UNINITIALIZED;
THREAD_SAFEPOINT_STATE = SAFEPOINT_INACTIVE;
THREAD_HEAP_OWNER = null();
}
}
}
impl Drop for GlobalMeshAllocator {
fn drop(&mut self) {
unsafe {
if core::ptr::eq(THREAD_HEAP_OWNER, self) {
self.shutdown_thread();
}
}
if self.init_state.load(Ordering::Acquire) == INIT_READY {
let _guard = self.lock.lock();
if self.init_state.load(Ordering::Acquire) == INIT_READY {
unsafe {
drop_in_place((&mut *self.allocator.get()).as_mut_ptr());
}
self.init_state.store(INIT_FAILED, Ordering::Release);
}
}
}
}
unsafe impl Sync for GlobalMeshAllocator {}
unsafe impl Send for GlobalMeshAllocator {}
unsafe impl GlobalAlloc for GlobalMeshAllocator {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
if !self.ensure_initialized() {
return null_mut();
}
let Some(thread_heap) = self.thread_heap() else {
return null_mut();
};
let allocator_ref = unsafe { (&*self.allocator.get()).assume_init_ref() };
if let Some(class) = size_class_for_layout(layout)
&& let Some(ptr) = allocator_ref.try_allocate_small_local(thread_heap, class)
{
return ptr;
}
self.with_allocator(|allocator| {
allocator
.allocate_layout_with_thread(thread_heap, layout)
.unwrap_or(null_mut())
})
.unwrap_or(null_mut())
}
unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) {
if !self.ensure_initialized() {
return;
}
let Some(thread_heap) = self.thread_heap() else {
return;
};
let allocator_ref = unsafe { (&*self.allocator.get()).assume_init_ref() };
if allocator_ref.try_deallocate_local(ptr, thread_heap) {
return;
}
let _ = self.with_allocator(|allocator| allocator.deallocate_with_thread(ptr, thread_heap));
}
unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
let ptr = unsafe { self.alloc(layout) };
if !ptr.is_null() {
unsafe {
ptr.write_bytes(0, layout.size());
}
}
ptr
}
unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
if !self.ensure_initialized() {
return null_mut();
}
let Some(thread_heap) = self.thread_heap() else {
return null_mut();
};
self.with_allocator(|allocator| unsafe {
allocator
.reallocate_with_thread(ptr, layout, new_size, thread_heap)
.unwrap_or(null_mut())
})
.unwrap_or(null_mut())
}
}
fn size_class_for_layout(layout: Layout) -> Option<u8> {
if layout.align() > super::page::page_size() {
return None;
}
let aligned_size = layout
.size()
.max(1)
.checked_add(layout.align() - 1)
.map(|value| value & !(layout.align() - 1))?;
if aligned_size > super::constants::MAX_SMALL_ALLOCATION {
return None;
}
let class = super::size_map::size_class_for(aligned_size)?;
if super::size_map::byte_size_for_class(class).is_multiple_of(layout.align()) {
Some(class)
} else {
None
}
}

View File

@@ -0,0 +1,15 @@
use super::bitmap::AtomicBitmap;
#[inline]
pub fn bitmaps_meshable(left: &AtomicBitmap, right: &AtomicBitmap) -> bool {
let left_words = left.snapshot();
let right_words = right.snapshot();
for (lhs, rhs) in left_words.words().iter().zip(right_words.words().iter()) {
if lhs & rhs != 0 {
return false;
}
}
true
}

View File

@@ -0,0 +1,374 @@
use core::sync::atomic::{AtomicU32, Ordering};
use super::bitmap::AtomicBitmap;
use super::page::page_size;
use super::size_map::{byte_size_for_class, size_class_for};
use super::span::Span;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[repr(u8)]
pub enum FreelistId {
Full = 0,
Partial = 1,
Empty = 2,
Attached = 3,
}
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub struct MiniHeapId(u32);
impl MiniHeapId {
#[inline(always)]
pub const fn new(id: u32) -> Self {
Self(id)
}
#[inline(always)]
pub const fn value(self) -> u32 {
self.0
}
#[inline(always)]
pub const fn has_value(self) -> bool {
self.0 != 0
}
}
#[derive(Debug)]
pub struct MiniHeapFlags {
bits: AtomicU32,
}
impl MiniHeapFlags {
const SIZE_CLASS_SHIFT: u32 = 0;
const FREELIST_ID_SHIFT: u32 = 6;
const SHUFFLE_OFFSET_SHIFT: u32 = 8;
const MAX_COUNT_SHIFT: u32 = 16;
const PENDING_OFFSET: u32 = 27;
const MESHED_OFFSET: u32 = 30;
#[inline]
pub fn new(
max_count: u16,
size_class: u8,
shuffle_offset: u8,
freelist_id: FreelistId,
) -> Self {
let bits = ((max_count as u32) << Self::MAX_COUNT_SHIFT)
| ((shuffle_offset as u32) << Self::SHUFFLE_OFFSET_SHIFT)
| ((freelist_id as u32) << Self::FREELIST_ID_SHIFT)
| ((size_class as u32) << Self::SIZE_CLASS_SHIFT);
Self {
bits: AtomicU32::new(bits),
}
}
#[inline(always)]
fn load(&self) -> u32 {
self.bits.load(Ordering::Acquire)
}
#[inline(always)]
fn update_masked(&self, mask: u32, value: u32) {
let mut old = self.bits.load(Ordering::Relaxed);
loop {
let new = (old & mask) | value;
match self
.bits
.compare_exchange_weak(old, new, Ordering::AcqRel, Ordering::Relaxed)
{
Ok(_) => return,
Err(next) => old = next,
}
}
}
#[inline(always)]
pub fn max_count(&self) -> u16 {
((self.load() >> Self::MAX_COUNT_SHIFT) & 0x7ff) as u16
}
#[inline(always)]
pub fn size_class(&self) -> u8 {
((self.load() >> Self::SIZE_CLASS_SHIFT) & 0x3f) as u8
}
#[inline(always)]
pub fn freelist_id(&self) -> FreelistId {
match (self.load() >> Self::FREELIST_ID_SHIFT) & 0x3 {
0 => FreelistId::Full,
1 => FreelistId::Partial,
2 => FreelistId::Empty,
_ => FreelistId::Attached,
}
}
#[inline(always)]
pub fn set_freelist_id(&self, id: FreelistId) {
let mask = !(0x3 << Self::FREELIST_ID_SHIFT);
self.update_masked(mask, (id as u32) << Self::FREELIST_ID_SHIFT);
}
#[inline(always)]
pub fn shuffle_vector_offset(&self) -> u8 {
((self.load() >> Self::SHUFFLE_OFFSET_SHIFT) & 0xff) as u8
}
#[inline(always)]
pub fn set_shuffle_vector_offset(&self, offset: u8) {
let mask = !(0xff << Self::SHUFFLE_OFFSET_SHIFT);
self.update_masked(mask, (offset as u32) << Self::SHUFFLE_OFFSET_SHIFT);
}
#[inline(always)]
pub fn is_pending(&self) -> bool {
self.load() & (1 << Self::PENDING_OFFSET) != 0
}
#[inline(always)]
pub fn clear_pending(&self) {
self.bits
.fetch_and(!(1 << Self::PENDING_OFFSET), Ordering::AcqRel);
}
#[inline(always)]
pub fn try_set_pending_from_full(&self) -> bool {
let full = (FreelistId::Full as u32) << Self::FREELIST_ID_SHIFT;
let pending = 1 << Self::PENDING_OFFSET;
let freelist_mask = 0x3 << Self::FREELIST_ID_SHIFT;
let mut old = self.bits.load(Ordering::Relaxed);
loop {
if (old & freelist_mask) != full || (old & pending) != 0 {
return false;
}
let new = old | pending;
match self
.bits
.compare_exchange_weak(old, new, Ordering::AcqRel, Ordering::Relaxed)
{
Ok(_) => return true,
Err(next) => old = next,
}
}
}
#[inline(always)]
pub fn is_meshed(&self) -> bool {
self.load() & (1 << Self::MESHED_OFFSET) != 0
}
#[inline(always)]
pub fn set_meshed(&self) {
self.bits
.fetch_or(1 << Self::MESHED_OFFSET, Ordering::AcqRel);
}
}
#[derive(Debug)]
pub struct MiniHeap {
span: Span,
current_thread: AtomicU32,
flags: MiniHeapFlags,
next_meshed: AtomicU32,
pending_next: AtomicU32,
bitmap: AtomicBitmap,
}
impl MiniHeap {
#[inline]
pub fn new(span: Span, object_count: u16, object_size: usize) -> Self {
let size_class = if object_count > 1 {
size_class_for(object_size).unwrap_or(1)
} else {
1
};
Self {
span,
current_thread: AtomicU32::new(0),
flags: MiniHeapFlags::new(object_count, size_class, 0, FreelistId::Attached),
next_meshed: AtomicU32::new(0),
pending_next: AtomicU32::new(0),
bitmap: AtomicBitmap::new(object_count as usize),
}
}
#[inline(always)]
pub const fn span(&self) -> Span {
self.span
}
#[inline(always)]
pub fn flags(&self) -> &MiniHeapFlags {
&self.flags
}
#[inline(always)]
pub fn bitmap(&self) -> &AtomicBitmap {
&self.bitmap
}
#[inline(always)]
pub fn max_count(&self) -> u16 {
self.flags.max_count()
}
#[inline(always)]
pub fn size_class(&self) -> u8 {
self.flags.size_class()
}
#[inline(always)]
pub fn is_large_alloc(&self) -> bool {
self.max_count() == 1
}
#[inline(always)]
pub fn object_size(&self) -> usize {
if self.is_large_alloc() {
self.span.byte_length_for_page_size(page_size())
} else {
byte_size_for_class(self.size_class())
}
}
#[inline(always)]
pub fn span_size(&self) -> usize {
self.span.byte_length_for_page_size(page_size())
}
#[inline(always)]
pub fn in_use_count(&self) -> u32 {
self.bitmap.in_use_count()
}
#[inline(always)]
pub fn is_empty(&self) -> bool {
self.in_use_count() == 0
}
#[inline(always)]
pub fn is_full(&self) -> bool {
self.in_use_count() == self.max_count() as u32
}
#[inline(always)]
pub fn bytes_free(&self) -> usize {
(self.max_count() as usize - self.in_use_count() as usize) * self.object_size()
}
#[inline(always)]
pub fn current_thread(&self) -> u32 {
self.current_thread.load(Ordering::Acquire)
}
#[inline(always)]
pub fn set_attached(&self, thread_id: u32) {
self.current_thread.store(thread_id, Ordering::Release);
self.flags.set_freelist_id(FreelistId::Attached);
}
#[inline(always)]
pub fn unset_attached(&self) {
self.current_thread.store(0, Ordering::Release);
}
#[inline(always)]
pub fn is_attached(&self) -> bool {
self.current_thread() != 0
}
#[inline(always)]
pub fn set_shuffle_vector_offset(&self, offset: u8) {
self.flags.set_shuffle_vector_offset(offset);
}
#[inline(always)]
pub fn shuffle_vector_offset(&self) -> u8 {
self.flags.shuffle_vector_offset()
}
#[inline(always)]
pub fn set_pending_next(&self, next: MiniHeapId) {
self.pending_next.store(next.value(), Ordering::Release);
}
#[inline(always)]
pub fn pending_next(&self) -> MiniHeapId {
MiniHeapId::new(self.pending_next.load(Ordering::Acquire))
}
#[inline(always)]
pub fn track_meshed_span(&self, next: MiniHeapId) {
self.next_meshed.store(next.value(), Ordering::Release);
}
#[inline(always)]
pub fn next_meshed(&self) -> MiniHeapId {
MiniHeapId::new(self.next_meshed.load(Ordering::Acquire))
}
#[inline(always)]
pub fn has_meshed_partner(&self) -> bool {
self.next_meshed().has_value()
}
#[inline(always)]
pub fn set_meshed(&self) {
self.flags.set_meshed();
}
#[inline(always)]
pub fn is_meshed(&self) -> bool {
self.flags.is_meshed()
}
#[inline(always)]
pub fn is_meshing_candidate(&self) -> bool {
!self.is_attached() && self.object_size() < page_size()
}
#[inline(always)]
pub fn fullness(&self) -> f32 {
self.in_use_count() as f32 / self.max_count() as f32
}
#[inline(always)]
pub fn malloc_at(&self, arena_begin: usize, slot: usize) -> Option<*mut u8> {
if !self.bitmap.try_set(slot) {
return None;
}
Some(self.ptr_from_offset(arena_begin, slot))
}
#[inline(always)]
pub fn ptr_from_offset(&self, arena_begin: usize, slot: usize) -> *mut u8 {
let span_start = arena_begin + (self.span.offset as usize * page_size());
(span_start + slot * self.object_size()) as *mut u8
}
#[inline(always)]
pub fn contains_ptr(&self, arena_begin: usize, ptr: *const u8) -> bool {
let span_start = arena_begin + (self.span.offset as usize * page_size());
let span_end = span_start + self.span_size();
let ptr = ptr as usize;
span_start <= ptr && ptr < span_end
}
#[inline(always)]
pub fn free_offset(&self, slot: usize) -> bool {
self.bitmap.unset(slot)
}
#[inline(always)]
pub fn clear_if_not_free(&self, slot: usize) -> bool {
self.bitmap.unset(slot)
}
#[inline(always)]
pub fn slot_for_ptr(&self, arena_begin: usize, ptr: *const u8) -> usize {
let span_start = arena_begin + (self.span.offset as usize * page_size());
((ptr as usize) - span_start) / self.object_size()
}
}

View File

@@ -0,0 +1,45 @@
pub mod allocator;
pub mod arena;
pub mod bitmap;
pub mod constants;
pub mod fault;
pub mod global;
pub mod meshing;
pub mod miniheap;
pub mod page;
pub mod platform;
pub mod pool;
pub mod raw_sys;
pub mod rng;
pub mod shuffle;
pub mod size_map;
pub mod span;
pub mod stats;
pub mod sync;
pub mod thread_local_heap;
pub use allocator::MeshAllocator;
pub use arena::Arena;
pub use bitmap::{AtomicBitmap, BitIter, RelaxedBitmap};
pub use constants::*;
pub use fault::{
ActiveMeshGuard, ensure_fault_mediation_installed, ok_to_proceed, retry_on_efault,
retry_on_efault_ptrs,
};
pub use global::{DEFAULT_GLOBAL_MINIHEAP_CAPACITY, GlobalMeshAllocator};
pub use meshing::bitmaps_meshable;
pub use miniheap::{FreelistId, MiniHeap, MiniHeapFlags, MiniHeapId};
pub use page::{
PageConfig, page_count, page_shift, page_size, round_up_to_page, runtime_slots_per_span,
};
pub use platform::{PlatformHooks, PlatformInstallError, install_platform_hooks};
pub use rng::{Mwc, Mwc64};
pub use shuffle::{ShuffleEntry, ShuffleVector};
pub use size_map::{CLASS_TO_SIZE, NUM_SIZE_CLASSES, byte_size_for_class, size_class_for};
pub use span::Span;
pub use stats::{
CompactionAdvice, CompactionEstimate, CompactionRecommendation, CompactionSkipReason,
MeshStats, RuntimeCompactionPolicy, RuntimeCompactionResult,
};
pub use sync::FutexMutex;
pub use thread_local_heap::ThreadLocalHeap;

View File

@@ -0,0 +1,94 @@
use core::sync::atomic::{AtomicUsize, Ordering};
use super::constants::{MAX_SUPPORTED_PAGE_SIZE, MIN_OBJECT_SIZE, MIN_SUPPORTED_PAGE_SIZE};
use super::platform;
static PAGE_SIZE_CACHE: AtomicUsize = AtomicUsize::new(0);
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct PageConfig {
size: usize,
shift: u32,
slots_per_span: usize,
}
impl PageConfig {
#[inline(always)]
pub fn get() -> Self {
let size = page_size();
Self {
size,
shift: size.trailing_zeros(),
slots_per_span: size / MIN_OBJECT_SIZE,
}
}
#[inline(always)]
pub const fn size(self) -> usize {
self.size
}
#[inline(always)]
pub const fn shift(self) -> u32 {
self.shift
}
#[inline(always)]
pub const fn slots_per_span(self) -> usize {
self.slots_per_span
}
}
#[inline]
pub fn page_size() -> usize {
let cached = PAGE_SIZE_CACHE.load(Ordering::Acquire);
if cached != 0 {
return cached;
}
let size = query_page_size();
match PAGE_SIZE_CACHE.compare_exchange(0, size, Ordering::AcqRel, Ordering::Acquire) {
Ok(_) => size,
Err(existing) => existing,
}
}
#[inline(always)]
pub fn page_shift() -> u32 {
page_size().trailing_zeros()
}
#[inline(always)]
pub fn page_count(size: usize) -> usize {
let page = page_size();
size.div_ceil(page)
}
#[inline(always)]
pub fn round_up_to_page(size: usize) -> usize {
page_count(size) * page_size()
}
#[inline(always)]
pub fn runtime_slots_per_span() -> usize {
page_size() / MIN_OBJECT_SIZE
}
fn query_page_size() -> usize {
let size = platform::page_size();
assert!(
size.is_power_of_two(),
"page size is not a power of two: {size}"
);
assert!(
(MIN_SUPPORTED_PAGE_SIZE..=MAX_SUPPORTED_PAGE_SIZE).contains(&size),
"unsupported page size {size}; supported range is {}..={}",
MIN_SUPPORTED_PAGE_SIZE,
MAX_SUPPORTED_PAGE_SIZE
);
assert_eq!(
size % MIN_OBJECT_SIZE,
0,
"page size {size} is not MIN_OBJECT_SIZE-aligned"
);
size
}

View File

@@ -0,0 +1,168 @@
use core::sync::atomic::{AtomicBool, AtomicPtr, Ordering};
use super::raw_sys;
#[derive(Clone, Copy, Debug)]
pub struct PlatformHooks {
pub page_size: fn() -> usize,
pub gettid: fn() -> raw_sys::Result<u32>,
pub getrandom: fn(&mut [u8], u32) -> raw_sys::Result<usize>,
pub memfd_create: fn(*const u8, u32) -> raw_sys::Result<i32>,
pub ftruncate: fn(i32, u64) -> raw_sys::Result<()>,
pub fallocate: fn(i32, u32, u64, u64) -> raw_sys::Result<()>,
pub close: fn(i32) -> raw_sys::Result<()>,
pub futex_wait: unsafe fn(*const u32, u32, u32) -> raw_sys::Result<()>,
pub futex_wake: unsafe fn(*const u32, u32, u32) -> raw_sys::Result<u32>,
pub mmap: unsafe fn(*mut u8, usize, u32, u32, i32, u64) -> raw_sys::Result<*mut u8>,
pub map_anonymous: unsafe fn(usize, u32) -> raw_sys::Result<*mut u8>,
pub mprotect: unsafe fn(*mut u8, usize, u32) -> raw_sys::Result<()>,
pub munmap: unsafe fn(*mut u8, usize) -> raw_sys::Result<()>,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum PlatformInstallError {
AlreadyConfigured,
AlreadyInUse,
}
static DEFAULT_PLATFORM_HOOKS: PlatformHooks = PlatformHooks {
page_size: default_page_size,
gettid: raw_sys::gettid,
getrandom: raw_sys::getrandom,
memfd_create: raw_sys::memfd_create,
ftruncate: raw_sys::ftruncate,
fallocate: raw_sys::fallocate,
close: raw_sys::close,
futex_wait: raw_sys::futex_wait,
futex_wake: raw_sys::futex_wake,
mmap: raw_sys::mmap,
map_anonymous: raw_sys::map_anonymous,
mprotect: raw_sys::mprotect,
munmap: raw_sys::munmap,
};
static PLATFORM_HOOKS: AtomicPtr<PlatformHooks> =
AtomicPtr::new((&DEFAULT_PLATFORM_HOOKS as *const PlatformHooks).cast_mut());
static PLATFORM_FROZEN: AtomicBool = AtomicBool::new(false);
pub fn install_platform_hooks(hooks: &'static PlatformHooks) -> Result<(), PlatformInstallError> {
if PLATFORM_FROZEN.load(Ordering::Acquire) {
return Err(PlatformInstallError::AlreadyInUse);
}
let default_ptr = (&DEFAULT_PLATFORM_HOOKS as *const PlatformHooks).cast_mut();
let hooks_ptr = (hooks as *const PlatformHooks).cast_mut();
match PLATFORM_HOOKS.compare_exchange(
default_ptr,
hooks_ptr,
Ordering::AcqRel,
Ordering::Acquire,
) {
Ok(_) => Ok(()),
Err(existing) if existing == hooks_ptr => Ok(()),
Err(_) => Err(PlatformInstallError::AlreadyConfigured),
}
}
#[inline(always)]
pub fn page_size() -> usize {
(platform_hooks().page_size)()
}
#[inline(always)]
pub fn gettid() -> raw_sys::Result<u32> {
(platform_hooks().gettid)()
}
#[inline(always)]
pub fn getrandom(buf: &mut [u8], flags: u32) -> raw_sys::Result<usize> {
(platform_hooks().getrandom)(buf, flags)
}
#[inline(always)]
pub fn memfd_create(name: *const u8, flags: u32) -> raw_sys::Result<i32> {
(platform_hooks().memfd_create)(name, flags)
}
#[inline(always)]
pub fn ftruncate(fd: i32, len: u64) -> raw_sys::Result<()> {
(platform_hooks().ftruncate)(fd, len)
}
#[inline(always)]
pub fn fallocate(fd: i32, mode: u32, offset: u64, len: u64) -> raw_sys::Result<()> {
(platform_hooks().fallocate)(fd, mode, offset, len)
}
#[inline(always)]
pub fn close(fd: i32) -> raw_sys::Result<()> {
(platform_hooks().close)(fd)
}
#[inline(always)]
/// # Safety
///
/// `uaddr` must be valid for the kernel to access as a futex word for the duration of the call.
pub unsafe fn futex_wait(uaddr: *const u32, op: u32, expected: u32) -> raw_sys::Result<()> {
unsafe { (platform_hooks().futex_wait)(uaddr, op, expected) }
}
#[inline(always)]
/// # Safety
///
/// `uaddr` must be valid for the kernel to access as a futex word for the duration of the call.
pub unsafe fn futex_wake(uaddr: *const u32, op: u32, count: u32) -> raw_sys::Result<u32> {
unsafe { (platform_hooks().futex_wake)(uaddr, op, count) }
}
#[inline(always)]
/// # Safety
///
/// The caller must ensure the mapping arguments satisfy the platform `mmap(2)` contract.
pub unsafe fn mmap(
addr: *mut u8,
len: usize,
prot: u32,
flags: u32,
fd: i32,
offset: u64,
) -> raw_sys::Result<*mut u8> {
unsafe { (platform_hooks().mmap)(addr, len, prot, flags, fd, offset) }
}
#[inline(always)]
/// # Safety
///
/// The caller must later unmap the returned memory exactly once.
pub unsafe fn map_anonymous(len: usize, prot: u32) -> raw_sys::Result<*mut u8> {
unsafe { (platform_hooks().map_anonymous)(len, prot) }
}
#[inline(always)]
/// # Safety
///
/// `addr..addr+len` must refer to a valid mapped region.
pub unsafe fn mprotect(addr: *mut u8, len: usize, prot: u32) -> raw_sys::Result<()> {
unsafe { (platform_hooks().mprotect)(addr, len, prot) }
}
#[inline(always)]
/// # Safety
///
/// `addr..addr+len` must refer to a valid mapping that may be unmapped exactly once.
pub unsafe fn munmap(addr: *mut u8, len: usize) -> raw_sys::Result<()> {
unsafe { (platform_hooks().munmap)(addr, len) }
}
#[inline(always)]
fn platform_hooks() -> &'static PlatformHooks {
PLATFORM_FROZEN.store(true, Ordering::Release);
let hooks = PLATFORM_HOOKS.load(Ordering::Acquire);
unsafe { &*hooks }
}
fn default_page_size() -> usize {
let size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) };
assert!(size > 0, "sysconf(_SC_PAGESIZE) failed");
size as usize
}

View File

@@ -0,0 +1,152 @@
use core::mem::size_of;
use core::sync::atomic::{AtomicU8, AtomicU32, Ordering};
use super::miniheap::{MiniHeap, MiniHeapId};
use super::platform;
use super::raw_sys;
use super::span::Span;
#[derive(Debug)]
pub struct MiniHeapPool {
base: *mut MiniHeap,
live: *mut AtomicU8,
free_ids: *mut u32,
capacity: u32,
len: AtomicU32,
live_len: AtomicU32,
free_len: u32,
}
impl MiniHeapPool {
#[inline]
pub fn with_capacity(capacity: u32) -> raw_sys::Result<Self> {
assert!(capacity > 0);
let bytes = capacity as usize * size_of::<MiniHeap>();
let live = unsafe {
platform::map_anonymous(capacity as usize, raw_sys::PROT_READ | raw_sys::PROT_WRITE)?
};
let free_ids = unsafe {
platform::map_anonymous(
capacity as usize * size_of::<u32>(),
raw_sys::PROT_READ | raw_sys::PROT_WRITE,
)? as *mut u32
};
let base = unsafe {
platform::map_anonymous(bytes, raw_sys::PROT_READ | raw_sys::PROT_WRITE)?
as *mut MiniHeap
};
Ok(Self {
base,
live: live as *mut AtomicU8,
free_ids,
capacity,
len: AtomicU32::new(0),
live_len: AtomicU32::new(0),
free_len: 0,
})
}
#[inline(always)]
pub fn len(&self) -> u32 {
self.len.load(Ordering::Acquire)
}
#[inline(always)]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline(always)]
pub const fn capacity(&self) -> u32 {
self.capacity
}
#[inline(always)]
pub fn live_len(&self) -> u32 {
self.live_len.load(Ordering::Acquire)
}
#[inline]
pub fn allocate(
&mut self,
span: Span,
object_count: u16,
object_size: usize,
) -> Option<(MiniHeapId, &MiniHeap)> {
let (id, index) = if self.free_len > 0 {
self.free_len -= 1;
let id = unsafe { *self.free_ids.add(self.free_len as usize) };
(MiniHeapId::new(id), (id - 1) as usize)
} else {
let len = self.len.load(Ordering::Relaxed);
if len >= self.capacity {
return None;
}
let id = len + 1;
self.len.store(id, Ordering::Release);
(MiniHeapId::new(id), (id - 1) as usize)
};
self.live_len.fetch_add(1, Ordering::AcqRel);
unsafe {
let ptr = self.base.add(index);
ptr.write(MiniHeap::new(span, object_count, object_size));
(&*self.live.add(index)).store(1, Ordering::Release);
Some((id, &*ptr))
}
}
#[inline]
pub fn get(&self, id: MiniHeapId) -> Option<&MiniHeap> {
if !id.has_value() {
return None;
}
let index = id.value() - 1;
if index >= self.len.load(Ordering::Acquire) {
return None;
}
if unsafe { (&*self.live.add(index as usize)).load(Ordering::Acquire) } == 0 {
return None;
}
unsafe { Some(&*self.base.add(index as usize)) }
}
#[inline]
pub fn release(&mut self, id: MiniHeapId) -> bool {
if !id.has_value() {
return false;
}
let index = id.value() - 1;
if index >= self.len.load(Ordering::Acquire) {
return false;
}
if unsafe { (&*self.live.add(index as usize)).load(Ordering::Acquire) } == 0 {
return false;
}
unsafe {
(&*self.live.add(index as usize)).store(0, Ordering::Release);
self.free_ids.add(self.free_len as usize).write(id.value());
}
self.free_len += 1;
self.live_len.fetch_sub(1, Ordering::AcqRel);
true
}
}
impl Drop for MiniHeapPool {
fn drop(&mut self) {
let bytes = self.capacity as usize * size_of::<MiniHeap>();
let live_bytes = self.capacity as usize;
let free_bytes = self.capacity as usize * size_of::<u32>();
unsafe {
let _ = platform::munmap(self.free_ids as *mut u8, free_bytes);
let _ = platform::munmap(self.live as *mut u8, live_bytes);
let _ = platform::munmap(self.base as *mut u8, bytes);
}
}
}

View File

@@ -0,0 +1,218 @@
use core::ptr::null_mut;
pub type Result<T> = core::result::Result<T, Error>;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Error(pub i32);
impl Error {
#[inline(always)]
pub const fn errno(self) -> i32 {
self.0
}
}
pub const PROT_NONE: u32 = libc::PROT_NONE as u32;
pub const PROT_READ: u32 = libc::PROT_READ as u32;
pub const PROT_WRITE: u32 = libc::PROT_WRITE as u32;
pub const MAP_SHARED: u32 = libc::MAP_SHARED as u32;
pub const MAP_PRIVATE: u32 = libc::MAP_PRIVATE as u32;
pub const MAP_FIXED: u32 = libc::MAP_FIXED as u32;
pub const MAP_ANONYMOUS: u32 = libc::MAP_ANONYMOUS as u32;
pub const MFD_CLOEXEC: u32 = libc::MFD_CLOEXEC;
pub const FALLOC_FL_KEEP_SIZE: u32 = libc::FALLOC_FL_KEEP_SIZE as u32;
pub const FALLOC_FL_PUNCH_HOLE: u32 = libc::FALLOC_FL_PUNCH_HOLE as u32;
pub const FUTEX_WAIT: u32 = libc::FUTEX_WAIT as u32;
pub const FUTEX_WAKE: u32 = libc::FUTEX_WAKE as u32;
pub const FUTEX_PRIVATE_FLAG: u32 = libc::FUTEX_PRIVATE_FLAG as u32;
pub const FUTEX_WAIT_PRIVATE: u32 = FUTEX_WAIT | FUTEX_PRIVATE_FLAG;
pub const FUTEX_WAKE_PRIVATE: u32 = FUTEX_WAKE | FUTEX_PRIVATE_FLAG;
pub const EINTR: i32 = libc::EINTR;
pub const EAGAIN: i32 = libc::EAGAIN;
pub const EFAULT: i32 = libc::EFAULT;
#[inline(always)]
fn last_error() -> Error {
Error(
std::io::Error::last_os_error()
.raw_os_error()
.unwrap_or(libc::EINVAL),
)
}
#[inline(always)]
fn map_c_int(result: libc::c_int) -> Result<libc::c_int> {
if result == -1 {
Err(last_error())
} else {
Ok(result)
}
}
#[inline(always)]
fn map_c_long(result: libc::c_long) -> Result<libc::c_long> {
if result == -1 {
Err(last_error())
} else {
Ok(result)
}
}
#[inline(always)]
fn to_off_t(value: u64) -> Result<libc::off_t> {
value.try_into().map_err(|_| Error(libc::EINVAL))
}
#[inline(always)]
pub fn getpid() -> Result<u32> {
let pid = unsafe { libc::getpid() };
if pid == -1 {
Err(last_error())
} else {
Ok(pid as u32)
}
}
#[inline(always)]
pub fn gettid() -> Result<u32> {
unsafe { map_c_long(libc::syscall(libc::SYS_gettid)).map(|value| value as u32) }
}
#[inline(always)]
pub fn close(fd: i32) -> Result<()> {
unsafe { map_c_int(libc::close(fd)).map(|_| ()) }
}
#[inline(always)]
pub fn memfd_create(name: *const u8, flags: u32) -> Result<i32> {
unsafe {
map_c_long(libc::syscall(
libc::SYS_memfd_create,
name.cast::<libc::c_char>(),
flags as libc::c_uint,
))
.map(|fd| fd as i32)
}
}
#[inline(always)]
pub fn ftruncate(fd: i32, len: u64) -> Result<()> {
let len = to_off_t(len)?;
unsafe { map_c_int(libc::ftruncate(fd, len)).map(|_| ()) }
}
#[inline(always)]
pub fn fallocate(fd: i32, mode: u32, offset: u64, len: u64) -> Result<()> {
let offset = to_off_t(offset)?;
let len = to_off_t(len)?;
unsafe { map_c_int(libc::fallocate(fd, mode as libc::c_int, offset, len)).map(|_| ()) }
}
#[inline(always)]
pub fn getrandom(buf: &mut [u8], flags: u32) -> Result<usize> {
let result =
unsafe { libc::getrandom(buf.as_mut_ptr().cast(), buf.len(), flags as libc::c_uint) };
if result == -1 {
Err(last_error())
} else {
Ok(result as usize)
}
}
#[inline(always)]
/// # Safety
///
/// `uaddr` must be valid for the kernel to read as a futex word for the duration of the call.
pub unsafe fn futex_wait(uaddr: *const u32, op: u32, expected: u32) -> Result<()> {
unsafe {
map_c_long(libc::syscall(
libc::SYS_futex,
uaddr,
op as libc::c_int,
expected,
null_mut::<libc::timespec>(),
0,
0,
))
.map(|_| ())
}
}
#[inline(always)]
/// # Safety
///
/// `uaddr` must be valid for the kernel to access as a futex word for the duration of the call.
pub unsafe fn futex_wake(uaddr: *const u32, op: u32, count: u32) -> Result<u32> {
unsafe {
map_c_long(libc::syscall(
libc::SYS_futex,
uaddr,
op as libc::c_int,
count,
null_mut::<libc::timespec>(),
0,
0,
))
.map(|woken| woken as u32)
}
}
#[inline(always)]
/// # Safety
///
/// The caller must uphold the platform `mmap(2)` contract for the provided arguments and manage
/// any returned mapping according to Rust aliasing and lifetime rules.
pub unsafe fn mmap(
addr: *mut u8,
len: usize,
prot: u32,
flags: u32,
fd: i32,
offset: u64,
) -> Result<*mut u8> {
let offset = to_off_t(offset)?;
let result = unsafe {
libc::mmap(
addr.cast(),
len,
prot as libc::c_int,
flags as libc::c_int,
fd,
offset,
)
};
if result == libc::MAP_FAILED {
Err(last_error())
} else {
Ok(result.cast())
}
}
#[inline(always)]
/// # Safety
///
/// The caller must later unmap the returned memory exactly once.
pub unsafe fn map_anonymous(len: usize, prot: u32) -> Result<*mut u8> {
unsafe { mmap(null_mut(), len, prot, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0) }
}
#[inline(always)]
/// # Safety
///
/// `addr..addr+len` must refer to a valid mapped region.
pub unsafe fn mprotect(addr: *mut u8, len: usize, prot: u32) -> Result<()> {
unsafe { map_c_int(libc::mprotect(addr.cast(), len, prot as libc::c_int)).map(|_| ()) }
}
#[inline(always)]
/// # Safety
///
/// `addr..addr+len` must refer to a valid mapping that can be unmapped exactly once.
pub unsafe fn munmap(addr: *mut u8, len: usize) -> Result<()> {
unsafe { map_c_int(libc::munmap(addr.cast(), len)).map(|_| ()) }
}

View File

@@ -0,0 +1,96 @@
use super::platform;
use super::raw_sys;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Mwc64 {
x: u64,
c: u64,
t: u64,
value: u64,
index: u8,
}
impl Mwc64 {
#[inline(always)]
pub const fn new(seed1: u64, seed2: u64) -> Self {
Self {
x: (seed1 << 32).wrapping_add(seed2),
c: 123_456_123_456_123_456,
t: 0,
value: 0,
index: 2,
}
}
#[inline(always)]
pub fn from_os_seed() -> raw_sys::Result<Self> {
let mut buf = [0u8; 16];
let mut filled = 0usize;
while filled < buf.len() {
let read = platform::getrandom(&mut buf[filled..], 0)?;
if read == 0 {
return Err(raw_sys::Error(5));
}
filled += read;
}
let seed1 = u64::from_ne_bytes(buf[0..8].try_into().unwrap());
let seed2 = u64::from_ne_bytes(buf[8..16].try_into().unwrap());
Ok(Self::new(seed1.max(1), seed2.max(1)))
}
#[inline(always)]
fn next_block(&mut self) -> u64 {
self.t = (self.x << 58).wrapping_add(self.c);
self.c = self.x >> 6;
self.x = self.x.wrapping_add(self.t);
self.c = self.c.wrapping_add((self.x < self.t) as u64);
self.x
}
#[inline(always)]
pub fn next_u32(&mut self) -> u32 {
if self.index == 2 {
self.value = self.next_block();
self.index = 0;
}
let shift = (self.index as u32) * 32;
let value = (self.value >> shift) as u32;
self.index += 1;
value
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Mwc {
inner: Mwc64,
}
impl Mwc {
#[inline(always)]
pub const fn new(seed1: u64, seed2: u64) -> Self {
Self {
inner: Mwc64::new(seed1, seed2),
}
}
#[inline(always)]
pub fn from_os_seed() -> raw_sys::Result<Self> {
Ok(Self {
inner: Mwc64::from_os_seed()?,
})
}
#[inline(always)]
pub fn next_u32(&mut self) -> u32 {
self.inner.next_u32()
}
#[inline(always)]
pub fn in_range(&mut self, min: usize, max: usize) -> usize {
debug_assert!(min <= max);
let range = 1 + max - min;
min + ((((self.next_u32() as u64) * (range as u64)) >> 32) as usize)
}
}

View File

@@ -0,0 +1,204 @@
use super::bitmap::RelaxedBitmap;
use super::constants::{
MAX_OBJECT_SLOTS_PER_SPAN, MAX_SHUFFLE_VECTOR_LENGTH, MIN_OBJECT_SIZE,
MIN_SHUFFLE_VECTOR_LENGTH,
};
use super::page::page_size;
use super::rng::Mwc;
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub struct ShuffleEntry {
pub miniheap_offset: u16,
pub slot_index: u16,
}
impl ShuffleEntry {
pub const EMPTY: Self = Self {
miniheap_offset: u16::MAX,
slot_index: u16::MAX,
};
#[inline(always)]
pub const fn new(miniheap_offset: u16, slot_index: u16) -> Self {
Self {
miniheap_offset,
slot_index,
}
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct ShuffleVector {
entries: [ShuffleEntry; MAX_SHUFFLE_VECTOR_LENGTH],
max_count: u16,
off: u16,
prng: Mwc,
}
impl ShuffleVector {
#[inline]
pub fn for_object_size(object_size: usize, seed1: u64, seed2: u64) -> Self {
Self::with_capacity(Self::capacity_for_object_size(object_size), seed1, seed2)
}
#[inline]
pub fn with_capacity(max_count: usize, seed1: u64, seed2: u64) -> Self {
assert!(max_count <= MAX_SHUFFLE_VECTOR_LENGTH);
Self {
entries: [ShuffleEntry::EMPTY; MAX_SHUFFLE_VECTOR_LENGTH],
max_count: max_count as u16,
off: max_count as u16,
prng: Mwc::new(seed1.max(1), seed2.max(1)),
}
}
#[inline(always)]
pub fn capacity_for_object_size(object_size: usize) -> usize {
let size = if object_size < MIN_OBJECT_SIZE {
MIN_OBJECT_SIZE
} else {
object_size
};
let per_page = page_size() / size;
let with_min = if per_page < MIN_SHUFFLE_VECTOR_LENGTH {
MIN_SHUFFLE_VECTOR_LENGTH
} else {
per_page
};
if with_min > MAX_OBJECT_SLOTS_PER_SPAN {
MAX_OBJECT_SLOTS_PER_SPAN
} else {
with_min
}
}
#[inline(always)]
pub const fn max_count(&self) -> usize {
self.max_count as usize
}
#[inline(always)]
pub const fn len(&self) -> usize {
self.max_count() - self.off as usize
}
#[inline(always)]
pub const fn is_empty(&self) -> bool {
self.off == self.max_count
}
#[inline(always)]
pub const fn is_full(&self) -> bool {
self.off == 0
}
#[inline(always)]
pub const fn is_exhausted(&self) -> bool {
self.off >= self.max_count
}
#[inline(always)]
pub fn clear(&mut self) {
self.off = self.max_count;
}
#[inline(always)]
pub fn active_entries(&self) -> &[ShuffleEntry] {
&self.entries[self.off as usize..self.max_count as usize]
}
#[inline]
pub fn count_entries_for_offset(&self, miniheap_offset: u16) -> usize {
self.active_entries()
.iter()
.filter(|entry| entry.miniheap_offset == miniheap_offset)
.count()
}
#[inline]
pub fn push(&mut self, entry: ShuffleEntry) {
assert!(self.off > 0);
self.off -= 1;
let inserted = self.off as usize;
self.entries[inserted] = entry;
let swap_index = self.prng.in_range(inserted, self.max_count() - 1);
self.entries.swap(inserted, swap_index);
}
#[inline]
pub fn pop(&mut self) -> Option<ShuffleEntry> {
if self.is_exhausted() {
return None;
}
let idx = self.off as usize;
let value = self.entries[idx];
self.off += 1;
Some(value)
}
#[inline]
pub fn refill_from_bitmap(
&mut self,
miniheap_offset: u16,
bitmap: &mut RelaxedBitmap,
) -> usize {
let mut free_bits = *bitmap;
free_bits.invert_masked();
bitmap.set_all();
let mut added = 0usize;
for slot in free_bits.iter_set_bits() {
if self.is_full() {
let _ = bitmap.unset(slot);
continue;
}
self.off -= 1;
self.entries[self.off as usize] = ShuffleEntry::new(miniheap_offset, slot as u16);
added += 1;
}
if added > 1 {
self.shuffle_active();
}
added
}
#[inline]
pub fn refill_from_heap(
&mut self,
miniheap_offset: u16,
heap: &super::miniheap::MiniHeap,
) -> usize {
let free_bits = heap.bitmap().take_free_bits();
let mut added = 0usize;
for slot in free_bits.iter_set_bits() {
if self.is_full() {
let _ = heap.free_offset(slot);
continue;
}
self.off -= 1;
self.entries[self.off as usize] = ShuffleEntry::new(miniheap_offset, slot as u16);
added += 1;
}
if added > 1 {
self.shuffle_active();
}
added
}
fn shuffle_active(&mut self) {
let start = self.off as usize;
let end = self.max_count();
let mut i = start;
while i < end {
let swap_index = self.prng.in_range(i, end - 1);
self.entries.swap(i, swap_index);
i += 1;
}
}
}

View File

@@ -0,0 +1,73 @@
use super::constants::MAX_SMALL_ALLOCATION;
pub const NUM_SIZE_CLASSES: usize = 25;
pub const CLASS_TO_SIZE: [usize; NUM_SIZE_CLASSES] = [
16, 16, 32, 48, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 448, 512, 640, 768, 896,
1024, 2048, 4096, 8192, 16384,
];
#[inline(always)]
pub const fn byte_size_for_class(class: u8) -> usize {
CLASS_TO_SIZE[class as usize]
}
#[inline(always)]
pub const fn size_class_for(size: usize) -> Option<u8> {
if size <= 16 {
Some(1)
} else if size <= 32 {
Some(2)
} else if size <= 48 {
Some(3)
} else if size <= 64 {
Some(4)
} else if size <= 80 {
Some(5)
} else if size <= 96 {
Some(6)
} else if size <= 112 {
Some(7)
} else if size <= 128 {
Some(8)
} else if size <= 160 {
Some(9)
} else if size <= 192 {
Some(10)
} else if size <= 224 {
Some(11)
} else if size <= 256 {
Some(12)
} else if size <= 320 {
Some(13)
} else if size <= 384 {
Some(14)
} else if size <= 448 {
Some(15)
} else if size <= 512 {
Some(16)
} else if size <= 640 {
Some(17)
} else if size <= 768 {
Some(18)
} else if size <= 896 {
Some(19)
} else if size <= 1024 {
Some(20)
} else if size <= 2048 {
Some(21)
} else if size <= 4096 {
Some(22)
} else if size <= 8192 {
Some(23)
} else if size <= 16384 {
Some(24)
} else {
None
}
}
#[inline(always)]
pub const fn is_small_allocation(size: usize) -> bool {
size <= MAX_SMALL_ALLOCATION
}

View File

@@ -0,0 +1,45 @@
pub const SPAN_CLASS_COUNT: u32 = 256;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Span {
pub offset: u32,
pub length: u32,
}
impl Span {
#[inline(always)]
pub const fn new(offset: u32, length: u32) -> Self {
Self { offset, length }
}
#[inline(always)]
pub const fn empty(self) -> bool {
self.length == 0
}
#[inline(always)]
pub fn split_after(&mut self, count: u32) -> Self {
assert!(count <= self.length);
let rest = Self {
offset: self.offset + count,
length: self.length - count,
};
self.length = count;
rest
}
#[inline(always)]
pub const fn span_class(self) -> u32 {
let length = if self.length > SPAN_CLASS_COUNT {
SPAN_CLASS_COUNT
} else {
self.length
};
length - 1
}
#[inline(always)]
pub fn byte_length_for_page_size(self, page_size: usize) -> usize {
self.length as usize * page_size
}
}

View File

@@ -0,0 +1,267 @@
use core::sync::atomic::{AtomicU64, Ordering};
use super::page::page_size;
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub struct CompactionEstimate {
pub candidate_heaps: u32,
pub candidate_pages: u32,
pub candidate_free_bytes: usize,
pub best_case_meshes: u32,
pub best_case_reclaimable_pages: u32,
pub best_case_reclaimable_bytes: usize,
}
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub struct MeshStats {
pub arena_size: usize,
pub reserved_bytes: usize,
pub reusable_span_count: u32,
pub reusable_span_bytes: usize,
pub live_miniheaps: u32,
pub live_small_heaps: u32,
pub partial_small_heaps: u32,
pub full_small_heaps: u32,
pub meshed_small_heaps: u32,
pub reusable_small_heaps: u32,
pub live_large_allocations: u32,
pub live_small_bytes: usize,
pub live_large_bytes: usize,
pub retained_small_span_bytes: usize,
pub retained_large_span_bytes: usize,
pub virtual_small_span_bytes: usize,
pub small_allocations: u64,
pub small_deallocations: u64,
pub large_allocations: u64,
pub large_deallocations: u64,
pub compact_calls: u64,
pub meshes_performed: u64,
pub meshed_pages: u64,
pub meshed_bytes: u64,
pub compaction: CompactionEstimate,
}
impl MeshStats {
#[inline(always)]
pub const fn live_bytes(&self) -> usize {
self.live_small_bytes + self.live_large_bytes
}
#[inline(always)]
pub const fn retained_bytes(&self) -> usize {
self.retained_small_span_bytes + self.retained_large_span_bytes
}
#[inline(always)]
pub const fn small_fragmentation_bytes(&self) -> usize {
self.retained_small_span_bytes
.saturating_sub(self.live_small_bytes)
}
#[inline(always)]
pub const fn mesh_alias_bytes(&self) -> usize {
self.virtual_small_span_bytes
.saturating_sub(self.retained_small_span_bytes)
}
pub fn compaction_advice(&self) -> CompactionAdvice {
let fragmented = self.small_fragmentation_bytes();
let retained = self.retained_small_span_bytes;
let fragmentation_percent = fragmented
.saturating_mul(100)
.checked_div(retained)
.unwrap_or(0)
.min(100) as u8;
let reclaimable = self.compaction.best_case_reclaimable_bytes;
let page = page_size();
let recommendation = if self.compaction.best_case_meshes == 0 || reclaimable < page {
CompactionRecommendation::Idle
} else if reclaimable >= page * 4
&& (fragmentation_percent >= 20 || reclaimable.saturating_mul(4) >= retained.max(page))
{
CompactionRecommendation::Compact
} else if reclaimable >= page
&& (fragmentation_percent >= 10 || self.compaction.candidate_heaps >= 2)
{
CompactionRecommendation::Consider
} else {
CompactionRecommendation::Idle
};
CompactionAdvice {
recommendation,
fragmentation_bytes: fragmented,
fragmentation_percent,
candidate_heaps: self.compaction.candidate_heaps,
candidate_free_bytes: self.compaction.candidate_free_bytes,
best_case_meshes: self.compaction.best_case_meshes,
best_case_reclaimable_pages: self.compaction.best_case_reclaimable_pages,
best_case_reclaimable_bytes: reclaimable,
}
}
}
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub struct CompactionAdvice {
pub recommendation: CompactionRecommendation,
pub fragmentation_bytes: usize,
pub fragmentation_percent: u8,
pub candidate_heaps: u32,
pub candidate_free_bytes: usize,
pub best_case_meshes: u32,
pub best_case_reclaimable_pages: u32,
pub best_case_reclaimable_bytes: usize,
}
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub enum CompactionRecommendation {
#[default]
Idle,
Consider,
Compact,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct RuntimeCompactionPolicy {
pub minimum_recommendation: CompactionRecommendation,
pub min_fragmentation_bytes: usize,
pub min_reclaimable_bytes: usize,
pub min_candidate_heaps: u32,
}
impl RuntimeCompactionPolicy {
pub fn should_compact(&self, advice: &CompactionAdvice) -> bool {
recommendation_rank(advice.recommendation)
>= recommendation_rank(self.minimum_recommendation)
&& advice.fragmentation_bytes >= self.min_fragmentation_bytes
&& advice.best_case_reclaimable_bytes >= self.min_reclaimable_bytes
&& advice.candidate_heaps >= self.min_candidate_heaps
}
}
impl Default for RuntimeCompactionPolicy {
fn default() -> Self {
let page = page_size();
Self {
minimum_recommendation: CompactionRecommendation::Consider,
min_fragmentation_bytes: page,
min_reclaimable_bytes: page,
min_candidate_heaps: 2,
}
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum RuntimeCompactionResult {
Compacted {
meshes: usize,
advice: CompactionAdvice,
},
Skipped {
reason: CompactionSkipReason,
advice: Option<CompactionAdvice>,
},
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum CompactionSkipReason {
AllocatorUnavailable,
ThreadUnavailable,
NotAtSafepoint,
ThreadsActive,
Policy,
}
#[inline(always)]
const fn recommendation_rank(recommendation: CompactionRecommendation) -> u8 {
match recommendation {
CompactionRecommendation::Idle => 0,
CompactionRecommendation::Consider => 1,
CompactionRecommendation::Compact => 2,
}
}
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub(crate) struct CounterSnapshot {
pub small_allocations: u64,
pub small_deallocations: u64,
pub large_allocations: u64,
pub large_deallocations: u64,
pub compact_calls: u64,
pub meshes_performed: u64,
pub meshed_pages: u64,
pub meshed_bytes: u64,
}
#[derive(Debug)]
pub(crate) struct StatsState {
small_allocations: AtomicU64,
small_deallocations: AtomicU64,
large_allocations: AtomicU64,
large_deallocations: AtomicU64,
compact_calls: AtomicU64,
meshes_performed: AtomicU64,
meshed_pages: AtomicU64,
meshed_bytes: AtomicU64,
}
impl StatsState {
pub const fn new() -> Self {
Self {
small_allocations: AtomicU64::new(0),
small_deallocations: AtomicU64::new(0),
large_allocations: AtomicU64::new(0),
large_deallocations: AtomicU64::new(0),
compact_calls: AtomicU64::new(0),
meshes_performed: AtomicU64::new(0),
meshed_pages: AtomicU64::new(0),
meshed_bytes: AtomicU64::new(0),
}
}
#[inline(always)]
pub fn record_small_allocation(&self) {
self.small_allocations.fetch_add(1, Ordering::Relaxed);
}
#[inline(always)]
pub fn record_small_deallocation(&self) {
self.small_deallocations.fetch_add(1, Ordering::Relaxed);
}
#[inline(always)]
pub fn record_large_allocation(&self) {
self.large_allocations.fetch_add(1, Ordering::Relaxed);
}
#[inline(always)]
pub fn record_large_deallocation(&self) {
self.large_deallocations.fetch_add(1, Ordering::Relaxed);
}
#[inline(always)]
pub fn record_compact_call(&self) {
self.compact_calls.fetch_add(1, Ordering::Relaxed);
}
#[inline(always)]
pub fn record_mesh(&self, pages: u32, bytes: usize) {
self.meshes_performed.fetch_add(1, Ordering::Relaxed);
self.meshed_pages.fetch_add(pages as u64, Ordering::Relaxed);
self.meshed_bytes.fetch_add(bytes as u64, Ordering::Relaxed);
}
pub fn snapshot(&self) -> CounterSnapshot {
CounterSnapshot {
small_allocations: self.small_allocations.load(Ordering::Relaxed),
small_deallocations: self.small_deallocations.load(Ordering::Relaxed),
large_allocations: self.large_allocations.load(Ordering::Relaxed),
large_deallocations: self.large_deallocations.load(Ordering::Relaxed),
compact_calls: self.compact_calls.load(Ordering::Relaxed),
meshes_performed: self.meshes_performed.load(Ordering::Relaxed),
meshed_pages: self.meshed_pages.load(Ordering::Relaxed),
meshed_bytes: self.meshed_bytes.load(Ordering::Relaxed),
}
}
}

View File

@@ -0,0 +1,137 @@
use core::sync::atomic::{AtomicU32, Ordering};
use super::platform;
use super::raw_sys;
const UNLOCKED: u32 = 0;
const LOCKED: u32 = 1;
const CONTENDED: u32 = 2;
#[derive(Debug)]
pub struct FutexMutex {
state: AtomicU32,
}
impl FutexMutex {
pub const fn new() -> Self {
Self {
state: AtomicU32::new(UNLOCKED),
}
}
pub fn lock(&self) -> FutexMutexGuard<'_> {
if self
.state
.compare_exchange(UNLOCKED, LOCKED, Ordering::Acquire, Ordering::Relaxed)
.is_err()
{
self.lock_contended();
}
FutexMutexGuard { mutex: self }
}
fn lock_contended(&self) {
loop {
if self.state.swap(CONTENDED, Ordering::Acquire) == UNLOCKED {
return;
}
match unsafe {
platform::futex_wait(self.state_ptr(), raw_sys::FUTEX_WAIT_PRIVATE, CONTENDED)
} {
Ok(()) => {}
Err(error) if matches!(error.errno(), raw_sys::EAGAIN | raw_sys::EINTR) => {}
Err(_) => {}
}
}
}
fn unlock(&self) {
if self.state.fetch_sub(1, Ordering::Release) != LOCKED {
self.state.store(UNLOCKED, Ordering::Release);
let _ =
unsafe { platform::futex_wake(self.state_ptr(), raw_sys::FUTEX_WAKE_PRIVATE, 1) };
}
}
#[inline(always)]
fn state_ptr(&self) -> *const u32 {
(&self.state as *const AtomicU32).cast::<u32>()
}
}
impl Default for FutexMutex {
fn default() -> Self {
Self::new()
}
}
pub struct FutexMutexGuard<'a> {
mutex: &'a FutexMutex,
}
impl Drop for FutexMutexGuard<'_> {
fn drop(&mut self) {
self.mutex.unlock();
}
}
pub(crate) fn futex_wait_for_value(state: &AtomicU32, expected: u32) {
match unsafe {
platform::futex_wait(
(state as *const AtomicU32).cast::<u32>(),
raw_sys::FUTEX_WAIT_PRIVATE,
expected,
)
} {
Ok(()) => {}
Err(error) if matches!(error.errno(), raw_sys::EAGAIN | raw_sys::EINTR) => {}
Err(_) => {}
}
}
pub(crate) fn futex_wake_all(state: &AtomicU32) {
let _ = unsafe {
platform::futex_wake(
(state as *const AtomicU32).cast::<u32>(),
raw_sys::FUTEX_WAKE_PRIVATE,
i32::MAX as u32,
)
};
}
#[cfg(test)]
mod tests {
use core::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::thread;
use std::vec::Vec;
use super::FutexMutex;
#[test]
fn futex_mutex_serializes_multiple_threads() {
let mutex = Arc::new(FutexMutex::new());
let counter = Arc::new(AtomicUsize::new(0));
let mut threads = Vec::new();
for _ in 0..4 {
let mutex = Arc::clone(&mutex);
let counter = Arc::clone(&counter);
threads.push(thread::spawn(move || {
for _ in 0..5000 {
let _guard = mutex.lock();
counter.fetch_add(1, Ordering::Relaxed);
}
}));
}
for thread in threads {
thread.join().unwrap();
}
assert_eq!(counter.load(Ordering::Acquire), 20_000);
}
}

View File

@@ -0,0 +1,124 @@
use core::array;
use core::ptr::null;
use super::constants::{MAX_ATTACHED_MINIHEAPS_PER_CLASS, MIN_OBJECT_SIZE, NUM_SIZE_CLASSES};
use super::miniheap::{MiniHeap, MiniHeapId};
use super::platform;
use super::raw_sys;
use super::rng::Mwc;
use super::shuffle::ShuffleVector;
use super::size_map::byte_size_for_class;
#[derive(Clone, Copy, Debug)]
pub(crate) struct ClassState {
pub shuffle: ShuffleVector,
pub attached_ids: [MiniHeapId; MAX_ATTACHED_MINIHEAPS_PER_CLASS],
pub attached_heaps: [*const MiniHeap; MAX_ATTACHED_MINIHEAPS_PER_CLASS],
pub attached_len: u8,
pub attached_cursor: u8,
}
impl ClassState {
fn new(object_size: usize, seed1: u64, seed2: u64) -> Self {
Self {
shuffle: ShuffleVector::for_object_size(object_size, seed1, seed2),
attached_ids: [MiniHeapId::new(0); MAX_ATTACHED_MINIHEAPS_PER_CLASS],
attached_heaps: [null(); MAX_ATTACHED_MINIHEAPS_PER_CLASS],
attached_len: 0,
attached_cursor: 0,
}
}
#[inline(always)]
pub fn clear_attached(&mut self) {
self.attached_len = 0;
self.attached_cursor = 0;
self.shuffle.clear();
let mut index = 0usize;
while index < MAX_ATTACHED_MINIHEAPS_PER_CLASS {
self.attached_ids[index] = MiniHeapId::new(0);
self.attached_heaps[index] = null();
index += 1;
}
}
#[inline(always)]
pub fn attached_full(&self) -> bool {
self.attached_len as usize == MAX_ATTACHED_MINIHEAPS_PER_CLASS
}
#[inline(always)]
pub fn push_attached(&mut self, id: MiniHeapId, heap: *const MiniHeap) -> Option<u8> {
if self.attached_full() {
return None;
}
let index = self.attached_len as usize;
self.attached_ids[index] = id;
self.attached_heaps[index] = heap;
self.attached_len += 1;
Some(index as u8)
}
#[inline(always)]
pub fn find_attached(&self, id: MiniHeapId) -> Option<u8> {
let len = self.attached_len as usize;
let mut i = 0usize;
while i < len {
if self.attached_ids[i] == id {
return Some(i as u8);
}
i += 1;
}
None
}
#[inline(always)]
pub fn heap_at(&self, index: usize) -> Option<&MiniHeap> {
if index >= self.attached_len as usize {
return None;
}
let heap = self.attached_heaps[index];
if heap.is_null() {
return None;
}
unsafe { Some(&*heap) }
}
}
#[derive(Debug)]
pub struct ThreadLocalHeap {
thread_id: u32,
classes: [ClassState; NUM_SIZE_CLASSES],
}
impl ThreadLocalHeap {
pub fn new() -> raw_sys::Result<Self> {
let thread_id = platform::gettid()?;
let mut seed_rng = Mwc::from_os_seed()?;
let classes = array::from_fn(|class| {
let object_size = byte_size_for_class(class as u8).max(MIN_OBJECT_SIZE);
let seed1 = seed_rng.next_u32() as u64 + 1;
let seed2 = seed_rng.next_u32() as u64 + 1;
ClassState::new(object_size, seed1, seed2)
});
Ok(Self { thread_id, classes })
}
#[inline(always)]
pub const fn thread_id(&self) -> u32 {
self.thread_id
}
#[inline(always)]
pub(crate) fn class(&self, class: u8) -> &ClassState {
&self.classes[class as usize]
}
#[inline(always)]
pub(crate) fn class_mut(&mut self, class: u8) -> &mut ClassState {
&mut self.classes[class as usize]
}
}

View File

@@ -0,0 +1,4 @@
pub mod mesh_alloc;
pub mod reactor;
pub mod runtime;
pub(crate) mod uring;

View File

@@ -0,0 +1,340 @@
use std::cell::Cell;
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::io;
use std::os::fd::RawFd;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};
use std::time::Duration;
use super::uring::{IORING_OP_ASYNC_CANCEL, IoUring, IoUringCqe, IoUringSqe};
const WAKE_TARGET_TOKEN: u64 = 1;
const TOKEN_KIND_SHIFT: u64 = 56;
const TOKEN_KIND_MASK: u64 = 0xff << TOKEN_KIND_SHIFT;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[repr(u8)]
enum CompletionKind {
Timer = 1,
TimerRemove = 2,
NotifySend = 3,
Operation = 4,
OperationCancel = 5,
}
type CompletionHandler = Box<dyn FnOnce(IoUringCqe) + Send + 'static>;
struct NotifierInner {
ring_fd: RawFd,
closed: AtomicBool,
}
impl NotifierInner {
fn notify(&self) -> io::Result<()> {
if self.closed.load(Ordering::Acquire) {
return Err(io::Error::new(
io::ErrorKind::BrokenPipe,
"target runtime ring is closed",
));
}
IoUring::with_submitter(|ring| {
ring.submit_msg_ring(
self.ring_fd,
WAKE_TARGET_TOKEN,
1,
make_token(CompletionKind::NotifySend, 0),
)
})
}
}
#[derive(Clone)]
pub struct ThreadNotifier {
inner: Arc<NotifierInner>,
}
impl ThreadNotifier {
pub fn notify(&self) -> io::Result<()> {
self.inner.notify()
}
}
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
pub struct ReadyEvents {
pub timer: bool,
pub wake: bool,
}
pub struct Reactor {
ring: IoUring,
notifier: Arc<NotifierInner>,
next_token: Cell<u64>,
active_timer_token: Cell<Option<u64>>,
pending_wakes: Cell<u64>,
pending_timers: Cell<u64>,
completions: RefCell<BTreeMap<u64, CompletionHandler>>,
}
pub fn create() -> io::Result<(Reactor, ThreadNotifier)> {
create_reactor()
}
pub fn create_reactor() -> io::Result<(Reactor, ThreadNotifier)> {
let ring = IoUring::new(64)?;
let notifier = Arc::new(NotifierInner {
ring_fd: ring.ring_fd(),
closed: AtomicBool::new(false),
});
Ok((
Reactor {
ring,
notifier: Arc::clone(&notifier),
next_token: Cell::new(1),
active_timer_token: Cell::new(None),
pending_wakes: Cell::new(0),
pending_timers: Cell::new(0),
completions: RefCell::new(BTreeMap::new()),
},
ThreadNotifier { inner: notifier },
))
}
impl Reactor {
pub(crate) fn bind_current_thread(&self) {
self.ring.bind_current_thread();
}
pub(crate) fn unbind_current_thread(&self) {
self.ring.unbind_current_thread();
}
pub fn poll(&self) -> io::Result<Option<ReadyEvents>> {
let mut ready = ReadyEvents::default();
let saw_any = self
.ring
.drain_completions(|cqe| self.process_cqe(cqe, &mut ready));
if saw_any { Ok(Some(ready)) } else { Ok(None) }
}
pub fn wait(&self) -> io::Result<()> {
self.ring.wait_for_cqe()
}
pub fn rearm_timer(&self, deadline: Option<Duration>) -> io::Result<()> {
match (self.active_timer_token.get(), deadline) {
(Some(active), Some(deadline)) => {
self.ring.submit_timeout_update(active, deadline)?;
}
(Some(active), None) => {
self.active_timer_token.set(None);
self.ring
.submit_timeout_remove(active, self.next_token(CompletionKind::TimerRemove))?;
}
(None, Some(deadline)) => {
let token = self.next_token(CompletionKind::Timer);
self.active_timer_token.set(Some(token));
self.ring.submit_timeout(token, deadline)?;
}
(None, None) => {}
}
Ok(())
}
pub(crate) fn submit_operation(
&self,
fill: impl FnOnce(&mut IoUringSqe),
on_complete: impl FnOnce(IoUringCqe) + Send + 'static,
) -> io::Result<u64> {
let token = self.next_token(CompletionKind::Operation);
self.completions
.borrow_mut()
.insert(token, Box::new(on_complete));
if let Err(error) = self.ring.submit_with_token(token, fill) {
let _ = self.completions.borrow_mut().remove(&token);
return Err(error);
}
Ok(token)
}
pub(crate) fn cancel_operation(&self, token: u64) -> io::Result<()> {
self.ring
.submit_with_token(self.next_token(CompletionKind::OperationCancel), |sqe| {
sqe.opcode = IORING_OP_ASYNC_CANCEL;
sqe.fd = -1;
sqe.addr = token;
})
}
pub fn drain_wake(&self) -> io::Result<u64> {
let wakes = self.pending_wakes.replace(0);
if wakes == 0 {
Err(io::Error::new(
io::ErrorKind::WouldBlock,
"no wake completions are pending",
))
} else {
Ok(wakes)
}
}
pub fn drain_timer(&self) -> io::Result<u64> {
let timers = self.pending_timers.replace(0);
if timers == 0 {
Err(io::Error::new(
io::ErrorKind::WouldBlock,
"no timer completions are pending",
))
} else {
Ok(timers)
}
}
fn process_cqe(&self, cqe: IoUringCqe, ready: &mut ReadyEvents) {
if cqe.user_data == WAKE_TARGET_TOKEN {
ready.wake = true;
let wakes = cqe.res.max(1) as u64;
self.pending_wakes
.set(self.pending_wakes.get().saturating_add(wakes));
return;
}
match decode_token_kind(cqe.user_data) {
Some(CompletionKind::Timer) => {
if self.active_timer_token.get() == Some(cqe.user_data) {
self.active_timer_token.set(None);
}
if cqe.res == -libc::ETIME {
ready.timer = true;
self.pending_timers
.set(self.pending_timers.get().saturating_add(1));
}
}
Some(CompletionKind::Operation) => {
if let Some(callback) = self.completions.borrow_mut().remove(&cqe.user_data) {
callback(cqe);
}
}
Some(CompletionKind::TimerRemove)
| Some(CompletionKind::NotifySend)
| Some(CompletionKind::OperationCancel)
| None => {}
}
}
fn next_token(&self, kind: CompletionKind) -> u64 {
let seq = self.next_token.get();
self.next_token.set(seq.wrapping_add(1));
make_token(kind, seq)
}
}
impl Drop for Reactor {
fn drop(&mut self) {
self.notifier.closed.store(true, Ordering::Release);
}
}
pub fn monotonic_now() -> io::Result<Duration> {
let mut now = std::mem::MaybeUninit::<libc::timespec>::uninit();
let result = unsafe { libc::clock_gettime(libc::CLOCK_MONOTONIC, now.as_mut_ptr()) };
if result == -1 {
return Err(io::Error::last_os_error());
}
let now = unsafe { now.assume_init() };
Ok(Duration::new(now.tv_sec as u64, now.tv_nsec as u32))
}
fn make_token(kind: CompletionKind, seq: u64) -> u64 {
((kind as u64) << TOKEN_KIND_SHIFT) | (seq & !TOKEN_KIND_MASK)
}
fn decode_token_kind(token: u64) -> Option<CompletionKind> {
match ((token & TOKEN_KIND_MASK) >> TOKEN_KIND_SHIFT) as u8 {
1 => Some(CompletionKind::Timer),
2 => Some(CompletionKind::TimerRemove),
3 => Some(CompletionKind::NotifySend),
4 => Some(CompletionKind::Operation),
5 => Some(CompletionKind::OperationCancel),
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::{create_reactor, monotonic_now};
use std::thread;
use std::time::Duration;
#[test]
fn notifier_wakes_target_ring() {
let (sender, _) = create_reactor().expect("sender reactor should initialize");
sender.bind_current_thread();
let (target, notifier) = create_reactor().expect("target reactor should initialize");
notifier.notify().expect("notify should succeed");
let ready = loop {
if let Some(ready) = target.poll().expect("poll should succeed") {
break ready;
}
thread::sleep(Duration::from_millis(1));
};
assert!(ready.wake);
assert!(!ready.timer);
assert_eq!(target.drain_wake().expect("wake drain should succeed"), 1);
sender.unbind_current_thread();
}
#[test]
fn notifier_wakes_target_ring_from_plain_thread() {
let (target, notifier) = create_reactor().expect("target reactor should initialize");
thread::spawn(move || {
notifier.notify().expect("notify should succeed");
})
.join()
.expect("notifier thread should exit cleanly");
let ready = loop {
if let Some(ready) = target.poll().expect("poll should succeed") {
break ready;
}
thread::sleep(Duration::from_millis(1));
};
assert!(ready.wake);
assert!(!ready.timer);
assert_eq!(target.drain_wake().expect("wake drain should succeed"), 1);
}
#[test]
fn timeout_reports_deadlines() {
let (reactor, _notifier) = create_reactor().expect("reactor should initialize");
let deadline = monotonic_now().expect("clock should work") + Duration::from_millis(20);
reactor
.rearm_timer(Some(deadline))
.expect("timer should arm");
let ready = loop {
if let Some(ready) = reactor.poll().expect("poll should succeed") {
break ready;
}
thread::sleep(Duration::from_millis(5));
};
assert!(ready.timer);
assert!(!ready.wake);
assert_eq!(
reactor.drain_timer().expect("timer drain should succeed"),
1
);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,478 @@
use std::cell::Cell;
use std::io;
use std::os::fd::RawFd;
use std::ptr;
use std::sync::atomic::{Ordering, compiler_fence};
use std::sync::{Mutex, OnceLock};
use std::time::Duration;
const IORING_OFF_SQ_RING: libc::off_t = 0;
const IORING_OFF_CQ_RING: libc::off_t = 0x0800_0000;
const IORING_OFF_SQES: libc::off_t = 0x1000_0000;
const IORING_ENTER_GETEVENTS: u32 = 1 << 0;
const IORING_SETUP_CLAMP: u32 = 1 << 4;
const IORING_FEAT_SINGLE_MMAP: u32 = 1 << 0;
pub(crate) const IORING_OP_FSYNC: u8 = 3;
pub(crate) const IORING_OP_TIMEOUT: u8 = 11;
pub(crate) const IORING_OP_TIMEOUT_REMOVE: u8 = 12;
pub(crate) const IORING_OP_ACCEPT: u8 = 13;
pub(crate) const IORING_OP_ASYNC_CANCEL: u8 = 14;
pub(crate) const IORING_OP_CONNECT: u8 = 16;
pub(crate) const IORING_OP_OPENAT: u8 = 18;
pub(crate) const IORING_OP_CLOSE: u8 = 19;
pub(crate) const IORING_OP_STATX: u8 = 21;
pub(crate) const IORING_OP_READ: u8 = 22;
pub(crate) const IORING_OP_WRITE: u8 = 23;
pub(crate) const IORING_OP_SEND: u8 = 26;
pub(crate) const IORING_OP_RECV: u8 = 27;
pub(crate) const IORING_OP_SHUTDOWN: u8 = 34;
pub(crate) const IORING_OP_RENAMEAT: u8 = 35;
pub(crate) const IORING_OP_UNLINKAT: u8 = 36;
pub(crate) const IORING_OP_MKDIRAT: u8 = 37;
pub(crate) const IORING_OP_MSG_RING: u8 = 40;
pub(crate) const IORING_OP_SOCKET: u8 = 45;
pub(crate) const IORING_OP_FTRUNCATE: u8 = 55;
pub(crate) const IORING_OP_BIND: u8 = 56;
pub(crate) const IORING_OP_LISTEN: u8 = 57;
const IORING_MSG_DATA: u64 = 0;
pub(crate) const IORING_FSYNC_DATASYNC: u32 = 1 << 0;
pub(crate) const IORING_TIMEOUT_ABS: u32 = 1 << 0;
pub(crate) const IORING_TIMEOUT_UPDATE: u32 = 1 << 1;
pub(crate) const IOSQE_CQE_SKIP_SUCCESS: u8 = 1 << 6;
thread_local! {
static CURRENT_SUBMITTER: Cell<*const IoUring> = const { Cell::new(ptr::null()) };
}
static GLOBAL_SUBMITTER: OnceLock<Mutex<Option<IoUring>>> = OnceLock::new();
#[repr(C)]
#[derive(Default, Clone, Copy)]
struct IoSqringOffsets {
head: u32,
tail: u32,
ring_mask: u32,
ring_entries: u32,
flags: u32,
dropped: u32,
array: u32,
resv1: u32,
user_addr: u64,
}
#[repr(C)]
#[derive(Default, Clone, Copy)]
struct IoCqringOffsets {
head: u32,
tail: u32,
ring_mask: u32,
ring_entries: u32,
overflow: u32,
cqes: u32,
flags: u32,
resv1: u32,
user_addr: u64,
}
#[repr(C)]
#[derive(Default, Clone, Copy)]
struct IoUringParams {
sq_entries: u32,
cq_entries: u32,
flags: u32,
sq_thread_cpu: u32,
sq_thread_idle: u32,
features: u32,
wq_fd: u32,
resv: [u32; 3],
sq_off: IoSqringOffsets,
cq_off: IoCqringOffsets,
}
#[repr(C)]
#[derive(Default, Clone, Copy)]
pub(crate) struct IoUringSqe {
pub(crate) opcode: u8,
pub(crate) flags: u8,
pub(crate) ioprio: u16,
pub(crate) fd: i32,
pub(crate) off: u64,
pub(crate) addr: u64,
pub(crate) len: u32,
pub(crate) op_flags: u32,
pub(crate) user_data: u64,
pub(crate) buf_index: u16,
pub(crate) personality: u16,
pub(crate) file_index: i32,
pub(crate) pad2: [u64; 2],
}
#[repr(C)]
#[derive(Default, Clone, Copy)]
pub(crate) struct IoUringCqe {
pub(crate) user_data: u64,
pub(crate) res: i32,
pub(crate) flags: u32,
}
#[repr(C)]
#[derive(Default, Clone, Copy)]
struct KernelTimespec {
tv_sec: i64,
tv_nsec: i64,
}
pub(crate) struct IoUring {
ring_fd: RawFd,
sq_ring_ptr: *mut u8,
cq_ring_ptr: *mut u8,
sqes_ptr: *mut IoUringSqe,
sq_ring_size: usize,
cq_ring_size: usize,
sqes_size: usize,
single_mmap: bool,
sq_head: *mut u32,
sq_tail: *mut u32,
sq_ring_mask: *mut u32,
sq_ring_entries: *mut u32,
sq_array: *mut u32,
cq_head: *mut u32,
cq_tail: *mut u32,
cq_ring_mask: *mut u32,
cqes: *mut IoUringCqe,
}
impl IoUring {
pub(crate) fn new(entries: u32) -> io::Result<Self> {
let mut params = IoUringParams {
flags: IORING_SETUP_CLAMP,
..IoUringParams::default()
};
let ring_fd = cvt_long(unsafe {
libc::syscall(
libc::SYS_io_uring_setup,
entries as libc::c_uint,
&mut params as *mut IoUringParams,
)
})? as RawFd;
let sq_ring_size =
params.sq_off.array as usize + params.sq_entries as usize * std::mem::size_of::<u32>();
let cq_ring_size = params.cq_off.cqes as usize
+ params.cq_entries as usize * std::mem::size_of::<IoUringCqe>();
let single_mmap = params.features & IORING_FEAT_SINGLE_MMAP != 0;
let sq_ring_ptr = mmap_ring(
if single_mmap {
sq_ring_size.max(cq_ring_size)
} else {
sq_ring_size
},
ring_fd,
IORING_OFF_SQ_RING,
)?;
let cq_ring_ptr = if single_mmap {
sq_ring_ptr
} else {
mmap_ring(cq_ring_size, ring_fd, IORING_OFF_CQ_RING)?
};
let sqes_size = params.sq_entries as usize * std::mem::size_of::<IoUringSqe>();
let sqes_ptr = mmap_ring(sqes_size, ring_fd, IORING_OFF_SQES)? as *mut IoUringSqe;
Ok(Self {
ring_fd,
sq_ring_ptr,
cq_ring_ptr,
sqes_ptr,
sq_ring_size,
cq_ring_size,
sqes_size,
single_mmap,
sq_head: offset_ptr(sq_ring_ptr, params.sq_off.head),
sq_tail: offset_ptr(sq_ring_ptr, params.sq_off.tail),
sq_ring_mask: offset_ptr(sq_ring_ptr, params.sq_off.ring_mask),
sq_ring_entries: offset_ptr(sq_ring_ptr, params.sq_off.ring_entries),
sq_array: offset_ptr(sq_ring_ptr, params.sq_off.array),
cq_head: offset_ptr(cq_ring_ptr, params.cq_off.head),
cq_tail: offset_ptr(cq_ring_ptr, params.cq_off.tail),
cq_ring_mask: offset_ptr(cq_ring_ptr, params.cq_off.ring_mask),
cqes: offset_ptr(cq_ring_ptr, params.cq_off.cqes),
})
}
pub(crate) fn ring_fd(&self) -> RawFd {
self.ring_fd
}
pub(crate) fn bind_current_thread(&self) {
CURRENT_SUBMITTER.with(|submitter| submitter.set(self as *const Self));
}
pub(crate) fn unbind_current_thread(&self) {
CURRENT_SUBMITTER.with(|submitter| {
if ptr::eq(submitter.get(), self) {
submitter.set(ptr::null());
}
});
}
pub(crate) fn with_submitter<T>(f: impl FnOnce(&IoUring) -> io::Result<T>) -> io::Result<T> {
CURRENT_SUBMITTER.with(|submitter| {
let ptr = submitter.get();
if !ptr.is_null() {
let ring = unsafe { &*ptr };
return f(ring);
}
let mut ring = global_submitter()
.lock()
.expect("global io_uring submitter should not be poisoned");
if ring.is_none() {
*ring = Some(IoUring::new(64)?);
}
f(ring
.as_ref()
.expect("global submitter ring should initialize"))
})
}
pub(crate) fn submit_timeout(&self, token: u64, deadline: Duration) -> io::Result<()> {
let timespec = duration_to_kernel_timespec(deadline);
self.push_sqe(|sqe| {
sqe.opcode = IORING_OP_TIMEOUT;
sqe.fd = -1;
sqe.off = 0;
sqe.user_data = token;
sqe.addr = (&timespec as *const KernelTimespec) as u64;
sqe.len = 1;
sqe.op_flags = IORING_TIMEOUT_ABS;
})?;
self.submit_pending().map(|_| ())
}
pub(crate) fn submit_timeout_remove(
&self,
token_to_remove: u64,
completion: u64,
) -> io::Result<()> {
self.push_sqe(|sqe| {
sqe.opcode = IORING_OP_TIMEOUT_REMOVE;
sqe.fd = -1;
sqe.flags = IOSQE_CQE_SKIP_SUCCESS;
sqe.user_data = completion;
sqe.addr = token_to_remove;
})?;
self.submit_pending().map(|_| ())
}
pub(crate) fn submit_timeout_update(
&self,
token_to_update: u64,
deadline: Duration,
) -> io::Result<()> {
let timespec = duration_to_kernel_timespec(deadline);
self.push_sqe(|sqe| {
sqe.opcode = IORING_OP_TIMEOUT_REMOVE;
sqe.fd = -1;
sqe.off = (&timespec as *const KernelTimespec) as u64;
sqe.addr = token_to_update;
sqe.op_flags = IORING_TIMEOUT_UPDATE | IORING_TIMEOUT_ABS;
})?;
self.submit_pending().map(|_| ())
}
pub(crate) fn submit_msg_ring(
&self,
target_ring_fd: RawFd,
target_user_data: u64,
value: u32,
completion: u64,
) -> io::Result<()> {
self.push_sqe(|sqe| {
sqe.opcode = IORING_OP_MSG_RING;
sqe.flags = IOSQE_CQE_SKIP_SUCCESS;
sqe.fd = target_ring_fd;
sqe.off = target_user_data;
sqe.addr = IORING_MSG_DATA;
sqe.len = value;
sqe.user_data = completion;
})?;
self.submit_pending().map(|_| ())
}
pub(crate) fn submit_with_token(
&self,
token: u64,
fill: impl FnOnce(&mut IoUringSqe),
) -> io::Result<()> {
self.push_sqe(|sqe| {
fill(sqe);
sqe.user_data = token;
})?;
self.submit_pending().map(|_| ())
}
pub(crate) fn drain_completions(&self, mut f: impl FnMut(IoUringCqe)) -> bool {
let mut head = load_u32(self.cq_head);
let tail = load_u32(self.cq_tail);
if head == tail {
return false;
}
let mask = load_u32(self.cq_ring_mask);
while head != tail {
let index = (head & mask) as usize;
let cqe = unsafe { ptr::read_volatile(self.cqes.add(index)) };
f(cqe);
head = head.wrapping_add(1);
}
store_u32(self.cq_head, head);
true
}
pub(crate) fn wait_for_cqe(&self) -> io::Result<()> {
loop {
match self.enter(0, 1, IORING_ENTER_GETEVENTS) {
Ok(_) => return Ok(()),
Err(error) if error.kind() == io::ErrorKind::Interrupted => continue,
Err(error) => return Err(error),
}
}
}
fn push_sqe(&self, fill: impl FnOnce(&mut IoUringSqe)) -> io::Result<()> {
let head = load_u32(self.sq_head);
let tail = load_u32(self.sq_tail);
let entries = load_u32(self.sq_ring_entries);
if tail.wrapping_sub(head) >= entries {
self.submit_pending()?;
let head = load_u32(self.sq_head);
let tail = load_u32(self.sq_tail);
if tail.wrapping_sub(head) >= entries {
return Err(io::Error::new(
io::ErrorKind::WouldBlock,
"io_uring submission queue is full",
));
}
}
let tail = load_u32(self.sq_tail);
let mask = load_u32(self.sq_ring_mask);
let index = (tail & mask) as usize;
let sqe = unsafe { &mut *self.sqes_ptr.add(index) };
*sqe = IoUringSqe::default();
fill(sqe);
unsafe {
ptr::write_volatile(self.sq_array.add(index), index as u32);
}
compiler_fence(Ordering::Release);
store_u32(self.sq_tail, tail.wrapping_add(1));
Ok(())
}
fn submit_pending(&self) -> io::Result<u32> {
let head = load_u32(self.sq_head);
let tail = load_u32(self.sq_tail);
let to_submit = tail.wrapping_sub(head);
if to_submit == 0 {
return Ok(0);
}
self.enter(to_submit, 0, 0)
}
fn enter(&self, to_submit: u32, min_complete: u32, flags: u32) -> io::Result<u32> {
cvt_long(unsafe {
libc::syscall(
libc::SYS_io_uring_enter,
self.ring_fd,
to_submit as libc::c_uint,
min_complete as libc::c_uint,
flags as libc::c_uint,
ptr::null::<libc::c_void>(),
0usize,
)
})
.map(|value| value as u32)
}
}
impl Drop for IoUring {
fn drop(&mut self) {
unsafe {
libc::munmap(self.sqes_ptr.cast(), self.sqes_size);
if self.single_mmap {
libc::munmap(
self.sq_ring_ptr.cast(),
self.sq_ring_size.max(self.cq_ring_size),
);
} else {
libc::munmap(self.sq_ring_ptr.cast(), self.sq_ring_size);
libc::munmap(self.cq_ring_ptr.cast(), self.cq_ring_size);
}
libc::close(self.ring_fd);
}
}
}
unsafe impl Send for IoUring {}
fn offset_ptr<T>(base: *mut u8, offset: u32) -> *mut T {
unsafe { base.add(offset as usize).cast::<T>() }
}
fn mmap_ring(length: usize, fd: RawFd, offset: libc::off_t) -> io::Result<*mut u8> {
let ptr = unsafe {
libc::mmap(
ptr::null_mut(),
length,
libc::PROT_READ | libc::PROT_WRITE,
libc::MAP_SHARED | libc::MAP_POPULATE,
fd,
offset,
)
};
if ptr == libc::MAP_FAILED {
Err(io::Error::last_os_error())
} else {
Ok(ptr.cast())
}
}
fn load_u32(ptr: *const u32) -> u32 {
let value = unsafe { ptr::read_volatile(ptr) };
compiler_fence(Ordering::Acquire);
value
}
fn store_u32(ptr: *mut u32, value: u32) {
compiler_fence(Ordering::Release);
unsafe {
ptr::write_volatile(ptr, value);
}
}
fn cvt_long(result: libc::c_long) -> io::Result<libc::c_long> {
if result == -1 {
Err(io::Error::last_os_error())
} else {
Ok(result)
}
}
fn global_submitter() -> &'static Mutex<Option<IoUring>> {
GLOBAL_SUBMITTER.get_or_init(|| Mutex::new(None))
}
fn duration_to_kernel_timespec(duration: Duration) -> KernelTimespec {
KernelTimespec {
tv_sec: duration.as_secs() as i64,
tv_nsec: duration.subsec_nanos() as i64,
}
}

View File

@@ -0,0 +1,2 @@
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
pub mod linux_x86_64;

View File

@@ -0,0 +1,10 @@
//! Linux channel wake helpers.
use crate::op::completion::{CompletionFuture, CompletionHandle, completion};
use crate::platform::linux_x86_64::runtime::try_current_thread_handle;
pub(crate) fn runtime_waiter<T: Send + 'static>() -> (CompletionFuture<T>, CompletionHandle<T>) {
let owner = try_current_thread_handle()
.expect("async channel operations must be polled on a runtime thread");
completion(owner)
}

View File

@@ -0,0 +1,586 @@
//! Linux filesystem backend.
use std::collections::VecDeque;
use std::ffi::CString;
use std::future::poll_fn;
use std::io;
use std::mem::MaybeUninit;
use std::os::fd::{FromRawFd, OwnedFd, RawFd};
use std::os::unix::ffi::OsStrExt;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex};
use std::task::{Context, Poll, Waker};
use std::thread;
use crate::op::completion::completion_for_current_thread;
use crate::op::fs::{FileType, FsOp, MetadataTarget, OpenOptions, RawDirEntry, RawMetadata};
use crate::platform::linux_x86_64::runtime::{
ThreadHandle, current_thread_handle, with_current_reactor,
};
use crate::platform::linux_x86_64::uring::{
IORING_FSYNC_DATASYNC, IORING_OP_CLOSE, IORING_OP_FSYNC, IORING_OP_FTRUNCATE,
IORING_OP_MKDIRAT, IORING_OP_OPENAT, IORING_OP_READ, IORING_OP_RENAMEAT, IORING_OP_STATX,
IORING_OP_UNLINKAT, IORING_OP_WRITE, IoUringCqe,
};
const STATX_BASIC_MASK: u32 =
libc::STATX_TYPE | libc::STATX_MODE | libc::STATX_SIZE | libc::STATX_NLINK;
const FILE_CURSOR: u64 = u64::MAX;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum ExecutionPath {
IoUring,
Offload,
}
pub fn execution_path(op: &FsOp) -> ExecutionPath {
match op {
FsOp::ReadDir { .. } | FsOp::Duplicate { .. } => ExecutionPath::Offload,
FsOp::Open { .. }
| FsOp::Read { .. }
| FsOp::Write { .. }
| FsOp::Metadata { .. }
| FsOp::SetLen { .. }
| FsOp::SyncAll { .. }
| FsOp::SyncData { .. }
| FsOp::CreateDir { .. }
| FsOp::RemoveFile { .. }
| FsOp::RemoveDir { .. }
| FsOp::Rename { .. }
| FsOp::Close { .. } => ExecutionPath::IoUring,
}
}
pub async fn open(op: FsOp) -> io::Result<OwnedFd> {
let FsOp::Open { path, options } = op else {
unreachable!("open backend called with non-open op");
};
let path = path_to_c_string(&path)?;
let path_ptr = path.as_ptr();
let (flags, mode) = open_flags(&options)?;
submit_uring::<OwnedFd, _>(
move |sqe| {
sqe.opcode = IORING_OP_OPENAT;
sqe.fd = libc::AT_FDCWD;
sqe.addr = path_ptr as u64;
sqe.len = mode;
sqe.op_flags = flags as u32;
},
move |cqe| {
let _path = path;
cqe_to_result(cqe).map(|fd| unsafe { OwnedFd::from_raw_fd(fd as RawFd) })
},
)
.await
}
pub async fn read(op: FsOp) -> io::Result<Vec<u8>> {
let FsOp::Read { fd, offset, len } = op else {
unreachable!("read backend called with non-read op");
};
let mut buffer = vec![0; len];
let buffer_ptr = buffer.as_mut_ptr();
let buffer_len = buffer.len();
submit_uring::<Vec<u8>, _>(
move |sqe| {
sqe.opcode = IORING_OP_READ;
sqe.fd = fd;
sqe.addr = buffer_ptr as u64;
sqe.len = buffer_len as u32;
sqe.off = offset.unwrap_or(FILE_CURSOR);
},
move |cqe| {
let read = cqe_to_result(cqe)? as usize;
buffer.truncate(read);
Ok(buffer)
},
)
.await
}
pub async fn write(op: FsOp) -> io::Result<usize> {
let FsOp::Write { fd, offset, data } = op else {
unreachable!("write backend called with non-write op");
};
let data_ptr = data.as_ptr();
let data_len = data.len();
submit_uring::<usize, _>(
move |sqe| {
sqe.opcode = IORING_OP_WRITE;
sqe.fd = fd;
sqe.addr = data_ptr as u64;
sqe.len = data_len as u32;
sqe.off = offset.unwrap_or(FILE_CURSOR);
},
move |cqe| {
let _data = data;
cqe_to_result(cqe).map(|written| written as usize)
},
)
.await
}
pub async fn metadata(op: FsOp) -> io::Result<RawMetadata> {
let FsOp::Metadata {
target,
follow_symlinks,
} = op
else {
unreachable!("metadata backend called with non-metadata op");
};
let mut statx = Box::new(MaybeUninit::<libc::statx>::zeroed());
let statx_ptr = statx.as_mut_ptr();
let (fd, path, flags) = match target {
MetadataTarget::Path(path) => (
libc::AT_FDCWD,
path_to_c_string(&path)?,
metadata_flags(follow_symlinks),
),
MetadataTarget::File(fd) => (
fd,
CString::new(Vec::<u8>::new()).expect("empty statx path should be valid"),
libc::AT_EMPTY_PATH,
),
};
let path_ptr = path.as_ptr();
submit_uring::<RawMetadata, _>(
move |sqe| {
sqe.opcode = IORING_OP_STATX;
sqe.fd = fd;
sqe.addr = path_ptr as u64;
sqe.len = STATX_BASIC_MASK;
sqe.off = statx_ptr as u64;
sqe.op_flags = flags as u32;
},
move |cqe| {
let _path = path;
cqe_to_result(cqe)?;
let statx = unsafe { statx.assume_init() };
Ok(raw_metadata_from_statx(&statx))
},
)
.await
}
pub async fn sync_all(op: FsOp) -> io::Result<()> {
let FsOp::SyncAll { fd } = op else {
unreachable!("sync_all backend called with non-sync_all op");
};
submit_sync(fd, 0).await
}
pub async fn sync_data(op: FsOp) -> io::Result<()> {
let FsOp::SyncData { fd } = op else {
unreachable!("sync_data backend called with non-sync_data op");
};
submit_sync(fd, IORING_FSYNC_DATASYNC).await
}
pub async fn set_len(op: FsOp) -> io::Result<()> {
let FsOp::SetLen { fd, len } = op else {
unreachable!("set_len backend called with non-set_len op");
};
submit_uring::<(), _>(
move |sqe| {
sqe.opcode = IORING_OP_FTRUNCATE;
sqe.fd = fd;
sqe.off = len;
},
move |cqe| cqe_to_result(cqe).map(|_| ()),
)
.await
}
pub async fn try_clone(op: FsOp) -> io::Result<OwnedFd> {
let FsOp::Duplicate { fd } = op else {
unreachable!("try_clone backend called with non-duplicate op");
};
offload(move || {
let duplicated = cvt(unsafe { libc::fcntl(fd, libc::F_DUPFD_CLOEXEC, 0) })?;
Ok(unsafe { OwnedFd::from_raw_fd(duplicated) })
})
.await
}
pub async fn create_dir(op: FsOp) -> io::Result<()> {
let FsOp::CreateDir {
path,
recursive: _,
mode,
} = op
else {
unreachable!("create_dir backend called with non-create_dir op");
};
let path = path_to_c_string(&path)?;
let path_ptr = path.as_ptr();
submit_uring::<(), _>(
move |sqe| {
sqe.opcode = IORING_OP_MKDIRAT;
sqe.fd = libc::AT_FDCWD;
sqe.addr = path_ptr as u64;
sqe.len = mode;
},
move |cqe| {
let _path = path;
cqe_to_result(cqe).map(|_| ())
},
)
.await
}
pub async fn remove_file(op: FsOp) -> io::Result<()> {
let FsOp::RemoveFile { path } = op else {
unreachable!("remove_file backend called with non-remove_file op");
};
submit_unlink(path, 0).await
}
pub async fn remove_dir(op: FsOp) -> io::Result<()> {
let FsOp::RemoveDir { path } = op else {
unreachable!("remove_dir backend called with non-remove_dir op");
};
submit_unlink(path, libc::AT_REMOVEDIR).await
}
pub async fn rename(op: FsOp) -> io::Result<()> {
let FsOp::Rename { from, to } = op else {
unreachable!("rename backend called with non-rename op");
};
let from = path_to_c_string(&from)?;
let to = path_to_c_string(&to)?;
let from_ptr = from.as_ptr();
let to_ptr = to.as_ptr();
submit_uring::<(), _>(
move |sqe| {
sqe.opcode = IORING_OP_RENAMEAT;
sqe.fd = libc::AT_FDCWD;
sqe.addr = from_ptr as u64;
sqe.len = libc::AT_FDCWD as u32;
sqe.off = to_ptr as u64;
sqe.op_flags = 0;
},
move |cqe| {
let _from = from;
let _to = to;
cqe_to_result(cqe).map(|_| ())
},
)
.await
}
pub async fn close(op: FsOp) -> io::Result<()> {
let FsOp::Close { fd } = op else {
unreachable!("close backend called with non-close op");
};
submit_uring::<(), _>(
move |sqe| {
sqe.opcode = IORING_OP_CLOSE;
sqe.fd = fd;
},
move |cqe| cqe_to_result(cqe).map(|_| ()),
)
.await
}
pub fn read_dir(op: FsOp) -> io::Result<ReadDirStream> {
let FsOp::ReadDir { path } = op else {
unreachable!("read_dir backend called with non-read_dir op");
};
ReadDirStream::new(path)
}
pub struct ReadDirStream {
state: Arc<ReadDirState>,
}
impl ReadDirStream {
fn new(path: PathBuf) -> io::Result<Self> {
let state = Arc::new(ReadDirState::new(current_thread_handle()));
let producer = Arc::clone(&state);
thread::Builder::new()
.name("ruin-runtime-read-dir".into())
.spawn(move || produce_dir_entries(path, producer))
.map_err(io::Error::other)?;
Ok(Self { state })
}
pub async fn next_entry(&mut self) -> io::Result<Option<RawDirEntry>> {
poll_fn(|cx| self.state.poll_next(cx)).await
}
}
struct ReadDirState {
owner: ThreadHandle,
queue: Mutex<VecDeque<io::Result<RawDirEntry>>>,
done: AtomicBool,
pending: AtomicBool,
wake_queued: AtomicBool,
waker: Mutex<Option<Waker>>,
}
impl ReadDirState {
fn new(owner: ThreadHandle) -> Self {
owner.begin_async_operation();
Self {
owner,
queue: Mutex::new(VecDeque::new()),
done: AtomicBool::new(false),
pending: AtomicBool::new(true),
wake_queued: AtomicBool::new(false),
waker: Mutex::new(None),
}
}
fn push(self: &Arc<Self>, entry: io::Result<RawDirEntry>) {
self.queue.lock().unwrap().push_back(entry);
self.notify();
}
fn finish(self: &Arc<Self>) {
self.done.store(true, Ordering::Release);
self.release_pending();
self.notify();
}
fn release_pending(&self) {
if self.pending.swap(false, Ordering::AcqRel) {
self.owner.finish_async_operation();
}
}
fn notify(self: &Arc<Self>) {
if self.wake_queued.swap(true, Ordering::AcqRel) {
return;
}
let state = Arc::clone(self);
if !self.owner.queue_microtask(move || {
state.wake_queued.store(false, Ordering::Release);
if let Some(waker) = state.waker.lock().unwrap().take() {
waker.wake();
}
}) {
self.wake_queued.store(false, Ordering::Release);
}
}
fn poll_next(&self, cx: &mut Context<'_>) -> Poll<io::Result<Option<RawDirEntry>>> {
if let Some(entry) = self.queue.lock().unwrap().pop_front() {
return Poll::Ready(entry.map(Some));
}
if self.done.load(Ordering::Acquire) {
return Poll::Ready(Ok(None));
}
*self.waker.lock().unwrap() = Some(cx.waker().clone());
if let Some(entry) = self.queue.lock().unwrap().pop_front() {
let _ = self.waker.lock().unwrap().take();
return Poll::Ready(entry.map(Some));
}
if self.done.load(Ordering::Acquire) {
let _ = self.waker.lock().unwrap().take();
return Poll::Ready(Ok(None));
}
Poll::Pending
}
}
impl Drop for ReadDirStream {
fn drop(&mut self) {
self.state.release_pending();
}
}
fn produce_dir_entries(path: PathBuf, state: Arc<ReadDirState>) {
match std::fs::read_dir(path) {
Ok(entries) => {
for entry in entries {
match entry {
Ok(entry) => {
let file_name = entry.file_name();
state.push(Ok(RawDirEntry {
path: entry.path(),
file_name,
}));
}
Err(error) => state.push(Err(error)),
}
}
}
Err(error) => state.push(Err(error)),
}
state.finish();
}
async fn submit_sync(fd: RawFd, flags: u32) -> io::Result<()> {
submit_uring::<(), _>(
move |sqe| {
sqe.opcode = IORING_OP_FSYNC;
sqe.fd = fd;
sqe.op_flags = flags;
},
move |cqe| cqe_to_result(cqe).map(|_| ()),
)
.await
}
async fn submit_unlink(path: PathBuf, flags: i32) -> io::Result<()> {
let path = path_to_c_string(&path)?;
let path_ptr = path.as_ptr();
submit_uring::<(), _>(
move |sqe| {
sqe.opcode = IORING_OP_UNLINKAT;
sqe.fd = libc::AT_FDCWD;
sqe.addr = path_ptr as u64;
sqe.op_flags = flags as u32;
},
move |cqe| {
let _path = path;
cqe_to_result(cqe).map(|_| ())
},
)
.await
}
async fn submit_uring<T: Send + 'static, M>(
fill: impl FnOnce(&mut crate::platform::linux_x86_64::uring::IoUringSqe),
map: M,
) -> io::Result<T>
where
M: FnOnce(IoUringCqe) -> io::Result<T> + Send + 'static,
{
let (future, handle) = completion_for_current_thread::<io::Result<T>>();
let callback_handle = handle.clone();
let token = with_current_reactor(|reactor| {
reactor.submit_operation(fill, move |cqe| {
callback_handle.complete(map(cqe));
})
})?;
handle.set_cancel(move || {
let _ = with_current_reactor(|reactor| reactor.cancel_operation(token));
});
future.await
}
async fn offload<T: Send + 'static>(
task: impl FnOnce() -> io::Result<T> + Send + 'static,
) -> io::Result<T> {
let (future, handle) = completion_for_current_thread::<io::Result<T>>();
thread::Builder::new()
.name("ruin-runtime-fs-offload".into())
.spawn(move || handle.complete(task()))
.map_err(io::Error::other)?;
future.await
}
fn path_to_c_string(path: &Path) -> io::Result<CString> {
CString::new(path.as_os_str().as_bytes()).map_err(|_| {
io::Error::new(
io::ErrorKind::InvalidInput,
"paths containing NUL bytes are not supported",
)
})
}
fn open_flags(options: &OpenOptions) -> io::Result<(i32, u32)> {
if !options.read && !options.write && !options.append {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"OpenOptions requires read, write, or append access",
));
}
let mut flags = if options.read {
if options.write || options.append {
libc::O_RDWR
} else {
libc::O_RDONLY
}
} else {
libc::O_WRONLY
};
if options.append {
flags |= libc::O_APPEND;
}
if options.truncate {
flags |= libc::O_TRUNC;
}
if options.create_new {
flags |= libc::O_CREAT | libc::O_EXCL;
} else if options.create {
flags |= libc::O_CREAT;
}
Ok((flags | libc::O_CLOEXEC, 0o666))
}
fn metadata_flags(follow_symlinks: bool) -> i32 {
let mut flags = libc::AT_NO_AUTOMOUNT;
if !follow_symlinks {
flags |= libc::AT_SYMLINK_NOFOLLOW;
}
flags
}
fn raw_metadata_from_statx(statx: &libc::statx) -> RawMetadata {
RawMetadata {
file_type: file_type_from_mode(statx.stx_mode),
mode: statx.stx_mode,
len: statx.stx_size,
}
}
fn file_type_from_mode(mode: u16) -> FileType {
match mode & libc::S_IFMT as u16 {
value if value == libc::S_IFREG as u16 => FileType::File,
value if value == libc::S_IFDIR as u16 => FileType::Directory,
value if value == libc::S_IFLNK as u16 => FileType::Symlink,
value if value == libc::S_IFBLK as u16 => FileType::BlockDevice,
value if value == libc::S_IFCHR as u16 => FileType::CharacterDevice,
value if value == libc::S_IFIFO as u16 => FileType::Fifo,
value if value == libc::S_IFSOCK as u16 => FileType::Socket,
_ => FileType::Unknown,
}
}
fn cqe_to_result(cqe: IoUringCqe) -> io::Result<i32> {
if cqe.res < 0 {
Err(io::Error::from_raw_os_error(-cqe.res))
} else {
Ok(cqe.res)
}
}
fn cvt(value: libc::c_int) -> io::Result<libc::c_int> {
if value == -1 {
Err(io::Error::last_os_error())
} else {
Ok(value)
}
}

View File

@@ -0,0 +1,5 @@
//! Linux backend modules.
pub mod channel;
pub mod fs;
pub mod net;

View File

@@ -0,0 +1,974 @@
//! Linux networking backend.
use std::ffi::c_void;
use std::future::Future;
use std::io;
use std::mem::MaybeUninit;
use std::net::{
Ipv4Addr, Ipv6Addr, Shutdown, SocketAddr, SocketAddrV4, SocketAddrV6, ToSocketAddrs,
};
use std::os::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd};
use std::pin::Pin;
use std::thread;
use std::time::Duration;
use crate::op::completion::completion_for_current_thread;
use crate::op::net::{AcceptedSocket, NetOp, ReceivedDatagram};
use crate::platform::linux_x86_64::runtime::with_current_reactor;
use crate::platform::linux_x86_64::uring::{
IORING_OP_ACCEPT, IORING_OP_BIND, IORING_OP_CLOSE, IORING_OP_CONNECT, IORING_OP_LISTEN,
IORING_OP_RECV, IORING_OP_SEND, IORING_OP_SHUTDOWN, IORING_OP_SOCKET, IoUringCqe, IoUringSqe,
};
const DEFAULT_LISTENER_BACKLOG: i32 = 1024;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum ExecutionPath {
IoUring,
Offload,
}
pub fn execution_path(op: &NetOp) -> ExecutionPath {
match op {
NetOp::Socket { .. }
| NetOp::Connect { .. }
| NetOp::Bind { .. }
| NetOp::Listen { .. }
| NetOp::Accept { .. }
| NetOp::Send { .. }
| NetOp::Recv { .. }
| NetOp::Shutdown { .. }
| NetOp::Close { .. } => ExecutionPath::IoUring,
NetOp::SendTo { .. } | NetOp::RecvFrom { .. } => ExecutionPath::Offload,
}
}
pub async fn resolve_addrs<A>(addr: A) -> io::Result<Vec<SocketAddr>>
where
A: ToSocketAddrs + Send + 'static,
{
offload(move || {
let addrs = addr.to_socket_addrs()?.collect::<Vec<_>>();
if addrs.is_empty() {
Err(io::Error::new(
io::ErrorKind::InvalidInput,
"address resolved to no socket addresses",
))
} else {
Ok(addrs)
}
})
.await
}
pub async fn socket(op: NetOp) -> io::Result<OwnedFd> {
let NetOp::Socket {
domain,
socket_type,
protocol,
flags,
} = op
else {
unreachable!("socket backend called with non-socket op");
};
match submit_uring::<OwnedFd, _>(
move |sqe| {
sqe.opcode = IORING_OP_SOCKET;
sqe.fd = domain;
sqe.off = socket_type as u64;
sqe.len = protocol as u32;
sqe.op_flags = flags;
},
move |cqe| cqe_to_result(cqe).map(|fd| unsafe { OwnedFd::from_raw_fd(fd as RawFd) }),
)
.await
{
Err(error) if should_fallback_to_offload(&error) => {
offload(move || socket_sync(domain, socket_type, protocol, flags)).await
}
result => result,
}
}
pub async fn connect(op: NetOp) -> io::Result<()> {
let NetOp::Connect { fd, addr } = op else {
unreachable!("connect backend called with non-connect op");
};
let raw_addr = RawSocketAddr::from_socket_addr(addr);
let fallback_addr = raw_addr;
let addr_ptr = raw_addr.as_ptr();
let addr_len = raw_addr.len();
match submit_uring::<(), _>(
move |sqe| {
sqe.opcode = IORING_OP_CONNECT;
sqe.fd = fd;
sqe.addr = addr_ptr as u64;
sqe.off = addr_len as u64;
},
move |cqe| {
let _raw_addr = raw_addr;
cqe_to_result(cqe).map(|_| ())
},
)
.await
{
Err(error) if should_fallback_to_offload(&error) => {
offload(move || connect_sync(fd, fallback_addr)).await
}
result => result,
}
}
pub async fn bind(op: NetOp) -> io::Result<()> {
let NetOp::Bind { fd, addr } = op else {
unreachable!("bind backend called with non-bind op");
};
let raw_addr = RawSocketAddr::from_socket_addr(addr);
let fallback_addr = raw_addr;
let addr_ptr = raw_addr.as_ptr();
let addr_len = raw_addr.len();
match submit_uring::<(), _>(
move |sqe| {
sqe.opcode = IORING_OP_BIND;
sqe.fd = fd;
sqe.addr = addr_ptr as u64;
sqe.off = addr_len as u64;
},
move |cqe| {
let _raw_addr = raw_addr;
cqe_to_result(cqe).map(|_| ())
},
)
.await
{
Err(error) if should_fallback_to_offload(&error) => {
offload(move || bind_sync(fd, fallback_addr)).await
}
result => result,
}
}
pub async fn listen(op: NetOp) -> io::Result<()> {
let NetOp::Listen { fd, backlog } = op else {
unreachable!("listen backend called with non-listen op");
};
match submit_uring::<(), _>(
move |sqe| {
sqe.opcode = IORING_OP_LISTEN;
sqe.fd = fd;
sqe.len = backlog as u32;
},
move |cqe| cqe_to_result(cqe).map(|_| ()),
)
.await
{
Err(error) if should_fallback_to_offload(&error) => {
offload(move || listen_sync(fd, backlog)).await
}
result => result,
}
}
pub async fn accept(op: NetOp) -> io::Result<AcceptedSocket> {
let NetOp::Accept { fd } = op else {
unreachable!("accept backend called with non-accept op");
};
let mut storage = Box::new(MaybeUninit::<libc::sockaddr_storage>::zeroed());
let mut addr_len = Box::new(std::mem::size_of::<libc::sockaddr_storage>() as libc::socklen_t);
let storage_ptr = storage.as_mut_ptr();
let addr_len_ptr = addr_len.as_mut() as *mut libc::socklen_t;
match submit_uring::<AcceptedSocket, _>(
move |sqe| {
sqe.opcode = IORING_OP_ACCEPT;
sqe.fd = fd;
sqe.addr = storage_ptr as u64;
sqe.off = addr_len_ptr as u64;
},
move |cqe| {
let accepted_fd = cqe_to_result(cqe)? as RawFd;
let storage = unsafe { storage.assume_init() };
let peer_addr = socket_addr_from_storage(&storage, *addr_len)?;
Ok(AcceptedSocket {
fd: accepted_fd,
peer_addr,
})
},
)
.await
{
Err(error) if should_fallback_to_offload(&error) => offload(move || accept_sync(fd)).await,
result => result,
}
}
pub async fn send(op: NetOp) -> io::Result<usize> {
let NetOp::Send { fd, data, flags } = op else {
unreachable!("send backend called with non-send op");
};
let fallback_data = data.clone();
let data_ptr = data.as_ptr();
let data_len = data.len();
match submit_uring::<usize, _>(
move |sqe| {
sqe.opcode = IORING_OP_SEND;
sqe.fd = fd;
sqe.addr = data_ptr as u64;
sqe.len = data_len as u32;
sqe.op_flags = flags as u32;
},
move |cqe| {
let _data = data;
cqe_to_result(cqe).map(|written| written as usize)
},
)
.await
{
Err(error) if should_fallback_to_offload(&error) => {
offload(move || send_sync(fd, fallback_data, flags)).await
}
result => result,
}
}
pub async fn send_to(op: NetOp) -> io::Result<usize> {
let NetOp::SendTo {
fd,
target,
data,
flags,
} = op
else {
unreachable!("send_to backend called with non-send_to op");
};
let raw_addr = RawSocketAddr::from_socket_addr(target);
offload(move || send_to_sync(fd, data, raw_addr, flags)).await
}
pub async fn recv(op: NetOp) -> io::Result<Vec<u8>> {
let NetOp::Recv { fd, len, flags } = op else {
unreachable!("recv backend called with non-recv op");
};
let mut buffer = vec![0; len];
let buffer_ptr = buffer.as_mut_ptr();
let buffer_len = buffer.len();
match submit_uring::<Vec<u8>, _>(
move |sqe| {
sqe.opcode = IORING_OP_RECV;
sqe.fd = fd;
sqe.addr = buffer_ptr as u64;
sqe.len = buffer_len as u32;
sqe.op_flags = flags as u32;
},
move |cqe| {
let read = cqe_to_result(cqe)? as usize;
buffer.truncate(read);
Ok(buffer)
},
)
.await
{
Err(error) if should_fallback_to_offload(&error) => {
offload(move || recv_sync(fd, len, flags)).await
}
result => result,
}
}
pub async fn recv_from(op: NetOp) -> io::Result<ReceivedDatagram> {
let NetOp::RecvFrom { fd, len, flags } = op else {
unreachable!("recv_from backend called with non-recv_from op");
};
offload(move || recv_from_sync(fd, len, flags)).await
}
pub async fn shutdown(op: NetOp) -> io::Result<()> {
let NetOp::Shutdown { fd, how } = op else {
unreachable!("shutdown backend called with non-shutdown op");
};
let fallback_how = how;
match submit_uring::<(), _>(
move |sqe| {
sqe.opcode = IORING_OP_SHUTDOWN;
sqe.fd = fd;
sqe.len = shutdown_how(how) as u32;
},
move |cqe| cqe_to_result(cqe).map(|_| ()),
)
.await
{
Err(error) if should_fallback_to_offload(&error) => {
offload(move || shutdown_sync(fd, fallback_how)).await
}
result => result,
}
}
pub async fn close(op: NetOp) -> io::Result<()> {
let NetOp::Close { fd } = op else {
unreachable!("close backend called with non-close op");
};
match submit_uring::<(), _>(
move |sqe| {
sqe.opcode = IORING_OP_CLOSE;
sqe.fd = fd;
},
move |cqe| cqe_to_result(cqe).map(|_| ()),
)
.await
{
Err(error) if should_fallback_to_offload(&error) => offload(move || close_sync(fd)).await,
result => result,
}
}
pub async fn connect_stream(addr: SocketAddr) -> io::Result<OwnedFd> {
let socket = socket(NetOp::Socket {
domain: socket_domain(addr),
socket_type: libc::SOCK_STREAM,
protocol: 0,
flags: libc::SOCK_CLOEXEC as u32,
})
.await?;
let connect_result = connect(NetOp::Connect {
fd: socket.as_raw_fd(),
addr,
})
.await;
match connect_result {
Ok(()) => Ok(socket),
Err(error) => Err(error),
}
}
pub async fn bind_listener(addr: SocketAddr, backlog: Option<i32>) -> io::Result<OwnedFd> {
let listener = socket(NetOp::Socket {
domain: socket_domain(addr),
socket_type: libc::SOCK_STREAM,
protocol: 0,
flags: libc::SOCK_CLOEXEC as u32,
})
.await?;
set_reuse_addr(listener.as_raw_fd(), true)?;
bind(NetOp::Bind {
fd: listener.as_raw_fd(),
addr,
})
.await?;
listen(NetOp::Listen {
fd: listener.as_raw_fd(),
backlog: backlog.unwrap_or(DEFAULT_LISTENER_BACKLOG),
})
.await?;
Ok(listener)
}
pub async fn bind_datagram(addr: SocketAddr) -> io::Result<OwnedFd> {
let socket = socket(NetOp::Socket {
domain: socket_domain(addr),
socket_type: libc::SOCK_DGRAM,
protocol: 0,
flags: libc::SOCK_CLOEXEC as u32,
})
.await?;
bind(NetOp::Bind {
fd: socket.as_raw_fd(),
addr,
})
.await?;
Ok(socket)
}
pub async fn duplicate(fd: RawFd) -> io::Result<OwnedFd> {
offload(move || {
let duplicated = cvt(unsafe { libc::fcntl(fd, libc::F_DUPFD_CLOEXEC, 0) })?;
Ok(unsafe { OwnedFd::from_raw_fd(duplicated) })
})
.await
}
pub async fn recv_timeout(
fd: RawFd,
len: usize,
flags: i32,
timeout: Duration,
) -> io::Result<Vec<u8>> {
offload(move || {
wait_socket(fd, libc::POLLIN, timeout)?;
recv_sync(fd, len, flags)
})
.await
}
pub async fn send_timeout(
fd: RawFd,
data: Vec<u8>,
flags: i32,
timeout: Duration,
) -> io::Result<usize> {
offload(move || {
wait_socket(fd, libc::POLLOUT, timeout)?;
send_sync(fd, data, flags)
})
.await
}
pub async fn recv_from_timeout(
fd: RawFd,
len: usize,
flags: i32,
timeout: Duration,
) -> io::Result<ReceivedDatagram> {
offload(move || {
wait_socket(fd, libc::POLLIN, timeout)?;
recv_from_sync(fd, len, flags)
})
.await
}
pub async fn send_to_timeout(
fd: RawFd,
data: Vec<u8>,
target: SocketAddr,
flags: i32,
timeout: Duration,
) -> io::Result<usize> {
offload(move || {
wait_socket(fd, libc::POLLOUT, timeout)?;
send_to_sync(fd, data, RawSocketAddr::from_socket_addr(target), flags)
})
.await
}
pub async fn connect_stream_timeout(addr: SocketAddr, timeout: Duration) -> io::Result<OwnedFd> {
offload(move || connect_stream_timeout_sync(addr, timeout)).await
}
pub fn local_addr(fd: RawFd) -> io::Result<SocketAddr> {
socket_addr_with(libc::getsockname, fd)
}
pub fn peer_addr(fd: RawFd) -> io::Result<SocketAddr> {
socket_addr_with(libc::getpeername, fd)
}
pub fn nodelay(fd: RawFd) -> io::Result<bool> {
let mut value = 0;
let mut len = std::mem::size_of::<libc::c_int>() as libc::socklen_t;
cvt(unsafe {
libc::getsockopt(
fd,
libc::IPPROTO_TCP,
libc::TCP_NODELAY,
&mut value as *mut libc::c_int as *mut c_void,
&mut len,
)
})?;
Ok(value != 0)
}
pub fn broadcast(fd: RawFd) -> io::Result<bool> {
getsockopt_int(fd, libc::SOL_SOCKET, libc::SO_BROADCAST).map(|value| value != 0)
}
pub fn set_broadcast(fd: RawFd, enabled: bool) -> io::Result<()> {
setsockopt_int(fd, libc::SOL_SOCKET, libc::SO_BROADCAST, enabled.into())
}
pub fn ttl(fd: RawFd) -> io::Result<u32> {
match socket_family(fd)? {
libc::AF_INET => {
getsockopt_int(fd, libc::IPPROTO_IP, libc::IP_TTL).map(|value| value as u32)
}
libc::AF_INET6 => getsockopt_int(fd, libc::IPPROTO_IPV6, libc::IPV6_UNICAST_HOPS)
.map(|value| value as u32),
family => Err(io::Error::new(
io::ErrorKind::InvalidInput,
format!("unsupported socket family {family} for TTL"),
)),
}
}
pub fn set_ttl(fd: RawFd, ttl: u32) -> io::Result<()> {
let ttl = i32::try_from(ttl)
.map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "TTL exceeds i32 range"))?;
match socket_family(fd)? {
libc::AF_INET => setsockopt_int(fd, libc::IPPROTO_IP, libc::IP_TTL, ttl),
libc::AF_INET6 => setsockopt_int(fd, libc::IPPROTO_IPV6, libc::IPV6_UNICAST_HOPS, ttl),
family => Err(io::Error::new(
io::ErrorKind::InvalidInput,
format!("unsupported socket family {family} for TTL"),
)),
}
}
pub fn set_nodelay(fd: RawFd, enabled: bool) -> io::Result<()> {
let value: libc::c_int = enabled.into();
cvt(unsafe {
libc::setsockopt(
fd,
libc::IPPROTO_TCP,
libc::TCP_NODELAY,
&value as *const libc::c_int as *const c_void,
std::mem::size_of_val(&value) as libc::socklen_t,
)
})
.map(|_| ())
}
pub type RecvFuture = Pin<Box<dyn Future<Output = io::Result<Vec<u8>>> + 'static>>;
pub type SendFuture = Pin<Box<dyn Future<Output = io::Result<usize>> + 'static>>;
pub type ShutdownFuture = Pin<Box<dyn Future<Output = io::Result<()>> + 'static>>;
pub fn recv_future(fd: RawFd, len: usize) -> RecvFuture {
Box::pin(recv(NetOp::Recv { fd, len, flags: 0 }))
}
pub fn send_future(fd: RawFd, data: Vec<u8>) -> SendFuture {
Box::pin(send(NetOp::Send { fd, data, flags: 0 }))
}
pub fn shutdown_future(fd: RawFd, how: Shutdown) -> ShutdownFuture {
Box::pin(shutdown(NetOp::Shutdown { fd, how }))
}
async fn submit_uring<T: Send + 'static, M>(
fill: impl FnOnce(&mut IoUringSqe),
map: M,
) -> io::Result<T>
where
M: FnOnce(IoUringCqe) -> io::Result<T> + Send + 'static,
{
let (future, handle) = completion_for_current_thread::<io::Result<T>>();
let callback_handle = handle.clone();
let token = with_current_reactor(|reactor| {
reactor.submit_operation(fill, move |cqe| {
callback_handle.complete(map(cqe));
})
})?;
handle.set_cancel(move || {
let _ = with_current_reactor(|reactor| reactor.cancel_operation(token));
});
future.await
}
async fn offload<T: Send + 'static>(
task: impl FnOnce() -> io::Result<T> + Send + 'static,
) -> io::Result<T> {
let (future, handle) = completion_for_current_thread::<io::Result<T>>();
thread::Builder::new()
.name("ruin-runtime-net-offload".into())
.spawn(move || handle.complete(task()))
.map_err(io::Error::other)?;
future.await
}
fn socket_domain(addr: SocketAddr) -> i32 {
match addr {
SocketAddr::V4(_) => libc::AF_INET,
SocketAddr::V6(_) => libc::AF_INET6,
}
}
fn shutdown_how(how: Shutdown) -> i32 {
match how {
Shutdown::Read => libc::SHUT_RD,
Shutdown::Write => libc::SHUT_WR,
Shutdown::Both => libc::SHUT_RDWR,
}
}
fn socket_addr_with(
op: unsafe extern "C" fn(RawFd, *mut libc::sockaddr, *mut libc::socklen_t) -> libc::c_int,
fd: RawFd,
) -> io::Result<SocketAddr> {
let mut storage = MaybeUninit::<libc::sockaddr_storage>::zeroed();
let mut len = std::mem::size_of::<libc::sockaddr_storage>() as libc::socklen_t;
cvt(unsafe { op(fd, storage.as_mut_ptr().cast::<libc::sockaddr>(), &mut len) })?;
let storage = unsafe { storage.assume_init() };
socket_addr_from_storage(&storage, len)
}
fn set_reuse_addr(fd: RawFd, enabled: bool) -> io::Result<()> {
setsockopt_int(fd, libc::SOL_SOCKET, libc::SO_REUSEADDR, enabled.into())
}
fn socket_family(fd: RawFd) -> io::Result<i32> {
let mut storage = MaybeUninit::<libc::sockaddr_storage>::zeroed();
let mut len = std::mem::size_of::<libc::sockaddr_storage>() as libc::socklen_t;
cvt(unsafe { libc::getsockname(fd, storage.as_mut_ptr().cast::<libc::sockaddr>(), &mut len) })?;
let storage = unsafe { storage.assume_init() };
Ok(storage.ss_family as i32)
}
fn getsockopt_int(fd: RawFd, level: i32, name: i32) -> io::Result<i32> {
let mut value = 0;
let mut len = std::mem::size_of::<libc::c_int>() as libc::socklen_t;
cvt(unsafe {
libc::getsockopt(
fd,
level,
name,
&mut value as *mut libc::c_int as *mut c_void,
&mut len,
)
})?;
Ok(value)
}
fn setsockopt_int(fd: RawFd, level: i32, name: i32, value: i32) -> io::Result<()> {
cvt(unsafe {
libc::setsockopt(
fd,
level,
name,
&value as *const libc::c_int as *const c_void,
std::mem::size_of_val(&value) as libc::socklen_t,
)
})
.map(|_| ())
}
fn socket_addr_from_storage(
storage: &libc::sockaddr_storage,
len: libc::socklen_t,
) -> io::Result<SocketAddr> {
match storage.ss_family as i32 {
libc::AF_INET => {
if len < std::mem::size_of::<libc::sockaddr_in>() as libc::socklen_t {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"short IPv4 socket address from kernel",
));
}
let addr = unsafe { *(storage as *const _ as *const libc::sockaddr_in) };
Ok(SocketAddr::V4(SocketAddrV4::new(
Ipv4Addr::from(addr.sin_addr.s_addr.to_ne_bytes()),
u16::from_be(addr.sin_port),
)))
}
libc::AF_INET6 => {
if len < std::mem::size_of::<libc::sockaddr_in6>() as libc::socklen_t {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"short IPv6 socket address from kernel",
));
}
let addr = unsafe { *(storage as *const _ as *const libc::sockaddr_in6) };
Ok(SocketAddr::V6(SocketAddrV6::new(
Ipv6Addr::from(addr.sin6_addr.s6_addr),
u16::from_be(addr.sin6_port),
addr.sin6_flowinfo,
addr.sin6_scope_id,
)))
}
family => Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("unsupported socket address family {family}"),
)),
}
}
#[derive(Clone, Copy)]
struct RawSocketAddr {
storage: libc::sockaddr_storage,
len: libc::socklen_t,
}
impl RawSocketAddr {
fn from_socket_addr(addr: SocketAddr) -> Self {
match addr {
SocketAddr::V4(addr) => {
let sockaddr = libc::sockaddr_in {
sin_family: libc::AF_INET as libc::sa_family_t,
sin_port: addr.port().to_be(),
sin_addr: libc::in_addr {
s_addr: u32::from_ne_bytes(addr.ip().octets()),
},
sin_zero: [0; 8],
};
let mut storage =
unsafe { MaybeUninit::<libc::sockaddr_storage>::zeroed().assume_init() };
unsafe {
std::ptr::write(
&mut storage as *mut libc::sockaddr_storage as *mut libc::sockaddr_in,
sockaddr,
);
}
Self {
storage,
len: std::mem::size_of::<libc::sockaddr_in>() as libc::socklen_t,
}
}
SocketAddr::V6(addr) => {
let sockaddr = libc::sockaddr_in6 {
sin6_family: libc::AF_INET6 as libc::sa_family_t,
sin6_port: addr.port().to_be(),
sin6_flowinfo: addr.flowinfo(),
sin6_addr: libc::in6_addr {
s6_addr: addr.ip().octets(),
},
sin6_scope_id: addr.scope_id(),
};
let mut storage =
unsafe { MaybeUninit::<libc::sockaddr_storage>::zeroed().assume_init() };
unsafe {
std::ptr::write(
&mut storage as *mut libc::sockaddr_storage as *mut libc::sockaddr_in6,
sockaddr,
);
}
Self {
storage,
len: std::mem::size_of::<libc::sockaddr_in6>() as libc::socklen_t,
}
}
}
}
fn as_ptr(&self) -> *const libc::sockaddr {
&self.storage as *const libc::sockaddr_storage as *const libc::sockaddr
}
fn len(&self) -> libc::socklen_t {
self.len
}
}
fn cqe_to_result(cqe: IoUringCqe) -> io::Result<i32> {
if cqe.res < 0 {
Err(io::Error::from_raw_os_error(-cqe.res))
} else {
Ok(cqe.res)
}
}
fn cvt(value: libc::c_int) -> io::Result<libc::c_int> {
if value == -1 {
Err(io::Error::last_os_error())
} else {
Ok(value)
}
}
fn should_fallback_to_offload(error: &io::Error) -> bool {
matches!(
error.raw_os_error(),
Some(libc::EINVAL | libc::ENOSYS | libc::EOPNOTSUPP)
)
}
fn socket_sync(domain: i32, socket_type: i32, protocol: i32, flags: u32) -> io::Result<OwnedFd> {
let fd = cvt(unsafe { libc::socket(domain, socket_type | flags as i32, protocol) })?;
Ok(unsafe { OwnedFd::from_raw_fd(fd) })
}
fn connect_sync(fd: RawFd, addr: RawSocketAddr) -> io::Result<()> {
cvt(unsafe { libc::connect(fd, addr.as_ptr(), addr.len()) }).map(|_| ())
}
fn bind_sync(fd: RawFd, addr: RawSocketAddr) -> io::Result<()> {
cvt(unsafe { libc::bind(fd, addr.as_ptr(), addr.len()) }).map(|_| ())
}
fn listen_sync(fd: RawFd, backlog: i32) -> io::Result<()> {
cvt(unsafe { libc::listen(fd, backlog) }).map(|_| ())
}
fn accept_sync(fd: RawFd) -> io::Result<AcceptedSocket> {
let mut storage = MaybeUninit::<libc::sockaddr_storage>::zeroed();
let mut len = std::mem::size_of::<libc::sockaddr_storage>() as libc::socklen_t;
let accepted_fd = cvt(unsafe {
libc::accept4(
fd,
storage.as_mut_ptr().cast::<libc::sockaddr>(),
&mut len,
libc::SOCK_CLOEXEC,
)
})?;
let storage = unsafe { storage.assume_init() };
let peer_addr = socket_addr_from_storage(&storage, len)?;
Ok(AcceptedSocket {
fd: accepted_fd,
peer_addr,
})
}
fn send_sync(fd: RawFd, data: Vec<u8>, flags: i32) -> io::Result<usize> {
let written = unsafe { libc::send(fd, data.as_ptr().cast::<c_void>(), data.len(), flags) };
cvt_long(written).map(|written| written as usize)
}
fn send_to_sync(fd: RawFd, data: Vec<u8>, target: RawSocketAddr, flags: i32) -> io::Result<usize> {
let written = unsafe {
libc::sendto(
fd,
data.as_ptr().cast::<c_void>(),
data.len(),
flags,
target.as_ptr(),
target.len(),
)
};
cvt_long(written).map(|written| written as usize)
}
fn recv_sync(fd: RawFd, len: usize, flags: i32) -> io::Result<Vec<u8>> {
let mut buffer = vec![0; len];
let read = unsafe {
libc::recv(
fd,
buffer.as_mut_ptr().cast::<c_void>(),
buffer.len(),
flags,
)
};
let read = cvt_long(read)? as usize;
buffer.truncate(read);
Ok(buffer)
}
fn recv_from_sync(fd: RawFd, len: usize, flags: i32) -> io::Result<ReceivedDatagram> {
let mut buffer = vec![0; len];
let mut storage = MaybeUninit::<libc::sockaddr_storage>::zeroed();
let mut addr_len = std::mem::size_of::<libc::sockaddr_storage>() as libc::socklen_t;
let read = unsafe {
libc::recvfrom(
fd,
buffer.as_mut_ptr().cast::<c_void>(),
buffer.len(),
flags,
storage.as_mut_ptr().cast::<libc::sockaddr>(),
&mut addr_len,
)
};
let read = cvt_long(read)? as usize;
buffer.truncate(read);
let storage = unsafe { storage.assume_init() };
let peer_addr = socket_addr_from_storage(&storage, addr_len)?;
Ok(ReceivedDatagram {
data: buffer,
peer_addr,
})
}
fn shutdown_sync(fd: RawFd, how: Shutdown) -> io::Result<()> {
cvt(unsafe { libc::shutdown(fd, shutdown_how(how)) }).map(|_| ())
}
fn close_sync(fd: RawFd) -> io::Result<()> {
cvt(unsafe { libc::close(fd) }).map(|_| ())
}
fn connect_stream_timeout_sync(addr: SocketAddr, timeout: Duration) -> io::Result<OwnedFd> {
let fd = cvt(unsafe {
libc::socket(
socket_domain(addr),
libc::SOCK_STREAM | libc::SOCK_CLOEXEC | libc::SOCK_NONBLOCK,
0,
)
})?;
let raw_addr = RawSocketAddr::from_socket_addr(addr);
let connect_result = unsafe { libc::connect(fd, raw_addr.as_ptr(), raw_addr.len()) };
if connect_result == 0 {
set_nonblocking(fd, false)?;
return Ok(unsafe { OwnedFd::from_raw_fd(fd) });
}
let error = io::Error::last_os_error();
if error.raw_os_error() != Some(libc::EINPROGRESS) {
let _ = close_sync(fd);
return Err(error);
}
let completion = wait_socket(fd, libc::POLLOUT, timeout)
.and_then(|_| getsockopt_int(fd, libc::SOL_SOCKET, libc::SO_ERROR));
match completion {
Ok(0) => {
set_nonblocking(fd, false)?;
Ok(unsafe { OwnedFd::from_raw_fd(fd) })
}
Ok(code) => {
let _ = close_sync(fd);
Err(io::Error::from_raw_os_error(code))
}
Err(error) => {
let _ = close_sync(fd);
Err(error)
}
}
}
fn set_nonblocking(fd: RawFd, enabled: bool) -> io::Result<()> {
let flags = cvt(unsafe { libc::fcntl(fd, libc::F_GETFL) })?;
let new_flags = if enabled {
flags | libc::O_NONBLOCK
} else {
flags & !libc::O_NONBLOCK
};
cvt(unsafe { libc::fcntl(fd, libc::F_SETFL, new_flags) }).map(|_| ())
}
fn wait_socket(fd: RawFd, events: i16, timeout: Duration) -> io::Result<()> {
let timeout_ms = timeout
.as_millis()
.min(i32::MAX as u128)
.try_into()
.unwrap_or(i32::MAX);
loop {
let mut poll_fd = libc::pollfd {
fd,
events,
revents: 0,
};
let result = unsafe { libc::poll(&mut poll_fd, 1, timeout_ms) };
if result == 0 {
return Err(io::Error::new(
io::ErrorKind::TimedOut,
"socket operation timed out",
));
}
if result < 0 {
let error = io::Error::last_os_error();
if error.kind() == io::ErrorKind::Interrupted {
continue;
}
return Err(error);
}
if poll_fd.revents & (libc::POLLERR | libc::POLLHUP | libc::POLLNVAL) != 0 {
let socket_error = getsockopt_int(fd, libc::SOL_SOCKET, libc::SO_ERROR).unwrap_or(0);
if socket_error != 0 {
return Err(io::Error::from_raw_os_error(socket_error));
}
}
return Ok(());
}
}
fn cvt_long(value: libc::ssize_t) -> io::Result<libc::ssize_t> {
if value == -1 {
Err(io::Error::last_os_error())
} else {
Ok(value)
}
}

View File

@@ -0,0 +1,4 @@
//! Platform backend implementations.
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
pub mod linux;

175
lib/runtime/src/time.rs Normal file
View File

@@ -0,0 +1,175 @@
//! Runtime time primitives.
use std::cell::{Cell, RefCell};
use std::fmt;
use std::future::{Future, poll_fn};
use std::io;
use std::pin::Pin;
use std::rc::Rc;
use std::task::Waker;
use std::task::{Context, Poll};
use std::time::Duration;
use crate::{clear_timeout, set_timeout};
pub struct Sleep {
delay: Option<Duration>,
state: Option<Rc<SleepState>>,
handle: Option<crate::TimeoutHandle>,
completed: bool,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Elapsed;
pub fn sleep(duration: Duration) -> Sleep {
Sleep {
delay: Some(duration),
state: None,
handle: None,
completed: false,
}
}
pub async fn timeout<F>(duration: Duration, future: F) -> Result<F::Output, Elapsed>
where
F: Future,
{
let mut future = std::pin::pin!(future);
let mut sleeper = std::pin::pin!(sleep(duration));
poll_fn(|cx| {
if let Poll::Ready(output) = future.as_mut().poll(cx) {
return Poll::Ready(Ok(output));
}
if let Poll::Ready(()) = sleeper.as_mut().poll(cx) {
return Poll::Ready(Err(Elapsed));
}
Poll::Pending
})
.await
}
pub fn timeout_error(action: &'static str) -> io::Error {
io::Error::new(io::ErrorKind::TimedOut, format!("{action} timed out"))
}
impl Future for Sleep {
type Output = ();
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
if self.completed {
return Poll::Ready(());
}
if self.state.is_none() {
let delay = self.delay.take().unwrap_or(Duration::ZERO);
let state = Rc::new(SleepState::default());
let state_for_callback = Rc::clone(&state);
let timeout_handle = set_timeout(delay, move || state_for_callback.complete());
self.state = Some(state);
self.handle = Some(timeout_handle);
}
let state = self
.state
.as_ref()
.expect("sleep state should be initialized");
if state.ready.get() {
self.completed = true;
self.state = None;
self.handle = None;
Poll::Ready(())
} else {
*state.waker.borrow_mut() = Some(cx.waker().clone());
if state.ready.get() {
self.completed = true;
self.state = None;
self.handle = None;
Poll::Ready(())
} else {
Poll::Pending
}
}
}
}
impl Drop for Sleep {
fn drop(&mut self) {
if self.completed {
return;
}
if let Some(handle) = self.handle.take() {
clear_timeout(&handle);
}
}
}
#[derive(Default)]
struct SleepState {
ready: Cell<bool>,
waker: RefCell<Option<Waker>>,
}
impl SleepState {
fn complete(&self) {
self.ready.set(true);
if let Some(waker) = self.waker.borrow_mut().take() {
waker.wake();
}
}
}
impl fmt::Display for Elapsed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("deadline elapsed")
}
}
impl std::error::Error for Elapsed {}
#[cfg(test)]
mod tests {
use std::sync::{Arc, Mutex};
use std::time::Duration;
use crate::{queue_future, queue_task, run};
use super::{sleep, timeout};
#[test]
fn sleep_and_timeout_work() {
let log = std::thread::spawn(|| {
let log = Arc::new(Mutex::new(Vec::new()));
let log_for_task = Arc::clone(&log);
queue_task(move || {
let log_for_task = Arc::clone(&log_for_task);
queue_future(async move {
log_for_task.lock().unwrap().push("started");
sleep(Duration::from_millis(5)).await;
log_for_task.lock().unwrap().push("slept");
let result = timeout(Duration::from_millis(5), async {
sleep(Duration::from_millis(20)).await;
42usize
})
.await;
assert!(result.is_err(), "timeout should fire first");
log_for_task.lock().unwrap().push("timed out");
});
});
run();
let log = log.lock().unwrap();
log.clone()
})
.join()
.expect("time test thread should join successfully");
assert_eq!(log.as_slice(), ["started", "slept", "timed out"]);
}
}

View File

@@ -0,0 +1,12 @@
[package]
name = "ruin-runtime-proc-macros"
version = "0.1.0"
edition = "2024"
[lib]
proc-macro = true
[dependencies]
proc-macro2 = "1"
quote = "1"
syn = { version = "2", features = ["full"] }

View File

@@ -0,0 +1,128 @@
use proc_macro::TokenStream;
use proc_macro2::Span;
use quote::{format_ident, quote};
use syn::{Error, ItemFn, parse_macro_input};
#[proc_macro_attribute]
pub fn main(attr: TokenStream, item: TokenStream) -> TokenStream {
expand_entry(attr, item, EntryKind::Sync)
}
#[proc_macro_attribute]
pub fn async_main(attr: TokenStream, item: TokenStream) -> TokenStream {
expand_entry(attr, item, EntryKind::Async)
}
#[derive(Clone, Copy)]
enum EntryKind {
Sync,
Async,
}
fn expand_entry(attr: TokenStream, item: TokenStream, kind: EntryKind) -> TokenStream {
if !proc_macro2::TokenStream::from(attr).is_empty() {
return Error::new(
Span::call_site(),
"ruin runtime entry attributes take no arguments",
)
.to_compile_error()
.into();
}
let function = parse_macro_input!(item as ItemFn);
match validate_entry(&function, kind) {
Ok(()) => generate_entry(function, kind).into(),
Err(error) => error.to_compile_error().into(),
}
}
fn validate_entry(function: &ItemFn, kind: EntryKind) -> syn::Result<()> {
let signature = &function.sig;
if signature.ident != "main" {
return Err(Error::new_spanned(
&signature.ident,
"ruin runtime entry attribute must be attached to a function named `main`",
));
}
if !signature.inputs.is_empty() {
return Err(Error::new_spanned(
&signature.inputs,
"ruin runtime entry functions cannot take parameters",
));
}
if !signature.generics.params.is_empty() || signature.generics.where_clause.is_some() {
return Err(Error::new_spanned(
&signature.generics,
"ruin runtime entry functions cannot be generic",
));
}
if signature.constness.is_some() {
return Err(Error::new_spanned(
signature.fn_token,
"ruin runtime entry functions cannot be const",
));
}
if signature.unsafety.is_some() {
return Err(Error::new_spanned(
signature.fn_token,
"ruin runtime entry functions cannot be unsafe",
));
}
if signature.abi.is_some() {
return Err(Error::new_spanned(
&signature.abi,
"ruin runtime entry functions cannot declare an ABI",
));
}
if signature.variadic.is_some() {
return Err(Error::new_spanned(
&signature.variadic,
"ruin runtime entry functions cannot be variadic",
));
}
match kind {
EntryKind::Sync if signature.asyncness.is_some() => Err(Error::new_spanned(
signature.asyncness,
"#[ruin_runtime::main] expects a non-async `fn main`",
)),
EntryKind::Async if signature.asyncness.is_none() => Err(Error::new_spanned(
signature.fn_token,
"#[ruin_runtime::async_main] expects an `async fn main`",
)),
_ => Ok(()),
}
}
fn generate_entry(mut function: ItemFn, kind: EntryKind) -> proc_macro2::TokenStream {
let original_name = function.sig.ident.clone();
let implementation_name = format_ident!("__ruin_runtime_internal_{}", original_name);
function.sig.ident = implementation_name.clone();
let entry_call = match kind {
EntryKind::Sync => quote! {
::ruin_runtime::queue_task(|| {
let _ = #implementation_name();
});
},
EntryKind::Async => quote! {
let _ = ::ruin_runtime::queue_future(#implementation_name());
},
};
quote! {
#function
fn #original_name() {
#entry_call
::ruin_runtime::run();
}
}
}