Restaged repo, allocator and runtime implemented, ioring-backed async fs/net/channel/timer primitives
This commit is contained in:
13
lib/runtime/Cargo.toml
Normal file
13
lib/runtime/Cargo.toml
Normal file
@@ -0,0 +1,13 @@
|
||||
[package]
|
||||
name = "ruin-runtime"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
hyper = { version = "1.8", default-features = false, features = ["client", "http1"] }
|
||||
libc = "0.2"
|
||||
ruin_runtime_proc_macros = { package = "ruin-runtime-proc-macros", path = "../runtime_proc_macros" }
|
||||
|
||||
[dev-dependencies]
|
||||
bytes = "1"
|
||||
http-body-util = "0.1"
|
||||
81
lib/runtime/examples/async_fs_showcase.rs
Normal file
81
lib/runtime/examples/async_fs_showcase.rs
Normal file
@@ -0,0 +1,81 @@
|
||||
use ruin_runtime::fs::{self, File};
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn preview(bytes: &[u8]) -> String {
|
||||
String::from_utf8_lossy(bytes).replace('\n', "\\n")
|
||||
}
|
||||
|
||||
#[ruin_runtime::async_main]
|
||||
async fn main() {
|
||||
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
let cargo_toml = manifest_dir.join("Cargo.toml");
|
||||
let src_dir = manifest_dir.join("src");
|
||||
|
||||
println!("manifest dir: {}", manifest_dir.display());
|
||||
|
||||
let cargo_meta = fs::metadata(&cargo_toml)
|
||||
.await
|
||||
.expect("Cargo.toml metadata should load");
|
||||
println!(
|
||||
"Cargo.toml: {} bytes, file={}, empty={}",
|
||||
cargo_meta.len(),
|
||||
cargo_meta.is_file(),
|
||||
cargo_meta.is_empty()
|
||||
);
|
||||
|
||||
let mut file = File::open(&cargo_toml)
|
||||
.await
|
||||
.expect("Cargo.toml should open for reading");
|
||||
let file_meta = file
|
||||
.metadata()
|
||||
.await
|
||||
.expect("opened file metadata should load");
|
||||
println!("opened file metadata size: {}", file_meta.len());
|
||||
|
||||
let mut sequential = vec![0; 96];
|
||||
let sequential_read = file
|
||||
.read(&mut sequential)
|
||||
.await
|
||||
.expect("sequential read should succeed");
|
||||
sequential.truncate(sequential_read);
|
||||
println!(
|
||||
"sequential read ({sequential_read} bytes): {}",
|
||||
preview(&sequential)
|
||||
);
|
||||
|
||||
let cloned = file.try_clone().await.expect("file clone should succeed");
|
||||
let mut positioned = [0u8; 48];
|
||||
let positioned_read = cloned
|
||||
.read_at(0, &mut positioned)
|
||||
.await
|
||||
.expect("positioned read should succeed");
|
||||
println!(
|
||||
"positioned read ({positioned_read} bytes): {}",
|
||||
preview(&positioned[..positioned_read])
|
||||
);
|
||||
|
||||
let cargo_text = fs::read_to_string(&cargo_toml)
|
||||
.await
|
||||
.expect("read_to_string should succeed");
|
||||
println!("Cargo.toml line count: {}", cargo_text.lines().count());
|
||||
|
||||
let mut dir = fs::read_dir(&src_dir)
|
||||
.await
|
||||
.expect("src directory should be readable");
|
||||
let mut entries = Vec::new();
|
||||
while let Some(entry) = dir
|
||||
.next_entry()
|
||||
.await
|
||||
.expect("read_dir stream should succeed")
|
||||
{
|
||||
let metadata = entry.metadata().await.expect("entry metadata should load");
|
||||
let kind = if metadata.is_dir() { "dir" } else { "file" };
|
||||
entries.push((entry.file_name().to_string_lossy().into_owned(), kind));
|
||||
}
|
||||
entries.sort_by(|left, right| left.0.cmp(&right.0));
|
||||
|
||||
println!("src entries:");
|
||||
for (name, kind) in entries.iter().take(8) {
|
||||
println!(" - {name} ({kind})");
|
||||
}
|
||||
}
|
||||
160
lib/runtime/examples/channel_showcase.rs
Normal file
160
lib/runtime/examples/channel_showcase.rs
Normal file
@@ -0,0 +1,160 @@
|
||||
use ruin_runtime::channel::{mpsc, oneshot};
|
||||
use ruin_runtime::{queue_future, spawn_worker, time::sleep};
|
||||
use std::fmt;
|
||||
use std::sync::OnceLock;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||
|
||||
static START: OnceLock<Instant> = OnceLock::new();
|
||||
static ACTUAL_ORDER: AtomicUsize = AtomicUsize::new(1);
|
||||
|
||||
macro_rules! log_event {
|
||||
($expected:literal, $($arg:tt)*) => {{
|
||||
log_event_impl($expected, format_args!($($arg)*));
|
||||
}};
|
||||
}
|
||||
|
||||
fn log_event_impl(expected: usize, message: fmt::Arguments<'_>) {
|
||||
let actual = ACTUAL_ORDER.fetch_add(1, Ordering::SeqCst);
|
||||
let elapsed = START
|
||||
.get()
|
||||
.expect("showcase start time should be initialized")
|
||||
.elapsed()
|
||||
.as_millis();
|
||||
println!(
|
||||
"[actual {actual:02} | expected {expected:02} | +{elapsed:04}ms | ts {}] {message}",
|
||||
unix_timestamp_millis(),
|
||||
);
|
||||
}
|
||||
|
||||
fn unix_timestamp_millis() -> String {
|
||||
let now = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.expect("system clock should be after the Unix epoch");
|
||||
format!("{}.{:03}", now.as_secs(), now.subsec_millis())
|
||||
}
|
||||
|
||||
enum WorkerEvent {
|
||||
Log(String),
|
||||
PresentRequest {
|
||||
frame: &'static str,
|
||||
ack: oneshot::Sender<&'static str>,
|
||||
},
|
||||
}
|
||||
|
||||
#[ruin_runtime::async_main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
START.get_or_init(Instant::now);
|
||||
|
||||
let (job_tx, mut job_rx) = mpsc::channel::<&'static str>(1);
|
||||
let (event_tx, mut event_rx) = mpsc::unbounded_channel::<WorkerEvent>();
|
||||
|
||||
let worker = spawn_worker(
|
||||
move || {
|
||||
queue_future(async move {
|
||||
while let Some(job) = job_rx.recv().await {
|
||||
event_tx
|
||||
.send(WorkerEvent::Log(format!(
|
||||
"[worker] accepted job `{job}` from main thread"
|
||||
)))
|
||||
.unwrap_or_else(|_| {
|
||||
panic!("worker should be able to report accepted jobs")
|
||||
});
|
||||
|
||||
sleep(Duration::from_millis(20)).await;
|
||||
if job == "upload-frame" {
|
||||
let (ack_tx, mut ack_rx) = oneshot::channel();
|
||||
event_tx
|
||||
.send(WorkerEvent::PresentRequest {
|
||||
frame: job,
|
||||
ack: ack_tx,
|
||||
})
|
||||
.unwrap_or_else(|_| {
|
||||
panic!("worker should be able to request presentation")
|
||||
});
|
||||
let ack = ack_rx
|
||||
.recv()
|
||||
.await
|
||||
.expect("main thread should acknowledge frame");
|
||||
event_tx
|
||||
.send(WorkerEvent::Log(format!(
|
||||
"[worker] got oneshot ack `{ack}` for `{job}`"
|
||||
)))
|
||||
.unwrap_or_else(|_| {
|
||||
panic!("worker should be able to report ack reception")
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
event_tx
|
||||
.send(WorkerEvent::Log(
|
||||
"[worker] bounded command channel closed; worker is done".into(),
|
||||
))
|
||||
.unwrap_or_else(|_| panic!("worker should be able to report shutdown"));
|
||||
});
|
||||
},
|
||||
|| log_event!(12, "[main] worker exited"),
|
||||
);
|
||||
|
||||
queue_future(async move {
|
||||
log_event!(1, "[main] bounded mpsc send: enqueue `prepare-scene`");
|
||||
job_tx
|
||||
.send("prepare-scene")
|
||||
.await
|
||||
.expect("prepare-scene should be sent");
|
||||
|
||||
log_event!(
|
||||
2,
|
||||
"[main] bounded mpsc send: enqueue `upload-frame` (fits once worker drains capacity)"
|
||||
);
|
||||
job_tx
|
||||
.send("upload-frame")
|
||||
.await
|
||||
.expect("upload-frame should be sent");
|
||||
|
||||
log_event!(
|
||||
3,
|
||||
"[main] bounded mpsc send: enqueue `flush-stats` (waits for capacity/backpressure)"
|
||||
);
|
||||
job_tx
|
||||
.send("flush-stats")
|
||||
.await
|
||||
.expect("flush-stats should be sent");
|
||||
|
||||
log_event!(
|
||||
5,
|
||||
"[main] drop bounded sender to close worker command stream"
|
||||
);
|
||||
drop(job_tx);
|
||||
});
|
||||
|
||||
let mut event_count = 0usize;
|
||||
while let Some(event) = event_rx.recv().await {
|
||||
event_count += 1;
|
||||
match event {
|
||||
WorkerEvent::Log(message) => {
|
||||
let expected = match event_count {
|
||||
1 => 4,
|
||||
2 => 6,
|
||||
4 => 9,
|
||||
5 => 10,
|
||||
6 => 11,
|
||||
_ => 10 + event_count,
|
||||
};
|
||||
log_event_impl(expected, format_args!("{message}"));
|
||||
}
|
||||
WorkerEvent::PresentRequest { frame, ack } => {
|
||||
log_event!(
|
||||
7,
|
||||
"[main] unbounded mpsc recv: worker requests presentation for `{frame}`"
|
||||
);
|
||||
ack.send("presented")
|
||||
.expect("main thread should be able to answer oneshot");
|
||||
log_event!(8, "[main] oneshot send: acknowledged frame presentation");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let _ = worker;
|
||||
Ok(())
|
||||
}
|
||||
75
lib/runtime/examples/hyper_http_client.rs
Normal file
75
lib/runtime/examples/hyper_http_client.rs
Normal file
@@ -0,0 +1,75 @@
|
||||
use std::io::{Read as _, Write as _};
|
||||
use std::net::TcpListener as StdTcpListener;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
use bytes::Bytes;
|
||||
use http_body_util::{BodyExt, Empty};
|
||||
use hyper::Request;
|
||||
use ruin_runtime::time::sleep;
|
||||
use ruin_runtime::{clear_interval, queue_future, set_interval};
|
||||
|
||||
fn spawn_demo_server() -> std::io::Result<(std::net::SocketAddr, thread::JoinHandle<()>)> {
|
||||
let listener = StdTcpListener::bind(("127.0.0.1", 0))?;
|
||||
let address = listener.local_addr()?;
|
||||
let handle = thread::Builder::new()
|
||||
.name("hyper-demo-server".into())
|
||||
.spawn(move || {
|
||||
let (mut stream, peer) = listener.accept().expect("demo server should accept");
|
||||
let mut request = [0; 1024];
|
||||
let read = stream.read(&mut request).expect("demo server should read");
|
||||
println!("[server] accepted {peer}, saw {} request bytes", read);
|
||||
|
||||
let response = concat!(
|
||||
"HTTP/1.1 200 OK\r\n",
|
||||
"content-type: text/plain; charset=utf-8\r\n",
|
||||
"content-length: 24\r\n",
|
||||
"connection: close\r\n",
|
||||
"\r\n",
|
||||
"hello from ruin runtime!"
|
||||
);
|
||||
stream
|
||||
.write_all(response.as_bytes())
|
||||
.expect("demo server should reply");
|
||||
})
|
||||
.map_err(std::io::Error::other)?;
|
||||
Ok((address, handle))
|
||||
}
|
||||
|
||||
#[ruin_runtime::async_main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let (address, server) = spawn_demo_server()?;
|
||||
|
||||
let stream = ruin_runtime::net::TcpStream::connect(address).await?;
|
||||
let (mut sender, connection) = hyper::client::conn::http1::handshake(stream).await?;
|
||||
queue_future(async move {
|
||||
if let Err(error) = connection.await {
|
||||
eprintln!("[runtime] hyper connection ended with error: {error}");
|
||||
}
|
||||
});
|
||||
|
||||
println!("Sleeping a moment to let the server start...");
|
||||
let interval = set_interval(Duration::from_millis(400), || println!("..."));
|
||||
sleep(Duration::from_secs(2)).await;
|
||||
clear_interval(&interval);
|
||||
println!("Let's go!");
|
||||
|
||||
let request = Request::builder()
|
||||
.method("GET")
|
||||
.uri(format!("http://{address}/demo"))
|
||||
.header("host", address.to_string())
|
||||
.body(Empty::<Bytes>::new())?;
|
||||
let response = sender.send_request(request).await?;
|
||||
let status = response.status();
|
||||
let body = response.into_body().collect().await?.to_bytes();
|
||||
|
||||
println!(
|
||||
"[client] status={status}, body={}",
|
||||
String::from_utf8_lossy(&body)
|
||||
);
|
||||
|
||||
server
|
||||
.join()
|
||||
.expect("demo server thread should exit cleanly");
|
||||
Ok(())
|
||||
}
|
||||
228
lib/runtime/examples/runtime_loop_showcase.rs
Normal file
228
lib/runtime/examples/runtime_loop_showcase.rs
Normal file
@@ -0,0 +1,228 @@
|
||||
use ruin_runtime::{
|
||||
IntervalHandle, ThreadHandle, clear_interval, current_thread_handle, queue_future,
|
||||
queue_microtask, queue_task, set_interval, set_timeout, spawn_worker, yield_now,
|
||||
};
|
||||
use std::cell::{Cell, RefCell};
|
||||
use std::fmt;
|
||||
use std::rc::Rc;
|
||||
use std::sync::OnceLock;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||
|
||||
static START: OnceLock<Instant> = OnceLock::new();
|
||||
static ACTUAL_ORDER: AtomicUsize = AtomicUsize::new(1);
|
||||
|
||||
macro_rules! log_event {
|
||||
($expected:literal, $($arg:tt)*) => {{
|
||||
log_event_impl($expected, format_args!($($arg)*));
|
||||
}};
|
||||
}
|
||||
|
||||
fn log_event_impl(expected: usize, message: fmt::Arguments<'_>) {
|
||||
let actual = ACTUAL_ORDER.fetch_add(1, Ordering::SeqCst);
|
||||
let elapsed = START
|
||||
.get()
|
||||
.expect("showcase start time should be initialized")
|
||||
.elapsed()
|
||||
.as_millis();
|
||||
println!(
|
||||
"[actual {actual:02} | expected {expected:02} | +{elapsed:04}ms | ts {}] {message}",
|
||||
unix_timestamp_millis(),
|
||||
);
|
||||
}
|
||||
|
||||
fn unix_timestamp_millis() -> String {
|
||||
let now = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.expect("system clock should be after the Unix epoch");
|
||||
format!("{}.{:03}", now.as_secs(), now.subsec_millis())
|
||||
}
|
||||
|
||||
fn queue_log(handle: &ThreadHandle, expected: usize, message: impl Into<String>) {
|
||||
let message = message.into();
|
||||
let queued = handle.queue_task(move || {
|
||||
log_event_impl(expected, format_args!("{message}"));
|
||||
});
|
||||
assert!(queued, "main thread should accept log task {expected}");
|
||||
}
|
||||
|
||||
fn queue_log_microtask(handle: &ThreadHandle, expected: usize, message: impl Into<String>) {
|
||||
let message = message.into();
|
||||
let queued = handle.queue_microtask(move || {
|
||||
log_event_impl(expected, format_args!("{message}"));
|
||||
});
|
||||
assert!(queued, "main thread should accept log microtask {expected}");
|
||||
}
|
||||
|
||||
#[ruin_runtime::main]
|
||||
fn main() {
|
||||
START.get_or_init(Instant::now);
|
||||
|
||||
queue_microtask(|| log_event!(1, "[main] boot microtask: prime UI state"));
|
||||
|
||||
queue_future(async {
|
||||
log_event!(2, "[main] future: fetch scene metadata");
|
||||
yield_now().await;
|
||||
log_event!(4, "[main] future: scene metadata cached");
|
||||
});
|
||||
|
||||
queue_microtask(|| {
|
||||
log_event!(3, "[main] microtask queued immediately");
|
||||
});
|
||||
|
||||
let main_handle = current_thread_handle();
|
||||
queue_task(move || {
|
||||
log_event!(
|
||||
5,
|
||||
"[main] boot task: paint first frame and start background worker"
|
||||
);
|
||||
|
||||
let dashboard_interval = Rc::new(RefCell::new(None::<IntervalHandle>));
|
||||
let dashboard_ticks = Rc::new(Cell::new(0usize));
|
||||
{
|
||||
let slot = Rc::clone(&dashboard_interval);
|
||||
let ticks = Rc::clone(&dashboard_ticks);
|
||||
set_dashboard_interval(slot, ticks);
|
||||
}
|
||||
|
||||
set_timeout(Duration::from_millis(30), || {
|
||||
log_event!(11, "[main] timeout: network snapshot ready");
|
||||
});
|
||||
|
||||
let main_for_worker = main_handle.clone();
|
||||
let worker = spawn_worker(
|
||||
move || {
|
||||
queue_log(
|
||||
&main_for_worker,
|
||||
6,
|
||||
"[worker->main] startup task: prepare upload queue",
|
||||
);
|
||||
|
||||
{
|
||||
let main_for_microtask = main_for_worker.clone();
|
||||
queue_microtask(move || {
|
||||
queue_log(
|
||||
&main_for_microtask,
|
||||
7,
|
||||
"[worker->main] microtask: inspect staging buffers",
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
{
|
||||
let main_for_future = main_for_worker.clone();
|
||||
queue_future(async move {
|
||||
queue_log(
|
||||
&main_for_future,
|
||||
8,
|
||||
"[worker->main] future: compile shader variants",
|
||||
);
|
||||
yield_now().await;
|
||||
queue_log(
|
||||
&main_for_future,
|
||||
9,
|
||||
"[worker->main] future: shader cache is warm",
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
{
|
||||
let main_for_task = main_for_worker.clone();
|
||||
queue_task(move || {
|
||||
queue_log(
|
||||
&main_for_task,
|
||||
10,
|
||||
"[worker->main] task: upload static geometry",
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
let sample_interval = Rc::new(RefCell::new(None::<IntervalHandle>));
|
||||
let sample_count = Rc::new(Cell::new(0usize));
|
||||
{
|
||||
let slot = Rc::clone(&sample_interval);
|
||||
let count = Rc::clone(&sample_count);
|
||||
let main_for_samples = main_for_worker.clone();
|
||||
let handle = set_interval(Duration::from_millis(40), move || {
|
||||
let next = count.get() + 1;
|
||||
count.set(next);
|
||||
queue_log(
|
||||
&main_for_samples,
|
||||
if next == 1 { 12 } else { 17 },
|
||||
format!("[worker->main] interval: sample batch {next} ready"),
|
||||
);
|
||||
if next == 2 {
|
||||
let interval = slot.borrow_mut().take().expect("interval should exist");
|
||||
clear_interval(&interval);
|
||||
queue_log(&main_for_samples, 18, "[worker->main] interval stopped");
|
||||
}
|
||||
});
|
||||
*sample_interval.borrow_mut() = Some(handle);
|
||||
}
|
||||
|
||||
{
|
||||
let main_for_flush = main_for_worker.clone();
|
||||
set_timeout(Duration::from_millis(110), move || {
|
||||
queue_log_microtask(
|
||||
&main_for_flush,
|
||||
20,
|
||||
"[worker->main] timeout: flushed final upload batch",
|
||||
);
|
||||
});
|
||||
}
|
||||
},
|
||||
|| log_event!(21, "[main] worker exited"),
|
||||
);
|
||||
|
||||
set_timeout(Duration::from_millis(70), move || {
|
||||
let queued = worker.queue_task({
|
||||
let main_from_remote_task = main_handle.clone();
|
||||
move || {
|
||||
queue_log(
|
||||
&main_from_remote_task,
|
||||
15,
|
||||
"[worker->main] remote task: upload late texture atlas",
|
||||
);
|
||||
|
||||
let main_from_remote_microtask = main_from_remote_task.clone();
|
||||
queue_microtask(move || {
|
||||
queue_log(
|
||||
&main_from_remote_microtask,
|
||||
16,
|
||||
"[worker->main] remote microtask: retire staging pages",
|
||||
);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
log_event!(
|
||||
14,
|
||||
"[main] timeout: queue late texture upload on worker (queued={queued})"
|
||||
);
|
||||
});
|
||||
|
||||
set_timeout(Duration::from_millis(140), || {
|
||||
log_event!(22, "[main] final timeout: commit frame statistics");
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
fn set_dashboard_interval(slot: Rc<RefCell<Option<IntervalHandle>>>, ticks: Rc<Cell<usize>>) {
|
||||
let slot_for_callback = Rc::clone(&slot);
|
||||
let handle = set_interval(Duration::from_millis(50), move || {
|
||||
let next = ticks.get() + 1;
|
||||
ticks.set(next);
|
||||
if next == 1 {
|
||||
log_event!(13, "[main] interval: dashboard tick 1");
|
||||
return;
|
||||
}
|
||||
|
||||
let interval = slot_for_callback
|
||||
.borrow_mut()
|
||||
.take()
|
||||
.expect("interval should exist");
|
||||
clear_interval(&interval);
|
||||
log_event!(19, "[main] interval: dashboard tick 2 and stop");
|
||||
});
|
||||
*slot.borrow_mut() = Some(handle);
|
||||
}
|
||||
4
lib/runtime/src/channel/mod.rs
Normal file
4
lib/runtime/src/channel/mod.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
//! Async channels for inter-thread communication.
|
||||
|
||||
pub mod mpsc;
|
||||
pub mod oneshot;
|
||||
575
lib/runtime/src/channel/mpsc.rs
Normal file
575
lib/runtime/src/channel/mpsc.rs
Normal file
@@ -0,0 +1,575 @@
|
||||
use std::collections::VecDeque;
|
||||
use std::future::poll_fn;
|
||||
use std::pin::Pin;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use crate::op::completion::{CompletionFuture, CompletionHandle};
|
||||
use crate::sys::linux::channel::runtime_waiter;
|
||||
|
||||
pub fn channel<T: Send + 'static>(capacity: usize) -> (Sender<T>, Receiver<T>) {
|
||||
assert!(capacity > 0, "bounded channels require capacity > 0");
|
||||
let shared = Arc::new(Mutex::new(State::new(Some(capacity))));
|
||||
(
|
||||
Sender {
|
||||
shared: Arc::clone(&shared),
|
||||
},
|
||||
Receiver { shared },
|
||||
)
|
||||
}
|
||||
|
||||
pub fn unbounded_channel<T: Send + 'static>() -> (UnboundedSender<T>, Receiver<T>) {
|
||||
let shared = Arc::new(Mutex::new(State::new(None)));
|
||||
(
|
||||
UnboundedSender {
|
||||
shared: Arc::clone(&shared),
|
||||
},
|
||||
Receiver { shared },
|
||||
)
|
||||
}
|
||||
|
||||
pub struct Sender<T: Send + 'static> {
|
||||
shared: Arc<Mutex<State<T>>>,
|
||||
}
|
||||
|
||||
pub struct UnboundedSender<T: Send + 'static> {
|
||||
shared: Arc<Mutex<State<T>>>,
|
||||
}
|
||||
|
||||
pub struct Receiver<T: Send + 'static> {
|
||||
shared: Arc<Mutex<State<T>>>,
|
||||
}
|
||||
|
||||
struct State<T: Send + 'static> {
|
||||
queue: VecDeque<T>,
|
||||
capacity: Option<usize>,
|
||||
sender_count: usize,
|
||||
receiver_closed: bool,
|
||||
recv_waiter: Option<CompletionHandle<Option<T>>>,
|
||||
send_waiters: VecDeque<SendWaiter<T>>,
|
||||
next_waiter_id: usize,
|
||||
}
|
||||
|
||||
struct SendWaiter<T: Send + 'static> {
|
||||
id: usize,
|
||||
value: T,
|
||||
handle: CompletionHandle<Result<(), SendError<T>>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub struct SendError<T>(pub T);
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub enum TrySendError<T> {
|
||||
Full(T),
|
||||
Closed(T),
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum TryRecvError {
|
||||
Empty,
|
||||
Disconnected,
|
||||
}
|
||||
|
||||
impl<T: Send + 'static> State<T> {
|
||||
fn new(capacity: Option<usize>) -> Self {
|
||||
Self {
|
||||
queue: VecDeque::new(),
|
||||
capacity,
|
||||
sender_count: 1,
|
||||
receiver_closed: false,
|
||||
recv_waiter: None,
|
||||
send_waiters: VecDeque::new(),
|
||||
next_waiter_id: 1,
|
||||
}
|
||||
}
|
||||
|
||||
fn try_send_now(&mut self, value: T) -> Result<(), TrySendError<T>> {
|
||||
if self.receiver_closed {
|
||||
return Err(TrySendError::Closed(value));
|
||||
}
|
||||
|
||||
if let Some(waiter) = self.recv_waiter.take() {
|
||||
waiter.complete(Some(value));
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if self
|
||||
.capacity
|
||||
.is_some_and(|capacity| self.queue.len() >= capacity)
|
||||
{
|
||||
return Err(TrySendError::Full(value));
|
||||
}
|
||||
|
||||
self.queue.push_back(value);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn enqueue_send_waiter(
|
||||
&mut self,
|
||||
value: T,
|
||||
handle: CompletionHandle<Result<(), SendError<T>>>,
|
||||
) -> usize {
|
||||
let id = self.next_waiter_id;
|
||||
self.next_waiter_id = self.next_waiter_id.wrapping_add(1);
|
||||
self.send_waiters
|
||||
.push_back(SendWaiter { id, value, handle });
|
||||
id
|
||||
}
|
||||
|
||||
fn remove_send_waiter(&mut self, waiter_id: usize) -> bool {
|
||||
let Some(index) = self
|
||||
.send_waiters
|
||||
.iter()
|
||||
.position(|waiter| waiter.id == waiter_id)
|
||||
else {
|
||||
return false;
|
||||
};
|
||||
self.send_waiters.remove(index);
|
||||
true
|
||||
}
|
||||
|
||||
fn pump_senders(&mut self) {
|
||||
loop {
|
||||
if self.receiver_closed {
|
||||
self.fail_pending_senders();
|
||||
break;
|
||||
}
|
||||
|
||||
let has_capacity = self
|
||||
.capacity
|
||||
.is_none_or(|capacity| self.queue.len() < capacity);
|
||||
if !has_capacity {
|
||||
break;
|
||||
}
|
||||
|
||||
let Some(waiter) = self.send_waiters.pop_front() else {
|
||||
break;
|
||||
};
|
||||
|
||||
if let Some(receiver) = self.recv_waiter.take() {
|
||||
receiver.complete(Some(waiter.value));
|
||||
} else {
|
||||
self.queue.push_back(waiter.value);
|
||||
}
|
||||
waiter.handle.complete(Ok(()));
|
||||
}
|
||||
|
||||
if self.queue.is_empty()
|
||||
&& self.sender_count == 0
|
||||
&& let Some(waiter) = self.recv_waiter.take()
|
||||
{
|
||||
waiter.complete(None);
|
||||
}
|
||||
}
|
||||
|
||||
fn fail_pending_senders(&mut self) {
|
||||
while let Some(waiter) = self.send_waiters.pop_front() {
|
||||
waiter.handle.complete(Err(SendError(waiter.value)));
|
||||
}
|
||||
}
|
||||
|
||||
fn close_receiver(&mut self) {
|
||||
self.receiver_closed = true;
|
||||
self.fail_pending_senders();
|
||||
if self.queue.is_empty()
|
||||
&& let Some(waiter) = self.recv_waiter.take()
|
||||
{
|
||||
waiter.complete(None);
|
||||
}
|
||||
}
|
||||
|
||||
fn drop_sender(&mut self) {
|
||||
self.sender_count = self.sender_count.saturating_sub(1);
|
||||
if self.sender_count == 0
|
||||
&& self.queue.is_empty()
|
||||
&& let Some(waiter) = self.recv_waiter.take()
|
||||
{
|
||||
waiter.complete(None);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Send + 'static> Clone for Sender<T> {
|
||||
fn clone(&self) -> Self {
|
||||
self.shared
|
||||
.lock()
|
||||
.expect("mpsc state should not be poisoned")
|
||||
.sender_count += 1;
|
||||
Self {
|
||||
shared: Arc::clone(&self.shared),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Send + 'static> Clone for UnboundedSender<T> {
|
||||
fn clone(&self) -> Self {
|
||||
self.shared
|
||||
.lock()
|
||||
.expect("mpsc state should not be poisoned")
|
||||
.sender_count += 1;
|
||||
Self {
|
||||
shared: Arc::clone(&self.shared),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Send + 'static> Sender<T> {
|
||||
pub async fn send(&self, value: T) -> Result<(), SendError<T>> {
|
||||
let mut value = Some(value);
|
||||
let mut wait = None;
|
||||
poll_fn(|cx| self.poll_send(cx, &mut value, &mut wait)).await
|
||||
}
|
||||
|
||||
pub fn try_send(&self, value: T) -> Result<(), TrySendError<T>> {
|
||||
self.shared
|
||||
.lock()
|
||||
.expect("mpsc state should not be poisoned")
|
||||
.try_send_now(value)
|
||||
}
|
||||
|
||||
pub fn is_closed(&self) -> bool {
|
||||
self.shared
|
||||
.lock()
|
||||
.expect("mpsc state should not be poisoned")
|
||||
.receiver_closed
|
||||
}
|
||||
|
||||
fn poll_send(
|
||||
&self,
|
||||
cx: &mut Context<'_>,
|
||||
value_slot: &mut Option<T>,
|
||||
wait: &mut Option<CompletionFuture<Result<(), SendError<T>>>>,
|
||||
) -> Poll<Result<(), SendError<T>>> {
|
||||
if let Some(future) = wait.as_mut() {
|
||||
match Pin::new(future).poll(cx) {
|
||||
Poll::Ready(result) => {
|
||||
wait.take();
|
||||
Poll::Ready(result)
|
||||
}
|
||||
Poll::Pending => Poll::Pending,
|
||||
}
|
||||
} else {
|
||||
let mut state = self
|
||||
.shared
|
||||
.lock()
|
||||
.expect("mpsc state should not be poisoned");
|
||||
match state.try_send_now(value_slot.take().expect("send value should be present")) {
|
||||
Ok(()) => Poll::Ready(Ok(())),
|
||||
Err(TrySendError::Closed(value)) => Poll::Ready(Err(SendError(value))),
|
||||
Err(TrySendError::Full(returned)) => {
|
||||
drop(state);
|
||||
let (future, handle) = runtime_waiter::<Result<(), SendError<T>>>();
|
||||
let state_shared = Arc::clone(&self.shared);
|
||||
let registration = {
|
||||
let mut state = state_shared
|
||||
.lock()
|
||||
.expect("mpsc state should not be poisoned");
|
||||
match state.try_send_now(returned) {
|
||||
Ok(()) => Ok(None),
|
||||
Err(TrySendError::Closed(value)) => Err(SendError(value)),
|
||||
Err(TrySendError::Full(value)) => {
|
||||
Ok(Some(state.enqueue_send_waiter(value, handle.clone())))
|
||||
}
|
||||
}
|
||||
};
|
||||
match registration {
|
||||
Ok(None) => {
|
||||
handle.complete(Ok(()));
|
||||
*wait = Some(future);
|
||||
self.poll_send(cx, value_slot, wait)
|
||||
}
|
||||
Err(error) => {
|
||||
handle.complete(Err(error));
|
||||
*wait = Some(future);
|
||||
self.poll_send(cx, value_slot, wait)
|
||||
}
|
||||
Ok(Some(waiter_id)) => {
|
||||
let cancel_shared = Arc::clone(&self.shared);
|
||||
let cancel_handle = handle.clone();
|
||||
handle.set_cancel(move || {
|
||||
let mut state = cancel_shared
|
||||
.lock()
|
||||
.expect("mpsc state should not be poisoned");
|
||||
let _ = state.remove_send_waiter(waiter_id);
|
||||
drop(state);
|
||||
cancel_handle.finish(None);
|
||||
});
|
||||
*wait = Some(future);
|
||||
self.poll_send(cx, value_slot, wait)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Send + 'static> UnboundedSender<T> {
|
||||
pub fn send(&self, value: T) -> Result<(), SendError<T>> {
|
||||
self.shared
|
||||
.lock()
|
||||
.expect("mpsc state should not be poisoned")
|
||||
.try_send_now(value)
|
||||
.map_err(|error| match error {
|
||||
TrySendError::Full(value) | TrySendError::Closed(value) => SendError(value),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn is_closed(&self) -> bool {
|
||||
self.shared
|
||||
.lock()
|
||||
.expect("mpsc state should not be poisoned")
|
||||
.receiver_closed
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Send + 'static> Receiver<T> {
|
||||
pub async fn recv(&mut self) -> Option<T> {
|
||||
let mut wait = None;
|
||||
poll_fn(|cx| self.poll_recv(cx, &mut wait)).await
|
||||
}
|
||||
|
||||
pub fn try_recv(&mut self) -> Result<T, TryRecvError> {
|
||||
let mut state = self
|
||||
.shared
|
||||
.lock()
|
||||
.expect("mpsc state should not be poisoned");
|
||||
if let Some(value) = state.queue.pop_front() {
|
||||
state.pump_senders();
|
||||
Ok(value)
|
||||
} else if state.sender_count == 0 || state.receiver_closed {
|
||||
Err(TryRecvError::Disconnected)
|
||||
} else {
|
||||
Err(TryRecvError::Empty)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn close(&mut self) {
|
||||
self.shared
|
||||
.lock()
|
||||
.expect("mpsc state should not be poisoned")
|
||||
.close_receiver();
|
||||
}
|
||||
|
||||
pub fn is_closed(&self) -> bool {
|
||||
let state = self
|
||||
.shared
|
||||
.lock()
|
||||
.expect("mpsc state should not be poisoned");
|
||||
state.receiver_closed || state.sender_count == 0
|
||||
}
|
||||
|
||||
fn poll_recv(
|
||||
&mut self,
|
||||
cx: &mut Context<'_>,
|
||||
wait: &mut Option<CompletionFuture<Option<T>>>,
|
||||
) -> Poll<Option<T>> {
|
||||
if let Some(future) = wait.as_mut() {
|
||||
match Pin::new(future).poll(cx) {
|
||||
Poll::Ready(result) => {
|
||||
wait.take();
|
||||
Poll::Ready(result)
|
||||
}
|
||||
Poll::Pending => Poll::Pending,
|
||||
}
|
||||
} else {
|
||||
let (future, handle) = runtime_waiter::<Option<T>>();
|
||||
let cancel_shared = Arc::clone(&self.shared);
|
||||
let cancel_handle = handle.clone();
|
||||
handle.set_cancel(move || {
|
||||
let mut state = cancel_shared
|
||||
.lock()
|
||||
.expect("mpsc state should not be poisoned");
|
||||
let _ = state.recv_waiter.take();
|
||||
drop(state);
|
||||
cancel_handle.finish(None);
|
||||
});
|
||||
|
||||
{
|
||||
let mut state = self
|
||||
.shared
|
||||
.lock()
|
||||
.expect("mpsc state should not be poisoned");
|
||||
if let Some(value) = state.queue.pop_front() {
|
||||
state.pump_senders();
|
||||
handle.complete(Some(value));
|
||||
} else if state.receiver_closed || state.sender_count == 0 {
|
||||
handle.complete(None);
|
||||
} else {
|
||||
assert!(
|
||||
state.recv_waiter.is_none(),
|
||||
"only one mpsc receive operation may wait at a time"
|
||||
);
|
||||
state.recv_waiter = Some(handle.clone());
|
||||
}
|
||||
}
|
||||
|
||||
*wait = Some(future);
|
||||
self.poll_recv(cx, wait)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Send + 'static> Drop for Sender<T> {
|
||||
fn drop(&mut self) {
|
||||
self.shared
|
||||
.lock()
|
||||
.expect("mpsc state should not be poisoned")
|
||||
.drop_sender();
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Send + 'static> Drop for UnboundedSender<T> {
|
||||
fn drop(&mut self) {
|
||||
self.shared
|
||||
.lock()
|
||||
.expect("mpsc state should not be poisoned")
|
||||
.drop_sender();
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Send + 'static> Drop for Receiver<T> {
|
||||
fn drop(&mut self) {
|
||||
self.shared
|
||||
.lock()
|
||||
.expect("mpsc state should not be poisoned")
|
||||
.close_receiver();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::time::sleep;
|
||||
use crate::{queue_future, queue_task, run, spawn_worker};
|
||||
|
||||
use super::{TryRecvError, TrySendError, channel, unbounded_channel};
|
||||
|
||||
#[test]
|
||||
fn bounded_channel_applies_backpressure() {
|
||||
let log = Arc::new(Mutex::new(Vec::<String>::new()));
|
||||
let log_for_task = Arc::clone(&log);
|
||||
|
||||
queue_task(move || {
|
||||
let (sender, mut receiver) = channel(1);
|
||||
let log_for_sender = Arc::clone(&log_for_task);
|
||||
let log_for_receiver = Arc::clone(&log_for_task);
|
||||
|
||||
queue_future(async move {
|
||||
sender
|
||||
.send("first")
|
||||
.await
|
||||
.expect("first send should succeed");
|
||||
log_for_sender
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push("sent first".to_string());
|
||||
sender
|
||||
.send("second")
|
||||
.await
|
||||
.expect("second send should succeed");
|
||||
log_for_sender
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push("sent second".to_string());
|
||||
});
|
||||
|
||||
queue_future(async move {
|
||||
sleep(Duration::from_millis(5)).await;
|
||||
let first = receiver.recv().await.expect("first recv should succeed");
|
||||
log_for_receiver
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push(format!("received {first}"));
|
||||
let second = receiver.recv().await.expect("second recv should succeed");
|
||||
log_for_receiver
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push(format!("received {second}"));
|
||||
});
|
||||
});
|
||||
run();
|
||||
|
||||
let log = log.lock().unwrap();
|
||||
let sent_first = log.iter().position(|entry| entry == "sent first").unwrap();
|
||||
let received_first = log
|
||||
.iter()
|
||||
.position(|entry| entry == "received first")
|
||||
.unwrap();
|
||||
let sent_second = log.iter().position(|entry| entry == "sent second").unwrap();
|
||||
let received_second = log
|
||||
.iter()
|
||||
.position(|entry| entry == "received second")
|
||||
.unwrap();
|
||||
|
||||
assert!(
|
||||
sent_first < received_first,
|
||||
"first send should happen before first recv"
|
||||
);
|
||||
assert!(
|
||||
received_first < sent_second,
|
||||
"second send should not complete before capacity is freed"
|
||||
);
|
||||
assert!(
|
||||
received_first < received_second,
|
||||
"receiver should observe messages in FIFO order"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unbounded_channel_moves_messages_across_worker_threads() {
|
||||
let log = Arc::new(Mutex::new(Vec::new()));
|
||||
let log_for_task = Arc::clone(&log);
|
||||
|
||||
queue_task(move || {
|
||||
let (sender, mut receiver) = unbounded_channel::<String>();
|
||||
let worker_sender = sender.clone();
|
||||
let log_for_receiver = Arc::clone(&log_for_task);
|
||||
|
||||
let _worker = spawn_worker(
|
||||
move || {
|
||||
queue_task(move || {
|
||||
worker_sender
|
||||
.send("worker boot".into())
|
||||
.expect("worker boot send should succeed");
|
||||
worker_sender
|
||||
.send("worker done".into())
|
||||
.expect("worker done send should succeed");
|
||||
});
|
||||
},
|
||||
|| {},
|
||||
);
|
||||
drop(sender);
|
||||
|
||||
queue_future(async move {
|
||||
while let Some(message) = receiver.recv().await {
|
||||
log_for_receiver.lock().unwrap().push(message);
|
||||
}
|
||||
});
|
||||
});
|
||||
run();
|
||||
|
||||
assert_eq!(
|
||||
log.lock().unwrap().as_slice(),
|
||||
["worker boot", "worker done"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn try_send_try_recv_and_close_semantics_work() {
|
||||
let (sender, mut receiver) = channel(1);
|
||||
sender
|
||||
.try_send(1usize)
|
||||
.expect("initial send should succeed");
|
||||
assert_eq!(sender.try_send(2usize), Err(TrySendError::Full(2)));
|
||||
assert_eq!(receiver.try_recv(), Ok(1));
|
||||
assert_eq!(receiver.try_recv(), Err(TryRecvError::Empty));
|
||||
receiver.close();
|
||||
assert!(sender.is_closed(), "sender should observe closed receiver");
|
||||
assert_eq!(sender.try_send(3usize), Err(TrySendError::Closed(3)));
|
||||
assert_eq!(receiver.try_recv(), Err(TryRecvError::Disconnected));
|
||||
}
|
||||
}
|
||||
281
lib/runtime/src/channel/oneshot.rs
Normal file
281
lib/runtime/src/channel/oneshot.rs
Normal file
@@ -0,0 +1,281 @@
|
||||
use std::future::poll_fn;
|
||||
use std::pin::Pin;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use crate::op::completion::{CompletionFuture, CompletionHandle};
|
||||
use crate::sys::linux::channel::runtime_waiter;
|
||||
|
||||
pub fn channel<T: Send + 'static>() -> (Sender<T>, Receiver<T>) {
|
||||
let shared = Arc::new(Mutex::new(State {
|
||||
value: None,
|
||||
sender_alive: true,
|
||||
receiver_closed: false,
|
||||
waiter: None,
|
||||
}));
|
||||
(
|
||||
Sender {
|
||||
shared: Some(Arc::clone(&shared)),
|
||||
},
|
||||
Receiver {
|
||||
shared,
|
||||
consumed: false,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub struct Sender<T: Send + 'static> {
|
||||
shared: Option<Arc<Mutex<State<T>>>>,
|
||||
}
|
||||
|
||||
pub struct Receiver<T: Send + 'static> {
|
||||
shared: Arc<Mutex<State<T>>>,
|
||||
consumed: bool,
|
||||
}
|
||||
|
||||
struct State<T: Send + 'static> {
|
||||
value: Option<T>,
|
||||
sender_alive: bool,
|
||||
receiver_closed: bool,
|
||||
waiter: Option<CompletionHandle<Result<T, RecvError>>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub struct SendError<T>(pub T);
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub struct RecvError;
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub enum TryRecvError {
|
||||
Empty,
|
||||
Closed,
|
||||
}
|
||||
|
||||
impl<T: Send + 'static> Sender<T> {
|
||||
pub fn send(mut self, value: T) -> Result<(), SendError<T>> {
|
||||
let Some(shared) = self.shared.take() else {
|
||||
return Err(SendError(value));
|
||||
};
|
||||
|
||||
let waiter = {
|
||||
let mut state = shared.lock().expect("oneshot state should not be poisoned");
|
||||
state.sender_alive = false;
|
||||
if state.receiver_closed {
|
||||
return Err(SendError(value));
|
||||
}
|
||||
|
||||
state.waiter.take()
|
||||
};
|
||||
|
||||
if let Some(waiter) = waiter {
|
||||
waiter.complete(Ok(value));
|
||||
} else {
|
||||
shared
|
||||
.lock()
|
||||
.expect("oneshot state should not be poisoned")
|
||||
.value = Some(value);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn is_closed(&self) -> bool {
|
||||
self.shared.as_ref().is_none_or(|shared| {
|
||||
shared
|
||||
.lock()
|
||||
.expect("oneshot state should not be poisoned")
|
||||
.receiver_closed
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Send + 'static> Receiver<T> {
|
||||
pub async fn recv(&mut self) -> Result<T, RecvError> {
|
||||
let mut wait = None;
|
||||
poll_fn(|cx| self.poll_recv(cx, &mut wait)).await
|
||||
}
|
||||
|
||||
pub fn try_recv(&mut self) -> Result<T, TryRecvError> {
|
||||
if self.consumed {
|
||||
return Err(TryRecvError::Closed);
|
||||
}
|
||||
|
||||
let mut state = self
|
||||
.shared
|
||||
.lock()
|
||||
.expect("oneshot state should not be poisoned");
|
||||
if let Some(value) = state.value.take() {
|
||||
self.consumed = true;
|
||||
return Ok(value);
|
||||
}
|
||||
|
||||
if state.receiver_closed || !state.sender_alive {
|
||||
self.consumed = true;
|
||||
Err(TryRecvError::Closed)
|
||||
} else {
|
||||
Err(TryRecvError::Empty)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn close(&mut self) {
|
||||
let mut state = self
|
||||
.shared
|
||||
.lock()
|
||||
.expect("oneshot state should not be poisoned");
|
||||
state.receiver_closed = true;
|
||||
}
|
||||
|
||||
pub fn is_closed(&self) -> bool {
|
||||
let state = self
|
||||
.shared
|
||||
.lock()
|
||||
.expect("oneshot state should not be poisoned");
|
||||
state.receiver_closed || !state.sender_alive
|
||||
}
|
||||
|
||||
fn poll_recv(
|
||||
&mut self,
|
||||
cx: &mut Context<'_>,
|
||||
wait: &mut Option<CompletionFuture<Result<T, RecvError>>>,
|
||||
) -> Poll<Result<T, RecvError>> {
|
||||
if self.consumed {
|
||||
return Poll::Ready(Err(RecvError));
|
||||
}
|
||||
|
||||
if let Some(future) = wait.as_mut() {
|
||||
match Pin::new(future).poll(cx) {
|
||||
Poll::Ready(result) => {
|
||||
wait.take();
|
||||
self.consumed = true;
|
||||
Poll::Ready(result)
|
||||
}
|
||||
Poll::Pending => Poll::Pending,
|
||||
}
|
||||
} else {
|
||||
let (future, handle) = runtime_waiter::<Result<T, RecvError>>();
|
||||
let cancel_shared = Arc::clone(&self.shared);
|
||||
let cancel_handle = handle.clone();
|
||||
handle.set_cancel(move || {
|
||||
let mut state = cancel_shared
|
||||
.lock()
|
||||
.expect("oneshot state should not be poisoned");
|
||||
let _ = state.waiter.take();
|
||||
drop(state);
|
||||
cancel_handle.finish(None);
|
||||
});
|
||||
|
||||
let mut immediate = None;
|
||||
{
|
||||
let mut state = self
|
||||
.shared
|
||||
.lock()
|
||||
.expect("oneshot state should not be poisoned");
|
||||
if let Some(value) = state.value.take() {
|
||||
immediate = Some(Ok(value));
|
||||
} else if state.receiver_closed || !state.sender_alive {
|
||||
immediate = Some(Err(RecvError));
|
||||
} else {
|
||||
assert!(
|
||||
state.waiter.is_none(),
|
||||
"only one oneshot receive operation may wait at a time"
|
||||
);
|
||||
state.waiter = Some(handle.clone());
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(result) = immediate {
|
||||
handle.complete(result);
|
||||
}
|
||||
|
||||
*wait = Some(future);
|
||||
self.poll_recv(cx, wait)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Send + 'static> Drop for Sender<T> {
|
||||
fn drop(&mut self) {
|
||||
let Some(shared) = self.shared.take() else {
|
||||
return;
|
||||
};
|
||||
|
||||
let waiter = {
|
||||
let mut state = shared.lock().expect("oneshot state should not be poisoned");
|
||||
if !state.sender_alive {
|
||||
return;
|
||||
}
|
||||
|
||||
state.sender_alive = false;
|
||||
if state.value.is_none() {
|
||||
state.waiter.take()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(waiter) = waiter {
|
||||
waiter.complete(Err(RecvError));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Send + 'static> Drop for Receiver<T> {
|
||||
fn drop(&mut self) {
|
||||
let mut state = self
|
||||
.shared
|
||||
.lock()
|
||||
.expect("oneshot state should not be poisoned");
|
||||
state.receiver_closed = true;
|
||||
let _ = state.waiter.take();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use crate::{queue_future, queue_task, run, spawn_worker};
|
||||
|
||||
use super::{TryRecvError, channel};
|
||||
|
||||
#[test]
|
||||
fn oneshot_cross_thread_round_trip() {
|
||||
let result = Arc::new(Mutex::new(None::<usize>));
|
||||
let result_for_task = Arc::clone(&result);
|
||||
|
||||
queue_task(move || {
|
||||
let (sender, mut receiver) = channel();
|
||||
let result_for_task = Arc::clone(&result_for_task);
|
||||
|
||||
let _worker = spawn_worker(
|
||||
move || {
|
||||
queue_task(move || {
|
||||
sender.send(42usize).expect("oneshot send should succeed");
|
||||
});
|
||||
},
|
||||
|| {},
|
||||
);
|
||||
|
||||
queue_future(async move {
|
||||
let value = receiver.recv().await.expect("oneshot recv should succeed");
|
||||
*result_for_task.lock().unwrap() = Some(value);
|
||||
});
|
||||
});
|
||||
run();
|
||||
|
||||
assert_eq!(*result.lock().unwrap(), Some(42));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oneshot_try_recv_and_close() {
|
||||
let (sender, mut receiver) = channel::<usize>();
|
||||
assert_eq!(receiver.try_recv(), Err(TryRecvError::Empty));
|
||||
receiver.close();
|
||||
assert!(
|
||||
sender.send(7).is_err(),
|
||||
"closed receiver should reject send"
|
||||
);
|
||||
assert_eq!(receiver.try_recv(), Err(TryRecvError::Closed));
|
||||
}
|
||||
}
|
||||
552
lib/runtime/src/fs.rs
Normal file
552
lib/runtime/src/fs.rs
Normal file
@@ -0,0 +1,552 @@
|
||||
//! Portable async filesystem API.
|
||||
//!
|
||||
//! Cancellation semantics:
|
||||
//! - Dropping an I/O future cancels interest in the result.
|
||||
//! - The runtime issues best-effort kernel cancellation where supported.
|
||||
//! - The underlying OS operation may still complete after the future is dropped.
|
||||
|
||||
use std::ffi::OsStr;
|
||||
use std::io;
|
||||
use std::os::fd::{AsRawFd, OwnedFd};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::op::fs::{
|
||||
FileType as RawFileType, FsOp, MetadataTarget, OpenOptions as OpOpenOptions,
|
||||
RawDirEntry as OpDirEntry, RawMetadata,
|
||||
};
|
||||
use crate::sys::linux::fs as sys_fs;
|
||||
|
||||
struct FileInner {
|
||||
fd: OwnedFd,
|
||||
}
|
||||
|
||||
pub struct File {
|
||||
inner: Arc<FileInner>,
|
||||
}
|
||||
|
||||
pub struct OpenOptions {
|
||||
inner: OpOpenOptions,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct Metadata {
|
||||
inner: RawMetadata,
|
||||
}
|
||||
|
||||
pub struct ReadDir {
|
||||
inner: sys_fs::ReadDirStream,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct DirEntry {
|
||||
inner: OpDirEntry,
|
||||
}
|
||||
|
||||
impl File {
|
||||
pub async fn open(path: impl AsRef<Path>) -> io::Result<Self> {
|
||||
OpenOptions::new().read(true).open(path).await
|
||||
}
|
||||
|
||||
pub async fn create(path: impl AsRef<Path>) -> io::Result<Self> {
|
||||
OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.open(path)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
self.read_impl(None, buf).await
|
||||
}
|
||||
|
||||
pub async fn read_exact(&mut self, mut buf: &mut [u8]) -> io::Result<()> {
|
||||
while !buf.is_empty() {
|
||||
let read = self.read(buf).await?;
|
||||
if read == 0 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::UnexpectedEof,
|
||||
"failed to fill whole buffer",
|
||||
));
|
||||
}
|
||||
buf = &mut buf[read..];
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
self.write_impl(None, buf).await
|
||||
}
|
||||
|
||||
pub async fn write_all(&mut self, mut buf: &[u8]) -> io::Result<()> {
|
||||
while !buf.is_empty() {
|
||||
let written = self.write(buf).await?;
|
||||
if written == 0 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::WriteZero,
|
||||
"failed to write whole buffer",
|
||||
));
|
||||
}
|
||||
buf = &buf[written..];
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn flush(&mut self) -> io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn sync_all(&self) -> io::Result<()> {
|
||||
sys_fs::sync_all(FsOp::SyncAll { fd: self.raw_fd() }).await
|
||||
}
|
||||
|
||||
pub async fn sync_data(&self) -> io::Result<()> {
|
||||
sys_fs::sync_data(FsOp::SyncData { fd: self.raw_fd() }).await
|
||||
}
|
||||
|
||||
pub async fn read_at(&self, offset: u64, buf: &mut [u8]) -> io::Result<usize> {
|
||||
self.read_impl(Some(offset), buf).await
|
||||
}
|
||||
|
||||
pub async fn read_exact_at(&self, mut offset: u64, mut buf: &mut [u8]) -> io::Result<()> {
|
||||
while !buf.is_empty() {
|
||||
let read = self.read_at(offset, buf).await?;
|
||||
if read == 0 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::UnexpectedEof,
|
||||
"failed to fill whole buffer",
|
||||
));
|
||||
}
|
||||
offset = offset.saturating_add(read as u64);
|
||||
buf = &mut buf[read..];
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn write_at(&self, offset: u64, buf: &[u8]) -> io::Result<usize> {
|
||||
self.write_impl(Some(offset), buf).await
|
||||
}
|
||||
|
||||
pub async fn write_all_at(&self, mut offset: u64, mut buf: &[u8]) -> io::Result<()> {
|
||||
while !buf.is_empty() {
|
||||
let written = self.write_at(offset, buf).await?;
|
||||
if written == 0 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::WriteZero,
|
||||
"failed to write whole buffer",
|
||||
));
|
||||
}
|
||||
offset = offset.saturating_add(written as u64);
|
||||
buf = &buf[written..];
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn metadata(&self) -> io::Result<Metadata> {
|
||||
sys_fs::metadata(FsOp::Metadata {
|
||||
target: MetadataTarget::File(self.raw_fd()),
|
||||
follow_symlinks: true,
|
||||
})
|
||||
.await
|
||||
.map(Metadata::from_raw)
|
||||
}
|
||||
|
||||
pub async fn set_len(&self, len: u64) -> io::Result<()> {
|
||||
sys_fs::set_len(FsOp::SetLen {
|
||||
fd: self.raw_fd(),
|
||||
len,
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn try_clone(&self) -> io::Result<Self> {
|
||||
sys_fs::try_clone(FsOp::Duplicate { fd: self.raw_fd() })
|
||||
.await
|
||||
.map(File::from_owned_fd)
|
||||
}
|
||||
|
||||
fn from_owned_fd(fd: OwnedFd) -> Self {
|
||||
Self {
|
||||
inner: Arc::new(FileInner { fd }),
|
||||
}
|
||||
}
|
||||
|
||||
fn raw_fd(&self) -> i32 {
|
||||
self.inner.fd.as_raw_fd()
|
||||
}
|
||||
|
||||
async fn read_impl(&self, offset: Option<u64>, buf: &mut [u8]) -> io::Result<usize> {
|
||||
let data = sys_fs::read(FsOp::Read {
|
||||
fd: self.raw_fd(),
|
||||
offset,
|
||||
len: buf.len(),
|
||||
})
|
||||
.await?;
|
||||
|
||||
let read = data.len();
|
||||
buf[..read].copy_from_slice(&data);
|
||||
Ok(read)
|
||||
}
|
||||
|
||||
async fn write_impl(&self, offset: Option<u64>, buf: &[u8]) -> io::Result<usize> {
|
||||
sys_fs::write(FsOp::Write {
|
||||
fd: self.raw_fd(),
|
||||
offset,
|
||||
data: buf.to_vec(),
|
||||
})
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
impl OpenOptions {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
inner: OpOpenOptions::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read(&mut self, value: bool) -> &mut Self {
|
||||
self.inner.read = value;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn write(&mut self, value: bool) -> &mut Self {
|
||||
self.inner.write = value;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn append(&mut self, value: bool) -> &mut Self {
|
||||
self.inner.append = value;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn truncate(&mut self, value: bool) -> &mut Self {
|
||||
self.inner.truncate = value;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn create(&mut self, value: bool) -> &mut Self {
|
||||
self.inner.create = value;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn create_new(&mut self, value: bool) -> &mut Self {
|
||||
self.inner.create_new = value;
|
||||
self
|
||||
}
|
||||
|
||||
pub async fn open(&self, path: impl AsRef<Path>) -> io::Result<File> {
|
||||
sys_fs::open(FsOp::Open {
|
||||
path: path.as_ref().to_path_buf(),
|
||||
options: self.inner.clone(),
|
||||
})
|
||||
.await
|
||||
.map(File::from_owned_fd)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for OpenOptions {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Metadata {
|
||||
fn from_raw(inner: RawMetadata) -> Self {
|
||||
Self { inner }
|
||||
}
|
||||
|
||||
pub fn len(&self) -> u64 {
|
||||
self.inner.len
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
pub fn is_file(&self) -> bool {
|
||||
self.inner.file_type == RawFileType::File
|
||||
}
|
||||
|
||||
pub fn is_dir(&self) -> bool {
|
||||
self.inner.file_type == RawFileType::Directory
|
||||
}
|
||||
|
||||
pub fn is_symlink(&self) -> bool {
|
||||
self.inner.file_type == RawFileType::Symlink
|
||||
}
|
||||
|
||||
pub fn mode(&self) -> u16 {
|
||||
self.inner.mode
|
||||
}
|
||||
}
|
||||
|
||||
impl ReadDir {
|
||||
pub async fn next_entry(&mut self) -> io::Result<Option<DirEntry>> {
|
||||
self.inner
|
||||
.next_entry()
|
||||
.await
|
||||
.map(|entry| entry.map(|inner| DirEntry { inner }))
|
||||
}
|
||||
}
|
||||
|
||||
impl DirEntry {
|
||||
pub fn path(&self) -> PathBuf {
|
||||
self.inner.path.clone()
|
||||
}
|
||||
|
||||
pub fn file_name(&self) -> &OsStr {
|
||||
self.inner.file_name.as_os_str()
|
||||
}
|
||||
|
||||
pub async fn metadata(&self) -> io::Result<Metadata> {
|
||||
metadata(self.path()).await
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn read(path: impl AsRef<Path>) -> io::Result<Vec<u8>> {
|
||||
let mut file = File::open(path.as_ref()).await?;
|
||||
let mut output = Vec::new();
|
||||
let mut chunk = vec![0; 8192];
|
||||
|
||||
loop {
|
||||
let read = file.read(&mut chunk).await?;
|
||||
if read == 0 {
|
||||
return Ok(output);
|
||||
}
|
||||
output.extend_from_slice(&chunk[..read]);
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn read_to_string(path: impl AsRef<Path>) -> io::Result<String> {
|
||||
let bytes = read(path).await?;
|
||||
String::from_utf8(bytes).map_err(|error| io::Error::new(io::ErrorKind::InvalidData, error))
|
||||
}
|
||||
|
||||
pub async fn write(path: impl AsRef<Path>, data: impl AsRef<[u8]>) -> io::Result<()> {
|
||||
let mut file = OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.open(path)
|
||||
.await?;
|
||||
file.write_all(data.as_ref()).await
|
||||
}
|
||||
|
||||
pub async fn metadata(path: impl AsRef<Path>) -> io::Result<Metadata> {
|
||||
sys_fs::metadata(FsOp::Metadata {
|
||||
target: MetadataTarget::Path(path.as_ref().to_path_buf()),
|
||||
follow_symlinks: true,
|
||||
})
|
||||
.await
|
||||
.map(Metadata::from_raw)
|
||||
}
|
||||
|
||||
pub async fn create_dir(path: impl AsRef<Path>) -> io::Result<()> {
|
||||
sys_fs::create_dir(FsOp::CreateDir {
|
||||
path: path.as_ref().to_path_buf(),
|
||||
recursive: false,
|
||||
mode: 0o777,
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn create_dir_all(path: impl AsRef<Path>) -> io::Result<()> {
|
||||
let path = path.as_ref();
|
||||
let mut current = PathBuf::new();
|
||||
|
||||
for component in path.components() {
|
||||
current.push(component.as_os_str());
|
||||
if current.as_os_str().is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
match create_dir(¤t).await {
|
||||
Ok(()) => {}
|
||||
Err(error) if error.kind() == io::ErrorKind::AlreadyExists => {}
|
||||
Err(error) => return Err(error),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn remove_file(path: impl AsRef<Path>) -> io::Result<()> {
|
||||
sys_fs::remove_file(FsOp::RemoveFile {
|
||||
path: path.as_ref().to_path_buf(),
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn remove_dir(path: impl AsRef<Path>) -> io::Result<()> {
|
||||
sys_fs::remove_dir(FsOp::RemoveDir {
|
||||
path: path.as_ref().to_path_buf(),
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn rename(from: impl AsRef<Path>, to: impl AsRef<Path>) -> io::Result<()> {
|
||||
sys_fs::rename(FsOp::Rename {
|
||||
from: from.as_ref().to_path_buf(),
|
||||
to: to.as_ref().to_path_buf(),
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn read_dir(path: impl AsRef<Path>) -> io::Result<ReadDir> {
|
||||
sys_fs::read_dir(FsOp::ReadDir {
|
||||
path: path.as_ref().to_path_buf(),
|
||||
})
|
||||
.map(|inner| ReadDir { inner })
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{
|
||||
OpenOptions, create_dir_all, metadata, read, read_dir, read_to_string, remove_dir,
|
||||
remove_file, rename, write,
|
||||
};
|
||||
use crate::queue_future;
|
||||
use crate::{queue_task, run};
|
||||
use std::collections::BTreeSet;
|
||||
use std::ffi::OsString;
|
||||
use std::path::PathBuf;
|
||||
use std::process;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
use std::sync::OnceLock;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
fn test_lock() -> &'static Mutex<()> {
|
||||
static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
|
||||
LOCK.get_or_init(|| Mutex::new(()))
|
||||
}
|
||||
|
||||
fn unique_path(label: &str) -> PathBuf {
|
||||
let nanos = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.expect("system time should be after epoch")
|
||||
.as_nanos();
|
||||
std::env::temp_dir().join(format!("ruin-runtime-{label}-{}-{nanos}", process::id()))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn async_fs_round_trip() {
|
||||
let _guard = test_lock().lock().unwrap();
|
||||
let root = unique_path("fs-round-trip");
|
||||
let nested = root.join("nested");
|
||||
let file_path = nested.join("hello.txt");
|
||||
let renamed_path = nested.join("renamed.txt");
|
||||
let output = Arc::new(Mutex::new(None::<String>));
|
||||
|
||||
{
|
||||
let output = Arc::clone(&output);
|
||||
queue_task(move || {
|
||||
queue_future(async move {
|
||||
create_dir_all(&nested)
|
||||
.await
|
||||
.expect("dir creation should succeed");
|
||||
write(&file_path, b"hello world")
|
||||
.await
|
||||
.expect("initial write should succeed");
|
||||
|
||||
let file = OpenOptions::new()
|
||||
.read(true)
|
||||
.write(true)
|
||||
.open(&file_path)
|
||||
.await
|
||||
.expect("open should succeed");
|
||||
file.write_at(6, b"runtime")
|
||||
.await
|
||||
.expect("positioned write should succeed");
|
||||
file.sync_all().await.expect("sync should succeed");
|
||||
|
||||
let mut prefix = [0u8; 5];
|
||||
file.read_exact_at(0, &mut prefix)
|
||||
.await
|
||||
.expect("positioned read should succeed");
|
||||
assert_eq!(&prefix, b"hello");
|
||||
|
||||
let meta = file.metadata().await.expect("metadata should succeed");
|
||||
assert!(meta.is_file());
|
||||
assert!(meta.len() >= 13);
|
||||
|
||||
let cloned = file.try_clone().await.expect("clone should succeed");
|
||||
cloned.set_len(13).await.expect("truncate should succeed");
|
||||
|
||||
rename(&file_path, &renamed_path)
|
||||
.await
|
||||
.expect("rename should succeed");
|
||||
let text = read_to_string(&renamed_path)
|
||||
.await
|
||||
.expect("read_to_string should succeed");
|
||||
assert_eq!(text, "hello runtime");
|
||||
|
||||
let bytes = read(&renamed_path).await.expect("read should succeed");
|
||||
assert_eq!(bytes, b"hello runtime");
|
||||
|
||||
let path_meta = metadata(&renamed_path)
|
||||
.await
|
||||
.expect("path metadata should work");
|
||||
assert!(path_meta.is_file());
|
||||
|
||||
*output.lock().unwrap() = Some(text);
|
||||
|
||||
remove_file(&renamed_path)
|
||||
.await
|
||||
.expect("remove_file should succeed");
|
||||
remove_dir(&nested)
|
||||
.await
|
||||
.expect("remove nested dir should succeed");
|
||||
remove_dir(&root)
|
||||
.await
|
||||
.expect("remove root dir should succeed");
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
run();
|
||||
|
||||
assert_eq!(output.lock().unwrap().as_deref(), Some("hello runtime"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn async_read_dir_streams_entries() {
|
||||
let _guard = test_lock().lock().unwrap();
|
||||
let root = unique_path("fs-read-dir");
|
||||
let one = root.join("one.txt");
|
||||
let two = root.join("two.txt");
|
||||
let seen: Arc<Mutex<BTreeSet<OsString>>> = Arc::new(Mutex::new(BTreeSet::new()));
|
||||
|
||||
{
|
||||
let seen = Arc::clone(&seen);
|
||||
queue_task(move || {
|
||||
queue_future(async move {
|
||||
create_dir_all(&root)
|
||||
.await
|
||||
.expect("dir creation should succeed");
|
||||
write(&one, b"1").await.expect("write one should succeed");
|
||||
write(&two, b"2").await.expect("write two should succeed");
|
||||
|
||||
let mut dir = read_dir(&root).await.expect("read_dir should succeed");
|
||||
while let Some(entry) = dir.next_entry().await.expect("stream should succeed") {
|
||||
seen.lock()
|
||||
.unwrap()
|
||||
.insert(entry.file_name().to_os_string());
|
||||
}
|
||||
|
||||
remove_file(&one).await.expect("remove one should succeed");
|
||||
remove_file(&two).await.expect("remove two should succeed");
|
||||
remove_dir(&root).await.expect("remove root should succeed");
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
run();
|
||||
|
||||
let seen = seen.lock().unwrap();
|
||||
assert!(seen.contains(&OsString::from("one.txt")));
|
||||
assert!(seen.contains(&OsString::from("two.txt")));
|
||||
}
|
||||
}
|
||||
78
lib/runtime/src/lib.rs
Normal file
78
lib/runtime/src/lib.rs
Normal file
@@ -0,0 +1,78 @@
|
||||
//! RUIN runtime foundations.
|
||||
//!
|
||||
//! This crate provides a Linux x86_64 runtime substrate: the mesh allocator, the reactor, and a
|
||||
//! single-threaded runtime loop with worker-thread task forwarding.
|
||||
|
||||
#![feature(thread_local)]
|
||||
|
||||
#[cfg(not(all(target_os = "linux", target_arch = "x86_64")))]
|
||||
compile_error!("ruin-runtime currently supports only Linux x86_64.");
|
||||
|
||||
extern crate alloc;
|
||||
|
||||
pub mod channel;
|
||||
pub mod fs;
|
||||
pub mod net;
|
||||
pub mod op;
|
||||
pub mod platform;
|
||||
pub mod sys;
|
||||
pub mod time;
|
||||
|
||||
pub use ruin_runtime_proc_macros::{async_main, main};
|
||||
|
||||
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
|
||||
pub use platform::linux_x86_64::mesh_alloc::{
|
||||
ActiveMeshGuard, Arena, AtomicBitmap, BitIter, CLASS_TO_SIZE, CompactionAdvice,
|
||||
CompactionEstimate, CompactionRecommendation, CompactionSkipReason,
|
||||
DEFAULT_GLOBAL_MINIHEAP_CAPACITY, FutexMutex, GlobalMeshAllocator, MeshAllocator, MeshStats,
|
||||
MiniHeap, MiniHeapFlags, MiniHeapId, Mwc, Mwc64, NUM_SIZE_CLASSES, PageConfig, PlatformHooks,
|
||||
PlatformInstallError, RelaxedBitmap, RuntimeCompactionPolicy, RuntimeCompactionResult,
|
||||
ShuffleEntry, ShuffleVector, Span, ThreadLocalHeap, byte_size_for_class,
|
||||
ensure_fault_mediation_installed, install_platform_hooks, ok_to_proceed, page_count,
|
||||
page_shift, page_size, retry_on_efault, retry_on_efault_ptrs, round_up_to_page,
|
||||
runtime_slots_per_span, size_class_for,
|
||||
};
|
||||
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
|
||||
pub use platform::linux_x86_64::mesh_alloc::{FreelistId, bitmaps_meshable};
|
||||
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
|
||||
pub use platform::linux_x86_64::reactor::{
|
||||
Reactor, ReadyEvents, ThreadNotifier, create, create_reactor, monotonic_now,
|
||||
};
|
||||
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
|
||||
pub use platform::linux_x86_64::runtime::{
|
||||
IntervalHandle, JoinHandle, ThreadHandle, TimeoutHandle, WorkerHandle, clear_interval,
|
||||
clear_timeout, current_thread_handle, queue_future, queue_microtask, queue_task, run,
|
||||
set_interval, set_timeout, spawn_worker, yield_now,
|
||||
};
|
||||
|
||||
pub const fn default_global_allocator() -> GlobalMeshAllocator {
|
||||
GlobalMeshAllocator::with_default_config()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{MeshAllocator, page_size};
|
||||
|
||||
#[test]
|
||||
fn mesh_allocator_smoke_test() {
|
||||
let mut allocator =
|
||||
MeshAllocator::new(page_size() * 1024, 256).expect("allocator should initialize");
|
||||
|
||||
let small = allocator
|
||||
.allocate(64)
|
||||
.expect("small allocation should succeed");
|
||||
unsafe {
|
||||
small.write_bytes(0xAB, 64);
|
||||
}
|
||||
allocator.deallocate(small);
|
||||
|
||||
let large_size = page_size() * 2;
|
||||
let large = allocator
|
||||
.allocate(large_size)
|
||||
.expect("large allocation should succeed");
|
||||
unsafe {
|
||||
large.write_bytes(0xCD, large_size);
|
||||
}
|
||||
allocator.deallocate(large);
|
||||
}
|
||||
}
|
||||
963
lib/runtime/src/net.rs
Normal file
963
lib/runtime/src/net.rs
Normal file
@@ -0,0 +1,963 @@
|
||||
//! Portable async networking API.
|
||||
|
||||
use std::future::Future;
|
||||
use std::io;
|
||||
use std::net::{Shutdown, SocketAddr, ToSocketAddrs};
|
||||
use std::os::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd};
|
||||
use std::pin::Pin;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::task::{Context, Poll};
|
||||
use std::time::Duration;
|
||||
|
||||
use hyper::rt::{Read as HyperRead, ReadBufCursor, Write as HyperWrite};
|
||||
|
||||
use crate::op::net::NetOp;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TcpStreamInner {
|
||||
fd: OwnedFd,
|
||||
timeouts: Mutex<SocketTimeouts>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TcpListenerInner {
|
||||
fd: OwnedFd,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct UdpSocketInner {
|
||||
fd: OwnedFd,
|
||||
timeouts: Mutex<SocketTimeouts>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
struct SocketTimeouts {
|
||||
read: Option<Duration>,
|
||||
write: Option<Duration>,
|
||||
}
|
||||
|
||||
type PendingRead = Pin<Box<dyn Future<Output = io::Result<Vec<u8>>> + 'static>>;
|
||||
type PendingWrite = Pin<Box<dyn Future<Output = io::Result<usize>> + 'static>>;
|
||||
type PendingShutdown = Pin<Box<dyn Future<Output = io::Result<()>> + 'static>>;
|
||||
|
||||
pub struct TcpStream {
|
||||
inner: Arc<TcpStreamInner>,
|
||||
pending_read: Option<PendingRead>,
|
||||
pending_write: Option<PendingWrite>,
|
||||
pending_shutdown: Option<PendingShutdown>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct TcpListener {
|
||||
inner: Arc<TcpListenerInner>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct UdpSocket {
|
||||
inner: Arc<UdpSocketInner>,
|
||||
}
|
||||
|
||||
impl TcpStream {
|
||||
pub async fn connect<A>(addr: A) -> io::Result<Self>
|
||||
where
|
||||
A: ToSocketAddrs + Send + 'static,
|
||||
{
|
||||
let addrs = crate::sys::linux::net::resolve_addrs(addr).await?;
|
||||
let mut last_error = None;
|
||||
for addr in addrs {
|
||||
match crate::sys::linux::net::connect_stream(addr).await {
|
||||
Ok(fd) => return Ok(Self::from_owned_fd(fd)),
|
||||
Err(error) => last_error = Some(error),
|
||||
}
|
||||
}
|
||||
|
||||
Err(last_error.unwrap_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::AddrNotAvailable,
|
||||
"address resolution returned no usable TCP endpoints",
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
pub async fn connect_timeout(addr: &SocketAddr, timeout: Duration) -> io::Result<Self> {
|
||||
validate_timeout(timeout)?;
|
||||
crate::sys::linux::net::connect_stream_timeout(*addr, timeout)
|
||||
.await
|
||||
.map(Self::from_owned_fd)
|
||||
}
|
||||
|
||||
pub async fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
let data = match self.read_timeout_value() {
|
||||
Some(timeout) => {
|
||||
crate::sys::linux::net::recv_timeout(self.raw_fd(), buf.len(), 0, timeout).await?
|
||||
}
|
||||
None => {
|
||||
crate::sys::linux::net::recv(NetOp::Recv {
|
||||
fd: self.raw_fd(),
|
||||
len: buf.len(),
|
||||
flags: 0,
|
||||
})
|
||||
.await?
|
||||
}
|
||||
};
|
||||
let read = data.len();
|
||||
buf[..read].copy_from_slice(&data);
|
||||
Ok(read)
|
||||
}
|
||||
|
||||
pub async fn read_exact(&mut self, mut buf: &mut [u8]) -> io::Result<()> {
|
||||
while !buf.is_empty() {
|
||||
let read = self.read(buf).await?;
|
||||
if read == 0 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::UnexpectedEof,
|
||||
"failed to fill whole buffer",
|
||||
));
|
||||
}
|
||||
buf = &mut buf[read..];
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
match self.write_timeout_value() {
|
||||
Some(timeout) => {
|
||||
crate::sys::linux::net::send_timeout(self.raw_fd(), buf.to_vec(), 0, timeout).await
|
||||
}
|
||||
None => {
|
||||
crate::sys::linux::net::send(NetOp::Send {
|
||||
fd: self.raw_fd(),
|
||||
data: buf.to_vec(),
|
||||
flags: 0,
|
||||
})
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn write_all(&mut self, mut buf: &[u8]) -> io::Result<()> {
|
||||
while !buf.is_empty() {
|
||||
let written = self.write(buf).await?;
|
||||
if written == 0 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::WriteZero,
|
||||
"failed to write whole buffer",
|
||||
));
|
||||
}
|
||||
buf = &buf[written..];
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn shutdown(&self, how: Shutdown) -> io::Result<()> {
|
||||
crate::sys::linux::net::shutdown(NetOp::Shutdown {
|
||||
fd: self.raw_fd(),
|
||||
how,
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn try_clone(&self) -> io::Result<Self> {
|
||||
crate::sys::linux::net::duplicate(self.raw_fd())
|
||||
.await
|
||||
.map(Self::from_owned_fd)
|
||||
}
|
||||
|
||||
pub fn local_addr(&self) -> io::Result<SocketAddr> {
|
||||
crate::sys::linux::net::local_addr(self.raw_fd())
|
||||
}
|
||||
|
||||
pub fn peer_addr(&self) -> io::Result<SocketAddr> {
|
||||
crate::sys::linux::net::peer_addr(self.raw_fd())
|
||||
}
|
||||
|
||||
pub fn nodelay(&self) -> io::Result<bool> {
|
||||
crate::sys::linux::net::nodelay(self.raw_fd())
|
||||
}
|
||||
|
||||
pub fn set_nodelay(&self, enabled: bool) -> io::Result<()> {
|
||||
crate::sys::linux::net::set_nodelay(self.raw_fd(), enabled)
|
||||
}
|
||||
|
||||
pub fn ttl(&self) -> io::Result<u32> {
|
||||
crate::sys::linux::net::ttl(self.raw_fd())
|
||||
}
|
||||
|
||||
pub fn set_ttl(&self, ttl: u32) -> io::Result<()> {
|
||||
crate::sys::linux::net::set_ttl(self.raw_fd(), ttl)
|
||||
}
|
||||
|
||||
pub fn read_timeout(&self) -> io::Result<Option<Duration>> {
|
||||
Ok(self.read_timeout_value())
|
||||
}
|
||||
|
||||
pub fn set_read_timeout(&self, timeout: Option<Duration>) -> io::Result<()> {
|
||||
validate_optional_timeout(timeout)?;
|
||||
self.inner.timeouts.lock().unwrap().read = timeout;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_timeout(&self) -> io::Result<Option<Duration>> {
|
||||
Ok(self.write_timeout_value())
|
||||
}
|
||||
|
||||
pub fn set_write_timeout(&self, timeout: Option<Duration>) -> io::Result<()> {
|
||||
validate_optional_timeout(timeout)?;
|
||||
self.inner.timeouts.lock().unwrap().write = timeout;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn from_owned_fd(fd: OwnedFd) -> Self {
|
||||
Self {
|
||||
inner: Arc::new(TcpStreamInner {
|
||||
fd,
|
||||
timeouts: Mutex::new(SocketTimeouts::default()),
|
||||
}),
|
||||
pending_read: None,
|
||||
pending_write: None,
|
||||
pending_shutdown: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn raw_fd(&self) -> RawFd {
|
||||
self.inner.fd.as_raw_fd()
|
||||
}
|
||||
|
||||
fn read_timeout_value(&self) -> Option<Duration> {
|
||||
self.inner.timeouts.lock().unwrap().read
|
||||
}
|
||||
|
||||
fn write_timeout_value(&self) -> Option<Duration> {
|
||||
self.inner.timeouts.lock().unwrap().write
|
||||
}
|
||||
}
|
||||
|
||||
impl TcpListener {
|
||||
pub async fn bind<A>(addr: A) -> io::Result<Self>
|
||||
where
|
||||
A: ToSocketAddrs + Send + 'static,
|
||||
{
|
||||
let addrs = crate::sys::linux::net::resolve_addrs(addr).await?;
|
||||
let mut last_error = None;
|
||||
for addr in addrs {
|
||||
match crate::sys::linux::net::bind_listener(addr, None).await {
|
||||
Ok(fd) => return Ok(Self::from_owned_fd(fd)),
|
||||
Err(error) => last_error = Some(error),
|
||||
}
|
||||
}
|
||||
|
||||
Err(last_error.unwrap_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::AddrNotAvailable,
|
||||
"address resolution returned no usable listener endpoints",
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
pub async fn accept(&self) -> io::Result<(TcpStream, SocketAddr)> {
|
||||
let accepted = crate::sys::linux::net::accept(NetOp::Accept { fd: self.raw_fd() }).await?;
|
||||
|
||||
let stream = TcpStream::from_owned_fd(unsafe { OwnedFd::from_raw_fd(accepted.fd) });
|
||||
Ok((stream, accepted.peer_addr))
|
||||
}
|
||||
|
||||
pub fn local_addr(&self) -> io::Result<SocketAddr> {
|
||||
crate::sys::linux::net::local_addr(self.raw_fd())
|
||||
}
|
||||
|
||||
pub fn ttl(&self) -> io::Result<u32> {
|
||||
crate::sys::linux::net::ttl(self.raw_fd())
|
||||
}
|
||||
|
||||
pub fn set_ttl(&self, ttl: u32) -> io::Result<()> {
|
||||
crate::sys::linux::net::set_ttl(self.raw_fd(), ttl)
|
||||
}
|
||||
|
||||
fn from_owned_fd(fd: OwnedFd) -> Self {
|
||||
Self {
|
||||
inner: Arc::new(TcpListenerInner { fd }),
|
||||
}
|
||||
}
|
||||
|
||||
fn raw_fd(&self) -> RawFd {
|
||||
self.inner.fd.as_raw_fd()
|
||||
}
|
||||
}
|
||||
|
||||
impl UdpSocket {
|
||||
pub async fn bind<A>(addr: A) -> io::Result<Self>
|
||||
where
|
||||
A: ToSocketAddrs + Send + 'static,
|
||||
{
|
||||
let addrs = crate::sys::linux::net::resolve_addrs(addr).await?;
|
||||
let mut last_error = None;
|
||||
for addr in addrs {
|
||||
match crate::sys::linux::net::bind_datagram(addr).await {
|
||||
Ok(fd) => return Ok(Self::from_owned_fd(fd)),
|
||||
Err(error) => last_error = Some(error),
|
||||
}
|
||||
}
|
||||
|
||||
Err(last_error.unwrap_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::AddrNotAvailable,
|
||||
"address resolution returned no usable UDP endpoints",
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
pub async fn connect<A>(&self, addr: A) -> io::Result<()>
|
||||
where
|
||||
A: ToSocketAddrs + Send + 'static,
|
||||
{
|
||||
let addrs = crate::sys::linux::net::resolve_addrs(addr).await?;
|
||||
let mut last_error = None;
|
||||
for addr in addrs {
|
||||
match crate::sys::linux::net::connect(NetOp::Connect {
|
||||
fd: self.raw_fd(),
|
||||
addr,
|
||||
})
|
||||
.await
|
||||
{
|
||||
Ok(()) => return Ok(()),
|
||||
Err(error) => last_error = Some(error),
|
||||
}
|
||||
}
|
||||
|
||||
Err(last_error.unwrap_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::AddrNotAvailable,
|
||||
"address resolution returned no usable UDP peers",
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
pub async fn send(&self, buf: &[u8]) -> io::Result<usize> {
|
||||
match self.write_timeout_value() {
|
||||
Some(timeout) => {
|
||||
crate::sys::linux::net::send_timeout(self.raw_fd(), buf.to_vec(), 0, timeout).await
|
||||
}
|
||||
None => {
|
||||
crate::sys::linux::net::send(NetOp::Send {
|
||||
fd: self.raw_fd(),
|
||||
data: buf.to_vec(),
|
||||
flags: 0,
|
||||
})
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn recv(&self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
let data = match self.read_timeout_value() {
|
||||
Some(timeout) => {
|
||||
crate::sys::linux::net::recv_timeout(self.raw_fd(), buf.len(), 0, timeout).await?
|
||||
}
|
||||
None => {
|
||||
crate::sys::linux::net::recv(NetOp::Recv {
|
||||
fd: self.raw_fd(),
|
||||
len: buf.len(),
|
||||
flags: 0,
|
||||
})
|
||||
.await?
|
||||
}
|
||||
};
|
||||
let read = data.len();
|
||||
buf[..read].copy_from_slice(&data);
|
||||
Ok(read)
|
||||
}
|
||||
|
||||
pub async fn peek(&self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
let data = match self.read_timeout_value() {
|
||||
Some(timeout) => {
|
||||
crate::sys::linux::net::recv_timeout(
|
||||
self.raw_fd(),
|
||||
buf.len(),
|
||||
libc::MSG_PEEK,
|
||||
timeout,
|
||||
)
|
||||
.await?
|
||||
}
|
||||
None => {
|
||||
crate::sys::linux::net::recv(NetOp::Recv {
|
||||
fd: self.raw_fd(),
|
||||
len: buf.len(),
|
||||
flags: libc::MSG_PEEK,
|
||||
})
|
||||
.await?
|
||||
}
|
||||
};
|
||||
let read = data.len();
|
||||
buf[..read].copy_from_slice(&data);
|
||||
Ok(read)
|
||||
}
|
||||
|
||||
pub async fn send_to<A>(&self, buf: &[u8], addr: A) -> io::Result<usize>
|
||||
where
|
||||
A: ToSocketAddrs + Send + 'static,
|
||||
{
|
||||
let addrs = crate::sys::linux::net::resolve_addrs(addr).await?;
|
||||
let mut last_error = None;
|
||||
let timeout = self.write_timeout_value();
|
||||
for addr in addrs {
|
||||
let result = match timeout {
|
||||
Some(timeout) => {
|
||||
crate::sys::linux::net::send_to_timeout(
|
||||
self.raw_fd(),
|
||||
buf.to_vec(),
|
||||
addr,
|
||||
0,
|
||||
timeout,
|
||||
)
|
||||
.await
|
||||
}
|
||||
None => {
|
||||
crate::sys::linux::net::send_to(NetOp::SendTo {
|
||||
fd: self.raw_fd(),
|
||||
target: addr,
|
||||
data: buf.to_vec(),
|
||||
flags: 0,
|
||||
})
|
||||
.await
|
||||
}
|
||||
};
|
||||
match result {
|
||||
Ok(sent) => return Ok(sent),
|
||||
Err(error) => last_error = Some(error),
|
||||
}
|
||||
}
|
||||
|
||||
Err(last_error.unwrap_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::AddrNotAvailable,
|
||||
"address resolution returned no usable UDP destinations",
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
pub async fn recv_from(&self, buf: &mut [u8]) -> io::Result<(usize, SocketAddr)> {
|
||||
let datagram = match self.read_timeout_value() {
|
||||
Some(timeout) => {
|
||||
crate::sys::linux::net::recv_from_timeout(self.raw_fd(), buf.len(), 0, timeout)
|
||||
.await?
|
||||
}
|
||||
None => {
|
||||
crate::sys::linux::net::recv_from(NetOp::RecvFrom {
|
||||
fd: self.raw_fd(),
|
||||
len: buf.len(),
|
||||
flags: 0,
|
||||
})
|
||||
.await?
|
||||
}
|
||||
};
|
||||
let read = datagram.data.len();
|
||||
buf[..read].copy_from_slice(&datagram.data);
|
||||
Ok((read, datagram.peer_addr))
|
||||
}
|
||||
|
||||
pub async fn peek_from(&self, buf: &mut [u8]) -> io::Result<(usize, SocketAddr)> {
|
||||
let datagram = match self.read_timeout_value() {
|
||||
Some(timeout) => {
|
||||
crate::sys::linux::net::recv_from_timeout(
|
||||
self.raw_fd(),
|
||||
buf.len(),
|
||||
libc::MSG_PEEK,
|
||||
timeout,
|
||||
)
|
||||
.await?
|
||||
}
|
||||
None => {
|
||||
crate::sys::linux::net::recv_from(NetOp::RecvFrom {
|
||||
fd: self.raw_fd(),
|
||||
len: buf.len(),
|
||||
flags: libc::MSG_PEEK,
|
||||
})
|
||||
.await?
|
||||
}
|
||||
};
|
||||
let read = datagram.data.len();
|
||||
buf[..read].copy_from_slice(&datagram.data);
|
||||
Ok((read, datagram.peer_addr))
|
||||
}
|
||||
|
||||
pub async fn try_clone(&self) -> io::Result<Self> {
|
||||
crate::sys::linux::net::duplicate(self.raw_fd())
|
||||
.await
|
||||
.map(Self::from_owned_fd)
|
||||
}
|
||||
|
||||
pub fn local_addr(&self) -> io::Result<SocketAddr> {
|
||||
crate::sys::linux::net::local_addr(self.raw_fd())
|
||||
}
|
||||
|
||||
pub fn peer_addr(&self) -> io::Result<SocketAddr> {
|
||||
crate::sys::linux::net::peer_addr(self.raw_fd())
|
||||
}
|
||||
|
||||
pub fn broadcast(&self) -> io::Result<bool> {
|
||||
crate::sys::linux::net::broadcast(self.raw_fd())
|
||||
}
|
||||
|
||||
pub fn set_broadcast(&self, enabled: bool) -> io::Result<()> {
|
||||
crate::sys::linux::net::set_broadcast(self.raw_fd(), enabled)
|
||||
}
|
||||
|
||||
pub fn ttl(&self) -> io::Result<u32> {
|
||||
crate::sys::linux::net::ttl(self.raw_fd())
|
||||
}
|
||||
|
||||
pub fn set_ttl(&self, ttl: u32) -> io::Result<()> {
|
||||
crate::sys::linux::net::set_ttl(self.raw_fd(), ttl)
|
||||
}
|
||||
|
||||
pub fn read_timeout(&self) -> io::Result<Option<Duration>> {
|
||||
Ok(self.read_timeout_value())
|
||||
}
|
||||
|
||||
pub fn set_read_timeout(&self, timeout: Option<Duration>) -> io::Result<()> {
|
||||
validate_optional_timeout(timeout)?;
|
||||
self.inner.timeouts.lock().unwrap().read = timeout;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_timeout(&self) -> io::Result<Option<Duration>> {
|
||||
Ok(self.write_timeout_value())
|
||||
}
|
||||
|
||||
pub fn set_write_timeout(&self, timeout: Option<Duration>) -> io::Result<()> {
|
||||
validate_optional_timeout(timeout)?;
|
||||
self.inner.timeouts.lock().unwrap().write = timeout;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn from_owned_fd(fd: OwnedFd) -> Self {
|
||||
Self {
|
||||
inner: Arc::new(UdpSocketInner {
|
||||
fd,
|
||||
timeouts: Mutex::new(SocketTimeouts::default()),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn raw_fd(&self) -> RawFd {
|
||||
self.inner.fd.as_raw_fd()
|
||||
}
|
||||
|
||||
fn read_timeout_value(&self) -> Option<Duration> {
|
||||
self.inner.timeouts.lock().unwrap().read
|
||||
}
|
||||
|
||||
fn write_timeout_value(&self) -> Option<Duration> {
|
||||
self.inner.timeouts.lock().unwrap().write
|
||||
}
|
||||
}
|
||||
|
||||
impl HyperRead for TcpStream {
|
||||
fn poll_read(
|
||||
self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
mut buf: ReadBufCursor<'_>,
|
||||
) -> Poll<Result<(), io::Error>> {
|
||||
let this = self.get_mut();
|
||||
if buf.remaining() == 0 {
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
|
||||
if this.pending_read.is_none() {
|
||||
this.pending_read = Some(match this.read_timeout_value() {
|
||||
Some(timeout) => Box::pin(crate::sys::linux::net::recv_timeout(
|
||||
this.raw_fd(),
|
||||
buf.remaining(),
|
||||
0,
|
||||
timeout,
|
||||
)),
|
||||
None => crate::sys::linux::net::recv_future(this.raw_fd(), buf.remaining()),
|
||||
});
|
||||
}
|
||||
|
||||
let poll = this
|
||||
.pending_read
|
||||
.as_mut()
|
||||
.expect("pending read future should exist")
|
||||
.as_mut()
|
||||
.poll(cx);
|
||||
match poll {
|
||||
Poll::Ready(Ok(data)) => {
|
||||
this.pending_read = None;
|
||||
buf.put_slice(&data);
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
Poll::Ready(Err(error)) => {
|
||||
this.pending_read = None;
|
||||
Poll::Ready(Err(error))
|
||||
}
|
||||
Poll::Pending => Poll::Pending,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl HyperWrite for TcpStream {
|
||||
fn poll_write(
|
||||
self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
buf: &[u8],
|
||||
) -> Poll<Result<usize, io::Error>> {
|
||||
let this = self.get_mut();
|
||||
if buf.is_empty() {
|
||||
return Poll::Ready(Ok(0));
|
||||
}
|
||||
|
||||
if this.pending_write.is_none() {
|
||||
this.pending_write = Some(match this.write_timeout_value() {
|
||||
Some(timeout) => Box::pin(crate::sys::linux::net::send_timeout(
|
||||
this.raw_fd(),
|
||||
buf.to_vec(),
|
||||
0,
|
||||
timeout,
|
||||
)),
|
||||
None => crate::sys::linux::net::send_future(this.raw_fd(), buf.to_vec()),
|
||||
});
|
||||
}
|
||||
|
||||
let poll = this
|
||||
.pending_write
|
||||
.as_mut()
|
||||
.expect("pending write future should exist")
|
||||
.as_mut()
|
||||
.poll(cx);
|
||||
match poll {
|
||||
Poll::Ready(Ok(written)) => {
|
||||
this.pending_write = None;
|
||||
Poll::Ready(Ok(written))
|
||||
}
|
||||
Poll::Ready(Err(error)) => {
|
||||
this.pending_write = None;
|
||||
Poll::Ready(Err(error))
|
||||
}
|
||||
Poll::Pending => Poll::Pending,
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Result<(), io::Error>> {
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
|
||||
fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), io::Error>> {
|
||||
let this = self.get_mut();
|
||||
if this.pending_shutdown.is_none() {
|
||||
this.pending_shutdown = Some(crate::sys::linux::net::shutdown_future(
|
||||
this.raw_fd(),
|
||||
Shutdown::Write,
|
||||
));
|
||||
}
|
||||
|
||||
let poll = this
|
||||
.pending_shutdown
|
||||
.as_mut()
|
||||
.expect("pending shutdown future should exist")
|
||||
.as_mut()
|
||||
.poll(cx);
|
||||
match poll {
|
||||
Poll::Ready(Ok(())) => {
|
||||
this.pending_shutdown = None;
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
Poll::Ready(Err(error)) => {
|
||||
this.pending_shutdown = None;
|
||||
Poll::Ready(Err(error))
|
||||
}
|
||||
Poll::Pending => Poll::Pending,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_optional_timeout(timeout: Option<Duration>) -> io::Result<()> {
|
||||
if let Some(timeout) = timeout {
|
||||
validate_timeout(timeout)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_timeout(timeout: Duration) -> io::Result<()> {
|
||||
if timeout.is_zero() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"zero-duration timeouts are not supported",
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::{queue_future, queue_task, run};
|
||||
|
||||
use super::{TcpListener, TcpStream, UdpSocket};
|
||||
use std::io::ErrorKind;
|
||||
use std::net::SocketAddr;
|
||||
|
||||
#[test]
|
||||
fn tcp_listener_and_stream_round_trip() {
|
||||
let received = Arc::new(Mutex::new(None::<Vec<u8>>));
|
||||
let received_for_task = Arc::clone(&received);
|
||||
|
||||
queue_task(move || {
|
||||
let received_for_task = Arc::clone(&received_for_task);
|
||||
queue_future(async move {
|
||||
let listener = Arc::new(
|
||||
TcpListener::bind(SocketAddr::from(([127, 0, 0, 1], 0)))
|
||||
.await
|
||||
.expect("listener should bind"),
|
||||
);
|
||||
let local_addr = listener
|
||||
.local_addr()
|
||||
.expect("listener should expose address");
|
||||
|
||||
let listener_for_accept = Arc::clone(&listener);
|
||||
let server = queue_future(async move {
|
||||
let (mut stream, peer_addr) = listener_for_accept
|
||||
.accept()
|
||||
.await
|
||||
.expect("listener should accept");
|
||||
assert_eq!(peer_addr.ip().to_string(), "127.0.0.1");
|
||||
|
||||
let mut buffer = [0; 32];
|
||||
let read = stream
|
||||
.read(&mut buffer)
|
||||
.await
|
||||
.expect("server read should succeed");
|
||||
stream
|
||||
.write_all(b"pong")
|
||||
.await
|
||||
.expect("server write should succeed");
|
||||
buffer[..read].to_vec()
|
||||
});
|
||||
|
||||
let mut client = TcpStream::connect(local_addr)
|
||||
.await
|
||||
.expect("client should connect");
|
||||
client
|
||||
.set_nodelay(true)
|
||||
.expect("setting TCP_NODELAY should succeed");
|
||||
assert!(
|
||||
client
|
||||
.nodelay()
|
||||
.expect("reading TCP_NODELAY should succeed"),
|
||||
"TCP_NODELAY should be enabled",
|
||||
);
|
||||
client
|
||||
.write_all(b"ping")
|
||||
.await
|
||||
.expect("client write should succeed");
|
||||
let mut response = [0; 4];
|
||||
client
|
||||
.read_exact(&mut response)
|
||||
.await
|
||||
.expect("client read should succeed");
|
||||
assert_eq!(&response, b"pong");
|
||||
|
||||
let server_bytes = server.await;
|
||||
*received_for_task
|
||||
.lock()
|
||||
.expect("received buffer should not be poisoned") = Some(server_bytes);
|
||||
});
|
||||
});
|
||||
run();
|
||||
|
||||
let received = received
|
||||
.lock()
|
||||
.expect("received buffer should not be poisoned");
|
||||
assert_eq!(received.as_deref(), Some(b"ping".as_slice()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tcp_connect_resolves_localhost() {
|
||||
let peer = Arc::new(Mutex::new(None::<String>));
|
||||
let peer_for_task = Arc::clone(&peer);
|
||||
|
||||
queue_task(move || {
|
||||
let peer_for_task = Arc::clone(&peer_for_task);
|
||||
queue_future(async move {
|
||||
let listener = Arc::new(
|
||||
TcpListener::bind(SocketAddr::from(([127, 0, 0, 1], 0)))
|
||||
.await
|
||||
.expect("listener should bind"),
|
||||
);
|
||||
let port = listener
|
||||
.local_addr()
|
||||
.expect("listener should expose address")
|
||||
.port();
|
||||
|
||||
let listener_for_accept = Arc::clone(&listener);
|
||||
let server = queue_future(async move {
|
||||
let (stream, peer_addr) = listener_for_accept
|
||||
.accept()
|
||||
.await
|
||||
.expect("listener should accept");
|
||||
drop(stream);
|
||||
peer_addr
|
||||
});
|
||||
|
||||
let _client = TcpStream::connect(format!("localhost:{port}"))
|
||||
.await
|
||||
.expect("localhost DNS connect should succeed");
|
||||
let peer_addr = server.await;
|
||||
*peer_for_task
|
||||
.lock()
|
||||
.expect("peer buffer should not be poisoned") =
|
||||
Some(peer_addr.ip().to_string());
|
||||
});
|
||||
});
|
||||
run();
|
||||
|
||||
let peer = peer.lock().expect("peer buffer should not be poisoned");
|
||||
assert_eq!(peer.as_deref(), Some("127.0.0.1"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn udp_send_to_and_recv_from_round_trip() {
|
||||
let server_received = Arc::new(Mutex::new(None::<Vec<u8>>));
|
||||
let server_received_for_task = Arc::clone(&server_received);
|
||||
|
||||
queue_task(move || {
|
||||
let server_received_for_task = Arc::clone(&server_received_for_task);
|
||||
queue_future(async move {
|
||||
let server = UdpSocket::bind(SocketAddr::from(([127, 0, 0, 1], 0)))
|
||||
.await
|
||||
.expect("server udp socket should bind");
|
||||
let client = UdpSocket::bind(SocketAddr::from(([127, 0, 0, 1], 0)))
|
||||
.await
|
||||
.expect("client udp socket should bind");
|
||||
|
||||
server
|
||||
.set_broadcast(true)
|
||||
.expect("enabling broadcast should succeed");
|
||||
assert!(
|
||||
server
|
||||
.broadcast()
|
||||
.expect("reading broadcast should succeed"),
|
||||
"broadcast should be enabled",
|
||||
);
|
||||
client.set_ttl(42).expect("setting ttl should succeed");
|
||||
assert_eq!(client.ttl().expect("reading ttl should succeed"), 42);
|
||||
|
||||
let server_addr = server.local_addr().expect("server should expose address");
|
||||
let client_addr = client.local_addr().expect("client should expose address");
|
||||
|
||||
let server_task = queue_future(async move {
|
||||
let mut peek_buffer = [0; 32];
|
||||
let (peeked, peek_peer) = server
|
||||
.peek_from(&mut peek_buffer)
|
||||
.await
|
||||
.expect("server peek_from should succeed");
|
||||
assert_eq!(&peek_buffer[..peeked], b"ping");
|
||||
assert_eq!(peek_peer, client_addr);
|
||||
|
||||
let mut buffer = [0; 32];
|
||||
let (read, peer) = server
|
||||
.recv_from(&mut buffer)
|
||||
.await
|
||||
.expect("server recv_from should succeed");
|
||||
assert_eq!(peer, client_addr);
|
||||
server
|
||||
.send_to(b"pong", peer)
|
||||
.await
|
||||
.expect("server send_to should succeed");
|
||||
buffer[..read].to_vec()
|
||||
});
|
||||
|
||||
client
|
||||
.send_to(b"ping", server_addr)
|
||||
.await
|
||||
.expect("client send_to should succeed");
|
||||
let mut response = [0; 32];
|
||||
let (read, peer) = client
|
||||
.recv_from(&mut response)
|
||||
.await
|
||||
.expect("client recv_from should succeed");
|
||||
assert_eq!(peer, server_addr);
|
||||
assert_eq!(&response[..read], b"pong");
|
||||
|
||||
let received = server_task.await;
|
||||
*server_received_for_task.lock().unwrap() = Some(received);
|
||||
});
|
||||
});
|
||||
run();
|
||||
|
||||
let server_received = server_received.lock().unwrap();
|
||||
assert_eq!(server_received.as_deref(), Some(b"ping".as_slice()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn udp_connected_sockets_and_timeouts_work() {
|
||||
let observed = Arc::new(Mutex::new(Vec::new()));
|
||||
let observed_for_task = Arc::clone(&observed);
|
||||
|
||||
queue_task(move || {
|
||||
let observed_for_task = Arc::clone(&observed_for_task);
|
||||
queue_future(async move {
|
||||
let server = UdpSocket::bind(SocketAddr::from(([127, 0, 0, 1], 0)))
|
||||
.await
|
||||
.expect("server udp socket should bind");
|
||||
let client = UdpSocket::bind(SocketAddr::from(([127, 0, 0, 1], 0)))
|
||||
.await
|
||||
.expect("client udp socket should bind");
|
||||
|
||||
let server_addr = server.local_addr().expect("server should expose address");
|
||||
let client_addr = client.local_addr().expect("client should expose address");
|
||||
|
||||
client
|
||||
.connect(server_addr)
|
||||
.await
|
||||
.expect("client udp connect should succeed");
|
||||
server
|
||||
.connect(client_addr)
|
||||
.await
|
||||
.expect("server udp connect should succeed");
|
||||
|
||||
client
|
||||
.set_read_timeout(Some(Duration::from_millis(5)))
|
||||
.expect("setting read timeout should succeed");
|
||||
assert_eq!(
|
||||
client
|
||||
.read_timeout()
|
||||
.expect("reading read timeout should succeed"),
|
||||
Some(Duration::from_millis(5))
|
||||
);
|
||||
|
||||
let mut buffer = [0; 16];
|
||||
let error = client
|
||||
.recv(&mut buffer)
|
||||
.await
|
||||
.expect_err("recv should time out before any datagram arrives");
|
||||
assert_eq!(error.kind(), ErrorKind::TimedOut);
|
||||
observed_for_task
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push("timed out".to_string());
|
||||
|
||||
server
|
||||
.send(b"hello")
|
||||
.await
|
||||
.expect("server send should succeed");
|
||||
|
||||
let peeked = client.peek(&mut buffer).await.expect("peek should succeed");
|
||||
assert_eq!(&buffer[..peeked], b"hello");
|
||||
|
||||
let read = client.recv(&mut buffer).await.expect("recv should succeed");
|
||||
assert_eq!(&buffer[..read], b"hello");
|
||||
observed_for_task
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push("received".to_string());
|
||||
});
|
||||
});
|
||||
run();
|
||||
|
||||
let observed = observed.lock().unwrap();
|
||||
assert_eq!(observed.as_slice(), ["timed out", "received"]);
|
||||
}
|
||||
}
|
||||
147
lib/runtime/src/op/completion.rs
Normal file
147
lib/runtime/src/op/completion.rs
Normal file
@@ -0,0 +1,147 @@
|
||||
#![allow(dead_code)]
|
||||
|
||||
use std::future::Future;
|
||||
use std::pin::Pin;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::task::{Context, Poll, Waker};
|
||||
|
||||
use crate::platform::linux_x86_64::runtime::{ThreadHandle, current_thread_handle};
|
||||
|
||||
type CancelCallback = Box<dyn FnOnce() + Send + 'static>;
|
||||
|
||||
struct CompletionState<T> {
|
||||
owner: ThreadHandle,
|
||||
interested: AtomicBool,
|
||||
finished: AtomicBool,
|
||||
wake_queued: AtomicBool,
|
||||
result: Mutex<Option<T>>,
|
||||
waker: Mutex<Option<Waker>>,
|
||||
cancel: Mutex<Option<CancelCallback>>,
|
||||
}
|
||||
|
||||
impl<T: Send + 'static> CompletionState<T> {
|
||||
fn queue_wake(self: &Arc<Self>) {
|
||||
if self.wake_queued.swap(true, Ordering::AcqRel) {
|
||||
return;
|
||||
}
|
||||
|
||||
let state = Arc::clone(self);
|
||||
if !self.owner.queue_microtask(move || {
|
||||
state.wake_queued.store(false, Ordering::Release);
|
||||
if let Some(waker) = state.waker.lock().unwrap().take() {
|
||||
waker.wake();
|
||||
}
|
||||
}) {
|
||||
self.wake_queued.store(false, Ordering::Release);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct CompletionFuture<T> {
|
||||
state: Arc<CompletionState<T>>,
|
||||
}
|
||||
|
||||
pub(crate) struct CompletionHandle<T> {
|
||||
state: Arc<CompletionState<T>>,
|
||||
}
|
||||
|
||||
impl<T> Clone for CompletionHandle<T> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
state: Arc::clone(&self.state),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn completion<T: Send + 'static>(
|
||||
owner: ThreadHandle,
|
||||
) -> (CompletionFuture<T>, CompletionHandle<T>) {
|
||||
owner.begin_async_operation();
|
||||
let state = Arc::new(CompletionState {
|
||||
owner,
|
||||
interested: AtomicBool::new(true),
|
||||
finished: AtomicBool::new(false),
|
||||
wake_queued: AtomicBool::new(false),
|
||||
result: Mutex::new(None),
|
||||
waker: Mutex::new(None),
|
||||
cancel: Mutex::new(None),
|
||||
});
|
||||
|
||||
(
|
||||
CompletionFuture {
|
||||
state: Arc::clone(&state),
|
||||
},
|
||||
CompletionHandle { state },
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn completion_for_current_thread<T: Send + 'static>()
|
||||
-> (CompletionFuture<T>, CompletionHandle<T>) {
|
||||
completion(current_thread_handle())
|
||||
}
|
||||
|
||||
impl<T: Send + 'static> CompletionHandle<T> {
|
||||
pub(crate) fn complete(self, value: T) {
|
||||
self.finish(Some(value));
|
||||
}
|
||||
|
||||
pub(crate) fn finish(self, value: Option<T>) {
|
||||
if self.state.finished.swap(true, Ordering::AcqRel) {
|
||||
return;
|
||||
}
|
||||
|
||||
let interested = self.state.interested.load(Ordering::Acquire);
|
||||
if interested {
|
||||
*self.state.result.lock().unwrap() = value;
|
||||
self.state.queue_wake();
|
||||
}
|
||||
|
||||
let _ = self.state.cancel.lock().unwrap().take();
|
||||
self.state.owner.finish_async_operation();
|
||||
}
|
||||
|
||||
pub(crate) fn set_cancel(&self, cancel: impl FnOnce() + Send + 'static) {
|
||||
*self.state.cancel.lock().unwrap() = Some(Box::new(cancel));
|
||||
}
|
||||
|
||||
pub(crate) fn is_interested(&self) -> bool {
|
||||
self.state.interested.load(Ordering::Acquire)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Future for CompletionFuture<T> {
|
||||
type Output = T;
|
||||
|
||||
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||
if let Some(value) = self.state.result.lock().unwrap().take() {
|
||||
return Poll::Ready(value);
|
||||
}
|
||||
|
||||
*self.state.waker.lock().unwrap() = Some(cx.waker().clone());
|
||||
|
||||
if let Some(value) = self.state.result.lock().unwrap().take() {
|
||||
let _ = self.state.waker.lock().unwrap().take();
|
||||
return Poll::Ready(value);
|
||||
}
|
||||
|
||||
Poll::Pending
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Drop for CompletionFuture<T> {
|
||||
fn drop(&mut self) {
|
||||
if !self.state.interested.swap(false, Ordering::AcqRel) {
|
||||
return;
|
||||
}
|
||||
|
||||
let _ = self.state.result.lock().unwrap().take();
|
||||
let _ = self.state.waker.lock().unwrap().take();
|
||||
|
||||
if !self.state.finished.load(Ordering::Acquire)
|
||||
&& let Some(cancel) = self.state.cancel.lock().unwrap().take()
|
||||
{
|
||||
cancel();
|
||||
}
|
||||
}
|
||||
}
|
||||
105
lib/runtime/src/op/fs.rs
Normal file
105
lib/runtime/src/op/fs.rs
Normal file
@@ -0,0 +1,105 @@
|
||||
//! Logical filesystem operations.
|
||||
//!
|
||||
//! This layer owns request data so the public API can keep borrowed buffers while platform
|
||||
//! backends pin, stage, or offload as needed.
|
||||
|
||||
use std::ffi::OsString;
|
||||
use std::os::fd::RawFd;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Clone, Debug, Default, Eq, PartialEq)]
|
||||
pub struct OpenOptions {
|
||||
pub read: bool,
|
||||
pub write: bool,
|
||||
pub append: bool,
|
||||
pub truncate: bool,
|
||||
pub create: bool,
|
||||
pub create_new: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum MetadataTarget {
|
||||
Path(PathBuf),
|
||||
File(RawFd),
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum FileType {
|
||||
File,
|
||||
Directory,
|
||||
Symlink,
|
||||
BlockDevice,
|
||||
CharacterDevice,
|
||||
Fifo,
|
||||
Socket,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct RawMetadata {
|
||||
pub file_type: FileType,
|
||||
pub mode: u16,
|
||||
pub len: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct RawDirEntry {
|
||||
pub path: PathBuf,
|
||||
pub file_name: OsString,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum FsOp {
|
||||
Open {
|
||||
path: PathBuf,
|
||||
options: OpenOptions,
|
||||
},
|
||||
Read {
|
||||
fd: RawFd,
|
||||
offset: Option<u64>,
|
||||
len: usize,
|
||||
},
|
||||
Write {
|
||||
fd: RawFd,
|
||||
offset: Option<u64>,
|
||||
data: Vec<u8>,
|
||||
},
|
||||
Metadata {
|
||||
target: MetadataTarget,
|
||||
follow_symlinks: bool,
|
||||
},
|
||||
SetLen {
|
||||
fd: RawFd,
|
||||
len: u64,
|
||||
},
|
||||
SyncAll {
|
||||
fd: RawFd,
|
||||
},
|
||||
SyncData {
|
||||
fd: RawFd,
|
||||
},
|
||||
Duplicate {
|
||||
fd: RawFd,
|
||||
},
|
||||
CreateDir {
|
||||
path: PathBuf,
|
||||
recursive: bool,
|
||||
mode: u32,
|
||||
},
|
||||
RemoveFile {
|
||||
path: PathBuf,
|
||||
},
|
||||
RemoveDir {
|
||||
path: PathBuf,
|
||||
},
|
||||
Rename {
|
||||
from: PathBuf,
|
||||
to: PathBuf,
|
||||
},
|
||||
ReadDir {
|
||||
path: PathBuf,
|
||||
},
|
||||
Close {
|
||||
fd: RawFd,
|
||||
},
|
||||
}
|
||||
8
lib/runtime/src/op/mod.rs
Normal file
8
lib/runtime/src/op/mod.rs
Normal file
@@ -0,0 +1,8 @@
|
||||
//! Internal and public operation-layer building blocks.
|
||||
//!
|
||||
//! The operation layer defines logical work units that bridge user-facing APIs and platform
|
||||
//! backends without leaking platform details upward.
|
||||
|
||||
pub(crate) mod completion;
|
||||
pub mod fs;
|
||||
pub mod net;
|
||||
69
lib/runtime/src/op/net.rs
Normal file
69
lib/runtime/src/op/net.rs
Normal file
@@ -0,0 +1,69 @@
|
||||
//! Logical networking operations shared between the public API and Linux backend.
|
||||
|
||||
use std::net::{Shutdown, SocketAddr};
|
||||
use std::os::fd::RawFd;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum NetOp {
|
||||
Socket {
|
||||
domain: i32,
|
||||
socket_type: i32,
|
||||
protocol: i32,
|
||||
flags: u32,
|
||||
},
|
||||
Connect {
|
||||
fd: RawFd,
|
||||
addr: SocketAddr,
|
||||
},
|
||||
Bind {
|
||||
fd: RawFd,
|
||||
addr: SocketAddr,
|
||||
},
|
||||
Listen {
|
||||
fd: RawFd,
|
||||
backlog: i32,
|
||||
},
|
||||
Accept {
|
||||
fd: RawFd,
|
||||
},
|
||||
Send {
|
||||
fd: RawFd,
|
||||
data: Vec<u8>,
|
||||
flags: i32,
|
||||
},
|
||||
SendTo {
|
||||
fd: RawFd,
|
||||
target: SocketAddr,
|
||||
data: Vec<u8>,
|
||||
flags: i32,
|
||||
},
|
||||
Recv {
|
||||
fd: RawFd,
|
||||
len: usize,
|
||||
flags: i32,
|
||||
},
|
||||
RecvFrom {
|
||||
fd: RawFd,
|
||||
len: usize,
|
||||
flags: i32,
|
||||
},
|
||||
Shutdown {
|
||||
fd: RawFd,
|
||||
how: Shutdown,
|
||||
},
|
||||
Close {
|
||||
fd: RawFd,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AcceptedSocket {
|
||||
pub fd: RawFd,
|
||||
pub peer_addr: SocketAddr,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ReceivedDatagram {
|
||||
pub data: Vec<u8>,
|
||||
pub peer_addr: SocketAddr,
|
||||
}
|
||||
864
lib/runtime/src/platform/linux_x86_64/mesh_alloc/allocator.rs
Normal file
864
lib/runtime/src/platform/linux_x86_64/mesh_alloc/allocator.rs
Normal file
@@ -0,0 +1,864 @@
|
||||
use core::alloc::Layout;
|
||||
use core::mem::size_of;
|
||||
use core::ptr::copy_nonoverlapping;
|
||||
use core::sync::atomic::{AtomicU32, Ordering};
|
||||
|
||||
use super::arena::Arena;
|
||||
use super::constants::{
|
||||
MAX_ATTACHED_MINIHEAPS_PER_CLASS, MAX_SMALL_ALLOCATION, MIN_SHUFFLE_VECTOR_LENGTH,
|
||||
MINIHEAP_REFILL_GOAL_SIZE, NUM_SIZE_CLASSES, is_below_partial_threshold,
|
||||
};
|
||||
use super::fault::{self, ActiveMeshGuard};
|
||||
use super::meshing::bitmaps_meshable;
|
||||
use super::miniheap::{MiniHeap, MiniHeapId};
|
||||
use super::page::{page_count, page_size, runtime_slots_per_span};
|
||||
use super::platform;
|
||||
use super::pool::MiniHeapPool;
|
||||
use super::raw_sys;
|
||||
use super::rng::Mwc;
|
||||
use super::shuffle::ShuffleEntry;
|
||||
use super::size_map::{byte_size_for_class, size_class_for};
|
||||
use super::stats::{MeshStats, StatsState};
|
||||
use super::sync::{FutexMutex, futex_wait_for_value, futex_wake_all};
|
||||
use super::thread_local_heap::ThreadLocalHeap;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MeshAllocator {
|
||||
arena: Arena,
|
||||
pool: MiniHeapPool,
|
||||
bootstrap_thread: *mut ThreadLocalHeap,
|
||||
compaction_candidates: *mut MiniHeapId,
|
||||
meshing_rng: Mwc,
|
||||
mesh_epoch: AtomicU32,
|
||||
pool_lock: FutexMutex,
|
||||
stats: StatsState,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
struct ResolvedPtr {
|
||||
owner_id: MiniHeapId,
|
||||
slot: usize,
|
||||
}
|
||||
|
||||
impl MeshAllocator {
|
||||
pub fn new(arena_size: usize, miniheap_capacity: u32) -> raw_sys::Result<Self> {
|
||||
fault::ensure_fault_mediation_installed()?;
|
||||
let bootstrap_thread = unsafe {
|
||||
platform::map_anonymous(
|
||||
size_of::<ThreadLocalHeap>(),
|
||||
raw_sys::PROT_READ | raw_sys::PROT_WRITE,
|
||||
)? as *mut ThreadLocalHeap
|
||||
};
|
||||
unsafe {
|
||||
bootstrap_thread.write(ThreadLocalHeap::new()?);
|
||||
}
|
||||
let compaction_candidates = unsafe {
|
||||
platform::map_anonymous(
|
||||
miniheap_capacity as usize * size_of::<MiniHeapId>(),
|
||||
raw_sys::PROT_READ | raw_sys::PROT_WRITE,
|
||||
)? as *mut MiniHeapId
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
arena: Arena::with_size(arena_size)?,
|
||||
pool: MiniHeapPool::with_capacity(miniheap_capacity)?,
|
||||
bootstrap_thread,
|
||||
compaction_candidates,
|
||||
meshing_rng: Mwc::from_os_seed()?,
|
||||
mesh_epoch: AtomicU32::new(0),
|
||||
pool_lock: FutexMutex::new(),
|
||||
stats: StatsState::new(),
|
||||
})
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn arena(&self) -> &Arena {
|
||||
&self.arena
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn pool(&self) -> &MiniHeapPool {
|
||||
&self.pool
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn live_miniheap_count(&self) -> u32 {
|
||||
self.pool.live_len()
|
||||
}
|
||||
|
||||
pub fn stats(&self) -> MeshStats {
|
||||
let page = page_size();
|
||||
let (reusable_span_count, reusable_pages) = self.arena.reusable_span_stats();
|
||||
let counters = self.stats.snapshot();
|
||||
let mut stats = MeshStats {
|
||||
arena_size: self.arena.arena_size(),
|
||||
reserved_bytes: self.arena.reserved_pages() as usize * page,
|
||||
reusable_span_count,
|
||||
reusable_span_bytes: reusable_pages as usize * page,
|
||||
live_miniheaps: self.pool.live_len(),
|
||||
small_allocations: counters.small_allocations,
|
||||
small_deallocations: counters.small_deallocations,
|
||||
large_allocations: counters.large_allocations,
|
||||
large_deallocations: counters.large_deallocations,
|
||||
compact_calls: counters.compact_calls,
|
||||
meshes_performed: counters.meshes_performed,
|
||||
meshed_pages: counters.meshed_pages,
|
||||
meshed_bytes: counters.meshed_bytes,
|
||||
..MeshStats::default()
|
||||
};
|
||||
|
||||
let mut candidate_heaps_by_class = [0u32; NUM_SIZE_CLASSES];
|
||||
let mut candidate_pages_by_class = [0u32; NUM_SIZE_CLASSES];
|
||||
let mut candidate_free_bytes_by_class = [0usize; NUM_SIZE_CLASSES];
|
||||
let mut candidate_span_bytes_by_class = [0usize; NUM_SIZE_CLASSES];
|
||||
|
||||
let len = self.pool.len();
|
||||
let mut id_value = 1u32;
|
||||
while id_value <= len {
|
||||
let id = MiniHeapId::new(id_value);
|
||||
if let Some(heap) = self.pool.get(id) {
|
||||
if heap.is_large_alloc() {
|
||||
stats.live_large_allocations += 1;
|
||||
stats.live_large_bytes += heap.span_size();
|
||||
stats.retained_large_span_bytes += heap.span_size();
|
||||
id_value += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
stats.live_small_heaps += 1;
|
||||
stats.live_small_bytes += heap.in_use_count() as usize * heap.object_size();
|
||||
stats.virtual_small_span_bytes += heap.span_size();
|
||||
|
||||
if heap.is_meshed() {
|
||||
stats.meshed_small_heaps += 1;
|
||||
} else {
|
||||
stats.retained_small_span_bytes += heap.span_size();
|
||||
}
|
||||
|
||||
if heap.is_full() {
|
||||
stats.full_small_heaps += 1;
|
||||
} else if !heap.is_empty() {
|
||||
stats.partial_small_heaps += 1;
|
||||
}
|
||||
|
||||
if !heap.is_attached() && !heap.is_full() && !heap.is_meshed() {
|
||||
stats.reusable_small_heaps += 1;
|
||||
}
|
||||
|
||||
if self.heap_is_compaction_candidate(heap.size_class(), heap) {
|
||||
let class = heap.size_class() as usize;
|
||||
stats.compaction.candidate_heaps += 1;
|
||||
stats.compaction.candidate_pages += heap.span().length;
|
||||
stats.compaction.candidate_free_bytes += heap.bytes_free();
|
||||
candidate_heaps_by_class[class] += 1;
|
||||
candidate_pages_by_class[class] += heap.span().length;
|
||||
candidate_free_bytes_by_class[class] += heap.bytes_free();
|
||||
candidate_span_bytes_by_class[class] = heap.span_size();
|
||||
}
|
||||
}
|
||||
id_value += 1;
|
||||
}
|
||||
|
||||
let mut class = 1usize;
|
||||
while class < NUM_SIZE_CLASSES {
|
||||
let span_bytes = candidate_span_bytes_by_class[class];
|
||||
if let Some(pair_bound_by_free) =
|
||||
candidate_free_bytes_by_class[class].checked_div(span_bytes)
|
||||
{
|
||||
let pair_bound_by_count = candidate_heaps_by_class[class] / 2;
|
||||
let best_case_meshes = pair_bound_by_count.min(pair_bound_by_free as u32);
|
||||
let pages_per_mesh =
|
||||
candidate_pages_by_class[class] / candidate_heaps_by_class[class].max(1);
|
||||
stats.compaction.best_case_meshes += best_case_meshes;
|
||||
stats.compaction.best_case_reclaimable_pages += best_case_meshes * pages_per_mesh;
|
||||
stats.compaction.best_case_reclaimable_bytes +=
|
||||
best_case_meshes as usize * span_bytes;
|
||||
}
|
||||
class += 1;
|
||||
}
|
||||
|
||||
stats
|
||||
}
|
||||
|
||||
pub fn allocate(&mut self, size: usize) -> Option<*mut u8> {
|
||||
let thread_heap = unsafe { &mut *self.bootstrap_thread };
|
||||
self.allocate_with_thread(size, thread_heap)
|
||||
}
|
||||
|
||||
pub fn allocate_with_thread(
|
||||
&mut self,
|
||||
size: usize,
|
||||
thread_heap: &mut ThreadLocalHeap,
|
||||
) -> Option<*mut u8> {
|
||||
let size = size.max(1);
|
||||
if size <= MAX_SMALL_ALLOCATION {
|
||||
let class = size_class_for(size)?;
|
||||
if let Some(ptr) = self.try_allocate_small_local(thread_heap, class) {
|
||||
return Some(ptr);
|
||||
}
|
||||
return self.allocate_small_with_thread(thread_heap, class);
|
||||
}
|
||||
|
||||
self.allocate_large(size)
|
||||
}
|
||||
|
||||
pub fn allocate_layout(&mut self, layout: Layout) -> Option<*mut u8> {
|
||||
let thread_heap = unsafe { &mut *self.bootstrap_thread };
|
||||
self.allocate_layout_with_thread(thread_heap, layout)
|
||||
}
|
||||
|
||||
pub fn allocate_layout_with_thread(
|
||||
&mut self,
|
||||
thread_heap: &mut ThreadLocalHeap,
|
||||
layout: Layout,
|
||||
) -> Option<*mut u8> {
|
||||
let aligned_size = round_up_to_alignment(layout.size().max(1), layout.align())?;
|
||||
if aligned_size <= MAX_SMALL_ALLOCATION && layout.align() <= page_size() {
|
||||
let class = size_class_for(aligned_size)?;
|
||||
if byte_size_for_class(class).is_multiple_of(layout.align()) {
|
||||
if let Some(ptr) = self.try_allocate_small_local(thread_heap, class) {
|
||||
return Some(ptr);
|
||||
}
|
||||
return self.allocate_small_with_thread(thread_heap, class);
|
||||
}
|
||||
}
|
||||
|
||||
self.allocate_large_aligned(aligned_size, layout.align())
|
||||
}
|
||||
|
||||
pub fn deallocate(&mut self, ptr: *mut u8) {
|
||||
let thread_heap = unsafe { &mut *self.bootstrap_thread };
|
||||
self.deallocate_with_thread(ptr, thread_heap);
|
||||
}
|
||||
|
||||
pub fn deallocate_with_thread(&mut self, ptr: *mut u8, thread_heap: &mut ThreadLocalHeap) {
|
||||
if ptr.is_null() {
|
||||
return;
|
||||
}
|
||||
|
||||
let Some(resolved) = self.resolve_pointer(ptr) else {
|
||||
return;
|
||||
};
|
||||
let id = resolved.owner_id;
|
||||
let Some(heap) = self.pool.get(id) else {
|
||||
return;
|
||||
};
|
||||
|
||||
if heap.is_large_alloc() {
|
||||
let span = heap.span();
|
||||
self.stats.record_large_deallocation();
|
||||
let _ = heap.free_offset(0);
|
||||
self.arena.clear_miniheap(span);
|
||||
self.arena.release_span(span);
|
||||
let _ = {
|
||||
let _guard = self.pool_lock.lock();
|
||||
self.pool.release(id)
|
||||
};
|
||||
return;
|
||||
}
|
||||
|
||||
let slot = resolved.slot;
|
||||
let class = heap.size_class();
|
||||
let thread_id = thread_heap.thread_id();
|
||||
self.stats.record_small_deallocation();
|
||||
|
||||
if heap.current_thread() == thread_id && heap.is_attached() && !heap.is_meshed() {
|
||||
let state = thread_heap.class_mut(class);
|
||||
let attached_idx = state.find_attached(id);
|
||||
if let Some(attached_idx) = attached_idx
|
||||
&& !state.shuffle.is_full()
|
||||
{
|
||||
let cached = state.shuffle.count_entries_for_offset(attached_idx as u16);
|
||||
if cached + 1 == heap.max_count() as usize {
|
||||
self.release_class_attached(thread_heap, class);
|
||||
if let Some(heap) = self.pool.get(id) {
|
||||
let _ = heap.free_offset(slot);
|
||||
}
|
||||
self.reclaim_empty_detached_heap(id);
|
||||
return;
|
||||
}
|
||||
if cached + 1 < heap.max_count() as usize {
|
||||
state
|
||||
.shuffle
|
||||
.push(ShuffleEntry::new(attached_idx as u16, slot as u16));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let state = thread_heap.class_mut(class);
|
||||
let _ = heap.free_offset(slot);
|
||||
if heap.is_attached()
|
||||
&& is_below_partial_threshold(heap.in_use_count(), heap.max_count() as u32)
|
||||
{
|
||||
heap.unset_attached();
|
||||
if let Some(attached_idx) = state.find_attached(id) {
|
||||
state.attached_ids[attached_idx as usize] = MiniHeapId::new(0);
|
||||
state.attached_heaps[attached_idx as usize] = core::ptr::null();
|
||||
}
|
||||
}
|
||||
|
||||
if heap.is_empty() && !heap.is_meshed() {
|
||||
self.reclaim_empty_detached_heap(id);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn deallocate_layout(&mut self, ptr: *mut u8, _layout: Layout) {
|
||||
self.deallocate(ptr);
|
||||
}
|
||||
|
||||
pub fn try_deallocate_local(&self, ptr: *mut u8, thread_heap: &mut ThreadLocalHeap) -> bool {
|
||||
if ptr.is_null() {
|
||||
return true;
|
||||
}
|
||||
|
||||
if self.mesh_epoch.load(Ordering::Acquire) & 1 != 0 {
|
||||
return false;
|
||||
}
|
||||
|
||||
let Some(resolved) = self.resolve_pointer(ptr) else {
|
||||
return true;
|
||||
};
|
||||
let id = resolved.owner_id;
|
||||
let Some(heap) = self.pool.get(id) else {
|
||||
return true;
|
||||
};
|
||||
if heap.is_large_alloc()
|
||||
|| heap.current_thread() != thread_heap.thread_id()
|
||||
|| heap.is_meshed()
|
||||
|| !heap.contains_ptr(self.arena.base_ptr() as usize, ptr)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
let class = heap.size_class();
|
||||
let slot = resolved.slot;
|
||||
let state = thread_heap.class_mut(class);
|
||||
let Some(attached_idx) = state.find_attached(id) else {
|
||||
return false;
|
||||
};
|
||||
if state.shuffle.is_full() {
|
||||
return false;
|
||||
}
|
||||
|
||||
let cached = state.shuffle.count_entries_for_offset(attached_idx as u16);
|
||||
if cached + 1 >= heap.max_count() as usize {
|
||||
return false;
|
||||
}
|
||||
|
||||
state
|
||||
.shuffle
|
||||
.push(ShuffleEntry::new(attached_idx as u16, slot as u16));
|
||||
self.stats.record_small_deallocation();
|
||||
true
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// `ptr` must have been allocated by this allocator with `layout`, and must not be used
|
||||
/// after this call if a new allocation is returned.
|
||||
pub unsafe fn reallocate(
|
||||
&mut self,
|
||||
ptr: *mut u8,
|
||||
layout: Layout,
|
||||
new_size: usize,
|
||||
) -> Option<*mut u8> {
|
||||
let thread_heap = unsafe { &mut *self.bootstrap_thread };
|
||||
unsafe { self.reallocate_with_thread(ptr, layout, new_size, thread_heap) }
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// `ptr` must have been allocated by this allocator with `layout`, and must not be used
|
||||
/// after this call if a new allocation is returned.
|
||||
pub unsafe fn reallocate_with_thread(
|
||||
&mut self,
|
||||
ptr: *mut u8,
|
||||
layout: Layout,
|
||||
new_size: usize,
|
||||
thread_heap: &mut ThreadLocalHeap,
|
||||
) -> Option<*mut u8> {
|
||||
if ptr.is_null() {
|
||||
return self.allocate_layout_with_thread(
|
||||
thread_heap,
|
||||
Layout::from_size_align(new_size.max(1), layout.align()).ok()?,
|
||||
);
|
||||
}
|
||||
if new_size == 0 {
|
||||
self.deallocate_with_thread(ptr, thread_heap);
|
||||
return None;
|
||||
}
|
||||
|
||||
let new_layout = Layout::from_size_align(new_size, layout.align()).ok()?;
|
||||
let new_ptr = self.allocate_layout_with_thread(thread_heap, new_layout)?;
|
||||
unsafe {
|
||||
copy_nonoverlapping(ptr, new_ptr, layout.size().min(new_size));
|
||||
}
|
||||
self.deallocate_with_thread(ptr, thread_heap);
|
||||
Some(new_ptr)
|
||||
}
|
||||
|
||||
pub fn compact(&mut self) -> usize {
|
||||
let thread_heap = unsafe { &mut *self.bootstrap_thread };
|
||||
self.compact_with_thread(thread_heap)
|
||||
}
|
||||
|
||||
pub fn compact_with_thread(&mut self, thread_heap: &mut ThreadLocalHeap) -> usize {
|
||||
let _epoch_guard = MeshingEpochGuard::new(core::ptr::addr_of!(self.mesh_epoch));
|
||||
self.stats.record_compact_call();
|
||||
self.shutdown_thread(thread_heap);
|
||||
|
||||
let mut meshes = 0usize;
|
||||
for class_idx in 1..NUM_SIZE_CLASSES {
|
||||
meshes += self.mesh_class_candidates(class_idx as u8);
|
||||
}
|
||||
|
||||
meshes
|
||||
}
|
||||
|
||||
pub fn try_allocate_small_local(
|
||||
&self,
|
||||
thread_heap: &mut ThreadLocalHeap,
|
||||
class: u8,
|
||||
) -> Option<*mut u8> {
|
||||
if self.mesh_epoch.load(Ordering::Acquire) & 1 != 0 {
|
||||
return None;
|
||||
}
|
||||
if thread_heap.class(class).shuffle.is_exhausted() && !self.local_refill(thread_heap, class)
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
let state = thread_heap.class_mut(class);
|
||||
let entry = state.shuffle.pop()?;
|
||||
let heap = state.heap_at(entry.miniheap_offset as usize)?;
|
||||
self.stats.record_small_allocation();
|
||||
Some(heap.ptr_from_offset(self.arena.base_ptr() as usize, entry.slot_index as usize))
|
||||
}
|
||||
|
||||
fn allocate_small_with_thread(
|
||||
&mut self,
|
||||
thread_heap: &mut ThreadLocalHeap,
|
||||
class: u8,
|
||||
) -> Option<*mut u8> {
|
||||
self.global_refill(thread_heap, class)?;
|
||||
self.try_allocate_small_local(thread_heap, class)
|
||||
}
|
||||
|
||||
fn allocate_large(&mut self, size: usize) -> Option<*mut u8> {
|
||||
self.allocate_large_aligned(size, 1)
|
||||
}
|
||||
|
||||
fn allocate_large_aligned(&mut self, size: usize, align: usize) -> Option<*mut u8> {
|
||||
let page_align = page_alignment_for(align)?;
|
||||
let (_, span) = self.arena.allocate_bytes(size, page_align)?;
|
||||
let (id, heap) = {
|
||||
let _guard = self.pool_lock.lock();
|
||||
self.pool.allocate(span, 1, size)?
|
||||
};
|
||||
self.arena.track_miniheap(span, id);
|
||||
self.stats.record_large_allocation();
|
||||
heap.malloc_at(self.arena.base_ptr() as usize, 0)
|
||||
}
|
||||
|
||||
fn local_refill(&self, thread_heap: &mut ThreadLocalHeap, class: u8) -> bool {
|
||||
let state = thread_heap.class_mut(class);
|
||||
let count = state.attached_len as usize;
|
||||
if count == 0 {
|
||||
return false;
|
||||
}
|
||||
|
||||
let mut scanned = 0usize;
|
||||
while scanned < count && state.shuffle.is_exhausted() {
|
||||
let idx = (state.attached_cursor as usize) % count;
|
||||
state.attached_cursor = ((idx + 1) % count) as u8;
|
||||
let heap_ptr = state.attached_heaps[idx];
|
||||
if !heap_ptr.is_null() {
|
||||
let heap = unsafe { &*heap_ptr };
|
||||
if !heap.is_full() {
|
||||
let _ = state.shuffle.refill_from_heap(idx as u16, heap);
|
||||
}
|
||||
}
|
||||
scanned += 1;
|
||||
}
|
||||
|
||||
!state.shuffle.is_exhausted()
|
||||
}
|
||||
|
||||
fn global_refill(&mut self, thread_heap: &mut ThreadLocalHeap, class: u8) -> Option<()> {
|
||||
self.release_class_attached(thread_heap, class);
|
||||
|
||||
let object_size = byte_size_for_class(class);
|
||||
let object_count = miniheap_object_count(object_size);
|
||||
let page_count = page_count(object_size * object_count) as u32;
|
||||
let thread_id = thread_heap.thread_id();
|
||||
|
||||
let mut bytes_free = self.attach_reusable_heaps(thread_heap, class);
|
||||
while bytes_free < MINIHEAP_REFILL_GOAL_SIZE && !thread_heap.class(class).attached_full() {
|
||||
let (_, span) = self.arena.page_alloc(page_count, 1)?;
|
||||
let (id, heap) = {
|
||||
let _guard = self.pool_lock.lock();
|
||||
self.pool.allocate(span, object_count as u16, object_size)?
|
||||
};
|
||||
self.arena.track_miniheap(span, id);
|
||||
|
||||
let slot = thread_heap
|
||||
.class_mut(class)
|
||||
.push_attached(id, heap as *const MiniHeap)?;
|
||||
heap.set_attached(thread_id);
|
||||
heap.set_shuffle_vector_offset(slot);
|
||||
bytes_free += heap.bytes_free();
|
||||
}
|
||||
|
||||
thread_heap.class_mut(class).shuffle.clear();
|
||||
if self.local_refill(thread_heap, class) {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn attach_reusable_heaps(&mut self, thread_heap: &mut ThreadLocalHeap, class: u8) -> usize {
|
||||
let mut bytes_free = 0usize;
|
||||
let len = self.pool.len();
|
||||
let thread_id = thread_heap.thread_id();
|
||||
|
||||
let mut id_val = 1u32;
|
||||
while id_val <= len
|
||||
&& bytes_free < MINIHEAP_REFILL_GOAL_SIZE
|
||||
&& !thread_heap.class(class).attached_full()
|
||||
{
|
||||
let id = MiniHeapId::new(id_val);
|
||||
if let Some(heap) = self.pool.get(id)
|
||||
&& heap.size_class() == class
|
||||
&& !heap.is_attached()
|
||||
&& !heap.is_full()
|
||||
&& !heap.is_meshed()
|
||||
&& let Some(slot) = thread_heap
|
||||
.class_mut(class)
|
||||
.push_attached(id, heap as *const MiniHeap)
|
||||
{
|
||||
heap.set_attached(thread_id);
|
||||
heap.set_shuffle_vector_offset(slot);
|
||||
bytes_free += heap.bytes_free();
|
||||
}
|
||||
id_val += 1;
|
||||
}
|
||||
|
||||
bytes_free
|
||||
}
|
||||
|
||||
fn release_all_attached(&mut self, thread_heap: &mut ThreadLocalHeap) {
|
||||
for class in 1..NUM_SIZE_CLASSES as u8 {
|
||||
self.release_class_attached(thread_heap, class);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn shutdown_thread(&mut self, thread_heap: &mut ThreadLocalHeap) {
|
||||
self.release_all_attached(thread_heap);
|
||||
}
|
||||
|
||||
fn reclaim_empty_detached_heap(&mut self, id: MiniHeapId) {
|
||||
let Some(heap) = self.pool.get(id) else {
|
||||
return;
|
||||
};
|
||||
if !heap.is_empty() || heap.is_meshed() || heap.is_attached() || heap.has_meshed_partner() {
|
||||
return;
|
||||
}
|
||||
|
||||
let span = heap.span();
|
||||
self.arena.clear_miniheap(span);
|
||||
self.arena.release_span(span);
|
||||
let _ = {
|
||||
let _guard = self.pool_lock.lock();
|
||||
self.pool.release(id)
|
||||
};
|
||||
}
|
||||
|
||||
fn release_class_attached(&mut self, thread_heap: &mut ThreadLocalHeap, class: u8) {
|
||||
let mut released_ids = [MiniHeapId::new(0); MAX_ATTACHED_MINIHEAPS_PER_CLASS];
|
||||
let released_len;
|
||||
{
|
||||
let state = thread_heap.class_mut(class);
|
||||
for entry in state.shuffle.active_entries() {
|
||||
let attached_idx = entry.miniheap_offset as usize;
|
||||
if attached_idx >= state.attached_len as usize {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(heap) = state.heap_at(attached_idx) {
|
||||
let _ = heap.free_offset(entry.slot_index as usize);
|
||||
}
|
||||
}
|
||||
|
||||
released_len = state.attached_len as usize;
|
||||
for (idx, id) in released_ids.iter_mut().enumerate().take(released_len) {
|
||||
*id = state.attached_ids[idx];
|
||||
if let Some(heap) = state.heap_at(idx) {
|
||||
heap.unset_attached();
|
||||
}
|
||||
}
|
||||
state.clear_attached();
|
||||
}
|
||||
|
||||
for id in released_ids.into_iter().take(released_len) {
|
||||
if id != MiniHeapId::new(0) {
|
||||
self.reclaim_empty_detached_heap(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn heap_is_compaction_candidate(&self, class: u8, heap: &MiniHeap) -> bool {
|
||||
heap.size_class() == class
|
||||
&& !heap.is_attached()
|
||||
&& !heap.is_full()
|
||||
&& !heap.is_meshed()
|
||||
&& heap.object_size() < page_size()
|
||||
&& is_below_partial_threshold(heap.in_use_count(), heap.max_count() as u32)
|
||||
}
|
||||
|
||||
fn mesh_pair(&mut self, dst_id: MiniHeapId, src_id: MiniHeapId) -> raw_sys::Result<()> {
|
||||
let dst = self.pool.get(dst_id).expect("valid dst id");
|
||||
let src = self.pool.get(src_id).expect("valid src id");
|
||||
let span_size = dst.span_size();
|
||||
let arena_base = self.arena.base_ptr() as usize;
|
||||
let object_size = dst.object_size();
|
||||
let src_snapshot = src.bitmap().snapshot();
|
||||
let keep = dst.ptr_from_offset(arena_base, 0);
|
||||
let remove = src.ptr_from_offset(arena_base, 0);
|
||||
let barrier = ActiveMeshGuard::begin(remove, span_size)?;
|
||||
let scratch = match unsafe { self.arena.begin_mesh(remove, span_size) } {
|
||||
Ok(scratch) => scratch,
|
||||
Err(error) => {
|
||||
barrier.finish();
|
||||
return Err(error);
|
||||
}
|
||||
};
|
||||
|
||||
for slot in src_snapshot.iter_set_bits() {
|
||||
let src_ptr = unsafe { scratch.add(slot * object_size) };
|
||||
let dst_ptr = dst.ptr_from_offset(arena_base, slot);
|
||||
unsafe {
|
||||
copy_nonoverlapping(src_ptr, dst_ptr, object_size);
|
||||
}
|
||||
let _ = dst.bitmap().try_set(slot);
|
||||
let _ = src.free_offset(slot);
|
||||
}
|
||||
|
||||
let previous_family_head = dst.next_meshed();
|
||||
src.track_meshed_span(previous_family_head);
|
||||
dst.track_meshed_span(src_id);
|
||||
src.set_meshed();
|
||||
|
||||
if let Err(error) = unsafe { self.arena.finalize_mesh(keep, remove, scratch, span_size) } {
|
||||
let _ = unsafe { self.arena.abort_mesh(remove, scratch, span_size) };
|
||||
barrier.finish();
|
||||
return Err(error);
|
||||
}
|
||||
barrier.finish();
|
||||
self.arena.free_phys(remove, span_size)?;
|
||||
self.stats.record_mesh(dst.span().length, span_size);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn mesh_class_candidates(&mut self, class: u8) -> usize {
|
||||
let mut candidate_len = self.collect_compaction_candidates(class);
|
||||
if candidate_len < 2 {
|
||||
return 0;
|
||||
}
|
||||
self.shuffle_compaction_candidates(candidate_len);
|
||||
|
||||
let mut meshes = 0usize;
|
||||
while candidate_len > 1 {
|
||||
let left_index = candidate_len - 1;
|
||||
let left_id = unsafe { *self.compaction_candidates.add(left_index) };
|
||||
candidate_len -= 1;
|
||||
|
||||
let mut match_index = 0usize;
|
||||
while match_index < candidate_len {
|
||||
let right_id = unsafe { *self.compaction_candidates.add(match_index) };
|
||||
let mesh_result = if let (Some(left), Some(right)) =
|
||||
(self.pool.get(left_id), self.pool.get(right_id))
|
||||
{
|
||||
if self.heap_is_compaction_candidate(class, left)
|
||||
&& self.heap_is_compaction_candidate(class, right)
|
||||
&& bitmaps_meshable(left.bitmap(), right.bitmap())
|
||||
{
|
||||
if left.has_meshed_partner() && !right.has_meshed_partner() {
|
||||
self.mesh_pair(left_id, right_id)
|
||||
} else if right.has_meshed_partner() && !left.has_meshed_partner() {
|
||||
self.mesh_pair(right_id, left_id)
|
||||
} else if !right.has_meshed_partner() {
|
||||
self.mesh_pair(left_id, right_id)
|
||||
} else if !left.has_meshed_partner() {
|
||||
self.mesh_pair(right_id, left_id)
|
||||
} else {
|
||||
Err(raw_sys::Error(raw_sys::EAGAIN))
|
||||
}
|
||||
} else {
|
||||
Err(raw_sys::Error(raw_sys::EAGAIN))
|
||||
}
|
||||
} else {
|
||||
Err(raw_sys::Error(raw_sys::EAGAIN))
|
||||
};
|
||||
|
||||
if mesh_result.is_ok() {
|
||||
meshes += 1;
|
||||
if match_index != candidate_len - 1 {
|
||||
unsafe {
|
||||
let replacement = *self.compaction_candidates.add(candidate_len - 1);
|
||||
self.compaction_candidates
|
||||
.add(match_index)
|
||||
.write(replacement);
|
||||
}
|
||||
}
|
||||
candidate_len -= 1;
|
||||
break;
|
||||
}
|
||||
match_index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
meshes
|
||||
}
|
||||
|
||||
fn collect_compaction_candidates(&mut self, class: u8) -> usize {
|
||||
let len = self.pool.len();
|
||||
let mut candidate_len = 0usize;
|
||||
let mut id_val = 1u32;
|
||||
while id_val <= len {
|
||||
let id = MiniHeapId::new(id_val);
|
||||
if let Some(heap) = self.pool.get(id)
|
||||
&& self.heap_is_compaction_candidate(class, heap)
|
||||
{
|
||||
unsafe {
|
||||
self.compaction_candidates.add(candidate_len).write(id);
|
||||
}
|
||||
candidate_len += 1;
|
||||
}
|
||||
id_val += 1;
|
||||
}
|
||||
candidate_len
|
||||
}
|
||||
|
||||
fn shuffle_compaction_candidates(&mut self, len: usize) {
|
||||
if len <= 1 {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut index = len - 1;
|
||||
while index > 0 {
|
||||
let swap_index = self.meshing_rng.in_range(0, index);
|
||||
unsafe {
|
||||
let left = *self.compaction_candidates.add(index);
|
||||
let right = *self.compaction_candidates.add(swap_index);
|
||||
self.compaction_candidates.add(index).write(right);
|
||||
self.compaction_candidates.add(swap_index).write(left);
|
||||
}
|
||||
index -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_pointer(&self, ptr: *mut u8) -> Option<ResolvedPtr> {
|
||||
loop {
|
||||
let start_epoch = self.mesh_epoch.load(Ordering::Acquire);
|
||||
if start_epoch & 1 != 0 {
|
||||
futex_wait_for_value(&self.mesh_epoch, start_epoch);
|
||||
continue;
|
||||
}
|
||||
|
||||
let owner_id = self.arena.miniheap_id_for_ptr(ptr)?;
|
||||
let slot = self.resolve_family_slot(owner_id, ptr)?;
|
||||
let end_epoch = self.mesh_epoch.load(Ordering::Acquire);
|
||||
if start_epoch == end_epoch {
|
||||
return Some(ResolvedPtr { owner_id, slot });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_family_slot(&self, owner_id: MiniHeapId, ptr: *mut u8) -> Option<usize> {
|
||||
let arena_base = self.arena.base_ptr() as usize;
|
||||
let owner = self.pool.get(owner_id)?;
|
||||
if owner.contains_ptr(arena_base, ptr) {
|
||||
return Some(owner.slot_for_ptr(arena_base, ptr));
|
||||
}
|
||||
|
||||
let mut current = owner.next_meshed();
|
||||
while current.has_value() {
|
||||
let heap = self.pool.get(current)?;
|
||||
if heap.contains_ptr(arena_base, ptr) {
|
||||
return Some(heap.slot_for_ptr(arena_base, ptr));
|
||||
}
|
||||
current = heap.next_meshed();
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn miniheap_object_count(object_size: usize) -> usize {
|
||||
let bitmap_limit = runtime_slots_per_span();
|
||||
(page_size() / object_size)
|
||||
.max(MIN_SHUFFLE_VECTOR_LENGTH)
|
||||
.min(bitmap_limit)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn round_up_to_alignment(size: usize, align: usize) -> Option<usize> {
|
||||
debug_assert!(align.is_power_of_two());
|
||||
let mask = align - 1;
|
||||
size.checked_add(mask).map(|value| value & !mask)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn page_alignment_for(align: usize) -> Option<u32> {
|
||||
let page = page_size();
|
||||
if align <= page {
|
||||
return Some(1);
|
||||
}
|
||||
|
||||
let pages = align / page;
|
||||
if pages * page != align {
|
||||
return None;
|
||||
}
|
||||
|
||||
u32::try_from(pages).ok()
|
||||
}
|
||||
|
||||
struct MeshingEpochGuard {
|
||||
epoch: *const AtomicU32,
|
||||
}
|
||||
|
||||
impl MeshingEpochGuard {
|
||||
fn new(epoch: *const AtomicU32) -> MeshingEpochGuard {
|
||||
let epoch_ref = unsafe { &*epoch };
|
||||
let previous = epoch_ref.fetch_add(1, Ordering::AcqRel);
|
||||
debug_assert_eq!(previous & 1, 0);
|
||||
MeshingEpochGuard { epoch }
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for MeshingEpochGuard {
|
||||
fn drop(&mut self) {
|
||||
let epoch = unsafe { &*self.epoch };
|
||||
let previous = epoch.fetch_add(1, Ordering::AcqRel);
|
||||
debug_assert_eq!(previous & 1, 1);
|
||||
futex_wake_all(epoch);
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for MeshAllocator {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
core::ptr::drop_in_place(self.bootstrap_thread);
|
||||
let _ = platform::munmap(
|
||||
self.bootstrap_thread.cast::<u8>(),
|
||||
size_of::<ThreadLocalHeap>(),
|
||||
);
|
||||
let _ = platform::munmap(
|
||||
self.compaction_candidates.cast::<u8>(),
|
||||
self.pool.capacity() as usize * size_of::<MiniHeapId>(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
428
lib/runtime/src/platform/linux_x86_64/mesh_alloc/arena.rs
Normal file
428
lib/runtime/src/platform/linux_x86_64/mesh_alloc/arena.rs
Normal file
@@ -0,0 +1,428 @@
|
||||
use core::cell::UnsafeCell;
|
||||
use core::mem::size_of;
|
||||
use core::ptr::null_mut;
|
||||
use core::sync::atomic::{AtomicU32, Ordering};
|
||||
|
||||
use super::constants::DEFAULT_ARENA_SIZE;
|
||||
use super::miniheap::MiniHeapId;
|
||||
use super::page::{PageConfig, page_count, round_up_to_page};
|
||||
use super::platform;
|
||||
use super::raw_sys;
|
||||
use super::span::Span;
|
||||
use super::sync::FutexMutex;
|
||||
|
||||
const MAX_FREE_SPANS: usize = 4096;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Arena {
|
||||
config: PageConfig,
|
||||
arena_size: usize,
|
||||
page_count: u32,
|
||||
fd: i32,
|
||||
base: *mut u8,
|
||||
owners: *mut AtomicU32,
|
||||
next_page: AtomicU32,
|
||||
free_spans: *mut Span,
|
||||
free_span_count: UnsafeCell<u32>,
|
||||
free_span_lock: FutexMutex,
|
||||
}
|
||||
|
||||
impl Arena {
|
||||
#[inline]
|
||||
pub fn new() -> raw_sys::Result<Self> {
|
||||
Self::with_size(DEFAULT_ARENA_SIZE)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn with_size(arena_size: usize) -> raw_sys::Result<Self> {
|
||||
let config = PageConfig::get();
|
||||
assert!(arena_size > 0);
|
||||
assert_eq!(arena_size % config.size(), 0);
|
||||
|
||||
let page_count = page_count(arena_size) as u32;
|
||||
let fd = platform::memfd_create(c"rust-mesh-alloc".as_ptr().cast(), raw_sys::MFD_CLOEXEC)?;
|
||||
platform::ftruncate(fd, arena_size as u64)?;
|
||||
|
||||
let base = unsafe {
|
||||
platform::mmap(
|
||||
null_mut(),
|
||||
arena_size,
|
||||
raw_sys::PROT_READ | raw_sys::PROT_WRITE,
|
||||
raw_sys::MAP_SHARED,
|
||||
fd,
|
||||
0,
|
||||
)?
|
||||
};
|
||||
|
||||
let owner_bytes = page_count as usize * size_of::<AtomicU32>();
|
||||
let owners = unsafe {
|
||||
platform::map_anonymous(owner_bytes, raw_sys::PROT_READ | raw_sys::PROT_WRITE)?
|
||||
as *mut AtomicU32
|
||||
};
|
||||
let free_span_bytes = MAX_FREE_SPANS * size_of::<Span>();
|
||||
let free_spans = unsafe {
|
||||
platform::map_anonymous(free_span_bytes, raw_sys::PROT_READ | raw_sys::PROT_WRITE)?
|
||||
as *mut Span
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
config,
|
||||
arena_size,
|
||||
page_count,
|
||||
fd,
|
||||
base,
|
||||
owners,
|
||||
next_page: AtomicU32::new(0),
|
||||
free_spans,
|
||||
free_span_count: UnsafeCell::new(0),
|
||||
free_span_lock: FutexMutex::new(),
|
||||
})
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn config(&self) -> PageConfig {
|
||||
self.config
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn arena_size(&self) -> usize {
|
||||
self.arena_size
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn base_ptr(&self) -> *mut u8 {
|
||||
self.base
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn contains(&self, ptr: *const u8) -> bool {
|
||||
let start = self.base as usize;
|
||||
let end = start + self.arena_size;
|
||||
let value = ptr as usize;
|
||||
start <= value && value < end
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn reserve_pages(&self, page_count: u32, page_alignment: u32) -> Option<Span> {
|
||||
assert!(page_count > 0);
|
||||
assert!(page_alignment > 0);
|
||||
let alignment = page_alignment.next_power_of_two();
|
||||
|
||||
if let Some(span) = self.take_free_span(page_count, alignment) {
|
||||
return Some(span);
|
||||
}
|
||||
|
||||
loop {
|
||||
let current = self.next_page.load(Ordering::Acquire);
|
||||
let aligned = align_up_u32(current, alignment);
|
||||
let end = aligned.checked_add(page_count)?;
|
||||
if end > self.page_count {
|
||||
return None;
|
||||
}
|
||||
|
||||
match self
|
||||
.next_page
|
||||
.compare_exchange(current, end, Ordering::AcqRel, Ordering::Acquire)
|
||||
{
|
||||
Ok(_) => return Some(Span::new(aligned, page_count)),
|
||||
Err(_) => continue,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn page_alloc(&self, page_count: u32, page_alignment: u32) -> Option<(*mut u8, Span)> {
|
||||
let span = self.reserve_pages(page_count, page_alignment)?;
|
||||
Some((self.ptr_from_offset(span.offset as usize), span))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn allocate_bytes(&self, size: usize, page_alignment: u32) -> Option<(*mut u8, Span)> {
|
||||
let pages = page_count(size) as u32;
|
||||
self.page_alloc(pages, page_alignment)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn reserved_pages(&self) -> u32 {
|
||||
self.next_page.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
pub fn reusable_span_stats(&self) -> (u32, u32) {
|
||||
let _guard = self.free_span_lock.lock();
|
||||
let count = unsafe { *self.free_span_count.get() };
|
||||
let mut pages = 0u32;
|
||||
let mut index = 0usize;
|
||||
while index < count as usize {
|
||||
let span = unsafe { *self.free_spans.add(index) };
|
||||
pages += span.length;
|
||||
index += 1;
|
||||
}
|
||||
(count, pages)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn track_miniheap(&self, span: Span, id: MiniHeapId) {
|
||||
for page in 0..span.length {
|
||||
self.owner_at_offset(span.offset + page)
|
||||
.store(id.value(), Ordering::Release);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn clear_miniheap(&self, span: Span) {
|
||||
for page in 0..span.length {
|
||||
self.owner_at_offset(span.offset + page)
|
||||
.store(0, Ordering::Release);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn release_span(&self, span: Span) {
|
||||
if span.empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let _guard = self.free_span_lock.lock();
|
||||
let count = unsafe { &mut *self.free_span_count.get() };
|
||||
let mut merged = span;
|
||||
let mut index = 0usize;
|
||||
while index < *count as usize {
|
||||
let other = unsafe { *self.free_spans.add(index) };
|
||||
if other.offset + other.length == merged.offset {
|
||||
merged = Span::new(other.offset, other.length + merged.length);
|
||||
self.remove_free_span_at(index, count);
|
||||
continue;
|
||||
}
|
||||
if merged.offset + merged.length == other.offset {
|
||||
merged = Span::new(merged.offset, merged.length + other.length);
|
||||
self.remove_free_span_at(index, count);
|
||||
continue;
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
|
||||
self.push_free_span(merged, count);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn miniheap_id_for_ptr(&self, ptr: *const u8) -> Option<MiniHeapId> {
|
||||
if !self.contains(ptr) {
|
||||
return None;
|
||||
}
|
||||
let off = self.offset_for(ptr);
|
||||
let value = self.owner_at_offset(off).load(Ordering::Acquire);
|
||||
if value == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(MiniHeapId::new(value))
|
||||
}
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// `remove..remove+size` must describe a valid, page-aligned mapping within this arena.
|
||||
/// The returned alias is a private scratch mapping of the old source backing. The original
|
||||
/// `remove` range is protected with `PROT_NONE` and must be restored or remapped by the
|
||||
/// caller before any blocked mutators are allowed to resume.
|
||||
#[inline]
|
||||
pub unsafe fn begin_mesh(&self, remove: *mut u8, size: usize) -> raw_sys::Result<*mut u8> {
|
||||
let rounded = round_up_to_page(size);
|
||||
let remove_off = self.offset_for(remove);
|
||||
unsafe {
|
||||
platform::mprotect(remove, rounded, raw_sys::PROT_NONE)?;
|
||||
platform::mmap(
|
||||
core::ptr::null_mut(),
|
||||
rounded,
|
||||
raw_sys::PROT_READ,
|
||||
raw_sys::MAP_SHARED,
|
||||
self.fd,
|
||||
(remove_off as usize * self.config.size()) as u64,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// Restores the source mapping to its original backing after a failed mesh attempt.
|
||||
#[inline]
|
||||
pub unsafe fn abort_mesh(
|
||||
&self,
|
||||
remove: *mut u8,
|
||||
scratch: *mut u8,
|
||||
size: usize,
|
||||
) -> raw_sys::Result<()> {
|
||||
let rounded = round_up_to_page(size);
|
||||
let remove_off = self.offset_for(remove);
|
||||
unsafe {
|
||||
if !scratch.is_null() {
|
||||
platform::munmap(scratch, rounded)?;
|
||||
}
|
||||
platform::mmap(
|
||||
remove,
|
||||
rounded,
|
||||
raw_sys::PROT_READ | raw_sys::PROT_WRITE,
|
||||
raw_sys::MAP_SHARED | raw_sys::MAP_FIXED,
|
||||
self.fd,
|
||||
(remove_off as usize * self.config.size()) as u64,
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// `keep` and `remove` must each point to valid page-aligned ranges of at least `size`
|
||||
/// bytes within this arena. The caller must ensure that aliasing these ranges is valid for
|
||||
/// the current allocator state and that any required object copying has already completed.
|
||||
#[inline]
|
||||
pub unsafe fn finalize_mesh(
|
||||
&self,
|
||||
keep: *mut u8,
|
||||
remove: *mut u8,
|
||||
scratch: *mut u8,
|
||||
size: usize,
|
||||
) -> raw_sys::Result<()> {
|
||||
let rounded = round_up_to_page(size);
|
||||
let keep_off = self.offset_for(keep);
|
||||
let remove_off = self.offset_for(remove);
|
||||
let pages = page_count(rounded);
|
||||
|
||||
unsafe {
|
||||
platform::mmap(
|
||||
remove,
|
||||
rounded,
|
||||
raw_sys::PROT_READ | raw_sys::PROT_WRITE,
|
||||
raw_sys::MAP_SHARED | raw_sys::MAP_FIXED,
|
||||
self.fd,
|
||||
(keep_off as usize * self.config.size()) as u64,
|
||||
)?;
|
||||
if !scratch.is_null() {
|
||||
platform::munmap(scratch, rounded)?;
|
||||
}
|
||||
}
|
||||
|
||||
let keep_id = self.owner_at_offset(keep_off).load(Ordering::Acquire);
|
||||
for page in 0..pages {
|
||||
self.owner_at_offset(remove_off + page as u32)
|
||||
.store(keep_id, Ordering::Release);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn free_phys(&self, ptr: *mut u8, size: usize) -> raw_sys::Result<()> {
|
||||
let rounded = round_up_to_page(size);
|
||||
let offset = (ptr as usize).wrapping_sub(self.base as usize);
|
||||
platform::fallocate(
|
||||
self.fd,
|
||||
raw_sys::FALLOC_FL_PUNCH_HOLE | raw_sys::FALLOC_FL_KEEP_SIZE,
|
||||
offset as u64,
|
||||
rounded as u64,
|
||||
)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn reset_identity_mapping(&self, span: Span) -> raw_sys::Result<()> {
|
||||
let ptr = self.ptr_from_offset(span.offset as usize);
|
||||
unsafe {
|
||||
platform::mmap(
|
||||
ptr,
|
||||
span.byte_length_for_page_size(self.config.size()),
|
||||
raw_sys::PROT_READ | raw_sys::PROT_WRITE,
|
||||
raw_sys::MAP_SHARED | raw_sys::MAP_FIXED,
|
||||
self.fd,
|
||||
(span.offset as usize * self.config.size()) as u64,
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn offset_for(&self, ptr: *const u8) -> u32 {
|
||||
let delta = (ptr as usize).wrapping_sub(self.base as usize);
|
||||
(delta >> self.config.shift()) as u32
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn ptr_from_offset(&self, offset: usize) -> *mut u8 {
|
||||
unsafe { self.base.add(offset << self.config.shift()) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn owner_at_offset(&self, offset: u32) -> &AtomicU32 {
|
||||
assert!(offset < self.page_count);
|
||||
unsafe { &*self.owners.add(offset as usize) }
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Arena {
|
||||
fn drop(&mut self) {
|
||||
let owner_bytes = self.page_count as usize * size_of::<AtomicU32>();
|
||||
let free_span_bytes = MAX_FREE_SPANS * size_of::<Span>();
|
||||
unsafe {
|
||||
let _ = platform::munmap(self.free_spans as *mut u8, free_span_bytes);
|
||||
let _ = platform::munmap(self.owners as *mut u8, owner_bytes);
|
||||
let _ = platform::munmap(self.base, self.arena_size);
|
||||
}
|
||||
let _ = platform::close(self.fd);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn align_up_u32(value: u32, alignment: u32) -> u32 {
|
||||
debug_assert!(alignment.is_power_of_two());
|
||||
(value + alignment - 1) & !(alignment - 1)
|
||||
}
|
||||
|
||||
impl Arena {
|
||||
fn take_free_span(&self, page_count: u32, alignment: u32) -> Option<Span> {
|
||||
let _guard = self.free_span_lock.lock();
|
||||
let count = unsafe { &mut *self.free_span_count.get() };
|
||||
let mut index = 0usize;
|
||||
while index < *count as usize {
|
||||
let span = unsafe { *self.free_spans.add(index) };
|
||||
let aligned = align_up_u32(span.offset, alignment);
|
||||
let prefix = aligned.checked_sub(span.offset)?;
|
||||
let total = prefix.checked_add(page_count)?;
|
||||
if total <= span.length {
|
||||
self.remove_free_span_at(index, count);
|
||||
|
||||
if prefix > 0 {
|
||||
self.push_free_span(Span::new(span.offset, prefix), count);
|
||||
}
|
||||
|
||||
let suffix_offset = aligned + page_count;
|
||||
let suffix_length = span.length - total;
|
||||
if suffix_length > 0 {
|
||||
self.push_free_span(Span::new(suffix_offset, suffix_length), count);
|
||||
}
|
||||
|
||||
return Some(Span::new(aligned, page_count));
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn push_free_span(&self, span: Span, count: &mut u32) {
|
||||
assert!((*count as usize) < MAX_FREE_SPANS);
|
||||
unsafe {
|
||||
self.free_spans.add(*count as usize).write(span);
|
||||
}
|
||||
*count += 1;
|
||||
}
|
||||
|
||||
fn remove_free_span_at(&self, index: usize, count: &mut u32) {
|
||||
debug_assert!(index < *count as usize);
|
||||
let last = *count as usize - 1;
|
||||
if index != last {
|
||||
let replacement = unsafe { *self.free_spans.add(last) };
|
||||
unsafe {
|
||||
self.free_spans.add(index).write(replacement);
|
||||
}
|
||||
}
|
||||
*count -= 1;
|
||||
}
|
||||
}
|
||||
236
lib/runtime/src/platform/linux_x86_64/mesh_alloc/bitmap.rs
Normal file
236
lib/runtime/src/platform/linux_x86_64/mesh_alloc/bitmap.rs
Normal file
@@ -0,0 +1,236 @@
|
||||
use core::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
use super::constants::MAX_OBJECT_SLOTS_PER_SPAN;
|
||||
|
||||
const USIZE_BITS: usize = usize::BITS as usize;
|
||||
const BITMAP_WORDS: usize = MAX_OBJECT_SLOTS_PER_SPAN / USIZE_BITS;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub struct RelaxedBitmap {
|
||||
bit_count: u16,
|
||||
words: [usize; BITMAP_WORDS],
|
||||
}
|
||||
|
||||
impl RelaxedBitmap {
|
||||
#[inline]
|
||||
pub fn new(bit_count: usize) -> Self {
|
||||
assert!(bit_count <= MAX_OBJECT_SLOTS_PER_SPAN);
|
||||
Self {
|
||||
bit_count: bit_count as u16,
|
||||
words: [0; BITMAP_WORDS],
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn bit_count(&self) -> usize {
|
||||
self.bit_count as usize
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn words(&self) -> &[usize; BITMAP_WORDS] {
|
||||
&self.words
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn words_mut(&mut self) -> &mut [usize; BITMAP_WORDS] {
|
||||
&mut self.words
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn clear(&mut self) {
|
||||
self.words = [0; BITMAP_WORDS];
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn set_all(&mut self) {
|
||||
self.words = [usize::MAX; BITMAP_WORDS];
|
||||
self.mask_unused_bits();
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn invert_masked(&mut self) {
|
||||
for word in &mut self.words {
|
||||
*word = !*word;
|
||||
}
|
||||
self.mask_unused_bits();
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn try_set(&mut self, index: usize) -> bool {
|
||||
let (word, mask) = word_and_mask(index);
|
||||
let old = self.words[word];
|
||||
self.words[word] = old | mask;
|
||||
old & mask == 0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn unset(&mut self, index: usize) -> bool {
|
||||
let (word, mask) = word_and_mask(index);
|
||||
let old = self.words[word];
|
||||
self.words[word] = old & !mask;
|
||||
old & mask != 0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_set(&self, index: usize) -> bool {
|
||||
let (word, mask) = word_and_mask(index);
|
||||
self.words[word] & mask != 0
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn in_use_count(&self) -> u32 {
|
||||
self.words.iter().map(|word| word.count_ones()).sum()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn iter_set_bits(&self) -> BitIter {
|
||||
BitIter::new(self.words, self.bit_count())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn mask_unused_bits(&mut self) {
|
||||
let valid_bits = self.bit_count();
|
||||
if valid_bits == MAX_OBJECT_SLOTS_PER_SPAN {
|
||||
return;
|
||||
}
|
||||
|
||||
let used_words = valid_bits / USIZE_BITS;
|
||||
let remainder = valid_bits % USIZE_BITS;
|
||||
for word in self
|
||||
.words
|
||||
.iter_mut()
|
||||
.skip(used_words + usize::from(remainder != 0))
|
||||
{
|
||||
*word = 0;
|
||||
}
|
||||
if remainder != 0 {
|
||||
self.words[used_words] &= (1usize << remainder) - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct AtomicBitmap {
|
||||
bit_count: u16,
|
||||
words: [AtomicUsize; BITMAP_WORDS],
|
||||
}
|
||||
|
||||
impl AtomicBitmap {
|
||||
#[inline]
|
||||
pub fn new(bit_count: usize) -> Self {
|
||||
assert!(bit_count <= MAX_OBJECT_SLOTS_PER_SPAN);
|
||||
Self {
|
||||
bit_count: bit_count as u16,
|
||||
words: [const { AtomicUsize::new(0) }; BITMAP_WORDS],
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn bit_count(&self) -> usize {
|
||||
self.bit_count as usize
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn try_set(&self, index: usize) -> bool {
|
||||
let (word, mask) = word_and_mask(index);
|
||||
let old = self.words[word].fetch_or(mask, Ordering::AcqRel);
|
||||
old & mask == 0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn unset(&self, index: usize) -> bool {
|
||||
let (word, mask) = word_and_mask(index);
|
||||
let old = self.words[word].fetch_and(!mask, Ordering::AcqRel);
|
||||
old & mask != 0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_set(&self, index: usize) -> bool {
|
||||
let (word, mask) = word_and_mask(index);
|
||||
self.words[word].load(Ordering::Acquire) & mask != 0
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn in_use_count(&self) -> u32 {
|
||||
self.words
|
||||
.iter()
|
||||
.map(|word| word.load(Ordering::Acquire).count_ones())
|
||||
.sum()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn swap_words(&self, new_words: &[usize; BITMAP_WORDS]) -> [usize; BITMAP_WORDS] {
|
||||
let mut old_words = [0usize; BITMAP_WORDS];
|
||||
let mut i = 0;
|
||||
while i < BITMAP_WORDS {
|
||||
old_words[i] = self.words[i].swap(new_words[i], Ordering::AcqRel);
|
||||
i += 1;
|
||||
}
|
||||
old_words
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn snapshot(&self) -> RelaxedBitmap {
|
||||
let mut bitmap = RelaxedBitmap::new(self.bit_count());
|
||||
let mut i = 0;
|
||||
while i < BITMAP_WORDS {
|
||||
bitmap.words_mut()[i] = self.words[i].load(Ordering::Acquire);
|
||||
i += 1;
|
||||
}
|
||||
bitmap
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn take_free_bits(&self) -> RelaxedBitmap {
|
||||
let mut all_used = RelaxedBitmap::new(self.bit_count());
|
||||
all_used.set_all();
|
||||
let previous = self.swap_words(all_used.words());
|
||||
|
||||
let mut free_bits = RelaxedBitmap::new(self.bit_count());
|
||||
*free_bits.words_mut() = previous;
|
||||
free_bits.invert_masked();
|
||||
free_bits
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub struct BitIter {
|
||||
words: [usize; BITMAP_WORDS],
|
||||
bit_count: usize,
|
||||
next_index: usize,
|
||||
}
|
||||
|
||||
impl BitIter {
|
||||
#[inline(always)]
|
||||
pub fn new(words: [usize; BITMAP_WORDS], bit_count: usize) -> Self {
|
||||
Self {
|
||||
words,
|
||||
bit_count,
|
||||
next_index: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for BitIter {
|
||||
type Item = usize;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
while self.next_index < self.bit_count {
|
||||
let current = self.next_index;
|
||||
self.next_index += 1;
|
||||
let (word, mask) = word_and_mask(current);
|
||||
if self.words[word] & mask != 0 {
|
||||
return Some(current);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn word_and_mask(index: usize) -> (usize, usize) {
|
||||
debug_assert!(index < MAX_OBJECT_SLOTS_PER_SPAN);
|
||||
let word = index / USIZE_BITS;
|
||||
let bit = index % USIZE_BITS;
|
||||
(word, 1usize << bit)
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
pub const MIN_SUPPORTED_PAGE_SIZE: usize = 4096;
|
||||
pub const MAX_SUPPORTED_PAGE_SIZE: usize = 16384;
|
||||
pub const MIN_OBJECT_SIZE: usize = 16;
|
||||
pub const MAX_SMALL_ALLOCATION: usize = 16_384;
|
||||
pub const NUM_SIZE_CLASSES: usize = 25;
|
||||
pub const OCCUPANCY_CUTOFF_NUMERATOR: u32 = 4;
|
||||
pub const OCCUPANCY_CUTOFF_DENOMINATOR: u32 = 5;
|
||||
pub const MIN_SHUFFLE_VECTOR_LENGTH: usize = 8;
|
||||
pub const MAX_ATTACHED_MINIHEAPS_PER_CLASS: usize = 48;
|
||||
pub const MAX_OBJECT_SLOTS_PER_SPAN: usize = MAX_SUPPORTED_PAGE_SIZE / MIN_OBJECT_SIZE;
|
||||
pub const MAX_SHUFFLE_VECTOR_LENGTH: usize = MAX_OBJECT_SLOTS_PER_SPAN;
|
||||
pub const DEFAULT_ARENA_SIZE: usize = 64 * 1024 * 1024 * 1024;
|
||||
pub const MINIHEAP_REFILL_GOAL_SIZE: usize = 16 * 1024;
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn is_below_partial_threshold(in_use: u32, max_count: u32) -> bool {
|
||||
in_use.saturating_mul(OCCUPANCY_CUTOFF_DENOMINATOR)
|
||||
< max_count.saturating_mul(OCCUPANCY_CUTOFF_NUMERATOR)
|
||||
}
|
||||
132
lib/runtime/src/platform/linux_x86_64/mesh_alloc/fault.rs
Normal file
132
lib/runtime/src/platform/linux_x86_64/mesh_alloc/fault.rs
Normal file
@@ -0,0 +1,132 @@
|
||||
use core::sync::atomic::{AtomicU32, AtomicUsize, Ordering};
|
||||
|
||||
use super::raw_sys;
|
||||
use super::sync::{futex_wait_for_value, futex_wake_all};
|
||||
|
||||
const INSTALL_UNINITIALIZED: u32 = 0;
|
||||
const INSTALL_READY: u32 = 1;
|
||||
const INSTALL_FAILED: u32 = 2;
|
||||
|
||||
static INSTALL_STATE: AtomicU32 = AtomicU32::new(INSTALL_UNINITIALIZED);
|
||||
static ACTIVE_MESH_SEQ: AtomicU32 = AtomicU32::new(0);
|
||||
static ACTIVE_MESH_START: AtomicUsize = AtomicUsize::new(0);
|
||||
static ACTIVE_MESH_LEN: AtomicUsize = AtomicUsize::new(0);
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ActiveMeshGuard {
|
||||
active: bool,
|
||||
}
|
||||
|
||||
impl ActiveMeshGuard {
|
||||
pub fn begin(start: *mut u8, len: usize) -> raw_sys::Result<Self> {
|
||||
ensure_fault_mediation_installed()?;
|
||||
loop {
|
||||
let seq = ACTIVE_MESH_SEQ.load(Ordering::Acquire);
|
||||
if seq & 1 == 0 {
|
||||
break;
|
||||
}
|
||||
futex_wait_for_value(&ACTIVE_MESH_SEQ, seq);
|
||||
}
|
||||
ACTIVE_MESH_START.store(start as usize, Ordering::Release);
|
||||
ACTIVE_MESH_LEN.store(len, Ordering::Release);
|
||||
let previous = ACTIVE_MESH_SEQ.fetch_add(1, Ordering::AcqRel);
|
||||
debug_assert_eq!(previous & 1, 0);
|
||||
Ok(Self { active: true })
|
||||
}
|
||||
|
||||
pub fn finish(mut self) {
|
||||
self.release();
|
||||
}
|
||||
|
||||
fn release(&mut self) {
|
||||
if !self.active {
|
||||
return;
|
||||
}
|
||||
|
||||
ACTIVE_MESH_START.store(0, Ordering::Release);
|
||||
ACTIVE_MESH_LEN.store(0, Ordering::Release);
|
||||
let previous = ACTIVE_MESH_SEQ.fetch_add(1, Ordering::AcqRel);
|
||||
debug_assert_eq!(previous & 1, 1);
|
||||
futex_wake_all(&ACTIVE_MESH_SEQ);
|
||||
self.active = false;
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ActiveMeshGuard {
|
||||
fn drop(&mut self) {
|
||||
self.release();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ensure_fault_mediation_installed() -> raw_sys::Result<()> {
|
||||
match INSTALL_STATE.compare_exchange(
|
||||
INSTALL_UNINITIALIZED,
|
||||
INSTALL_READY,
|
||||
Ordering::AcqRel,
|
||||
Ordering::Acquire,
|
||||
) {
|
||||
Ok(_) | Err(INSTALL_READY) => Ok(()),
|
||||
Err(_) => {
|
||||
INSTALL_STATE.store(INSTALL_FAILED, Ordering::Release);
|
||||
Err(raw_sys::Error(raw_sys::EAGAIN))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ok_to_proceed(ptr: *const u8) -> bool {
|
||||
let address = ptr as usize;
|
||||
let mut waited = false;
|
||||
loop {
|
||||
let seq = ACTIVE_MESH_SEQ.load(Ordering::Acquire);
|
||||
if seq & 1 == 0 {
|
||||
return waited;
|
||||
}
|
||||
|
||||
let start = ACTIVE_MESH_START.load(Ordering::Acquire);
|
||||
let len = ACTIVE_MESH_LEN.load(Ordering::Acquire);
|
||||
let end = start.saturating_add(len);
|
||||
if address < start || address >= end {
|
||||
return waited;
|
||||
}
|
||||
|
||||
waited = true;
|
||||
futex_wait_for_value(&ACTIVE_MESH_SEQ, seq);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn retry_on_efault<T, F>(ptr: *const u8, mut op: F) -> raw_sys::Result<T>
|
||||
where
|
||||
F: FnMut() -> raw_sys::Result<T>,
|
||||
{
|
||||
loop {
|
||||
match op() {
|
||||
Ok(value) => return Ok(value),
|
||||
Err(error) if error.errno() == raw_sys::EFAULT && ok_to_proceed(ptr) => continue,
|
||||
Err(error) => return Err(error),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn retry_on_efault_ptrs<T, F>(ptrs: &[*const u8], mut op: F) -> raw_sys::Result<T>
|
||||
where
|
||||
F: FnMut() -> raw_sys::Result<T>,
|
||||
{
|
||||
loop {
|
||||
match op() {
|
||||
Ok(value) => return Ok(value),
|
||||
Err(error) if error.errno() == raw_sys::EFAULT => {
|
||||
let mut waited = false;
|
||||
for &ptr in ptrs {
|
||||
if !ptr.is_null() {
|
||||
waited |= ok_to_proceed(ptr);
|
||||
}
|
||||
}
|
||||
if waited {
|
||||
continue;
|
||||
}
|
||||
return Err(error);
|
||||
}
|
||||
Err(error) => return Err(error),
|
||||
}
|
||||
}
|
||||
}
|
||||
453
lib/runtime/src/platform/linux_x86_64/mesh_alloc/global.rs
Normal file
453
lib/runtime/src/platform/linux_x86_64/mesh_alloc/global.rs
Normal file
@@ -0,0 +1,453 @@
|
||||
use core::alloc::{GlobalAlloc, Layout};
|
||||
use core::cell::UnsafeCell;
|
||||
use core::mem::MaybeUninit;
|
||||
use core::ptr::{addr_of_mut, drop_in_place, null, null_mut};
|
||||
use core::sync::atomic::{AtomicU32, Ordering};
|
||||
|
||||
use super::allocator::MeshAllocator;
|
||||
use super::constants::DEFAULT_ARENA_SIZE;
|
||||
use super::stats::{
|
||||
CompactionAdvice, CompactionSkipReason, MeshStats, RuntimeCompactionPolicy,
|
||||
RuntimeCompactionResult,
|
||||
};
|
||||
use super::sync::{FutexMutex, futex_wait_for_value, futex_wake_all};
|
||||
use super::thread_local_heap::ThreadLocalHeap;
|
||||
|
||||
const INIT_UNINITIALIZED: u32 = 0;
|
||||
const INIT_IN_PROGRESS: u32 = 1;
|
||||
const INIT_READY: u32 = 2;
|
||||
const INIT_FAILED: u32 = 3;
|
||||
|
||||
pub const DEFAULT_GLOBAL_MINIHEAP_CAPACITY: u32 = 4096;
|
||||
|
||||
const TLS_UNINITIALIZED: u32 = 0;
|
||||
const TLS_READY: u32 = 1;
|
||||
const TLS_FAILED: u32 = 2;
|
||||
const SAFEPOINT_INACTIVE: u32 = 0;
|
||||
const SAFEPOINT_ACTIVE: u32 = 1;
|
||||
|
||||
#[thread_local]
|
||||
static mut THREAD_HEAP_STATE: u32 = TLS_UNINITIALIZED;
|
||||
#[thread_local]
|
||||
static mut THREAD_HEAP: MaybeUninit<ThreadLocalHeap> = MaybeUninit::uninit();
|
||||
#[thread_local]
|
||||
static mut THREAD_SAFEPOINT_STATE: u32 = SAFEPOINT_INACTIVE;
|
||||
#[thread_local]
|
||||
static mut THREAD_HEAP_OWNER: *const GlobalMeshAllocator = null();
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct GlobalMeshAllocator {
|
||||
arena_size: usize,
|
||||
miniheap_capacity: u32,
|
||||
init_state: AtomicU32,
|
||||
registered_threads: AtomicU32,
|
||||
quiescent_threads: AtomicU32,
|
||||
lock: FutexMutex,
|
||||
allocator: UnsafeCell<MaybeUninit<MeshAllocator>>,
|
||||
}
|
||||
|
||||
impl GlobalMeshAllocator {
|
||||
pub const fn new(arena_size: usize, miniheap_capacity: u32) -> Self {
|
||||
Self {
|
||||
arena_size,
|
||||
miniheap_capacity,
|
||||
init_state: AtomicU32::new(INIT_UNINITIALIZED),
|
||||
registered_threads: AtomicU32::new(0),
|
||||
quiescent_threads: AtomicU32::new(0),
|
||||
lock: FutexMutex::new(),
|
||||
allocator: UnsafeCell::new(MaybeUninit::uninit()),
|
||||
}
|
||||
}
|
||||
|
||||
pub const fn with_default_config() -> Self {
|
||||
Self::new(DEFAULT_ARENA_SIZE, DEFAULT_GLOBAL_MINIHEAP_CAPACITY)
|
||||
}
|
||||
|
||||
pub fn init_thread(&self) -> bool {
|
||||
self.thread_heap().is_some()
|
||||
}
|
||||
|
||||
pub fn shutdown_thread(&self) {
|
||||
unsafe {
|
||||
let state_ptr = addr_of_mut!(THREAD_HEAP_STATE);
|
||||
let heap_ptr = addr_of_mut!(THREAD_HEAP);
|
||||
let safepoint_ptr = addr_of_mut!(THREAD_SAFEPOINT_STATE);
|
||||
let owner_ptr = addr_of_mut!(THREAD_HEAP_OWNER);
|
||||
if *safepoint_ptr == SAFEPOINT_ACTIVE {
|
||||
self.quiescent_threads.fetch_sub(1, Ordering::AcqRel);
|
||||
*safepoint_ptr = SAFEPOINT_INACTIVE;
|
||||
}
|
||||
if *state_ptr == TLS_READY {
|
||||
let _ = self.with_existing_allocator_mut(|allocator| {
|
||||
allocator.shutdown_thread((*heap_ptr).assume_init_mut());
|
||||
});
|
||||
drop_in_place((*heap_ptr).as_mut_ptr());
|
||||
self.registered_threads.fetch_sub(1, Ordering::AcqRel);
|
||||
}
|
||||
*state_ptr = TLS_UNINITIALIZED;
|
||||
*owner_ptr = null();
|
||||
}
|
||||
}
|
||||
|
||||
/// Marks the current thread quiescent for cooperative compaction.
|
||||
///
|
||||
/// This is **not** true concurrent compaction: arbitrary loads/stores through existing raw
|
||||
/// pointers remain outside allocator control, so compaction is only safe once all registered
|
||||
/// allocator threads have voluntarily entered this state.
|
||||
pub fn enter_quiescent_compaction_state(&self) -> bool {
|
||||
if self.thread_heap().is_none() {
|
||||
return false;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
let safepoint_ptr = addr_of_mut!(THREAD_SAFEPOINT_STATE);
|
||||
if *safepoint_ptr == SAFEPOINT_ACTIVE {
|
||||
return true;
|
||||
}
|
||||
*safepoint_ptr = SAFEPOINT_ACTIVE;
|
||||
}
|
||||
self.quiescent_threads.fetch_add(1, Ordering::AcqRel);
|
||||
true
|
||||
}
|
||||
|
||||
/// Leaves the cooperative quiescent compaction state for the current thread.
|
||||
pub fn leave_quiescent_compaction_state(&self) {
|
||||
unsafe {
|
||||
let safepoint_ptr = addr_of_mut!(THREAD_SAFEPOINT_STATE);
|
||||
if *safepoint_ptr == SAFEPOINT_ACTIVE {
|
||||
*safepoint_ptr = SAFEPOINT_INACTIVE;
|
||||
self.quiescent_threads.fetch_sub(1, Ordering::AcqRel);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true only when every registered allocator thread is quiescent.
|
||||
///
|
||||
/// This is a cooperative global-quiescence check, not a proof that active mutators can safely
|
||||
/// race with remap/migration.
|
||||
pub fn quiescent_compaction_ready(&self) -> bool {
|
||||
let registered = self.registered_threads.load(Ordering::Acquire);
|
||||
registered != 0 && registered == self.quiescent_threads.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
/// Runs cooperative quiescent compaction when the current thread and every other registered
|
||||
/// allocator thread have voluntarily stopped allocator-visible activity.
|
||||
///
|
||||
/// This API intentionally does **not** claim to provide concurrent compaction with active
|
||||
/// mutators. Achieving that would require heavier machinery such as page-fault mediation,
|
||||
/// signal handling, syscall retry/interposition, or equivalent runtime coordination for raw
|
||||
/// pointer accesses and kernel I/O into moving pages.
|
||||
pub fn compact_when_quiescent(
|
||||
&self,
|
||||
policy: RuntimeCompactionPolicy,
|
||||
) -> RuntimeCompactionResult {
|
||||
if !self.current_thread_in_safepoint() {
|
||||
return RuntimeCompactionResult::Skipped {
|
||||
reason: CompactionSkipReason::NotAtSafepoint,
|
||||
advice: self.compaction_advice(),
|
||||
};
|
||||
}
|
||||
if !self.quiescent_compaction_ready() {
|
||||
return RuntimeCompactionResult::Skipped {
|
||||
reason: CompactionSkipReason::ThreadsActive,
|
||||
advice: self.compaction_advice(),
|
||||
};
|
||||
}
|
||||
|
||||
let Some(thread_heap) = self.thread_heap() else {
|
||||
return RuntimeCompactionResult::Skipped {
|
||||
reason: CompactionSkipReason::ThreadUnavailable,
|
||||
advice: self.compaction_advice(),
|
||||
};
|
||||
};
|
||||
let Some(result) = self.with_existing_allocator_mut(|allocator| {
|
||||
allocator.shutdown_thread(thread_heap);
|
||||
let advice = allocator.stats().compaction_advice();
|
||||
if !policy.should_compact(&advice) {
|
||||
return RuntimeCompactionResult::Skipped {
|
||||
reason: CompactionSkipReason::Policy,
|
||||
advice: Some(advice),
|
||||
};
|
||||
}
|
||||
|
||||
let meshes = allocator.compact_with_thread(thread_heap);
|
||||
RuntimeCompactionResult::Compacted { meshes, advice }
|
||||
}) else {
|
||||
return RuntimeCompactionResult::Skipped {
|
||||
reason: CompactionSkipReason::AllocatorUnavailable,
|
||||
advice: None,
|
||||
};
|
||||
};
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Compatibility alias for `enter_quiescent_compaction_state`.
|
||||
pub fn enter_compaction_safepoint(&self) -> bool {
|
||||
self.enter_quiescent_compaction_state()
|
||||
}
|
||||
|
||||
/// Compatibility alias for `leave_quiescent_compaction_state`.
|
||||
pub fn leave_compaction_safepoint(&self) {
|
||||
self.leave_quiescent_compaction_state();
|
||||
}
|
||||
|
||||
/// Compatibility alias for `quiescent_compaction_ready`.
|
||||
pub fn compaction_safepoint_ready(&self) -> bool {
|
||||
self.quiescent_compaction_ready()
|
||||
}
|
||||
|
||||
/// Compatibility alias for `compact_when_quiescent`.
|
||||
pub fn compact_at_safepoint(&self, policy: RuntimeCompactionPolicy) -> RuntimeCompactionResult {
|
||||
self.compact_when_quiescent(policy)
|
||||
}
|
||||
|
||||
pub fn compact(&self) -> usize {
|
||||
let Some(thread_heap) = self.thread_heap() else {
|
||||
return 0;
|
||||
};
|
||||
self.with_allocator(|allocator| allocator.compact_with_thread(thread_heap))
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
pub fn stats(&self) -> Option<MeshStats> {
|
||||
self.with_existing_allocator(|allocator| allocator.stats())
|
||||
}
|
||||
|
||||
pub fn compaction_advice(&self) -> Option<CompactionAdvice> {
|
||||
self.stats().map(|stats| stats.compaction_advice())
|
||||
}
|
||||
|
||||
fn with_allocator<R>(&self, f: impl FnOnce(&mut MeshAllocator) -> R) -> Option<R> {
|
||||
if !self.ensure_initialized() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let _guard = self.lock.lock();
|
||||
let allocator = unsafe { (&mut *self.allocator.get()).assume_init_mut() };
|
||||
Some(f(allocator))
|
||||
}
|
||||
|
||||
fn with_existing_allocator<R>(&self, f: impl FnOnce(&MeshAllocator) -> R) -> Option<R> {
|
||||
if self.init_state.load(Ordering::Acquire) != INIT_READY {
|
||||
return None;
|
||||
}
|
||||
|
||||
let _guard = self.lock.lock();
|
||||
if self.init_state.load(Ordering::Acquire) != INIT_READY {
|
||||
return None;
|
||||
}
|
||||
let allocator = unsafe { (&*self.allocator.get()).assume_init_ref() };
|
||||
Some(f(allocator))
|
||||
}
|
||||
|
||||
fn with_existing_allocator_mut<R>(&self, f: impl FnOnce(&mut MeshAllocator) -> R) -> Option<R> {
|
||||
if self.init_state.load(Ordering::Acquire) != INIT_READY {
|
||||
return None;
|
||||
}
|
||||
|
||||
let _guard = self.lock.lock();
|
||||
if self.init_state.load(Ordering::Acquire) != INIT_READY {
|
||||
return None;
|
||||
}
|
||||
let allocator = unsafe { (&mut *self.allocator.get()).assume_init_mut() };
|
||||
Some(f(allocator))
|
||||
}
|
||||
|
||||
fn ensure_initialized(&self) -> bool {
|
||||
loop {
|
||||
match self.init_state.load(Ordering::Acquire) {
|
||||
INIT_READY => return true,
|
||||
INIT_FAILED => return false,
|
||||
INIT_UNINITIALIZED => {
|
||||
if self
|
||||
.init_state
|
||||
.compare_exchange(
|
||||
INIT_UNINITIALIZED,
|
||||
INIT_IN_PROGRESS,
|
||||
Ordering::AcqRel,
|
||||
Ordering::Acquire,
|
||||
)
|
||||
.is_ok()
|
||||
{
|
||||
let result = MeshAllocator::new(self.arena_size, self.miniheap_capacity);
|
||||
match result {
|
||||
Ok(allocator) => unsafe {
|
||||
(*self.allocator.get()).write(allocator);
|
||||
self.init_state.store(INIT_READY, Ordering::Release);
|
||||
futex_wake_all(&self.init_state);
|
||||
return true;
|
||||
},
|
||||
Err(_) => {
|
||||
self.init_state.store(INIT_FAILED, Ordering::Release);
|
||||
futex_wake_all(&self.init_state);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
INIT_IN_PROGRESS => futex_wait_for_value(&self.init_state, INIT_IN_PROGRESS),
|
||||
_ => return false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn thread_heap(&self) -> Option<&'static mut ThreadLocalHeap> {
|
||||
unsafe {
|
||||
let owner_ptr = addr_of_mut!(THREAD_HEAP_OWNER);
|
||||
let heap_ptr = addr_of_mut!(THREAD_HEAP);
|
||||
match THREAD_HEAP_STATE {
|
||||
TLS_READY if core::ptr::eq(*owner_ptr, self) => Some((*heap_ptr).assume_init_mut()),
|
||||
TLS_READY => {
|
||||
self.reset_foreign_thread_heap();
|
||||
self.thread_heap()
|
||||
}
|
||||
TLS_FAILED => None,
|
||||
TLS_UNINITIALIZED => match ThreadLocalHeap::new() {
|
||||
Ok(heap) => {
|
||||
(*heap_ptr).write(heap);
|
||||
*addr_of_mut!(THREAD_SAFEPOINT_STATE) = SAFEPOINT_INACTIVE;
|
||||
*owner_ptr = self as *const _;
|
||||
self.registered_threads.fetch_add(1, Ordering::AcqRel);
|
||||
THREAD_HEAP_STATE = TLS_READY;
|
||||
Some((*heap_ptr).assume_init_mut())
|
||||
}
|
||||
Err(_) => {
|
||||
THREAD_HEAP_STATE = TLS_FAILED;
|
||||
None
|
||||
}
|
||||
},
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn current_thread_in_safepoint(&self) -> bool {
|
||||
let _ = self;
|
||||
unsafe {
|
||||
core::ptr::eq(THREAD_HEAP_OWNER, self) && THREAD_SAFEPOINT_STATE == SAFEPOINT_ACTIVE
|
||||
}
|
||||
}
|
||||
|
||||
fn reset_foreign_thread_heap(&self) {
|
||||
let _ = self;
|
||||
unsafe {
|
||||
if THREAD_HEAP_STATE == TLS_READY {
|
||||
drop_in_place(addr_of_mut!(THREAD_HEAP).cast::<ThreadLocalHeap>());
|
||||
}
|
||||
THREAD_HEAP_STATE = TLS_UNINITIALIZED;
|
||||
THREAD_SAFEPOINT_STATE = SAFEPOINT_INACTIVE;
|
||||
THREAD_HEAP_OWNER = null();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for GlobalMeshAllocator {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
if core::ptr::eq(THREAD_HEAP_OWNER, self) {
|
||||
self.shutdown_thread();
|
||||
}
|
||||
}
|
||||
|
||||
if self.init_state.load(Ordering::Acquire) == INIT_READY {
|
||||
let _guard = self.lock.lock();
|
||||
if self.init_state.load(Ordering::Acquire) == INIT_READY {
|
||||
unsafe {
|
||||
drop_in_place((&mut *self.allocator.get()).as_mut_ptr());
|
||||
}
|
||||
self.init_state.store(INIT_FAILED, Ordering::Release);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl Sync for GlobalMeshAllocator {}
|
||||
unsafe impl Send for GlobalMeshAllocator {}
|
||||
|
||||
unsafe impl GlobalAlloc for GlobalMeshAllocator {
|
||||
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
|
||||
if !self.ensure_initialized() {
|
||||
return null_mut();
|
||||
}
|
||||
let Some(thread_heap) = self.thread_heap() else {
|
||||
return null_mut();
|
||||
};
|
||||
|
||||
let allocator_ref = unsafe { (&*self.allocator.get()).assume_init_ref() };
|
||||
if let Some(class) = size_class_for_layout(layout)
|
||||
&& let Some(ptr) = allocator_ref.try_allocate_small_local(thread_heap, class)
|
||||
{
|
||||
return ptr;
|
||||
}
|
||||
|
||||
self.with_allocator(|allocator| {
|
||||
allocator
|
||||
.allocate_layout_with_thread(thread_heap, layout)
|
||||
.unwrap_or(null_mut())
|
||||
})
|
||||
.unwrap_or(null_mut())
|
||||
}
|
||||
|
||||
unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) {
|
||||
if !self.ensure_initialized() {
|
||||
return;
|
||||
}
|
||||
let Some(thread_heap) = self.thread_heap() else {
|
||||
return;
|
||||
};
|
||||
|
||||
let allocator_ref = unsafe { (&*self.allocator.get()).assume_init_ref() };
|
||||
if allocator_ref.try_deallocate_local(ptr, thread_heap) {
|
||||
return;
|
||||
}
|
||||
|
||||
let _ = self.with_allocator(|allocator| allocator.deallocate_with_thread(ptr, thread_heap));
|
||||
}
|
||||
|
||||
unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
|
||||
let ptr = unsafe { self.alloc(layout) };
|
||||
if !ptr.is_null() {
|
||||
unsafe {
|
||||
ptr.write_bytes(0, layout.size());
|
||||
}
|
||||
}
|
||||
ptr
|
||||
}
|
||||
|
||||
unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
|
||||
if !self.ensure_initialized() {
|
||||
return null_mut();
|
||||
}
|
||||
let Some(thread_heap) = self.thread_heap() else {
|
||||
return null_mut();
|
||||
};
|
||||
|
||||
self.with_allocator(|allocator| unsafe {
|
||||
allocator
|
||||
.reallocate_with_thread(ptr, layout, new_size, thread_heap)
|
||||
.unwrap_or(null_mut())
|
||||
})
|
||||
.unwrap_or(null_mut())
|
||||
}
|
||||
}
|
||||
|
||||
fn size_class_for_layout(layout: Layout) -> Option<u8> {
|
||||
if layout.align() > super::page::page_size() {
|
||||
return None;
|
||||
}
|
||||
let aligned_size = layout
|
||||
.size()
|
||||
.max(1)
|
||||
.checked_add(layout.align() - 1)
|
||||
.map(|value| value & !(layout.align() - 1))?;
|
||||
if aligned_size > super::constants::MAX_SMALL_ALLOCATION {
|
||||
return None;
|
||||
}
|
||||
let class = super::size_map::size_class_for(aligned_size)?;
|
||||
if super::size_map::byte_size_for_class(class).is_multiple_of(layout.align()) {
|
||||
Some(class)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
15
lib/runtime/src/platform/linux_x86_64/mesh_alloc/meshing.rs
Normal file
15
lib/runtime/src/platform/linux_x86_64/mesh_alloc/meshing.rs
Normal file
@@ -0,0 +1,15 @@
|
||||
use super::bitmap::AtomicBitmap;
|
||||
|
||||
#[inline]
|
||||
pub fn bitmaps_meshable(left: &AtomicBitmap, right: &AtomicBitmap) -> bool {
|
||||
let left_words = left.snapshot();
|
||||
let right_words = right.snapshot();
|
||||
|
||||
for (lhs, rhs) in left_words.words().iter().zip(right_words.words().iter()) {
|
||||
if lhs & rhs != 0 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
374
lib/runtime/src/platform/linux_x86_64/mesh_alloc/miniheap.rs
Normal file
374
lib/runtime/src/platform/linux_x86_64/mesh_alloc/miniheap.rs
Normal file
@@ -0,0 +1,374 @@
|
||||
use core::sync::atomic::{AtomicU32, Ordering};
|
||||
|
||||
use super::bitmap::AtomicBitmap;
|
||||
use super::page::page_size;
|
||||
use super::size_map::{byte_size_for_class, size_class_for};
|
||||
use super::span::Span;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
#[repr(u8)]
|
||||
pub enum FreelistId {
|
||||
Full = 0,
|
||||
Partial = 1,
|
||||
Empty = 2,
|
||||
Attached = 3,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
|
||||
pub struct MiniHeapId(u32);
|
||||
|
||||
impl MiniHeapId {
|
||||
#[inline(always)]
|
||||
pub const fn new(id: u32) -> Self {
|
||||
Self(id)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn value(self) -> u32 {
|
||||
self.0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn has_value(self) -> bool {
|
||||
self.0 != 0
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MiniHeapFlags {
|
||||
bits: AtomicU32,
|
||||
}
|
||||
|
||||
impl MiniHeapFlags {
|
||||
const SIZE_CLASS_SHIFT: u32 = 0;
|
||||
const FREELIST_ID_SHIFT: u32 = 6;
|
||||
const SHUFFLE_OFFSET_SHIFT: u32 = 8;
|
||||
const MAX_COUNT_SHIFT: u32 = 16;
|
||||
const PENDING_OFFSET: u32 = 27;
|
||||
const MESHED_OFFSET: u32 = 30;
|
||||
|
||||
#[inline]
|
||||
pub fn new(
|
||||
max_count: u16,
|
||||
size_class: u8,
|
||||
shuffle_offset: u8,
|
||||
freelist_id: FreelistId,
|
||||
) -> Self {
|
||||
let bits = ((max_count as u32) << Self::MAX_COUNT_SHIFT)
|
||||
| ((shuffle_offset as u32) << Self::SHUFFLE_OFFSET_SHIFT)
|
||||
| ((freelist_id as u32) << Self::FREELIST_ID_SHIFT)
|
||||
| ((size_class as u32) << Self::SIZE_CLASS_SHIFT);
|
||||
Self {
|
||||
bits: AtomicU32::new(bits),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn load(&self) -> u32 {
|
||||
self.bits.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn update_masked(&self, mask: u32, value: u32) {
|
||||
let mut old = self.bits.load(Ordering::Relaxed);
|
||||
loop {
|
||||
let new = (old & mask) | value;
|
||||
match self
|
||||
.bits
|
||||
.compare_exchange_weak(old, new, Ordering::AcqRel, Ordering::Relaxed)
|
||||
{
|
||||
Ok(_) => return,
|
||||
Err(next) => old = next,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn max_count(&self) -> u16 {
|
||||
((self.load() >> Self::MAX_COUNT_SHIFT) & 0x7ff) as u16
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn size_class(&self) -> u8 {
|
||||
((self.load() >> Self::SIZE_CLASS_SHIFT) & 0x3f) as u8
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn freelist_id(&self) -> FreelistId {
|
||||
match (self.load() >> Self::FREELIST_ID_SHIFT) & 0x3 {
|
||||
0 => FreelistId::Full,
|
||||
1 => FreelistId::Partial,
|
||||
2 => FreelistId::Empty,
|
||||
_ => FreelistId::Attached,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn set_freelist_id(&self, id: FreelistId) {
|
||||
let mask = !(0x3 << Self::FREELIST_ID_SHIFT);
|
||||
self.update_masked(mask, (id as u32) << Self::FREELIST_ID_SHIFT);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn shuffle_vector_offset(&self) -> u8 {
|
||||
((self.load() >> Self::SHUFFLE_OFFSET_SHIFT) & 0xff) as u8
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn set_shuffle_vector_offset(&self, offset: u8) {
|
||||
let mask = !(0xff << Self::SHUFFLE_OFFSET_SHIFT);
|
||||
self.update_masked(mask, (offset as u32) << Self::SHUFFLE_OFFSET_SHIFT);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_pending(&self) -> bool {
|
||||
self.load() & (1 << Self::PENDING_OFFSET) != 0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn clear_pending(&self) {
|
||||
self.bits
|
||||
.fetch_and(!(1 << Self::PENDING_OFFSET), Ordering::AcqRel);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn try_set_pending_from_full(&self) -> bool {
|
||||
let full = (FreelistId::Full as u32) << Self::FREELIST_ID_SHIFT;
|
||||
let pending = 1 << Self::PENDING_OFFSET;
|
||||
let freelist_mask = 0x3 << Self::FREELIST_ID_SHIFT;
|
||||
|
||||
let mut old = self.bits.load(Ordering::Relaxed);
|
||||
loop {
|
||||
if (old & freelist_mask) != full || (old & pending) != 0 {
|
||||
return false;
|
||||
}
|
||||
let new = old | pending;
|
||||
match self
|
||||
.bits
|
||||
.compare_exchange_weak(old, new, Ordering::AcqRel, Ordering::Relaxed)
|
||||
{
|
||||
Ok(_) => return true,
|
||||
Err(next) => old = next,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_meshed(&self) -> bool {
|
||||
self.load() & (1 << Self::MESHED_OFFSET) != 0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn set_meshed(&self) {
|
||||
self.bits
|
||||
.fetch_or(1 << Self::MESHED_OFFSET, Ordering::AcqRel);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MiniHeap {
|
||||
span: Span,
|
||||
current_thread: AtomicU32,
|
||||
flags: MiniHeapFlags,
|
||||
next_meshed: AtomicU32,
|
||||
pending_next: AtomicU32,
|
||||
bitmap: AtomicBitmap,
|
||||
}
|
||||
|
||||
impl MiniHeap {
|
||||
#[inline]
|
||||
pub fn new(span: Span, object_count: u16, object_size: usize) -> Self {
|
||||
let size_class = if object_count > 1 {
|
||||
size_class_for(object_size).unwrap_or(1)
|
||||
} else {
|
||||
1
|
||||
};
|
||||
|
||||
Self {
|
||||
span,
|
||||
current_thread: AtomicU32::new(0),
|
||||
flags: MiniHeapFlags::new(object_count, size_class, 0, FreelistId::Attached),
|
||||
next_meshed: AtomicU32::new(0),
|
||||
pending_next: AtomicU32::new(0),
|
||||
bitmap: AtomicBitmap::new(object_count as usize),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn span(&self) -> Span {
|
||||
self.span
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn flags(&self) -> &MiniHeapFlags {
|
||||
&self.flags
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn bitmap(&self) -> &AtomicBitmap {
|
||||
&self.bitmap
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn max_count(&self) -> u16 {
|
||||
self.flags.max_count()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn size_class(&self) -> u8 {
|
||||
self.flags.size_class()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_large_alloc(&self) -> bool {
|
||||
self.max_count() == 1
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn object_size(&self) -> usize {
|
||||
if self.is_large_alloc() {
|
||||
self.span.byte_length_for_page_size(page_size())
|
||||
} else {
|
||||
byte_size_for_class(self.size_class())
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn span_size(&self) -> usize {
|
||||
self.span.byte_length_for_page_size(page_size())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn in_use_count(&self) -> u32 {
|
||||
self.bitmap.in_use_count()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.in_use_count() == 0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_full(&self) -> bool {
|
||||
self.in_use_count() == self.max_count() as u32
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn bytes_free(&self) -> usize {
|
||||
(self.max_count() as usize - self.in_use_count() as usize) * self.object_size()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn current_thread(&self) -> u32 {
|
||||
self.current_thread.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn set_attached(&self, thread_id: u32) {
|
||||
self.current_thread.store(thread_id, Ordering::Release);
|
||||
self.flags.set_freelist_id(FreelistId::Attached);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn unset_attached(&self) {
|
||||
self.current_thread.store(0, Ordering::Release);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_attached(&self) -> bool {
|
||||
self.current_thread() != 0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn set_shuffle_vector_offset(&self, offset: u8) {
|
||||
self.flags.set_shuffle_vector_offset(offset);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn shuffle_vector_offset(&self) -> u8 {
|
||||
self.flags.shuffle_vector_offset()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn set_pending_next(&self, next: MiniHeapId) {
|
||||
self.pending_next.store(next.value(), Ordering::Release);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn pending_next(&self) -> MiniHeapId {
|
||||
MiniHeapId::new(self.pending_next.load(Ordering::Acquire))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn track_meshed_span(&self, next: MiniHeapId) {
|
||||
self.next_meshed.store(next.value(), Ordering::Release);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn next_meshed(&self) -> MiniHeapId {
|
||||
MiniHeapId::new(self.next_meshed.load(Ordering::Acquire))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn has_meshed_partner(&self) -> bool {
|
||||
self.next_meshed().has_value()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn set_meshed(&self) {
|
||||
self.flags.set_meshed();
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_meshed(&self) -> bool {
|
||||
self.flags.is_meshed()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_meshing_candidate(&self) -> bool {
|
||||
!self.is_attached() && self.object_size() < page_size()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn fullness(&self) -> f32 {
|
||||
self.in_use_count() as f32 / self.max_count() as f32
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn malloc_at(&self, arena_begin: usize, slot: usize) -> Option<*mut u8> {
|
||||
if !self.bitmap.try_set(slot) {
|
||||
return None;
|
||||
}
|
||||
Some(self.ptr_from_offset(arena_begin, slot))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn ptr_from_offset(&self, arena_begin: usize, slot: usize) -> *mut u8 {
|
||||
let span_start = arena_begin + (self.span.offset as usize * page_size());
|
||||
(span_start + slot * self.object_size()) as *mut u8
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn contains_ptr(&self, arena_begin: usize, ptr: *const u8) -> bool {
|
||||
let span_start = arena_begin + (self.span.offset as usize * page_size());
|
||||
let span_end = span_start + self.span_size();
|
||||
let ptr = ptr as usize;
|
||||
span_start <= ptr && ptr < span_end
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn free_offset(&self, slot: usize) -> bool {
|
||||
self.bitmap.unset(slot)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn clear_if_not_free(&self, slot: usize) -> bool {
|
||||
self.bitmap.unset(slot)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn slot_for_ptr(&self, arena_begin: usize, ptr: *const u8) -> usize {
|
||||
let span_start = arena_begin + (self.span.offset as usize * page_size());
|
||||
((ptr as usize) - span_start) / self.object_size()
|
||||
}
|
||||
}
|
||||
45
lib/runtime/src/platform/linux_x86_64/mesh_alloc/mod.rs
Normal file
45
lib/runtime/src/platform/linux_x86_64/mesh_alloc/mod.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
pub mod allocator;
|
||||
pub mod arena;
|
||||
pub mod bitmap;
|
||||
pub mod constants;
|
||||
pub mod fault;
|
||||
pub mod global;
|
||||
pub mod meshing;
|
||||
pub mod miniheap;
|
||||
pub mod page;
|
||||
pub mod platform;
|
||||
pub mod pool;
|
||||
pub mod raw_sys;
|
||||
pub mod rng;
|
||||
pub mod shuffle;
|
||||
pub mod size_map;
|
||||
pub mod span;
|
||||
pub mod stats;
|
||||
pub mod sync;
|
||||
pub mod thread_local_heap;
|
||||
|
||||
pub use allocator::MeshAllocator;
|
||||
pub use arena::Arena;
|
||||
pub use bitmap::{AtomicBitmap, BitIter, RelaxedBitmap};
|
||||
pub use constants::*;
|
||||
pub use fault::{
|
||||
ActiveMeshGuard, ensure_fault_mediation_installed, ok_to_proceed, retry_on_efault,
|
||||
retry_on_efault_ptrs,
|
||||
};
|
||||
pub use global::{DEFAULT_GLOBAL_MINIHEAP_CAPACITY, GlobalMeshAllocator};
|
||||
pub use meshing::bitmaps_meshable;
|
||||
pub use miniheap::{FreelistId, MiniHeap, MiniHeapFlags, MiniHeapId};
|
||||
pub use page::{
|
||||
PageConfig, page_count, page_shift, page_size, round_up_to_page, runtime_slots_per_span,
|
||||
};
|
||||
pub use platform::{PlatformHooks, PlatformInstallError, install_platform_hooks};
|
||||
pub use rng::{Mwc, Mwc64};
|
||||
pub use shuffle::{ShuffleEntry, ShuffleVector};
|
||||
pub use size_map::{CLASS_TO_SIZE, NUM_SIZE_CLASSES, byte_size_for_class, size_class_for};
|
||||
pub use span::Span;
|
||||
pub use stats::{
|
||||
CompactionAdvice, CompactionEstimate, CompactionRecommendation, CompactionSkipReason,
|
||||
MeshStats, RuntimeCompactionPolicy, RuntimeCompactionResult,
|
||||
};
|
||||
pub use sync::FutexMutex;
|
||||
pub use thread_local_heap::ThreadLocalHeap;
|
||||
94
lib/runtime/src/platform/linux_x86_64/mesh_alloc/page.rs
Normal file
94
lib/runtime/src/platform/linux_x86_64/mesh_alloc/page.rs
Normal file
@@ -0,0 +1,94 @@
|
||||
use core::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
use super::constants::{MAX_SUPPORTED_PAGE_SIZE, MIN_OBJECT_SIZE, MIN_SUPPORTED_PAGE_SIZE};
|
||||
use super::platform;
|
||||
static PAGE_SIZE_CACHE: AtomicUsize = AtomicUsize::new(0);
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub struct PageConfig {
|
||||
size: usize,
|
||||
shift: u32,
|
||||
slots_per_span: usize,
|
||||
}
|
||||
|
||||
impl PageConfig {
|
||||
#[inline(always)]
|
||||
pub fn get() -> Self {
|
||||
let size = page_size();
|
||||
Self {
|
||||
size,
|
||||
shift: size.trailing_zeros(),
|
||||
slots_per_span: size / MIN_OBJECT_SIZE,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn size(self) -> usize {
|
||||
self.size
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn shift(self) -> u32 {
|
||||
self.shift
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn slots_per_span(self) -> usize {
|
||||
self.slots_per_span
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn page_size() -> usize {
|
||||
let cached = PAGE_SIZE_CACHE.load(Ordering::Acquire);
|
||||
if cached != 0 {
|
||||
return cached;
|
||||
}
|
||||
|
||||
let size = query_page_size();
|
||||
match PAGE_SIZE_CACHE.compare_exchange(0, size, Ordering::AcqRel, Ordering::Acquire) {
|
||||
Ok(_) => size,
|
||||
Err(existing) => existing,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn page_shift() -> u32 {
|
||||
page_size().trailing_zeros()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn page_count(size: usize) -> usize {
|
||||
let page = page_size();
|
||||
size.div_ceil(page)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn round_up_to_page(size: usize) -> usize {
|
||||
page_count(size) * page_size()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn runtime_slots_per_span() -> usize {
|
||||
page_size() / MIN_OBJECT_SIZE
|
||||
}
|
||||
|
||||
fn query_page_size() -> usize {
|
||||
let size = platform::page_size();
|
||||
assert!(
|
||||
size.is_power_of_two(),
|
||||
"page size is not a power of two: {size}"
|
||||
);
|
||||
assert!(
|
||||
(MIN_SUPPORTED_PAGE_SIZE..=MAX_SUPPORTED_PAGE_SIZE).contains(&size),
|
||||
"unsupported page size {size}; supported range is {}..={}",
|
||||
MIN_SUPPORTED_PAGE_SIZE,
|
||||
MAX_SUPPORTED_PAGE_SIZE
|
||||
);
|
||||
assert_eq!(
|
||||
size % MIN_OBJECT_SIZE,
|
||||
0,
|
||||
"page size {size} is not MIN_OBJECT_SIZE-aligned"
|
||||
);
|
||||
size
|
||||
}
|
||||
168
lib/runtime/src/platform/linux_x86_64/mesh_alloc/platform.rs
Normal file
168
lib/runtime/src/platform/linux_x86_64/mesh_alloc/platform.rs
Normal file
@@ -0,0 +1,168 @@
|
||||
use core::sync::atomic::{AtomicBool, AtomicPtr, Ordering};
|
||||
|
||||
use super::raw_sys;
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct PlatformHooks {
|
||||
pub page_size: fn() -> usize,
|
||||
pub gettid: fn() -> raw_sys::Result<u32>,
|
||||
pub getrandom: fn(&mut [u8], u32) -> raw_sys::Result<usize>,
|
||||
pub memfd_create: fn(*const u8, u32) -> raw_sys::Result<i32>,
|
||||
pub ftruncate: fn(i32, u64) -> raw_sys::Result<()>,
|
||||
pub fallocate: fn(i32, u32, u64, u64) -> raw_sys::Result<()>,
|
||||
pub close: fn(i32) -> raw_sys::Result<()>,
|
||||
pub futex_wait: unsafe fn(*const u32, u32, u32) -> raw_sys::Result<()>,
|
||||
pub futex_wake: unsafe fn(*const u32, u32, u32) -> raw_sys::Result<u32>,
|
||||
pub mmap: unsafe fn(*mut u8, usize, u32, u32, i32, u64) -> raw_sys::Result<*mut u8>,
|
||||
pub map_anonymous: unsafe fn(usize, u32) -> raw_sys::Result<*mut u8>,
|
||||
pub mprotect: unsafe fn(*mut u8, usize, u32) -> raw_sys::Result<()>,
|
||||
pub munmap: unsafe fn(*mut u8, usize) -> raw_sys::Result<()>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum PlatformInstallError {
|
||||
AlreadyConfigured,
|
||||
AlreadyInUse,
|
||||
}
|
||||
|
||||
static DEFAULT_PLATFORM_HOOKS: PlatformHooks = PlatformHooks {
|
||||
page_size: default_page_size,
|
||||
gettid: raw_sys::gettid,
|
||||
getrandom: raw_sys::getrandom,
|
||||
memfd_create: raw_sys::memfd_create,
|
||||
ftruncate: raw_sys::ftruncate,
|
||||
fallocate: raw_sys::fallocate,
|
||||
close: raw_sys::close,
|
||||
futex_wait: raw_sys::futex_wait,
|
||||
futex_wake: raw_sys::futex_wake,
|
||||
mmap: raw_sys::mmap,
|
||||
map_anonymous: raw_sys::map_anonymous,
|
||||
mprotect: raw_sys::mprotect,
|
||||
munmap: raw_sys::munmap,
|
||||
};
|
||||
|
||||
static PLATFORM_HOOKS: AtomicPtr<PlatformHooks> =
|
||||
AtomicPtr::new((&DEFAULT_PLATFORM_HOOKS as *const PlatformHooks).cast_mut());
|
||||
static PLATFORM_FROZEN: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
pub fn install_platform_hooks(hooks: &'static PlatformHooks) -> Result<(), PlatformInstallError> {
|
||||
if PLATFORM_FROZEN.load(Ordering::Acquire) {
|
||||
return Err(PlatformInstallError::AlreadyInUse);
|
||||
}
|
||||
|
||||
let default_ptr = (&DEFAULT_PLATFORM_HOOKS as *const PlatformHooks).cast_mut();
|
||||
let hooks_ptr = (hooks as *const PlatformHooks).cast_mut();
|
||||
match PLATFORM_HOOKS.compare_exchange(
|
||||
default_ptr,
|
||||
hooks_ptr,
|
||||
Ordering::AcqRel,
|
||||
Ordering::Acquire,
|
||||
) {
|
||||
Ok(_) => Ok(()),
|
||||
Err(existing) if existing == hooks_ptr => Ok(()),
|
||||
Err(_) => Err(PlatformInstallError::AlreadyConfigured),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn page_size() -> usize {
|
||||
(platform_hooks().page_size)()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn gettid() -> raw_sys::Result<u32> {
|
||||
(platform_hooks().gettid)()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn getrandom(buf: &mut [u8], flags: u32) -> raw_sys::Result<usize> {
|
||||
(platform_hooks().getrandom)(buf, flags)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn memfd_create(name: *const u8, flags: u32) -> raw_sys::Result<i32> {
|
||||
(platform_hooks().memfd_create)(name, flags)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn ftruncate(fd: i32, len: u64) -> raw_sys::Result<()> {
|
||||
(platform_hooks().ftruncate)(fd, len)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn fallocate(fd: i32, mode: u32, offset: u64, len: u64) -> raw_sys::Result<()> {
|
||||
(platform_hooks().fallocate)(fd, mode, offset, len)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn close(fd: i32) -> raw_sys::Result<()> {
|
||||
(platform_hooks().close)(fd)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
/// # Safety
|
||||
///
|
||||
/// `uaddr` must be valid for the kernel to access as a futex word for the duration of the call.
|
||||
pub unsafe fn futex_wait(uaddr: *const u32, op: u32, expected: u32) -> raw_sys::Result<()> {
|
||||
unsafe { (platform_hooks().futex_wait)(uaddr, op, expected) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
/// # Safety
|
||||
///
|
||||
/// `uaddr` must be valid for the kernel to access as a futex word for the duration of the call.
|
||||
pub unsafe fn futex_wake(uaddr: *const u32, op: u32, count: u32) -> raw_sys::Result<u32> {
|
||||
unsafe { (platform_hooks().futex_wake)(uaddr, op, count) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller must ensure the mapping arguments satisfy the platform `mmap(2)` contract.
|
||||
pub unsafe fn mmap(
|
||||
addr: *mut u8,
|
||||
len: usize,
|
||||
prot: u32,
|
||||
flags: u32,
|
||||
fd: i32,
|
||||
offset: u64,
|
||||
) -> raw_sys::Result<*mut u8> {
|
||||
unsafe { (platform_hooks().mmap)(addr, len, prot, flags, fd, offset) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller must later unmap the returned memory exactly once.
|
||||
pub unsafe fn map_anonymous(len: usize, prot: u32) -> raw_sys::Result<*mut u8> {
|
||||
unsafe { (platform_hooks().map_anonymous)(len, prot) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
/// # Safety
|
||||
///
|
||||
/// `addr..addr+len` must refer to a valid mapped region.
|
||||
pub unsafe fn mprotect(addr: *mut u8, len: usize, prot: u32) -> raw_sys::Result<()> {
|
||||
unsafe { (platform_hooks().mprotect)(addr, len, prot) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
/// # Safety
|
||||
///
|
||||
/// `addr..addr+len` must refer to a valid mapping that may be unmapped exactly once.
|
||||
pub unsafe fn munmap(addr: *mut u8, len: usize) -> raw_sys::Result<()> {
|
||||
unsafe { (platform_hooks().munmap)(addr, len) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn platform_hooks() -> &'static PlatformHooks {
|
||||
PLATFORM_FROZEN.store(true, Ordering::Release);
|
||||
let hooks = PLATFORM_HOOKS.load(Ordering::Acquire);
|
||||
unsafe { &*hooks }
|
||||
}
|
||||
|
||||
fn default_page_size() -> usize {
|
||||
let size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) };
|
||||
assert!(size > 0, "sysconf(_SC_PAGESIZE) failed");
|
||||
size as usize
|
||||
}
|
||||
152
lib/runtime/src/platform/linux_x86_64/mesh_alloc/pool.rs
Normal file
152
lib/runtime/src/platform/linux_x86_64/mesh_alloc/pool.rs
Normal file
@@ -0,0 +1,152 @@
|
||||
use core::mem::size_of;
|
||||
use core::sync::atomic::{AtomicU8, AtomicU32, Ordering};
|
||||
|
||||
use super::miniheap::{MiniHeap, MiniHeapId};
|
||||
use super::platform;
|
||||
use super::raw_sys;
|
||||
use super::span::Span;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MiniHeapPool {
|
||||
base: *mut MiniHeap,
|
||||
live: *mut AtomicU8,
|
||||
free_ids: *mut u32,
|
||||
capacity: u32,
|
||||
len: AtomicU32,
|
||||
live_len: AtomicU32,
|
||||
free_len: u32,
|
||||
}
|
||||
|
||||
impl MiniHeapPool {
|
||||
#[inline]
|
||||
pub fn with_capacity(capacity: u32) -> raw_sys::Result<Self> {
|
||||
assert!(capacity > 0);
|
||||
let bytes = capacity as usize * size_of::<MiniHeap>();
|
||||
let live = unsafe {
|
||||
platform::map_anonymous(capacity as usize, raw_sys::PROT_READ | raw_sys::PROT_WRITE)?
|
||||
};
|
||||
let free_ids = unsafe {
|
||||
platform::map_anonymous(
|
||||
capacity as usize * size_of::<u32>(),
|
||||
raw_sys::PROT_READ | raw_sys::PROT_WRITE,
|
||||
)? as *mut u32
|
||||
};
|
||||
let base = unsafe {
|
||||
platform::map_anonymous(bytes, raw_sys::PROT_READ | raw_sys::PROT_WRITE)?
|
||||
as *mut MiniHeap
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
base,
|
||||
live: live as *mut AtomicU8,
|
||||
free_ids,
|
||||
capacity,
|
||||
len: AtomicU32::new(0),
|
||||
live_len: AtomicU32::new(0),
|
||||
free_len: 0,
|
||||
})
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn len(&self) -> u32 {
|
||||
self.len.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn capacity(&self) -> u32 {
|
||||
self.capacity
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn live_len(&self) -> u32 {
|
||||
self.live_len.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn allocate(
|
||||
&mut self,
|
||||
span: Span,
|
||||
object_count: u16,
|
||||
object_size: usize,
|
||||
) -> Option<(MiniHeapId, &MiniHeap)> {
|
||||
let (id, index) = if self.free_len > 0 {
|
||||
self.free_len -= 1;
|
||||
let id = unsafe { *self.free_ids.add(self.free_len as usize) };
|
||||
(MiniHeapId::new(id), (id - 1) as usize)
|
||||
} else {
|
||||
let len = self.len.load(Ordering::Relaxed);
|
||||
if len >= self.capacity {
|
||||
return None;
|
||||
}
|
||||
let id = len + 1;
|
||||
self.len.store(id, Ordering::Release);
|
||||
(MiniHeapId::new(id), (id - 1) as usize)
|
||||
};
|
||||
self.live_len.fetch_add(1, Ordering::AcqRel);
|
||||
|
||||
unsafe {
|
||||
let ptr = self.base.add(index);
|
||||
ptr.write(MiniHeap::new(span, object_count, object_size));
|
||||
(&*self.live.add(index)).store(1, Ordering::Release);
|
||||
Some((id, &*ptr))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn get(&self, id: MiniHeapId) -> Option<&MiniHeap> {
|
||||
if !id.has_value() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let index = id.value() - 1;
|
||||
if index >= self.len.load(Ordering::Acquire) {
|
||||
return None;
|
||||
}
|
||||
if unsafe { (&*self.live.add(index as usize)).load(Ordering::Acquire) } == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
unsafe { Some(&*self.base.add(index as usize)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn release(&mut self, id: MiniHeapId) -> bool {
|
||||
if !id.has_value() {
|
||||
return false;
|
||||
}
|
||||
|
||||
let index = id.value() - 1;
|
||||
if index >= self.len.load(Ordering::Acquire) {
|
||||
return false;
|
||||
}
|
||||
if unsafe { (&*self.live.add(index as usize)).load(Ordering::Acquire) } == 0 {
|
||||
return false;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
(&*self.live.add(index as usize)).store(0, Ordering::Release);
|
||||
self.free_ids.add(self.free_len as usize).write(id.value());
|
||||
}
|
||||
self.free_len += 1;
|
||||
self.live_len.fetch_sub(1, Ordering::AcqRel);
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for MiniHeapPool {
|
||||
fn drop(&mut self) {
|
||||
let bytes = self.capacity as usize * size_of::<MiniHeap>();
|
||||
let live_bytes = self.capacity as usize;
|
||||
let free_bytes = self.capacity as usize * size_of::<u32>();
|
||||
unsafe {
|
||||
let _ = platform::munmap(self.free_ids as *mut u8, free_bytes);
|
||||
let _ = platform::munmap(self.live as *mut u8, live_bytes);
|
||||
let _ = platform::munmap(self.base as *mut u8, bytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
218
lib/runtime/src/platform/linux_x86_64/mesh_alloc/raw_sys.rs
Normal file
218
lib/runtime/src/platform/linux_x86_64/mesh_alloc/raw_sys.rs
Normal file
@@ -0,0 +1,218 @@
|
||||
use core::ptr::null_mut;
|
||||
|
||||
pub type Result<T> = core::result::Result<T, Error>;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub struct Error(pub i32);
|
||||
|
||||
impl Error {
|
||||
#[inline(always)]
|
||||
pub const fn errno(self) -> i32 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
pub const PROT_NONE: u32 = libc::PROT_NONE as u32;
|
||||
pub const PROT_READ: u32 = libc::PROT_READ as u32;
|
||||
pub const PROT_WRITE: u32 = libc::PROT_WRITE as u32;
|
||||
|
||||
pub const MAP_SHARED: u32 = libc::MAP_SHARED as u32;
|
||||
pub const MAP_PRIVATE: u32 = libc::MAP_PRIVATE as u32;
|
||||
pub const MAP_FIXED: u32 = libc::MAP_FIXED as u32;
|
||||
pub const MAP_ANONYMOUS: u32 = libc::MAP_ANONYMOUS as u32;
|
||||
|
||||
pub const MFD_CLOEXEC: u32 = libc::MFD_CLOEXEC;
|
||||
|
||||
pub const FALLOC_FL_KEEP_SIZE: u32 = libc::FALLOC_FL_KEEP_SIZE as u32;
|
||||
pub const FALLOC_FL_PUNCH_HOLE: u32 = libc::FALLOC_FL_PUNCH_HOLE as u32;
|
||||
|
||||
pub const FUTEX_WAIT: u32 = libc::FUTEX_WAIT as u32;
|
||||
pub const FUTEX_WAKE: u32 = libc::FUTEX_WAKE as u32;
|
||||
pub const FUTEX_PRIVATE_FLAG: u32 = libc::FUTEX_PRIVATE_FLAG as u32;
|
||||
pub const FUTEX_WAIT_PRIVATE: u32 = FUTEX_WAIT | FUTEX_PRIVATE_FLAG;
|
||||
pub const FUTEX_WAKE_PRIVATE: u32 = FUTEX_WAKE | FUTEX_PRIVATE_FLAG;
|
||||
|
||||
pub const EINTR: i32 = libc::EINTR;
|
||||
pub const EAGAIN: i32 = libc::EAGAIN;
|
||||
pub const EFAULT: i32 = libc::EFAULT;
|
||||
|
||||
#[inline(always)]
|
||||
fn last_error() -> Error {
|
||||
Error(
|
||||
std::io::Error::last_os_error()
|
||||
.raw_os_error()
|
||||
.unwrap_or(libc::EINVAL),
|
||||
)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn map_c_int(result: libc::c_int) -> Result<libc::c_int> {
|
||||
if result == -1 {
|
||||
Err(last_error())
|
||||
} else {
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn map_c_long(result: libc::c_long) -> Result<libc::c_long> {
|
||||
if result == -1 {
|
||||
Err(last_error())
|
||||
} else {
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn to_off_t(value: u64) -> Result<libc::off_t> {
|
||||
value.try_into().map_err(|_| Error(libc::EINVAL))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn getpid() -> Result<u32> {
|
||||
let pid = unsafe { libc::getpid() };
|
||||
if pid == -1 {
|
||||
Err(last_error())
|
||||
} else {
|
||||
Ok(pid as u32)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn gettid() -> Result<u32> {
|
||||
unsafe { map_c_long(libc::syscall(libc::SYS_gettid)).map(|value| value as u32) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn close(fd: i32) -> Result<()> {
|
||||
unsafe { map_c_int(libc::close(fd)).map(|_| ()) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn memfd_create(name: *const u8, flags: u32) -> Result<i32> {
|
||||
unsafe {
|
||||
map_c_long(libc::syscall(
|
||||
libc::SYS_memfd_create,
|
||||
name.cast::<libc::c_char>(),
|
||||
flags as libc::c_uint,
|
||||
))
|
||||
.map(|fd| fd as i32)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn ftruncate(fd: i32, len: u64) -> Result<()> {
|
||||
let len = to_off_t(len)?;
|
||||
unsafe { map_c_int(libc::ftruncate(fd, len)).map(|_| ()) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn fallocate(fd: i32, mode: u32, offset: u64, len: u64) -> Result<()> {
|
||||
let offset = to_off_t(offset)?;
|
||||
let len = to_off_t(len)?;
|
||||
unsafe { map_c_int(libc::fallocate(fd, mode as libc::c_int, offset, len)).map(|_| ()) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn getrandom(buf: &mut [u8], flags: u32) -> Result<usize> {
|
||||
let result =
|
||||
unsafe { libc::getrandom(buf.as_mut_ptr().cast(), buf.len(), flags as libc::c_uint) };
|
||||
if result == -1 {
|
||||
Err(last_error())
|
||||
} else {
|
||||
Ok(result as usize)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
/// # Safety
|
||||
///
|
||||
/// `uaddr` must be valid for the kernel to read as a futex word for the duration of the call.
|
||||
pub unsafe fn futex_wait(uaddr: *const u32, op: u32, expected: u32) -> Result<()> {
|
||||
unsafe {
|
||||
map_c_long(libc::syscall(
|
||||
libc::SYS_futex,
|
||||
uaddr,
|
||||
op as libc::c_int,
|
||||
expected,
|
||||
null_mut::<libc::timespec>(),
|
||||
0,
|
||||
0,
|
||||
))
|
||||
.map(|_| ())
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
/// # Safety
|
||||
///
|
||||
/// `uaddr` must be valid for the kernel to access as a futex word for the duration of the call.
|
||||
pub unsafe fn futex_wake(uaddr: *const u32, op: u32, count: u32) -> Result<u32> {
|
||||
unsafe {
|
||||
map_c_long(libc::syscall(
|
||||
libc::SYS_futex,
|
||||
uaddr,
|
||||
op as libc::c_int,
|
||||
count,
|
||||
null_mut::<libc::timespec>(),
|
||||
0,
|
||||
0,
|
||||
))
|
||||
.map(|woken| woken as u32)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller must uphold the platform `mmap(2)` contract for the provided arguments and manage
|
||||
/// any returned mapping according to Rust aliasing and lifetime rules.
|
||||
pub unsafe fn mmap(
|
||||
addr: *mut u8,
|
||||
len: usize,
|
||||
prot: u32,
|
||||
flags: u32,
|
||||
fd: i32,
|
||||
offset: u64,
|
||||
) -> Result<*mut u8> {
|
||||
let offset = to_off_t(offset)?;
|
||||
let result = unsafe {
|
||||
libc::mmap(
|
||||
addr.cast(),
|
||||
len,
|
||||
prot as libc::c_int,
|
||||
flags as libc::c_int,
|
||||
fd,
|
||||
offset,
|
||||
)
|
||||
};
|
||||
if result == libc::MAP_FAILED {
|
||||
Err(last_error())
|
||||
} else {
|
||||
Ok(result.cast())
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller must later unmap the returned memory exactly once.
|
||||
pub unsafe fn map_anonymous(len: usize, prot: u32) -> Result<*mut u8> {
|
||||
unsafe { mmap(null_mut(), len, prot, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
/// # Safety
|
||||
///
|
||||
/// `addr..addr+len` must refer to a valid mapped region.
|
||||
pub unsafe fn mprotect(addr: *mut u8, len: usize, prot: u32) -> Result<()> {
|
||||
unsafe { map_c_int(libc::mprotect(addr.cast(), len, prot as libc::c_int)).map(|_| ()) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
/// # Safety
|
||||
///
|
||||
/// `addr..addr+len` must refer to a valid mapping that can be unmapped exactly once.
|
||||
pub unsafe fn munmap(addr: *mut u8, len: usize) -> Result<()> {
|
||||
unsafe { map_c_int(libc::munmap(addr.cast(), len)).map(|_| ()) }
|
||||
}
|
||||
96
lib/runtime/src/platform/linux_x86_64/mesh_alloc/rng.rs
Normal file
96
lib/runtime/src/platform/linux_x86_64/mesh_alloc/rng.rs
Normal file
@@ -0,0 +1,96 @@
|
||||
use super::platform;
|
||||
use super::raw_sys;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub struct Mwc64 {
|
||||
x: u64,
|
||||
c: u64,
|
||||
t: u64,
|
||||
value: u64,
|
||||
index: u8,
|
||||
}
|
||||
|
||||
impl Mwc64 {
|
||||
#[inline(always)]
|
||||
pub const fn new(seed1: u64, seed2: u64) -> Self {
|
||||
Self {
|
||||
x: (seed1 << 32).wrapping_add(seed2),
|
||||
c: 123_456_123_456_123_456,
|
||||
t: 0,
|
||||
value: 0,
|
||||
index: 2,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn from_os_seed() -> raw_sys::Result<Self> {
|
||||
let mut buf = [0u8; 16];
|
||||
let mut filled = 0usize;
|
||||
while filled < buf.len() {
|
||||
let read = platform::getrandom(&mut buf[filled..], 0)?;
|
||||
if read == 0 {
|
||||
return Err(raw_sys::Error(5));
|
||||
}
|
||||
filled += read;
|
||||
}
|
||||
|
||||
let seed1 = u64::from_ne_bytes(buf[0..8].try_into().unwrap());
|
||||
let seed2 = u64::from_ne_bytes(buf[8..16].try_into().unwrap());
|
||||
Ok(Self::new(seed1.max(1), seed2.max(1)))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn next_block(&mut self) -> u64 {
|
||||
self.t = (self.x << 58).wrapping_add(self.c);
|
||||
self.c = self.x >> 6;
|
||||
self.x = self.x.wrapping_add(self.t);
|
||||
self.c = self.c.wrapping_add((self.x < self.t) as u64);
|
||||
self.x
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn next_u32(&mut self) -> u32 {
|
||||
if self.index == 2 {
|
||||
self.value = self.next_block();
|
||||
self.index = 0;
|
||||
}
|
||||
|
||||
let shift = (self.index as u32) * 32;
|
||||
let value = (self.value >> shift) as u32;
|
||||
self.index += 1;
|
||||
value
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub struct Mwc {
|
||||
inner: Mwc64,
|
||||
}
|
||||
|
||||
impl Mwc {
|
||||
#[inline(always)]
|
||||
pub const fn new(seed1: u64, seed2: u64) -> Self {
|
||||
Self {
|
||||
inner: Mwc64::new(seed1, seed2),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn from_os_seed() -> raw_sys::Result<Self> {
|
||||
Ok(Self {
|
||||
inner: Mwc64::from_os_seed()?,
|
||||
})
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn next_u32(&mut self) -> u32 {
|
||||
self.inner.next_u32()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn in_range(&mut self, min: usize, max: usize) -> usize {
|
||||
debug_assert!(min <= max);
|
||||
let range = 1 + max - min;
|
||||
min + ((((self.next_u32() as u64) * (range as u64)) >> 32) as usize)
|
||||
}
|
||||
}
|
||||
204
lib/runtime/src/platform/linux_x86_64/mesh_alloc/shuffle.rs
Normal file
204
lib/runtime/src/platform/linux_x86_64/mesh_alloc/shuffle.rs
Normal file
@@ -0,0 +1,204 @@
|
||||
use super::bitmap::RelaxedBitmap;
|
||||
use super::constants::{
|
||||
MAX_OBJECT_SLOTS_PER_SPAN, MAX_SHUFFLE_VECTOR_LENGTH, MIN_OBJECT_SIZE,
|
||||
MIN_SHUFFLE_VECTOR_LENGTH,
|
||||
};
|
||||
use super::page::page_size;
|
||||
use super::rng::Mwc;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
|
||||
pub struct ShuffleEntry {
|
||||
pub miniheap_offset: u16,
|
||||
pub slot_index: u16,
|
||||
}
|
||||
|
||||
impl ShuffleEntry {
|
||||
pub const EMPTY: Self = Self {
|
||||
miniheap_offset: u16::MAX,
|
||||
slot_index: u16::MAX,
|
||||
};
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn new(miniheap_offset: u16, slot_index: u16) -> Self {
|
||||
Self {
|
||||
miniheap_offset,
|
||||
slot_index,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub struct ShuffleVector {
|
||||
entries: [ShuffleEntry; MAX_SHUFFLE_VECTOR_LENGTH],
|
||||
max_count: u16,
|
||||
off: u16,
|
||||
prng: Mwc,
|
||||
}
|
||||
|
||||
impl ShuffleVector {
|
||||
#[inline]
|
||||
pub fn for_object_size(object_size: usize, seed1: u64, seed2: u64) -> Self {
|
||||
Self::with_capacity(Self::capacity_for_object_size(object_size), seed1, seed2)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn with_capacity(max_count: usize, seed1: u64, seed2: u64) -> Self {
|
||||
assert!(max_count <= MAX_SHUFFLE_VECTOR_LENGTH);
|
||||
Self {
|
||||
entries: [ShuffleEntry::EMPTY; MAX_SHUFFLE_VECTOR_LENGTH],
|
||||
max_count: max_count as u16,
|
||||
off: max_count as u16,
|
||||
prng: Mwc::new(seed1.max(1), seed2.max(1)),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn capacity_for_object_size(object_size: usize) -> usize {
|
||||
let size = if object_size < MIN_OBJECT_SIZE {
|
||||
MIN_OBJECT_SIZE
|
||||
} else {
|
||||
object_size
|
||||
};
|
||||
let per_page = page_size() / size;
|
||||
let with_min = if per_page < MIN_SHUFFLE_VECTOR_LENGTH {
|
||||
MIN_SHUFFLE_VECTOR_LENGTH
|
||||
} else {
|
||||
per_page
|
||||
};
|
||||
if with_min > MAX_OBJECT_SLOTS_PER_SPAN {
|
||||
MAX_OBJECT_SLOTS_PER_SPAN
|
||||
} else {
|
||||
with_min
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn max_count(&self) -> usize {
|
||||
self.max_count as usize
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn len(&self) -> usize {
|
||||
self.max_count() - self.off as usize
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn is_empty(&self) -> bool {
|
||||
self.off == self.max_count
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn is_full(&self) -> bool {
|
||||
self.off == 0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn is_exhausted(&self) -> bool {
|
||||
self.off >= self.max_count
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn clear(&mut self) {
|
||||
self.off = self.max_count;
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn active_entries(&self) -> &[ShuffleEntry] {
|
||||
&self.entries[self.off as usize..self.max_count as usize]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn count_entries_for_offset(&self, miniheap_offset: u16) -> usize {
|
||||
self.active_entries()
|
||||
.iter()
|
||||
.filter(|entry| entry.miniheap_offset == miniheap_offset)
|
||||
.count()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn push(&mut self, entry: ShuffleEntry) {
|
||||
assert!(self.off > 0);
|
||||
self.off -= 1;
|
||||
let inserted = self.off as usize;
|
||||
self.entries[inserted] = entry;
|
||||
let swap_index = self.prng.in_range(inserted, self.max_count() - 1);
|
||||
self.entries.swap(inserted, swap_index);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn pop(&mut self) -> Option<ShuffleEntry> {
|
||||
if self.is_exhausted() {
|
||||
return None;
|
||||
}
|
||||
let idx = self.off as usize;
|
||||
let value = self.entries[idx];
|
||||
self.off += 1;
|
||||
Some(value)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn refill_from_bitmap(
|
||||
&mut self,
|
||||
miniheap_offset: u16,
|
||||
bitmap: &mut RelaxedBitmap,
|
||||
) -> usize {
|
||||
let mut free_bits = *bitmap;
|
||||
free_bits.invert_masked();
|
||||
bitmap.set_all();
|
||||
|
||||
let mut added = 0usize;
|
||||
for slot in free_bits.iter_set_bits() {
|
||||
if self.is_full() {
|
||||
let _ = bitmap.unset(slot);
|
||||
continue;
|
||||
}
|
||||
|
||||
self.off -= 1;
|
||||
self.entries[self.off as usize] = ShuffleEntry::new(miniheap_offset, slot as u16);
|
||||
added += 1;
|
||||
}
|
||||
|
||||
if added > 1 {
|
||||
self.shuffle_active();
|
||||
}
|
||||
|
||||
added
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn refill_from_heap(
|
||||
&mut self,
|
||||
miniheap_offset: u16,
|
||||
heap: &super::miniheap::MiniHeap,
|
||||
) -> usize {
|
||||
let free_bits = heap.bitmap().take_free_bits();
|
||||
let mut added = 0usize;
|
||||
for slot in free_bits.iter_set_bits() {
|
||||
if self.is_full() {
|
||||
let _ = heap.free_offset(slot);
|
||||
continue;
|
||||
}
|
||||
|
||||
self.off -= 1;
|
||||
self.entries[self.off as usize] = ShuffleEntry::new(miniheap_offset, slot as u16);
|
||||
added += 1;
|
||||
}
|
||||
|
||||
if added > 1 {
|
||||
self.shuffle_active();
|
||||
}
|
||||
|
||||
added
|
||||
}
|
||||
|
||||
fn shuffle_active(&mut self) {
|
||||
let start = self.off as usize;
|
||||
let end = self.max_count();
|
||||
let mut i = start;
|
||||
while i < end {
|
||||
let swap_index = self.prng.in_range(i, end - 1);
|
||||
self.entries.swap(i, swap_index);
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
73
lib/runtime/src/platform/linux_x86_64/mesh_alloc/size_map.rs
Normal file
73
lib/runtime/src/platform/linux_x86_64/mesh_alloc/size_map.rs
Normal file
@@ -0,0 +1,73 @@
|
||||
use super::constants::MAX_SMALL_ALLOCATION;
|
||||
|
||||
pub const NUM_SIZE_CLASSES: usize = 25;
|
||||
|
||||
pub const CLASS_TO_SIZE: [usize; NUM_SIZE_CLASSES] = [
|
||||
16, 16, 32, 48, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 448, 512, 640, 768, 896,
|
||||
1024, 2048, 4096, 8192, 16384,
|
||||
];
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn byte_size_for_class(class: u8) -> usize {
|
||||
CLASS_TO_SIZE[class as usize]
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn size_class_for(size: usize) -> Option<u8> {
|
||||
if size <= 16 {
|
||||
Some(1)
|
||||
} else if size <= 32 {
|
||||
Some(2)
|
||||
} else if size <= 48 {
|
||||
Some(3)
|
||||
} else if size <= 64 {
|
||||
Some(4)
|
||||
} else if size <= 80 {
|
||||
Some(5)
|
||||
} else if size <= 96 {
|
||||
Some(6)
|
||||
} else if size <= 112 {
|
||||
Some(7)
|
||||
} else if size <= 128 {
|
||||
Some(8)
|
||||
} else if size <= 160 {
|
||||
Some(9)
|
||||
} else if size <= 192 {
|
||||
Some(10)
|
||||
} else if size <= 224 {
|
||||
Some(11)
|
||||
} else if size <= 256 {
|
||||
Some(12)
|
||||
} else if size <= 320 {
|
||||
Some(13)
|
||||
} else if size <= 384 {
|
||||
Some(14)
|
||||
} else if size <= 448 {
|
||||
Some(15)
|
||||
} else if size <= 512 {
|
||||
Some(16)
|
||||
} else if size <= 640 {
|
||||
Some(17)
|
||||
} else if size <= 768 {
|
||||
Some(18)
|
||||
} else if size <= 896 {
|
||||
Some(19)
|
||||
} else if size <= 1024 {
|
||||
Some(20)
|
||||
} else if size <= 2048 {
|
||||
Some(21)
|
||||
} else if size <= 4096 {
|
||||
Some(22)
|
||||
} else if size <= 8192 {
|
||||
Some(23)
|
||||
} else if size <= 16384 {
|
||||
Some(24)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn is_small_allocation(size: usize) -> bool {
|
||||
size <= MAX_SMALL_ALLOCATION
|
||||
}
|
||||
45
lib/runtime/src/platform/linux_x86_64/mesh_alloc/span.rs
Normal file
45
lib/runtime/src/platform/linux_x86_64/mesh_alloc/span.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
pub const SPAN_CLASS_COUNT: u32 = 256;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub struct Span {
|
||||
pub offset: u32,
|
||||
pub length: u32,
|
||||
}
|
||||
|
||||
impl Span {
|
||||
#[inline(always)]
|
||||
pub const fn new(offset: u32, length: u32) -> Self {
|
||||
Self { offset, length }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn empty(self) -> bool {
|
||||
self.length == 0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn split_after(&mut self, count: u32) -> Self {
|
||||
assert!(count <= self.length);
|
||||
let rest = Self {
|
||||
offset: self.offset + count,
|
||||
length: self.length - count,
|
||||
};
|
||||
self.length = count;
|
||||
rest
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn span_class(self) -> u32 {
|
||||
let length = if self.length > SPAN_CLASS_COUNT {
|
||||
SPAN_CLASS_COUNT
|
||||
} else {
|
||||
self.length
|
||||
};
|
||||
length - 1
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn byte_length_for_page_size(self, page_size: usize) -> usize {
|
||||
self.length as usize * page_size
|
||||
}
|
||||
}
|
||||
267
lib/runtime/src/platform/linux_x86_64/mesh_alloc/stats.rs
Normal file
267
lib/runtime/src/platform/linux_x86_64/mesh_alloc/stats.rs
Normal file
@@ -0,0 +1,267 @@
|
||||
use core::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
use super::page::page_size;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
|
||||
pub struct CompactionEstimate {
|
||||
pub candidate_heaps: u32,
|
||||
pub candidate_pages: u32,
|
||||
pub candidate_free_bytes: usize,
|
||||
pub best_case_meshes: u32,
|
||||
pub best_case_reclaimable_pages: u32,
|
||||
pub best_case_reclaimable_bytes: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
|
||||
pub struct MeshStats {
|
||||
pub arena_size: usize,
|
||||
pub reserved_bytes: usize,
|
||||
pub reusable_span_count: u32,
|
||||
pub reusable_span_bytes: usize,
|
||||
pub live_miniheaps: u32,
|
||||
pub live_small_heaps: u32,
|
||||
pub partial_small_heaps: u32,
|
||||
pub full_small_heaps: u32,
|
||||
pub meshed_small_heaps: u32,
|
||||
pub reusable_small_heaps: u32,
|
||||
pub live_large_allocations: u32,
|
||||
pub live_small_bytes: usize,
|
||||
pub live_large_bytes: usize,
|
||||
pub retained_small_span_bytes: usize,
|
||||
pub retained_large_span_bytes: usize,
|
||||
pub virtual_small_span_bytes: usize,
|
||||
pub small_allocations: u64,
|
||||
pub small_deallocations: u64,
|
||||
pub large_allocations: u64,
|
||||
pub large_deallocations: u64,
|
||||
pub compact_calls: u64,
|
||||
pub meshes_performed: u64,
|
||||
pub meshed_pages: u64,
|
||||
pub meshed_bytes: u64,
|
||||
pub compaction: CompactionEstimate,
|
||||
}
|
||||
|
||||
impl MeshStats {
|
||||
#[inline(always)]
|
||||
pub const fn live_bytes(&self) -> usize {
|
||||
self.live_small_bytes + self.live_large_bytes
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn retained_bytes(&self) -> usize {
|
||||
self.retained_small_span_bytes + self.retained_large_span_bytes
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn small_fragmentation_bytes(&self) -> usize {
|
||||
self.retained_small_span_bytes
|
||||
.saturating_sub(self.live_small_bytes)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn mesh_alias_bytes(&self) -> usize {
|
||||
self.virtual_small_span_bytes
|
||||
.saturating_sub(self.retained_small_span_bytes)
|
||||
}
|
||||
|
||||
pub fn compaction_advice(&self) -> CompactionAdvice {
|
||||
let fragmented = self.small_fragmentation_bytes();
|
||||
let retained = self.retained_small_span_bytes;
|
||||
let fragmentation_percent = fragmented
|
||||
.saturating_mul(100)
|
||||
.checked_div(retained)
|
||||
.unwrap_or(0)
|
||||
.min(100) as u8;
|
||||
|
||||
let reclaimable = self.compaction.best_case_reclaimable_bytes;
|
||||
let page = page_size();
|
||||
let recommendation = if self.compaction.best_case_meshes == 0 || reclaimable < page {
|
||||
CompactionRecommendation::Idle
|
||||
} else if reclaimable >= page * 4
|
||||
&& (fragmentation_percent >= 20 || reclaimable.saturating_mul(4) >= retained.max(page))
|
||||
{
|
||||
CompactionRecommendation::Compact
|
||||
} else if reclaimable >= page
|
||||
&& (fragmentation_percent >= 10 || self.compaction.candidate_heaps >= 2)
|
||||
{
|
||||
CompactionRecommendation::Consider
|
||||
} else {
|
||||
CompactionRecommendation::Idle
|
||||
};
|
||||
|
||||
CompactionAdvice {
|
||||
recommendation,
|
||||
fragmentation_bytes: fragmented,
|
||||
fragmentation_percent,
|
||||
candidate_heaps: self.compaction.candidate_heaps,
|
||||
candidate_free_bytes: self.compaction.candidate_free_bytes,
|
||||
best_case_meshes: self.compaction.best_case_meshes,
|
||||
best_case_reclaimable_pages: self.compaction.best_case_reclaimable_pages,
|
||||
best_case_reclaimable_bytes: reclaimable,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
|
||||
pub struct CompactionAdvice {
|
||||
pub recommendation: CompactionRecommendation,
|
||||
pub fragmentation_bytes: usize,
|
||||
pub fragmentation_percent: u8,
|
||||
pub candidate_heaps: u32,
|
||||
pub candidate_free_bytes: usize,
|
||||
pub best_case_meshes: u32,
|
||||
pub best_case_reclaimable_pages: u32,
|
||||
pub best_case_reclaimable_bytes: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
|
||||
pub enum CompactionRecommendation {
|
||||
#[default]
|
||||
Idle,
|
||||
Consider,
|
||||
Compact,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub struct RuntimeCompactionPolicy {
|
||||
pub minimum_recommendation: CompactionRecommendation,
|
||||
pub min_fragmentation_bytes: usize,
|
||||
pub min_reclaimable_bytes: usize,
|
||||
pub min_candidate_heaps: u32,
|
||||
}
|
||||
|
||||
impl RuntimeCompactionPolicy {
|
||||
pub fn should_compact(&self, advice: &CompactionAdvice) -> bool {
|
||||
recommendation_rank(advice.recommendation)
|
||||
>= recommendation_rank(self.minimum_recommendation)
|
||||
&& advice.fragmentation_bytes >= self.min_fragmentation_bytes
|
||||
&& advice.best_case_reclaimable_bytes >= self.min_reclaimable_bytes
|
||||
&& advice.candidate_heaps >= self.min_candidate_heaps
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for RuntimeCompactionPolicy {
|
||||
fn default() -> Self {
|
||||
let page = page_size();
|
||||
Self {
|
||||
minimum_recommendation: CompactionRecommendation::Consider,
|
||||
min_fragmentation_bytes: page,
|
||||
min_reclaimable_bytes: page,
|
||||
min_candidate_heaps: 2,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum RuntimeCompactionResult {
|
||||
Compacted {
|
||||
meshes: usize,
|
||||
advice: CompactionAdvice,
|
||||
},
|
||||
Skipped {
|
||||
reason: CompactionSkipReason,
|
||||
advice: Option<CompactionAdvice>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum CompactionSkipReason {
|
||||
AllocatorUnavailable,
|
||||
ThreadUnavailable,
|
||||
NotAtSafepoint,
|
||||
ThreadsActive,
|
||||
Policy,
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
const fn recommendation_rank(recommendation: CompactionRecommendation) -> u8 {
|
||||
match recommendation {
|
||||
CompactionRecommendation::Idle => 0,
|
||||
CompactionRecommendation::Consider => 1,
|
||||
CompactionRecommendation::Compact => 2,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
|
||||
pub(crate) struct CounterSnapshot {
|
||||
pub small_allocations: u64,
|
||||
pub small_deallocations: u64,
|
||||
pub large_allocations: u64,
|
||||
pub large_deallocations: u64,
|
||||
pub compact_calls: u64,
|
||||
pub meshes_performed: u64,
|
||||
pub meshed_pages: u64,
|
||||
pub meshed_bytes: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct StatsState {
|
||||
small_allocations: AtomicU64,
|
||||
small_deallocations: AtomicU64,
|
||||
large_allocations: AtomicU64,
|
||||
large_deallocations: AtomicU64,
|
||||
compact_calls: AtomicU64,
|
||||
meshes_performed: AtomicU64,
|
||||
meshed_pages: AtomicU64,
|
||||
meshed_bytes: AtomicU64,
|
||||
}
|
||||
|
||||
impl StatsState {
|
||||
pub const fn new() -> Self {
|
||||
Self {
|
||||
small_allocations: AtomicU64::new(0),
|
||||
small_deallocations: AtomicU64::new(0),
|
||||
large_allocations: AtomicU64::new(0),
|
||||
large_deallocations: AtomicU64::new(0),
|
||||
compact_calls: AtomicU64::new(0),
|
||||
meshes_performed: AtomicU64::new(0),
|
||||
meshed_pages: AtomicU64::new(0),
|
||||
meshed_bytes: AtomicU64::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn record_small_allocation(&self) {
|
||||
self.small_allocations.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn record_small_deallocation(&self) {
|
||||
self.small_deallocations.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn record_large_allocation(&self) {
|
||||
self.large_allocations.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn record_large_deallocation(&self) {
|
||||
self.large_deallocations.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn record_compact_call(&self) {
|
||||
self.compact_calls.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn record_mesh(&self, pages: u32, bytes: usize) {
|
||||
self.meshes_performed.fetch_add(1, Ordering::Relaxed);
|
||||
self.meshed_pages.fetch_add(pages as u64, Ordering::Relaxed);
|
||||
self.meshed_bytes.fetch_add(bytes as u64, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn snapshot(&self) -> CounterSnapshot {
|
||||
CounterSnapshot {
|
||||
small_allocations: self.small_allocations.load(Ordering::Relaxed),
|
||||
small_deallocations: self.small_deallocations.load(Ordering::Relaxed),
|
||||
large_allocations: self.large_allocations.load(Ordering::Relaxed),
|
||||
large_deallocations: self.large_deallocations.load(Ordering::Relaxed),
|
||||
compact_calls: self.compact_calls.load(Ordering::Relaxed),
|
||||
meshes_performed: self.meshes_performed.load(Ordering::Relaxed),
|
||||
meshed_pages: self.meshed_pages.load(Ordering::Relaxed),
|
||||
meshed_bytes: self.meshed_bytes.load(Ordering::Relaxed),
|
||||
}
|
||||
}
|
||||
}
|
||||
137
lib/runtime/src/platform/linux_x86_64/mesh_alloc/sync.rs
Normal file
137
lib/runtime/src/platform/linux_x86_64/mesh_alloc/sync.rs
Normal file
@@ -0,0 +1,137 @@
|
||||
use core::sync::atomic::{AtomicU32, Ordering};
|
||||
|
||||
use super::platform;
|
||||
use super::raw_sys;
|
||||
|
||||
const UNLOCKED: u32 = 0;
|
||||
const LOCKED: u32 = 1;
|
||||
const CONTENDED: u32 = 2;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FutexMutex {
|
||||
state: AtomicU32,
|
||||
}
|
||||
|
||||
impl FutexMutex {
|
||||
pub const fn new() -> Self {
|
||||
Self {
|
||||
state: AtomicU32::new(UNLOCKED),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn lock(&self) -> FutexMutexGuard<'_> {
|
||||
if self
|
||||
.state
|
||||
.compare_exchange(UNLOCKED, LOCKED, Ordering::Acquire, Ordering::Relaxed)
|
||||
.is_err()
|
||||
{
|
||||
self.lock_contended();
|
||||
}
|
||||
|
||||
FutexMutexGuard { mutex: self }
|
||||
}
|
||||
|
||||
fn lock_contended(&self) {
|
||||
loop {
|
||||
if self.state.swap(CONTENDED, Ordering::Acquire) == UNLOCKED {
|
||||
return;
|
||||
}
|
||||
|
||||
match unsafe {
|
||||
platform::futex_wait(self.state_ptr(), raw_sys::FUTEX_WAIT_PRIVATE, CONTENDED)
|
||||
} {
|
||||
Ok(()) => {}
|
||||
Err(error) if matches!(error.errno(), raw_sys::EAGAIN | raw_sys::EINTR) => {}
|
||||
Err(_) => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn unlock(&self) {
|
||||
if self.state.fetch_sub(1, Ordering::Release) != LOCKED {
|
||||
self.state.store(UNLOCKED, Ordering::Release);
|
||||
let _ =
|
||||
unsafe { platform::futex_wake(self.state_ptr(), raw_sys::FUTEX_WAKE_PRIVATE, 1) };
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn state_ptr(&self) -> *const u32 {
|
||||
(&self.state as *const AtomicU32).cast::<u32>()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for FutexMutex {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FutexMutexGuard<'a> {
|
||||
mutex: &'a FutexMutex,
|
||||
}
|
||||
|
||||
impl Drop for FutexMutexGuard<'_> {
|
||||
fn drop(&mut self) {
|
||||
self.mutex.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn futex_wait_for_value(state: &AtomicU32, expected: u32) {
|
||||
match unsafe {
|
||||
platform::futex_wait(
|
||||
(state as *const AtomicU32).cast::<u32>(),
|
||||
raw_sys::FUTEX_WAIT_PRIVATE,
|
||||
expected,
|
||||
)
|
||||
} {
|
||||
Ok(()) => {}
|
||||
Err(error) if matches!(error.errno(), raw_sys::EAGAIN | raw_sys::EINTR) => {}
|
||||
Err(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn futex_wake_all(state: &AtomicU32) {
|
||||
let _ = unsafe {
|
||||
platform::futex_wake(
|
||||
(state as *const AtomicU32).cast::<u32>(),
|
||||
raw_sys::FUTEX_WAKE_PRIVATE,
|
||||
i32::MAX as u32,
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use core::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use std::vec::Vec;
|
||||
|
||||
use super::FutexMutex;
|
||||
|
||||
#[test]
|
||||
fn futex_mutex_serializes_multiple_threads() {
|
||||
let mutex = Arc::new(FutexMutex::new());
|
||||
let counter = Arc::new(AtomicUsize::new(0));
|
||||
let mut threads = Vec::new();
|
||||
|
||||
for _ in 0..4 {
|
||||
let mutex = Arc::clone(&mutex);
|
||||
let counter = Arc::clone(&counter);
|
||||
threads.push(thread::spawn(move || {
|
||||
for _ in 0..5000 {
|
||||
let _guard = mutex.lock();
|
||||
counter.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
for thread in threads {
|
||||
thread.join().unwrap();
|
||||
}
|
||||
|
||||
assert_eq!(counter.load(Ordering::Acquire), 20_000);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,124 @@
|
||||
use core::array;
|
||||
use core::ptr::null;
|
||||
|
||||
use super::constants::{MAX_ATTACHED_MINIHEAPS_PER_CLASS, MIN_OBJECT_SIZE, NUM_SIZE_CLASSES};
|
||||
use super::miniheap::{MiniHeap, MiniHeapId};
|
||||
use super::platform;
|
||||
use super::raw_sys;
|
||||
use super::rng::Mwc;
|
||||
use super::shuffle::ShuffleVector;
|
||||
use super::size_map::byte_size_for_class;
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(crate) struct ClassState {
|
||||
pub shuffle: ShuffleVector,
|
||||
pub attached_ids: [MiniHeapId; MAX_ATTACHED_MINIHEAPS_PER_CLASS],
|
||||
pub attached_heaps: [*const MiniHeap; MAX_ATTACHED_MINIHEAPS_PER_CLASS],
|
||||
pub attached_len: u8,
|
||||
pub attached_cursor: u8,
|
||||
}
|
||||
|
||||
impl ClassState {
|
||||
fn new(object_size: usize, seed1: u64, seed2: u64) -> Self {
|
||||
Self {
|
||||
shuffle: ShuffleVector::for_object_size(object_size, seed1, seed2),
|
||||
attached_ids: [MiniHeapId::new(0); MAX_ATTACHED_MINIHEAPS_PER_CLASS],
|
||||
attached_heaps: [null(); MAX_ATTACHED_MINIHEAPS_PER_CLASS],
|
||||
attached_len: 0,
|
||||
attached_cursor: 0,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn clear_attached(&mut self) {
|
||||
self.attached_len = 0;
|
||||
self.attached_cursor = 0;
|
||||
self.shuffle.clear();
|
||||
|
||||
let mut index = 0usize;
|
||||
while index < MAX_ATTACHED_MINIHEAPS_PER_CLASS {
|
||||
self.attached_ids[index] = MiniHeapId::new(0);
|
||||
self.attached_heaps[index] = null();
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn attached_full(&self) -> bool {
|
||||
self.attached_len as usize == MAX_ATTACHED_MINIHEAPS_PER_CLASS
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn push_attached(&mut self, id: MiniHeapId, heap: *const MiniHeap) -> Option<u8> {
|
||||
if self.attached_full() {
|
||||
return None;
|
||||
}
|
||||
let index = self.attached_len as usize;
|
||||
self.attached_ids[index] = id;
|
||||
self.attached_heaps[index] = heap;
|
||||
self.attached_len += 1;
|
||||
Some(index as u8)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn find_attached(&self, id: MiniHeapId) -> Option<u8> {
|
||||
let len = self.attached_len as usize;
|
||||
let mut i = 0usize;
|
||||
while i < len {
|
||||
if self.attached_ids[i] == id {
|
||||
return Some(i as u8);
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn heap_at(&self, index: usize) -> Option<&MiniHeap> {
|
||||
if index >= self.attached_len as usize {
|
||||
return None;
|
||||
}
|
||||
let heap = self.attached_heaps[index];
|
||||
if heap.is_null() {
|
||||
return None;
|
||||
}
|
||||
|
||||
unsafe { Some(&*heap) }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ThreadLocalHeap {
|
||||
thread_id: u32,
|
||||
classes: [ClassState; NUM_SIZE_CLASSES],
|
||||
}
|
||||
|
||||
impl ThreadLocalHeap {
|
||||
pub fn new() -> raw_sys::Result<Self> {
|
||||
let thread_id = platform::gettid()?;
|
||||
let mut seed_rng = Mwc::from_os_seed()?;
|
||||
let classes = array::from_fn(|class| {
|
||||
let object_size = byte_size_for_class(class as u8).max(MIN_OBJECT_SIZE);
|
||||
let seed1 = seed_rng.next_u32() as u64 + 1;
|
||||
let seed2 = seed_rng.next_u32() as u64 + 1;
|
||||
ClassState::new(object_size, seed1, seed2)
|
||||
});
|
||||
|
||||
Ok(Self { thread_id, classes })
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn thread_id(&self) -> u32 {
|
||||
self.thread_id
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn class(&self, class: u8) -> &ClassState {
|
||||
&self.classes[class as usize]
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn class_mut(&mut self, class: u8) -> &mut ClassState {
|
||||
&mut self.classes[class as usize]
|
||||
}
|
||||
}
|
||||
4
lib/runtime/src/platform/linux_x86_64/mod.rs
Normal file
4
lib/runtime/src/platform/linux_x86_64/mod.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
pub mod mesh_alloc;
|
||||
pub mod reactor;
|
||||
pub mod runtime;
|
||||
pub(crate) mod uring;
|
||||
340
lib/runtime/src/platform/linux_x86_64/reactor.rs
Normal file
340
lib/runtime/src/platform/linux_x86_64/reactor.rs
Normal file
@@ -0,0 +1,340 @@
|
||||
use std::cell::Cell;
|
||||
use std::cell::RefCell;
|
||||
use std::collections::BTreeMap;
|
||||
use std::io;
|
||||
use std::os::fd::RawFd;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::time::Duration;
|
||||
|
||||
use super::uring::{IORING_OP_ASYNC_CANCEL, IoUring, IoUringCqe, IoUringSqe};
|
||||
|
||||
const WAKE_TARGET_TOKEN: u64 = 1;
|
||||
const TOKEN_KIND_SHIFT: u64 = 56;
|
||||
const TOKEN_KIND_MASK: u64 = 0xff << TOKEN_KIND_SHIFT;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
#[repr(u8)]
|
||||
enum CompletionKind {
|
||||
Timer = 1,
|
||||
TimerRemove = 2,
|
||||
NotifySend = 3,
|
||||
Operation = 4,
|
||||
OperationCancel = 5,
|
||||
}
|
||||
|
||||
type CompletionHandler = Box<dyn FnOnce(IoUringCqe) + Send + 'static>;
|
||||
|
||||
struct NotifierInner {
|
||||
ring_fd: RawFd,
|
||||
closed: AtomicBool,
|
||||
}
|
||||
|
||||
impl NotifierInner {
|
||||
fn notify(&self) -> io::Result<()> {
|
||||
if self.closed.load(Ordering::Acquire) {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::BrokenPipe,
|
||||
"target runtime ring is closed",
|
||||
));
|
||||
}
|
||||
|
||||
IoUring::with_submitter(|ring| {
|
||||
ring.submit_msg_ring(
|
||||
self.ring_fd,
|
||||
WAKE_TARGET_TOKEN,
|
||||
1,
|
||||
make_token(CompletionKind::NotifySend, 0),
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ThreadNotifier {
|
||||
inner: Arc<NotifierInner>,
|
||||
}
|
||||
|
||||
impl ThreadNotifier {
|
||||
pub fn notify(&self) -> io::Result<()> {
|
||||
self.inner.notify()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
|
||||
pub struct ReadyEvents {
|
||||
pub timer: bool,
|
||||
pub wake: bool,
|
||||
}
|
||||
|
||||
pub struct Reactor {
|
||||
ring: IoUring,
|
||||
notifier: Arc<NotifierInner>,
|
||||
next_token: Cell<u64>,
|
||||
active_timer_token: Cell<Option<u64>>,
|
||||
pending_wakes: Cell<u64>,
|
||||
pending_timers: Cell<u64>,
|
||||
completions: RefCell<BTreeMap<u64, CompletionHandler>>,
|
||||
}
|
||||
|
||||
pub fn create() -> io::Result<(Reactor, ThreadNotifier)> {
|
||||
create_reactor()
|
||||
}
|
||||
|
||||
pub fn create_reactor() -> io::Result<(Reactor, ThreadNotifier)> {
|
||||
let ring = IoUring::new(64)?;
|
||||
let notifier = Arc::new(NotifierInner {
|
||||
ring_fd: ring.ring_fd(),
|
||||
closed: AtomicBool::new(false),
|
||||
});
|
||||
|
||||
Ok((
|
||||
Reactor {
|
||||
ring,
|
||||
notifier: Arc::clone(¬ifier),
|
||||
next_token: Cell::new(1),
|
||||
active_timer_token: Cell::new(None),
|
||||
pending_wakes: Cell::new(0),
|
||||
pending_timers: Cell::new(0),
|
||||
completions: RefCell::new(BTreeMap::new()),
|
||||
},
|
||||
ThreadNotifier { inner: notifier },
|
||||
))
|
||||
}
|
||||
|
||||
impl Reactor {
|
||||
pub(crate) fn bind_current_thread(&self) {
|
||||
self.ring.bind_current_thread();
|
||||
}
|
||||
|
||||
pub(crate) fn unbind_current_thread(&self) {
|
||||
self.ring.unbind_current_thread();
|
||||
}
|
||||
|
||||
pub fn poll(&self) -> io::Result<Option<ReadyEvents>> {
|
||||
let mut ready = ReadyEvents::default();
|
||||
let saw_any = self
|
||||
.ring
|
||||
.drain_completions(|cqe| self.process_cqe(cqe, &mut ready));
|
||||
if saw_any { Ok(Some(ready)) } else { Ok(None) }
|
||||
}
|
||||
|
||||
pub fn wait(&self) -> io::Result<()> {
|
||||
self.ring.wait_for_cqe()
|
||||
}
|
||||
|
||||
pub fn rearm_timer(&self, deadline: Option<Duration>) -> io::Result<()> {
|
||||
match (self.active_timer_token.get(), deadline) {
|
||||
(Some(active), Some(deadline)) => {
|
||||
self.ring.submit_timeout_update(active, deadline)?;
|
||||
}
|
||||
(Some(active), None) => {
|
||||
self.active_timer_token.set(None);
|
||||
self.ring
|
||||
.submit_timeout_remove(active, self.next_token(CompletionKind::TimerRemove))?;
|
||||
}
|
||||
(None, Some(deadline)) => {
|
||||
let token = self.next_token(CompletionKind::Timer);
|
||||
self.active_timer_token.set(Some(token));
|
||||
self.ring.submit_timeout(token, deadline)?;
|
||||
}
|
||||
(None, None) => {}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn submit_operation(
|
||||
&self,
|
||||
fill: impl FnOnce(&mut IoUringSqe),
|
||||
on_complete: impl FnOnce(IoUringCqe) + Send + 'static,
|
||||
) -> io::Result<u64> {
|
||||
let token = self.next_token(CompletionKind::Operation);
|
||||
self.completions
|
||||
.borrow_mut()
|
||||
.insert(token, Box::new(on_complete));
|
||||
|
||||
if let Err(error) = self.ring.submit_with_token(token, fill) {
|
||||
let _ = self.completions.borrow_mut().remove(&token);
|
||||
return Err(error);
|
||||
}
|
||||
|
||||
Ok(token)
|
||||
}
|
||||
|
||||
pub(crate) fn cancel_operation(&self, token: u64) -> io::Result<()> {
|
||||
self.ring
|
||||
.submit_with_token(self.next_token(CompletionKind::OperationCancel), |sqe| {
|
||||
sqe.opcode = IORING_OP_ASYNC_CANCEL;
|
||||
sqe.fd = -1;
|
||||
sqe.addr = token;
|
||||
})
|
||||
}
|
||||
|
||||
pub fn drain_wake(&self) -> io::Result<u64> {
|
||||
let wakes = self.pending_wakes.replace(0);
|
||||
if wakes == 0 {
|
||||
Err(io::Error::new(
|
||||
io::ErrorKind::WouldBlock,
|
||||
"no wake completions are pending",
|
||||
))
|
||||
} else {
|
||||
Ok(wakes)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn drain_timer(&self) -> io::Result<u64> {
|
||||
let timers = self.pending_timers.replace(0);
|
||||
if timers == 0 {
|
||||
Err(io::Error::new(
|
||||
io::ErrorKind::WouldBlock,
|
||||
"no timer completions are pending",
|
||||
))
|
||||
} else {
|
||||
Ok(timers)
|
||||
}
|
||||
}
|
||||
|
||||
fn process_cqe(&self, cqe: IoUringCqe, ready: &mut ReadyEvents) {
|
||||
if cqe.user_data == WAKE_TARGET_TOKEN {
|
||||
ready.wake = true;
|
||||
let wakes = cqe.res.max(1) as u64;
|
||||
self.pending_wakes
|
||||
.set(self.pending_wakes.get().saturating_add(wakes));
|
||||
return;
|
||||
}
|
||||
|
||||
match decode_token_kind(cqe.user_data) {
|
||||
Some(CompletionKind::Timer) => {
|
||||
if self.active_timer_token.get() == Some(cqe.user_data) {
|
||||
self.active_timer_token.set(None);
|
||||
}
|
||||
if cqe.res == -libc::ETIME {
|
||||
ready.timer = true;
|
||||
self.pending_timers
|
||||
.set(self.pending_timers.get().saturating_add(1));
|
||||
}
|
||||
}
|
||||
Some(CompletionKind::Operation) => {
|
||||
if let Some(callback) = self.completions.borrow_mut().remove(&cqe.user_data) {
|
||||
callback(cqe);
|
||||
}
|
||||
}
|
||||
Some(CompletionKind::TimerRemove)
|
||||
| Some(CompletionKind::NotifySend)
|
||||
| Some(CompletionKind::OperationCancel)
|
||||
| None => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn next_token(&self, kind: CompletionKind) -> u64 {
|
||||
let seq = self.next_token.get();
|
||||
self.next_token.set(seq.wrapping_add(1));
|
||||
make_token(kind, seq)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Reactor {
|
||||
fn drop(&mut self) {
|
||||
self.notifier.closed.store(true, Ordering::Release);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn monotonic_now() -> io::Result<Duration> {
|
||||
let mut now = std::mem::MaybeUninit::<libc::timespec>::uninit();
|
||||
let result = unsafe { libc::clock_gettime(libc::CLOCK_MONOTONIC, now.as_mut_ptr()) };
|
||||
if result == -1 {
|
||||
return Err(io::Error::last_os_error());
|
||||
}
|
||||
|
||||
let now = unsafe { now.assume_init() };
|
||||
Ok(Duration::new(now.tv_sec as u64, now.tv_nsec as u32))
|
||||
}
|
||||
|
||||
fn make_token(kind: CompletionKind, seq: u64) -> u64 {
|
||||
((kind as u64) << TOKEN_KIND_SHIFT) | (seq & !TOKEN_KIND_MASK)
|
||||
}
|
||||
|
||||
fn decode_token_kind(token: u64) -> Option<CompletionKind> {
|
||||
match ((token & TOKEN_KIND_MASK) >> TOKEN_KIND_SHIFT) as u8 {
|
||||
1 => Some(CompletionKind::Timer),
|
||||
2 => Some(CompletionKind::TimerRemove),
|
||||
3 => Some(CompletionKind::NotifySend),
|
||||
4 => Some(CompletionKind::Operation),
|
||||
5 => Some(CompletionKind::OperationCancel),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{create_reactor, monotonic_now};
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
#[test]
|
||||
fn notifier_wakes_target_ring() {
|
||||
let (sender, _) = create_reactor().expect("sender reactor should initialize");
|
||||
sender.bind_current_thread();
|
||||
|
||||
let (target, notifier) = create_reactor().expect("target reactor should initialize");
|
||||
notifier.notify().expect("notify should succeed");
|
||||
|
||||
let ready = loop {
|
||||
if let Some(ready) = target.poll().expect("poll should succeed") {
|
||||
break ready;
|
||||
}
|
||||
thread::sleep(Duration::from_millis(1));
|
||||
};
|
||||
|
||||
assert!(ready.wake);
|
||||
assert!(!ready.timer);
|
||||
assert_eq!(target.drain_wake().expect("wake drain should succeed"), 1);
|
||||
sender.unbind_current_thread();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn notifier_wakes_target_ring_from_plain_thread() {
|
||||
let (target, notifier) = create_reactor().expect("target reactor should initialize");
|
||||
|
||||
thread::spawn(move || {
|
||||
notifier.notify().expect("notify should succeed");
|
||||
})
|
||||
.join()
|
||||
.expect("notifier thread should exit cleanly");
|
||||
|
||||
let ready = loop {
|
||||
if let Some(ready) = target.poll().expect("poll should succeed") {
|
||||
break ready;
|
||||
}
|
||||
thread::sleep(Duration::from_millis(1));
|
||||
};
|
||||
|
||||
assert!(ready.wake);
|
||||
assert!(!ready.timer);
|
||||
assert_eq!(target.drain_wake().expect("wake drain should succeed"), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn timeout_reports_deadlines() {
|
||||
let (reactor, _notifier) = create_reactor().expect("reactor should initialize");
|
||||
let deadline = monotonic_now().expect("clock should work") + Duration::from_millis(20);
|
||||
reactor
|
||||
.rearm_timer(Some(deadline))
|
||||
.expect("timer should arm");
|
||||
|
||||
let ready = loop {
|
||||
if let Some(ready) = reactor.poll().expect("poll should succeed") {
|
||||
break ready;
|
||||
}
|
||||
thread::sleep(Duration::from_millis(5));
|
||||
};
|
||||
|
||||
assert!(ready.timer);
|
||||
assert!(!ready.wake);
|
||||
assert_eq!(
|
||||
reactor.drain_timer().expect("timer drain should succeed"),
|
||||
1
|
||||
);
|
||||
}
|
||||
}
|
||||
1067
lib/runtime/src/platform/linux_x86_64/runtime.rs
Normal file
1067
lib/runtime/src/platform/linux_x86_64/runtime.rs
Normal file
File diff suppressed because it is too large
Load Diff
478
lib/runtime/src/platform/linux_x86_64/uring.rs
Normal file
478
lib/runtime/src/platform/linux_x86_64/uring.rs
Normal file
@@ -0,0 +1,478 @@
|
||||
use std::cell::Cell;
|
||||
use std::io;
|
||||
use std::os::fd::RawFd;
|
||||
use std::ptr;
|
||||
use std::sync::atomic::{Ordering, compiler_fence};
|
||||
use std::sync::{Mutex, OnceLock};
|
||||
use std::time::Duration;
|
||||
|
||||
const IORING_OFF_SQ_RING: libc::off_t = 0;
|
||||
const IORING_OFF_CQ_RING: libc::off_t = 0x0800_0000;
|
||||
const IORING_OFF_SQES: libc::off_t = 0x1000_0000;
|
||||
|
||||
const IORING_ENTER_GETEVENTS: u32 = 1 << 0;
|
||||
const IORING_SETUP_CLAMP: u32 = 1 << 4;
|
||||
|
||||
const IORING_FEAT_SINGLE_MMAP: u32 = 1 << 0;
|
||||
|
||||
pub(crate) const IORING_OP_FSYNC: u8 = 3;
|
||||
pub(crate) const IORING_OP_TIMEOUT: u8 = 11;
|
||||
pub(crate) const IORING_OP_TIMEOUT_REMOVE: u8 = 12;
|
||||
pub(crate) const IORING_OP_ACCEPT: u8 = 13;
|
||||
pub(crate) const IORING_OP_ASYNC_CANCEL: u8 = 14;
|
||||
pub(crate) const IORING_OP_CONNECT: u8 = 16;
|
||||
pub(crate) const IORING_OP_OPENAT: u8 = 18;
|
||||
pub(crate) const IORING_OP_CLOSE: u8 = 19;
|
||||
pub(crate) const IORING_OP_STATX: u8 = 21;
|
||||
pub(crate) const IORING_OP_READ: u8 = 22;
|
||||
pub(crate) const IORING_OP_WRITE: u8 = 23;
|
||||
pub(crate) const IORING_OP_SEND: u8 = 26;
|
||||
pub(crate) const IORING_OP_RECV: u8 = 27;
|
||||
pub(crate) const IORING_OP_SHUTDOWN: u8 = 34;
|
||||
pub(crate) const IORING_OP_RENAMEAT: u8 = 35;
|
||||
pub(crate) const IORING_OP_UNLINKAT: u8 = 36;
|
||||
pub(crate) const IORING_OP_MKDIRAT: u8 = 37;
|
||||
pub(crate) const IORING_OP_MSG_RING: u8 = 40;
|
||||
pub(crate) const IORING_OP_SOCKET: u8 = 45;
|
||||
pub(crate) const IORING_OP_FTRUNCATE: u8 = 55;
|
||||
pub(crate) const IORING_OP_BIND: u8 = 56;
|
||||
pub(crate) const IORING_OP_LISTEN: u8 = 57;
|
||||
|
||||
const IORING_MSG_DATA: u64 = 0;
|
||||
pub(crate) const IORING_FSYNC_DATASYNC: u32 = 1 << 0;
|
||||
pub(crate) const IORING_TIMEOUT_ABS: u32 = 1 << 0;
|
||||
pub(crate) const IORING_TIMEOUT_UPDATE: u32 = 1 << 1;
|
||||
pub(crate) const IOSQE_CQE_SKIP_SUCCESS: u8 = 1 << 6;
|
||||
|
||||
thread_local! {
|
||||
static CURRENT_SUBMITTER: Cell<*const IoUring> = const { Cell::new(ptr::null()) };
|
||||
}
|
||||
|
||||
static GLOBAL_SUBMITTER: OnceLock<Mutex<Option<IoUring>>> = OnceLock::new();
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Default, Clone, Copy)]
|
||||
struct IoSqringOffsets {
|
||||
head: u32,
|
||||
tail: u32,
|
||||
ring_mask: u32,
|
||||
ring_entries: u32,
|
||||
flags: u32,
|
||||
dropped: u32,
|
||||
array: u32,
|
||||
resv1: u32,
|
||||
user_addr: u64,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Default, Clone, Copy)]
|
||||
struct IoCqringOffsets {
|
||||
head: u32,
|
||||
tail: u32,
|
||||
ring_mask: u32,
|
||||
ring_entries: u32,
|
||||
overflow: u32,
|
||||
cqes: u32,
|
||||
flags: u32,
|
||||
resv1: u32,
|
||||
user_addr: u64,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Default, Clone, Copy)]
|
||||
struct IoUringParams {
|
||||
sq_entries: u32,
|
||||
cq_entries: u32,
|
||||
flags: u32,
|
||||
sq_thread_cpu: u32,
|
||||
sq_thread_idle: u32,
|
||||
features: u32,
|
||||
wq_fd: u32,
|
||||
resv: [u32; 3],
|
||||
sq_off: IoSqringOffsets,
|
||||
cq_off: IoCqringOffsets,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Default, Clone, Copy)]
|
||||
pub(crate) struct IoUringSqe {
|
||||
pub(crate) opcode: u8,
|
||||
pub(crate) flags: u8,
|
||||
pub(crate) ioprio: u16,
|
||||
pub(crate) fd: i32,
|
||||
pub(crate) off: u64,
|
||||
pub(crate) addr: u64,
|
||||
pub(crate) len: u32,
|
||||
pub(crate) op_flags: u32,
|
||||
pub(crate) user_data: u64,
|
||||
pub(crate) buf_index: u16,
|
||||
pub(crate) personality: u16,
|
||||
pub(crate) file_index: i32,
|
||||
pub(crate) pad2: [u64; 2],
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Default, Clone, Copy)]
|
||||
pub(crate) struct IoUringCqe {
|
||||
pub(crate) user_data: u64,
|
||||
pub(crate) res: i32,
|
||||
pub(crate) flags: u32,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Default, Clone, Copy)]
|
||||
struct KernelTimespec {
|
||||
tv_sec: i64,
|
||||
tv_nsec: i64,
|
||||
}
|
||||
|
||||
pub(crate) struct IoUring {
|
||||
ring_fd: RawFd,
|
||||
sq_ring_ptr: *mut u8,
|
||||
cq_ring_ptr: *mut u8,
|
||||
sqes_ptr: *mut IoUringSqe,
|
||||
sq_ring_size: usize,
|
||||
cq_ring_size: usize,
|
||||
sqes_size: usize,
|
||||
single_mmap: bool,
|
||||
sq_head: *mut u32,
|
||||
sq_tail: *mut u32,
|
||||
sq_ring_mask: *mut u32,
|
||||
sq_ring_entries: *mut u32,
|
||||
sq_array: *mut u32,
|
||||
cq_head: *mut u32,
|
||||
cq_tail: *mut u32,
|
||||
cq_ring_mask: *mut u32,
|
||||
cqes: *mut IoUringCqe,
|
||||
}
|
||||
|
||||
impl IoUring {
|
||||
pub(crate) fn new(entries: u32) -> io::Result<Self> {
|
||||
let mut params = IoUringParams {
|
||||
flags: IORING_SETUP_CLAMP,
|
||||
..IoUringParams::default()
|
||||
};
|
||||
|
||||
let ring_fd = cvt_long(unsafe {
|
||||
libc::syscall(
|
||||
libc::SYS_io_uring_setup,
|
||||
entries as libc::c_uint,
|
||||
&mut params as *mut IoUringParams,
|
||||
)
|
||||
})? as RawFd;
|
||||
|
||||
let sq_ring_size =
|
||||
params.sq_off.array as usize + params.sq_entries as usize * std::mem::size_of::<u32>();
|
||||
let cq_ring_size = params.cq_off.cqes as usize
|
||||
+ params.cq_entries as usize * std::mem::size_of::<IoUringCqe>();
|
||||
let single_mmap = params.features & IORING_FEAT_SINGLE_MMAP != 0;
|
||||
|
||||
let sq_ring_ptr = mmap_ring(
|
||||
if single_mmap {
|
||||
sq_ring_size.max(cq_ring_size)
|
||||
} else {
|
||||
sq_ring_size
|
||||
},
|
||||
ring_fd,
|
||||
IORING_OFF_SQ_RING,
|
||||
)?;
|
||||
let cq_ring_ptr = if single_mmap {
|
||||
sq_ring_ptr
|
||||
} else {
|
||||
mmap_ring(cq_ring_size, ring_fd, IORING_OFF_CQ_RING)?
|
||||
};
|
||||
let sqes_size = params.sq_entries as usize * std::mem::size_of::<IoUringSqe>();
|
||||
let sqes_ptr = mmap_ring(sqes_size, ring_fd, IORING_OFF_SQES)? as *mut IoUringSqe;
|
||||
|
||||
Ok(Self {
|
||||
ring_fd,
|
||||
sq_ring_ptr,
|
||||
cq_ring_ptr,
|
||||
sqes_ptr,
|
||||
sq_ring_size,
|
||||
cq_ring_size,
|
||||
sqes_size,
|
||||
single_mmap,
|
||||
sq_head: offset_ptr(sq_ring_ptr, params.sq_off.head),
|
||||
sq_tail: offset_ptr(sq_ring_ptr, params.sq_off.tail),
|
||||
sq_ring_mask: offset_ptr(sq_ring_ptr, params.sq_off.ring_mask),
|
||||
sq_ring_entries: offset_ptr(sq_ring_ptr, params.sq_off.ring_entries),
|
||||
sq_array: offset_ptr(sq_ring_ptr, params.sq_off.array),
|
||||
cq_head: offset_ptr(cq_ring_ptr, params.cq_off.head),
|
||||
cq_tail: offset_ptr(cq_ring_ptr, params.cq_off.tail),
|
||||
cq_ring_mask: offset_ptr(cq_ring_ptr, params.cq_off.ring_mask),
|
||||
cqes: offset_ptr(cq_ring_ptr, params.cq_off.cqes),
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn ring_fd(&self) -> RawFd {
|
||||
self.ring_fd
|
||||
}
|
||||
|
||||
pub(crate) fn bind_current_thread(&self) {
|
||||
CURRENT_SUBMITTER.with(|submitter| submitter.set(self as *const Self));
|
||||
}
|
||||
|
||||
pub(crate) fn unbind_current_thread(&self) {
|
||||
CURRENT_SUBMITTER.with(|submitter| {
|
||||
if ptr::eq(submitter.get(), self) {
|
||||
submitter.set(ptr::null());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
pub(crate) fn with_submitter<T>(f: impl FnOnce(&IoUring) -> io::Result<T>) -> io::Result<T> {
|
||||
CURRENT_SUBMITTER.with(|submitter| {
|
||||
let ptr = submitter.get();
|
||||
if !ptr.is_null() {
|
||||
let ring = unsafe { &*ptr };
|
||||
return f(ring);
|
||||
}
|
||||
|
||||
let mut ring = global_submitter()
|
||||
.lock()
|
||||
.expect("global io_uring submitter should not be poisoned");
|
||||
if ring.is_none() {
|
||||
*ring = Some(IoUring::new(64)?);
|
||||
}
|
||||
|
||||
f(ring
|
||||
.as_ref()
|
||||
.expect("global submitter ring should initialize"))
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn submit_timeout(&self, token: u64, deadline: Duration) -> io::Result<()> {
|
||||
let timespec = duration_to_kernel_timespec(deadline);
|
||||
self.push_sqe(|sqe| {
|
||||
sqe.opcode = IORING_OP_TIMEOUT;
|
||||
sqe.fd = -1;
|
||||
sqe.off = 0;
|
||||
sqe.user_data = token;
|
||||
sqe.addr = (×pec as *const KernelTimespec) as u64;
|
||||
sqe.len = 1;
|
||||
sqe.op_flags = IORING_TIMEOUT_ABS;
|
||||
})?;
|
||||
self.submit_pending().map(|_| ())
|
||||
}
|
||||
|
||||
pub(crate) fn submit_timeout_remove(
|
||||
&self,
|
||||
token_to_remove: u64,
|
||||
completion: u64,
|
||||
) -> io::Result<()> {
|
||||
self.push_sqe(|sqe| {
|
||||
sqe.opcode = IORING_OP_TIMEOUT_REMOVE;
|
||||
sqe.fd = -1;
|
||||
sqe.flags = IOSQE_CQE_SKIP_SUCCESS;
|
||||
sqe.user_data = completion;
|
||||
sqe.addr = token_to_remove;
|
||||
})?;
|
||||
self.submit_pending().map(|_| ())
|
||||
}
|
||||
|
||||
pub(crate) fn submit_timeout_update(
|
||||
&self,
|
||||
token_to_update: u64,
|
||||
deadline: Duration,
|
||||
) -> io::Result<()> {
|
||||
let timespec = duration_to_kernel_timespec(deadline);
|
||||
self.push_sqe(|sqe| {
|
||||
sqe.opcode = IORING_OP_TIMEOUT_REMOVE;
|
||||
sqe.fd = -1;
|
||||
sqe.off = (×pec as *const KernelTimespec) as u64;
|
||||
sqe.addr = token_to_update;
|
||||
sqe.op_flags = IORING_TIMEOUT_UPDATE | IORING_TIMEOUT_ABS;
|
||||
})?;
|
||||
self.submit_pending().map(|_| ())
|
||||
}
|
||||
|
||||
pub(crate) fn submit_msg_ring(
|
||||
&self,
|
||||
target_ring_fd: RawFd,
|
||||
target_user_data: u64,
|
||||
value: u32,
|
||||
completion: u64,
|
||||
) -> io::Result<()> {
|
||||
self.push_sqe(|sqe| {
|
||||
sqe.opcode = IORING_OP_MSG_RING;
|
||||
sqe.flags = IOSQE_CQE_SKIP_SUCCESS;
|
||||
sqe.fd = target_ring_fd;
|
||||
sqe.off = target_user_data;
|
||||
sqe.addr = IORING_MSG_DATA;
|
||||
sqe.len = value;
|
||||
sqe.user_data = completion;
|
||||
})?;
|
||||
self.submit_pending().map(|_| ())
|
||||
}
|
||||
|
||||
pub(crate) fn submit_with_token(
|
||||
&self,
|
||||
token: u64,
|
||||
fill: impl FnOnce(&mut IoUringSqe),
|
||||
) -> io::Result<()> {
|
||||
self.push_sqe(|sqe| {
|
||||
fill(sqe);
|
||||
sqe.user_data = token;
|
||||
})?;
|
||||
self.submit_pending().map(|_| ())
|
||||
}
|
||||
|
||||
pub(crate) fn drain_completions(&self, mut f: impl FnMut(IoUringCqe)) -> bool {
|
||||
let mut head = load_u32(self.cq_head);
|
||||
let tail = load_u32(self.cq_tail);
|
||||
if head == tail {
|
||||
return false;
|
||||
}
|
||||
let mask = load_u32(self.cq_ring_mask);
|
||||
|
||||
while head != tail {
|
||||
let index = (head & mask) as usize;
|
||||
let cqe = unsafe { ptr::read_volatile(self.cqes.add(index)) };
|
||||
f(cqe);
|
||||
head = head.wrapping_add(1);
|
||||
}
|
||||
|
||||
store_u32(self.cq_head, head);
|
||||
true
|
||||
}
|
||||
|
||||
pub(crate) fn wait_for_cqe(&self) -> io::Result<()> {
|
||||
loop {
|
||||
match self.enter(0, 1, IORING_ENTER_GETEVENTS) {
|
||||
Ok(_) => return Ok(()),
|
||||
Err(error) if error.kind() == io::ErrorKind::Interrupted => continue,
|
||||
Err(error) => return Err(error),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn push_sqe(&self, fill: impl FnOnce(&mut IoUringSqe)) -> io::Result<()> {
|
||||
let head = load_u32(self.sq_head);
|
||||
let tail = load_u32(self.sq_tail);
|
||||
let entries = load_u32(self.sq_ring_entries);
|
||||
if tail.wrapping_sub(head) >= entries {
|
||||
self.submit_pending()?;
|
||||
let head = load_u32(self.sq_head);
|
||||
let tail = load_u32(self.sq_tail);
|
||||
if tail.wrapping_sub(head) >= entries {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::WouldBlock,
|
||||
"io_uring submission queue is full",
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
let tail = load_u32(self.sq_tail);
|
||||
let mask = load_u32(self.sq_ring_mask);
|
||||
let index = (tail & mask) as usize;
|
||||
let sqe = unsafe { &mut *self.sqes_ptr.add(index) };
|
||||
*sqe = IoUringSqe::default();
|
||||
fill(sqe);
|
||||
unsafe {
|
||||
ptr::write_volatile(self.sq_array.add(index), index as u32);
|
||||
}
|
||||
compiler_fence(Ordering::Release);
|
||||
store_u32(self.sq_tail, tail.wrapping_add(1));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn submit_pending(&self) -> io::Result<u32> {
|
||||
let head = load_u32(self.sq_head);
|
||||
let tail = load_u32(self.sq_tail);
|
||||
let to_submit = tail.wrapping_sub(head);
|
||||
if to_submit == 0 {
|
||||
return Ok(0);
|
||||
}
|
||||
self.enter(to_submit, 0, 0)
|
||||
}
|
||||
|
||||
fn enter(&self, to_submit: u32, min_complete: u32, flags: u32) -> io::Result<u32> {
|
||||
cvt_long(unsafe {
|
||||
libc::syscall(
|
||||
libc::SYS_io_uring_enter,
|
||||
self.ring_fd,
|
||||
to_submit as libc::c_uint,
|
||||
min_complete as libc::c_uint,
|
||||
flags as libc::c_uint,
|
||||
ptr::null::<libc::c_void>(),
|
||||
0usize,
|
||||
)
|
||||
})
|
||||
.map(|value| value as u32)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for IoUring {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
libc::munmap(self.sqes_ptr.cast(), self.sqes_size);
|
||||
if self.single_mmap {
|
||||
libc::munmap(
|
||||
self.sq_ring_ptr.cast(),
|
||||
self.sq_ring_size.max(self.cq_ring_size),
|
||||
);
|
||||
} else {
|
||||
libc::munmap(self.sq_ring_ptr.cast(), self.sq_ring_size);
|
||||
libc::munmap(self.cq_ring_ptr.cast(), self.cq_ring_size);
|
||||
}
|
||||
libc::close(self.ring_fd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl Send for IoUring {}
|
||||
|
||||
fn offset_ptr<T>(base: *mut u8, offset: u32) -> *mut T {
|
||||
unsafe { base.add(offset as usize).cast::<T>() }
|
||||
}
|
||||
|
||||
fn mmap_ring(length: usize, fd: RawFd, offset: libc::off_t) -> io::Result<*mut u8> {
|
||||
let ptr = unsafe {
|
||||
libc::mmap(
|
||||
ptr::null_mut(),
|
||||
length,
|
||||
libc::PROT_READ | libc::PROT_WRITE,
|
||||
libc::MAP_SHARED | libc::MAP_POPULATE,
|
||||
fd,
|
||||
offset,
|
||||
)
|
||||
};
|
||||
if ptr == libc::MAP_FAILED {
|
||||
Err(io::Error::last_os_error())
|
||||
} else {
|
||||
Ok(ptr.cast())
|
||||
}
|
||||
}
|
||||
|
||||
fn load_u32(ptr: *const u32) -> u32 {
|
||||
let value = unsafe { ptr::read_volatile(ptr) };
|
||||
compiler_fence(Ordering::Acquire);
|
||||
value
|
||||
}
|
||||
|
||||
fn store_u32(ptr: *mut u32, value: u32) {
|
||||
compiler_fence(Ordering::Release);
|
||||
unsafe {
|
||||
ptr::write_volatile(ptr, value);
|
||||
}
|
||||
}
|
||||
|
||||
fn cvt_long(result: libc::c_long) -> io::Result<libc::c_long> {
|
||||
if result == -1 {
|
||||
Err(io::Error::last_os_error())
|
||||
} else {
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
fn global_submitter() -> &'static Mutex<Option<IoUring>> {
|
||||
GLOBAL_SUBMITTER.get_or_init(|| Mutex::new(None))
|
||||
}
|
||||
|
||||
fn duration_to_kernel_timespec(duration: Duration) -> KernelTimespec {
|
||||
KernelTimespec {
|
||||
tv_sec: duration.as_secs() as i64,
|
||||
tv_nsec: duration.subsec_nanos() as i64,
|
||||
}
|
||||
}
|
||||
2
lib/runtime/src/platform/mod.rs
Normal file
2
lib/runtime/src/platform/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
|
||||
pub mod linux_x86_64;
|
||||
10
lib/runtime/src/sys/linux/channel.rs
Normal file
10
lib/runtime/src/sys/linux/channel.rs
Normal file
@@ -0,0 +1,10 @@
|
||||
//! Linux channel wake helpers.
|
||||
|
||||
use crate::op::completion::{CompletionFuture, CompletionHandle, completion};
|
||||
use crate::platform::linux_x86_64::runtime::try_current_thread_handle;
|
||||
|
||||
pub(crate) fn runtime_waiter<T: Send + 'static>() -> (CompletionFuture<T>, CompletionHandle<T>) {
|
||||
let owner = try_current_thread_handle()
|
||||
.expect("async channel operations must be polled on a runtime thread");
|
||||
completion(owner)
|
||||
}
|
||||
586
lib/runtime/src/sys/linux/fs.rs
Normal file
586
lib/runtime/src/sys/linux/fs.rs
Normal file
@@ -0,0 +1,586 @@
|
||||
//! Linux filesystem backend.
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::ffi::CString;
|
||||
use std::future::poll_fn;
|
||||
use std::io;
|
||||
use std::mem::MaybeUninit;
|
||||
use std::os::fd::{FromRawFd, OwnedFd, RawFd};
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::task::{Context, Poll, Waker};
|
||||
use std::thread;
|
||||
|
||||
use crate::op::completion::completion_for_current_thread;
|
||||
use crate::op::fs::{FileType, FsOp, MetadataTarget, OpenOptions, RawDirEntry, RawMetadata};
|
||||
use crate::platform::linux_x86_64::runtime::{
|
||||
ThreadHandle, current_thread_handle, with_current_reactor,
|
||||
};
|
||||
use crate::platform::linux_x86_64::uring::{
|
||||
IORING_FSYNC_DATASYNC, IORING_OP_CLOSE, IORING_OP_FSYNC, IORING_OP_FTRUNCATE,
|
||||
IORING_OP_MKDIRAT, IORING_OP_OPENAT, IORING_OP_READ, IORING_OP_RENAMEAT, IORING_OP_STATX,
|
||||
IORING_OP_UNLINKAT, IORING_OP_WRITE, IoUringCqe,
|
||||
};
|
||||
|
||||
const STATX_BASIC_MASK: u32 =
|
||||
libc::STATX_TYPE | libc::STATX_MODE | libc::STATX_SIZE | libc::STATX_NLINK;
|
||||
const FILE_CURSOR: u64 = u64::MAX;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum ExecutionPath {
|
||||
IoUring,
|
||||
Offload,
|
||||
}
|
||||
|
||||
pub fn execution_path(op: &FsOp) -> ExecutionPath {
|
||||
match op {
|
||||
FsOp::ReadDir { .. } | FsOp::Duplicate { .. } => ExecutionPath::Offload,
|
||||
FsOp::Open { .. }
|
||||
| FsOp::Read { .. }
|
||||
| FsOp::Write { .. }
|
||||
| FsOp::Metadata { .. }
|
||||
| FsOp::SetLen { .. }
|
||||
| FsOp::SyncAll { .. }
|
||||
| FsOp::SyncData { .. }
|
||||
| FsOp::CreateDir { .. }
|
||||
| FsOp::RemoveFile { .. }
|
||||
| FsOp::RemoveDir { .. }
|
||||
| FsOp::Rename { .. }
|
||||
| FsOp::Close { .. } => ExecutionPath::IoUring,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn open(op: FsOp) -> io::Result<OwnedFd> {
|
||||
let FsOp::Open { path, options } = op else {
|
||||
unreachable!("open backend called with non-open op");
|
||||
};
|
||||
|
||||
let path = path_to_c_string(&path)?;
|
||||
let path_ptr = path.as_ptr();
|
||||
let (flags, mode) = open_flags(&options)?;
|
||||
submit_uring::<OwnedFd, _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_OPENAT;
|
||||
sqe.fd = libc::AT_FDCWD;
|
||||
sqe.addr = path_ptr as u64;
|
||||
sqe.len = mode;
|
||||
sqe.op_flags = flags as u32;
|
||||
},
|
||||
move |cqe| {
|
||||
let _path = path;
|
||||
cqe_to_result(cqe).map(|fd| unsafe { OwnedFd::from_raw_fd(fd as RawFd) })
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn read(op: FsOp) -> io::Result<Vec<u8>> {
|
||||
let FsOp::Read { fd, offset, len } = op else {
|
||||
unreachable!("read backend called with non-read op");
|
||||
};
|
||||
|
||||
let mut buffer = vec![0; len];
|
||||
let buffer_ptr = buffer.as_mut_ptr();
|
||||
let buffer_len = buffer.len();
|
||||
submit_uring::<Vec<u8>, _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_READ;
|
||||
sqe.fd = fd;
|
||||
sqe.addr = buffer_ptr as u64;
|
||||
sqe.len = buffer_len as u32;
|
||||
sqe.off = offset.unwrap_or(FILE_CURSOR);
|
||||
},
|
||||
move |cqe| {
|
||||
let read = cqe_to_result(cqe)? as usize;
|
||||
buffer.truncate(read);
|
||||
Ok(buffer)
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn write(op: FsOp) -> io::Result<usize> {
|
||||
let FsOp::Write { fd, offset, data } = op else {
|
||||
unreachable!("write backend called with non-write op");
|
||||
};
|
||||
let data_ptr = data.as_ptr();
|
||||
let data_len = data.len();
|
||||
|
||||
submit_uring::<usize, _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_WRITE;
|
||||
sqe.fd = fd;
|
||||
sqe.addr = data_ptr as u64;
|
||||
sqe.len = data_len as u32;
|
||||
sqe.off = offset.unwrap_or(FILE_CURSOR);
|
||||
},
|
||||
move |cqe| {
|
||||
let _data = data;
|
||||
cqe_to_result(cqe).map(|written| written as usize)
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn metadata(op: FsOp) -> io::Result<RawMetadata> {
|
||||
let FsOp::Metadata {
|
||||
target,
|
||||
follow_symlinks,
|
||||
} = op
|
||||
else {
|
||||
unreachable!("metadata backend called with non-metadata op");
|
||||
};
|
||||
|
||||
let mut statx = Box::new(MaybeUninit::<libc::statx>::zeroed());
|
||||
let statx_ptr = statx.as_mut_ptr();
|
||||
let (fd, path, flags) = match target {
|
||||
MetadataTarget::Path(path) => (
|
||||
libc::AT_FDCWD,
|
||||
path_to_c_string(&path)?,
|
||||
metadata_flags(follow_symlinks),
|
||||
),
|
||||
MetadataTarget::File(fd) => (
|
||||
fd,
|
||||
CString::new(Vec::<u8>::new()).expect("empty statx path should be valid"),
|
||||
libc::AT_EMPTY_PATH,
|
||||
),
|
||||
};
|
||||
let path_ptr = path.as_ptr();
|
||||
|
||||
submit_uring::<RawMetadata, _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_STATX;
|
||||
sqe.fd = fd;
|
||||
sqe.addr = path_ptr as u64;
|
||||
sqe.len = STATX_BASIC_MASK;
|
||||
sqe.off = statx_ptr as u64;
|
||||
sqe.op_flags = flags as u32;
|
||||
},
|
||||
move |cqe| {
|
||||
let _path = path;
|
||||
cqe_to_result(cqe)?;
|
||||
let statx = unsafe { statx.assume_init() };
|
||||
Ok(raw_metadata_from_statx(&statx))
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn sync_all(op: FsOp) -> io::Result<()> {
|
||||
let FsOp::SyncAll { fd } = op else {
|
||||
unreachable!("sync_all backend called with non-sync_all op");
|
||||
};
|
||||
|
||||
submit_sync(fd, 0).await
|
||||
}
|
||||
|
||||
pub async fn sync_data(op: FsOp) -> io::Result<()> {
|
||||
let FsOp::SyncData { fd } = op else {
|
||||
unreachable!("sync_data backend called with non-sync_data op");
|
||||
};
|
||||
|
||||
submit_sync(fd, IORING_FSYNC_DATASYNC).await
|
||||
}
|
||||
|
||||
pub async fn set_len(op: FsOp) -> io::Result<()> {
|
||||
let FsOp::SetLen { fd, len } = op else {
|
||||
unreachable!("set_len backend called with non-set_len op");
|
||||
};
|
||||
|
||||
submit_uring::<(), _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_FTRUNCATE;
|
||||
sqe.fd = fd;
|
||||
sqe.off = len;
|
||||
},
|
||||
move |cqe| cqe_to_result(cqe).map(|_| ()),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn try_clone(op: FsOp) -> io::Result<OwnedFd> {
|
||||
let FsOp::Duplicate { fd } = op else {
|
||||
unreachable!("try_clone backend called with non-duplicate op");
|
||||
};
|
||||
|
||||
offload(move || {
|
||||
let duplicated = cvt(unsafe { libc::fcntl(fd, libc::F_DUPFD_CLOEXEC, 0) })?;
|
||||
Ok(unsafe { OwnedFd::from_raw_fd(duplicated) })
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn create_dir(op: FsOp) -> io::Result<()> {
|
||||
let FsOp::CreateDir {
|
||||
path,
|
||||
recursive: _,
|
||||
mode,
|
||||
} = op
|
||||
else {
|
||||
unreachable!("create_dir backend called with non-create_dir op");
|
||||
};
|
||||
|
||||
let path = path_to_c_string(&path)?;
|
||||
let path_ptr = path.as_ptr();
|
||||
submit_uring::<(), _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_MKDIRAT;
|
||||
sqe.fd = libc::AT_FDCWD;
|
||||
sqe.addr = path_ptr as u64;
|
||||
sqe.len = mode;
|
||||
},
|
||||
move |cqe| {
|
||||
let _path = path;
|
||||
cqe_to_result(cqe).map(|_| ())
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn remove_file(op: FsOp) -> io::Result<()> {
|
||||
let FsOp::RemoveFile { path } = op else {
|
||||
unreachable!("remove_file backend called with non-remove_file op");
|
||||
};
|
||||
|
||||
submit_unlink(path, 0).await
|
||||
}
|
||||
|
||||
pub async fn remove_dir(op: FsOp) -> io::Result<()> {
|
||||
let FsOp::RemoveDir { path } = op else {
|
||||
unreachable!("remove_dir backend called with non-remove_dir op");
|
||||
};
|
||||
|
||||
submit_unlink(path, libc::AT_REMOVEDIR).await
|
||||
}
|
||||
|
||||
pub async fn rename(op: FsOp) -> io::Result<()> {
|
||||
let FsOp::Rename { from, to } = op else {
|
||||
unreachable!("rename backend called with non-rename op");
|
||||
};
|
||||
|
||||
let from = path_to_c_string(&from)?;
|
||||
let to = path_to_c_string(&to)?;
|
||||
let from_ptr = from.as_ptr();
|
||||
let to_ptr = to.as_ptr();
|
||||
submit_uring::<(), _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_RENAMEAT;
|
||||
sqe.fd = libc::AT_FDCWD;
|
||||
sqe.addr = from_ptr as u64;
|
||||
sqe.len = libc::AT_FDCWD as u32;
|
||||
sqe.off = to_ptr as u64;
|
||||
sqe.op_flags = 0;
|
||||
},
|
||||
move |cqe| {
|
||||
let _from = from;
|
||||
let _to = to;
|
||||
cqe_to_result(cqe).map(|_| ())
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn close(op: FsOp) -> io::Result<()> {
|
||||
let FsOp::Close { fd } = op else {
|
||||
unreachable!("close backend called with non-close op");
|
||||
};
|
||||
|
||||
submit_uring::<(), _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_CLOSE;
|
||||
sqe.fd = fd;
|
||||
},
|
||||
move |cqe| cqe_to_result(cqe).map(|_| ()),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub fn read_dir(op: FsOp) -> io::Result<ReadDirStream> {
|
||||
let FsOp::ReadDir { path } = op else {
|
||||
unreachable!("read_dir backend called with non-read_dir op");
|
||||
};
|
||||
|
||||
ReadDirStream::new(path)
|
||||
}
|
||||
|
||||
pub struct ReadDirStream {
|
||||
state: Arc<ReadDirState>,
|
||||
}
|
||||
|
||||
impl ReadDirStream {
|
||||
fn new(path: PathBuf) -> io::Result<Self> {
|
||||
let state = Arc::new(ReadDirState::new(current_thread_handle()));
|
||||
let producer = Arc::clone(&state);
|
||||
|
||||
thread::Builder::new()
|
||||
.name("ruin-runtime-read-dir".into())
|
||||
.spawn(move || produce_dir_entries(path, producer))
|
||||
.map_err(io::Error::other)?;
|
||||
|
||||
Ok(Self { state })
|
||||
}
|
||||
|
||||
pub async fn next_entry(&mut self) -> io::Result<Option<RawDirEntry>> {
|
||||
poll_fn(|cx| self.state.poll_next(cx)).await
|
||||
}
|
||||
}
|
||||
|
||||
struct ReadDirState {
|
||||
owner: ThreadHandle,
|
||||
queue: Mutex<VecDeque<io::Result<RawDirEntry>>>,
|
||||
done: AtomicBool,
|
||||
pending: AtomicBool,
|
||||
wake_queued: AtomicBool,
|
||||
waker: Mutex<Option<Waker>>,
|
||||
}
|
||||
|
||||
impl ReadDirState {
|
||||
fn new(owner: ThreadHandle) -> Self {
|
||||
owner.begin_async_operation();
|
||||
Self {
|
||||
owner,
|
||||
queue: Mutex::new(VecDeque::new()),
|
||||
done: AtomicBool::new(false),
|
||||
pending: AtomicBool::new(true),
|
||||
wake_queued: AtomicBool::new(false),
|
||||
waker: Mutex::new(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(self: &Arc<Self>, entry: io::Result<RawDirEntry>) {
|
||||
self.queue.lock().unwrap().push_back(entry);
|
||||
self.notify();
|
||||
}
|
||||
|
||||
fn finish(self: &Arc<Self>) {
|
||||
self.done.store(true, Ordering::Release);
|
||||
self.release_pending();
|
||||
self.notify();
|
||||
}
|
||||
|
||||
fn release_pending(&self) {
|
||||
if self.pending.swap(false, Ordering::AcqRel) {
|
||||
self.owner.finish_async_operation();
|
||||
}
|
||||
}
|
||||
|
||||
fn notify(self: &Arc<Self>) {
|
||||
if self.wake_queued.swap(true, Ordering::AcqRel) {
|
||||
return;
|
||||
}
|
||||
|
||||
let state = Arc::clone(self);
|
||||
if !self.owner.queue_microtask(move || {
|
||||
state.wake_queued.store(false, Ordering::Release);
|
||||
if let Some(waker) = state.waker.lock().unwrap().take() {
|
||||
waker.wake();
|
||||
}
|
||||
}) {
|
||||
self.wake_queued.store(false, Ordering::Release);
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_next(&self, cx: &mut Context<'_>) -> Poll<io::Result<Option<RawDirEntry>>> {
|
||||
if let Some(entry) = self.queue.lock().unwrap().pop_front() {
|
||||
return Poll::Ready(entry.map(Some));
|
||||
}
|
||||
|
||||
if self.done.load(Ordering::Acquire) {
|
||||
return Poll::Ready(Ok(None));
|
||||
}
|
||||
|
||||
*self.waker.lock().unwrap() = Some(cx.waker().clone());
|
||||
|
||||
if let Some(entry) = self.queue.lock().unwrap().pop_front() {
|
||||
let _ = self.waker.lock().unwrap().take();
|
||||
return Poll::Ready(entry.map(Some));
|
||||
}
|
||||
|
||||
if self.done.load(Ordering::Acquire) {
|
||||
let _ = self.waker.lock().unwrap().take();
|
||||
return Poll::Ready(Ok(None));
|
||||
}
|
||||
|
||||
Poll::Pending
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ReadDirStream {
|
||||
fn drop(&mut self) {
|
||||
self.state.release_pending();
|
||||
}
|
||||
}
|
||||
|
||||
fn produce_dir_entries(path: PathBuf, state: Arc<ReadDirState>) {
|
||||
match std::fs::read_dir(path) {
|
||||
Ok(entries) => {
|
||||
for entry in entries {
|
||||
match entry {
|
||||
Ok(entry) => {
|
||||
let file_name = entry.file_name();
|
||||
state.push(Ok(RawDirEntry {
|
||||
path: entry.path(),
|
||||
file_name,
|
||||
}));
|
||||
}
|
||||
Err(error) => state.push(Err(error)),
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(error) => state.push(Err(error)),
|
||||
}
|
||||
|
||||
state.finish();
|
||||
}
|
||||
|
||||
async fn submit_sync(fd: RawFd, flags: u32) -> io::Result<()> {
|
||||
submit_uring::<(), _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_FSYNC;
|
||||
sqe.fd = fd;
|
||||
sqe.op_flags = flags;
|
||||
},
|
||||
move |cqe| cqe_to_result(cqe).map(|_| ()),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn submit_unlink(path: PathBuf, flags: i32) -> io::Result<()> {
|
||||
let path = path_to_c_string(&path)?;
|
||||
let path_ptr = path.as_ptr();
|
||||
submit_uring::<(), _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_UNLINKAT;
|
||||
sqe.fd = libc::AT_FDCWD;
|
||||
sqe.addr = path_ptr as u64;
|
||||
sqe.op_flags = flags as u32;
|
||||
},
|
||||
move |cqe| {
|
||||
let _path = path;
|
||||
cqe_to_result(cqe).map(|_| ())
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn submit_uring<T: Send + 'static, M>(
|
||||
fill: impl FnOnce(&mut crate::platform::linux_x86_64::uring::IoUringSqe),
|
||||
map: M,
|
||||
) -> io::Result<T>
|
||||
where
|
||||
M: FnOnce(IoUringCqe) -> io::Result<T> + Send + 'static,
|
||||
{
|
||||
let (future, handle) = completion_for_current_thread::<io::Result<T>>();
|
||||
let callback_handle = handle.clone();
|
||||
let token = with_current_reactor(|reactor| {
|
||||
reactor.submit_operation(fill, move |cqe| {
|
||||
callback_handle.complete(map(cqe));
|
||||
})
|
||||
})?;
|
||||
|
||||
handle.set_cancel(move || {
|
||||
let _ = with_current_reactor(|reactor| reactor.cancel_operation(token));
|
||||
});
|
||||
|
||||
future.await
|
||||
}
|
||||
|
||||
async fn offload<T: Send + 'static>(
|
||||
task: impl FnOnce() -> io::Result<T> + Send + 'static,
|
||||
) -> io::Result<T> {
|
||||
let (future, handle) = completion_for_current_thread::<io::Result<T>>();
|
||||
thread::Builder::new()
|
||||
.name("ruin-runtime-fs-offload".into())
|
||||
.spawn(move || handle.complete(task()))
|
||||
.map_err(io::Error::other)?;
|
||||
future.await
|
||||
}
|
||||
|
||||
fn path_to_c_string(path: &Path) -> io::Result<CString> {
|
||||
CString::new(path.as_os_str().as_bytes()).map_err(|_| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"paths containing NUL bytes are not supported",
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
fn open_flags(options: &OpenOptions) -> io::Result<(i32, u32)> {
|
||||
if !options.read && !options.write && !options.append {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"OpenOptions requires read, write, or append access",
|
||||
));
|
||||
}
|
||||
|
||||
let mut flags = if options.read {
|
||||
if options.write || options.append {
|
||||
libc::O_RDWR
|
||||
} else {
|
||||
libc::O_RDONLY
|
||||
}
|
||||
} else {
|
||||
libc::O_WRONLY
|
||||
};
|
||||
|
||||
if options.append {
|
||||
flags |= libc::O_APPEND;
|
||||
}
|
||||
if options.truncate {
|
||||
flags |= libc::O_TRUNC;
|
||||
}
|
||||
if options.create_new {
|
||||
flags |= libc::O_CREAT | libc::O_EXCL;
|
||||
} else if options.create {
|
||||
flags |= libc::O_CREAT;
|
||||
}
|
||||
|
||||
Ok((flags | libc::O_CLOEXEC, 0o666))
|
||||
}
|
||||
|
||||
fn metadata_flags(follow_symlinks: bool) -> i32 {
|
||||
let mut flags = libc::AT_NO_AUTOMOUNT;
|
||||
if !follow_symlinks {
|
||||
flags |= libc::AT_SYMLINK_NOFOLLOW;
|
||||
}
|
||||
flags
|
||||
}
|
||||
|
||||
fn raw_metadata_from_statx(statx: &libc::statx) -> RawMetadata {
|
||||
RawMetadata {
|
||||
file_type: file_type_from_mode(statx.stx_mode),
|
||||
mode: statx.stx_mode,
|
||||
len: statx.stx_size,
|
||||
}
|
||||
}
|
||||
|
||||
fn file_type_from_mode(mode: u16) -> FileType {
|
||||
match mode & libc::S_IFMT as u16 {
|
||||
value if value == libc::S_IFREG as u16 => FileType::File,
|
||||
value if value == libc::S_IFDIR as u16 => FileType::Directory,
|
||||
value if value == libc::S_IFLNK as u16 => FileType::Symlink,
|
||||
value if value == libc::S_IFBLK as u16 => FileType::BlockDevice,
|
||||
value if value == libc::S_IFCHR as u16 => FileType::CharacterDevice,
|
||||
value if value == libc::S_IFIFO as u16 => FileType::Fifo,
|
||||
value if value == libc::S_IFSOCK as u16 => FileType::Socket,
|
||||
_ => FileType::Unknown,
|
||||
}
|
||||
}
|
||||
|
||||
fn cqe_to_result(cqe: IoUringCqe) -> io::Result<i32> {
|
||||
if cqe.res < 0 {
|
||||
Err(io::Error::from_raw_os_error(-cqe.res))
|
||||
} else {
|
||||
Ok(cqe.res)
|
||||
}
|
||||
}
|
||||
|
||||
fn cvt(value: libc::c_int) -> io::Result<libc::c_int> {
|
||||
if value == -1 {
|
||||
Err(io::Error::last_os_error())
|
||||
} else {
|
||||
Ok(value)
|
||||
}
|
||||
}
|
||||
5
lib/runtime/src/sys/linux/mod.rs
Normal file
5
lib/runtime/src/sys/linux/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
//! Linux backend modules.
|
||||
|
||||
pub mod channel;
|
||||
pub mod fs;
|
||||
pub mod net;
|
||||
974
lib/runtime/src/sys/linux/net.rs
Normal file
974
lib/runtime/src/sys/linux/net.rs
Normal file
@@ -0,0 +1,974 @@
|
||||
//! Linux networking backend.
|
||||
|
||||
use std::ffi::c_void;
|
||||
use std::future::Future;
|
||||
use std::io;
|
||||
use std::mem::MaybeUninit;
|
||||
use std::net::{
|
||||
Ipv4Addr, Ipv6Addr, Shutdown, SocketAddr, SocketAddrV4, SocketAddrV6, ToSocketAddrs,
|
||||
};
|
||||
use std::os::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd};
|
||||
use std::pin::Pin;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::op::completion::completion_for_current_thread;
|
||||
use crate::op::net::{AcceptedSocket, NetOp, ReceivedDatagram};
|
||||
use crate::platform::linux_x86_64::runtime::with_current_reactor;
|
||||
use crate::platform::linux_x86_64::uring::{
|
||||
IORING_OP_ACCEPT, IORING_OP_BIND, IORING_OP_CLOSE, IORING_OP_CONNECT, IORING_OP_LISTEN,
|
||||
IORING_OP_RECV, IORING_OP_SEND, IORING_OP_SHUTDOWN, IORING_OP_SOCKET, IoUringCqe, IoUringSqe,
|
||||
};
|
||||
|
||||
const DEFAULT_LISTENER_BACKLOG: i32 = 1024;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum ExecutionPath {
|
||||
IoUring,
|
||||
Offload,
|
||||
}
|
||||
|
||||
pub fn execution_path(op: &NetOp) -> ExecutionPath {
|
||||
match op {
|
||||
NetOp::Socket { .. }
|
||||
| NetOp::Connect { .. }
|
||||
| NetOp::Bind { .. }
|
||||
| NetOp::Listen { .. }
|
||||
| NetOp::Accept { .. }
|
||||
| NetOp::Send { .. }
|
||||
| NetOp::Recv { .. }
|
||||
| NetOp::Shutdown { .. }
|
||||
| NetOp::Close { .. } => ExecutionPath::IoUring,
|
||||
NetOp::SendTo { .. } | NetOp::RecvFrom { .. } => ExecutionPath::Offload,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn resolve_addrs<A>(addr: A) -> io::Result<Vec<SocketAddr>>
|
||||
where
|
||||
A: ToSocketAddrs + Send + 'static,
|
||||
{
|
||||
offload(move || {
|
||||
let addrs = addr.to_socket_addrs()?.collect::<Vec<_>>();
|
||||
if addrs.is_empty() {
|
||||
Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"address resolved to no socket addresses",
|
||||
))
|
||||
} else {
|
||||
Ok(addrs)
|
||||
}
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn socket(op: NetOp) -> io::Result<OwnedFd> {
|
||||
let NetOp::Socket {
|
||||
domain,
|
||||
socket_type,
|
||||
protocol,
|
||||
flags,
|
||||
} = op
|
||||
else {
|
||||
unreachable!("socket backend called with non-socket op");
|
||||
};
|
||||
|
||||
match submit_uring::<OwnedFd, _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_SOCKET;
|
||||
sqe.fd = domain;
|
||||
sqe.off = socket_type as u64;
|
||||
sqe.len = protocol as u32;
|
||||
sqe.op_flags = flags;
|
||||
},
|
||||
move |cqe| cqe_to_result(cqe).map(|fd| unsafe { OwnedFd::from_raw_fd(fd as RawFd) }),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Err(error) if should_fallback_to_offload(&error) => {
|
||||
offload(move || socket_sync(domain, socket_type, protocol, flags)).await
|
||||
}
|
||||
result => result,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn connect(op: NetOp) -> io::Result<()> {
|
||||
let NetOp::Connect { fd, addr } = op else {
|
||||
unreachable!("connect backend called with non-connect op");
|
||||
};
|
||||
|
||||
let raw_addr = RawSocketAddr::from_socket_addr(addr);
|
||||
let fallback_addr = raw_addr;
|
||||
let addr_ptr = raw_addr.as_ptr();
|
||||
let addr_len = raw_addr.len();
|
||||
match submit_uring::<(), _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_CONNECT;
|
||||
sqe.fd = fd;
|
||||
sqe.addr = addr_ptr as u64;
|
||||
sqe.off = addr_len as u64;
|
||||
},
|
||||
move |cqe| {
|
||||
let _raw_addr = raw_addr;
|
||||
cqe_to_result(cqe).map(|_| ())
|
||||
},
|
||||
)
|
||||
.await
|
||||
{
|
||||
Err(error) if should_fallback_to_offload(&error) => {
|
||||
offload(move || connect_sync(fd, fallback_addr)).await
|
||||
}
|
||||
result => result,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn bind(op: NetOp) -> io::Result<()> {
|
||||
let NetOp::Bind { fd, addr } = op else {
|
||||
unreachable!("bind backend called with non-bind op");
|
||||
};
|
||||
|
||||
let raw_addr = RawSocketAddr::from_socket_addr(addr);
|
||||
let fallback_addr = raw_addr;
|
||||
let addr_ptr = raw_addr.as_ptr();
|
||||
let addr_len = raw_addr.len();
|
||||
match submit_uring::<(), _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_BIND;
|
||||
sqe.fd = fd;
|
||||
sqe.addr = addr_ptr as u64;
|
||||
sqe.off = addr_len as u64;
|
||||
},
|
||||
move |cqe| {
|
||||
let _raw_addr = raw_addr;
|
||||
cqe_to_result(cqe).map(|_| ())
|
||||
},
|
||||
)
|
||||
.await
|
||||
{
|
||||
Err(error) if should_fallback_to_offload(&error) => {
|
||||
offload(move || bind_sync(fd, fallback_addr)).await
|
||||
}
|
||||
result => result,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn listen(op: NetOp) -> io::Result<()> {
|
||||
let NetOp::Listen { fd, backlog } = op else {
|
||||
unreachable!("listen backend called with non-listen op");
|
||||
};
|
||||
|
||||
match submit_uring::<(), _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_LISTEN;
|
||||
sqe.fd = fd;
|
||||
sqe.len = backlog as u32;
|
||||
},
|
||||
move |cqe| cqe_to_result(cqe).map(|_| ()),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Err(error) if should_fallback_to_offload(&error) => {
|
||||
offload(move || listen_sync(fd, backlog)).await
|
||||
}
|
||||
result => result,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn accept(op: NetOp) -> io::Result<AcceptedSocket> {
|
||||
let NetOp::Accept { fd } = op else {
|
||||
unreachable!("accept backend called with non-accept op");
|
||||
};
|
||||
|
||||
let mut storage = Box::new(MaybeUninit::<libc::sockaddr_storage>::zeroed());
|
||||
let mut addr_len = Box::new(std::mem::size_of::<libc::sockaddr_storage>() as libc::socklen_t);
|
||||
let storage_ptr = storage.as_mut_ptr();
|
||||
let addr_len_ptr = addr_len.as_mut() as *mut libc::socklen_t;
|
||||
|
||||
match submit_uring::<AcceptedSocket, _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_ACCEPT;
|
||||
sqe.fd = fd;
|
||||
sqe.addr = storage_ptr as u64;
|
||||
sqe.off = addr_len_ptr as u64;
|
||||
},
|
||||
move |cqe| {
|
||||
let accepted_fd = cqe_to_result(cqe)? as RawFd;
|
||||
let storage = unsafe { storage.assume_init() };
|
||||
let peer_addr = socket_addr_from_storage(&storage, *addr_len)?;
|
||||
Ok(AcceptedSocket {
|
||||
fd: accepted_fd,
|
||||
peer_addr,
|
||||
})
|
||||
},
|
||||
)
|
||||
.await
|
||||
{
|
||||
Err(error) if should_fallback_to_offload(&error) => offload(move || accept_sync(fd)).await,
|
||||
result => result,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn send(op: NetOp) -> io::Result<usize> {
|
||||
let NetOp::Send { fd, data, flags } = op else {
|
||||
unreachable!("send backend called with non-send op");
|
||||
};
|
||||
|
||||
let fallback_data = data.clone();
|
||||
let data_ptr = data.as_ptr();
|
||||
let data_len = data.len();
|
||||
match submit_uring::<usize, _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_SEND;
|
||||
sqe.fd = fd;
|
||||
sqe.addr = data_ptr as u64;
|
||||
sqe.len = data_len as u32;
|
||||
sqe.op_flags = flags as u32;
|
||||
},
|
||||
move |cqe| {
|
||||
let _data = data;
|
||||
cqe_to_result(cqe).map(|written| written as usize)
|
||||
},
|
||||
)
|
||||
.await
|
||||
{
|
||||
Err(error) if should_fallback_to_offload(&error) => {
|
||||
offload(move || send_sync(fd, fallback_data, flags)).await
|
||||
}
|
||||
result => result,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn send_to(op: NetOp) -> io::Result<usize> {
|
||||
let NetOp::SendTo {
|
||||
fd,
|
||||
target,
|
||||
data,
|
||||
flags,
|
||||
} = op
|
||||
else {
|
||||
unreachable!("send_to backend called with non-send_to op");
|
||||
};
|
||||
|
||||
let raw_addr = RawSocketAddr::from_socket_addr(target);
|
||||
offload(move || send_to_sync(fd, data, raw_addr, flags)).await
|
||||
}
|
||||
|
||||
pub async fn recv(op: NetOp) -> io::Result<Vec<u8>> {
|
||||
let NetOp::Recv { fd, len, flags } = op else {
|
||||
unreachable!("recv backend called with non-recv op");
|
||||
};
|
||||
|
||||
let mut buffer = vec![0; len];
|
||||
let buffer_ptr = buffer.as_mut_ptr();
|
||||
let buffer_len = buffer.len();
|
||||
match submit_uring::<Vec<u8>, _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_RECV;
|
||||
sqe.fd = fd;
|
||||
sqe.addr = buffer_ptr as u64;
|
||||
sqe.len = buffer_len as u32;
|
||||
sqe.op_flags = flags as u32;
|
||||
},
|
||||
move |cqe| {
|
||||
let read = cqe_to_result(cqe)? as usize;
|
||||
buffer.truncate(read);
|
||||
Ok(buffer)
|
||||
},
|
||||
)
|
||||
.await
|
||||
{
|
||||
Err(error) if should_fallback_to_offload(&error) => {
|
||||
offload(move || recv_sync(fd, len, flags)).await
|
||||
}
|
||||
result => result,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn recv_from(op: NetOp) -> io::Result<ReceivedDatagram> {
|
||||
let NetOp::RecvFrom { fd, len, flags } = op else {
|
||||
unreachable!("recv_from backend called with non-recv_from op");
|
||||
};
|
||||
|
||||
offload(move || recv_from_sync(fd, len, flags)).await
|
||||
}
|
||||
|
||||
pub async fn shutdown(op: NetOp) -> io::Result<()> {
|
||||
let NetOp::Shutdown { fd, how } = op else {
|
||||
unreachable!("shutdown backend called with non-shutdown op");
|
||||
};
|
||||
|
||||
let fallback_how = how;
|
||||
match submit_uring::<(), _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_SHUTDOWN;
|
||||
sqe.fd = fd;
|
||||
sqe.len = shutdown_how(how) as u32;
|
||||
},
|
||||
move |cqe| cqe_to_result(cqe).map(|_| ()),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Err(error) if should_fallback_to_offload(&error) => {
|
||||
offload(move || shutdown_sync(fd, fallback_how)).await
|
||||
}
|
||||
result => result,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn close(op: NetOp) -> io::Result<()> {
|
||||
let NetOp::Close { fd } = op else {
|
||||
unreachable!("close backend called with non-close op");
|
||||
};
|
||||
|
||||
match submit_uring::<(), _>(
|
||||
move |sqe| {
|
||||
sqe.opcode = IORING_OP_CLOSE;
|
||||
sqe.fd = fd;
|
||||
},
|
||||
move |cqe| cqe_to_result(cqe).map(|_| ()),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Err(error) if should_fallback_to_offload(&error) => offload(move || close_sync(fd)).await,
|
||||
result => result,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn connect_stream(addr: SocketAddr) -> io::Result<OwnedFd> {
|
||||
let socket = socket(NetOp::Socket {
|
||||
domain: socket_domain(addr),
|
||||
socket_type: libc::SOCK_STREAM,
|
||||
protocol: 0,
|
||||
flags: libc::SOCK_CLOEXEC as u32,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let connect_result = connect(NetOp::Connect {
|
||||
fd: socket.as_raw_fd(),
|
||||
addr,
|
||||
})
|
||||
.await;
|
||||
match connect_result {
|
||||
Ok(()) => Ok(socket),
|
||||
Err(error) => Err(error),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn bind_listener(addr: SocketAddr, backlog: Option<i32>) -> io::Result<OwnedFd> {
|
||||
let listener = socket(NetOp::Socket {
|
||||
domain: socket_domain(addr),
|
||||
socket_type: libc::SOCK_STREAM,
|
||||
protocol: 0,
|
||||
flags: libc::SOCK_CLOEXEC as u32,
|
||||
})
|
||||
.await?;
|
||||
|
||||
set_reuse_addr(listener.as_raw_fd(), true)?;
|
||||
|
||||
bind(NetOp::Bind {
|
||||
fd: listener.as_raw_fd(),
|
||||
addr,
|
||||
})
|
||||
.await?;
|
||||
listen(NetOp::Listen {
|
||||
fd: listener.as_raw_fd(),
|
||||
backlog: backlog.unwrap_or(DEFAULT_LISTENER_BACKLOG),
|
||||
})
|
||||
.await?;
|
||||
Ok(listener)
|
||||
}
|
||||
|
||||
pub async fn bind_datagram(addr: SocketAddr) -> io::Result<OwnedFd> {
|
||||
let socket = socket(NetOp::Socket {
|
||||
domain: socket_domain(addr),
|
||||
socket_type: libc::SOCK_DGRAM,
|
||||
protocol: 0,
|
||||
flags: libc::SOCK_CLOEXEC as u32,
|
||||
})
|
||||
.await?;
|
||||
|
||||
bind(NetOp::Bind {
|
||||
fd: socket.as_raw_fd(),
|
||||
addr,
|
||||
})
|
||||
.await?;
|
||||
Ok(socket)
|
||||
}
|
||||
|
||||
pub async fn duplicate(fd: RawFd) -> io::Result<OwnedFd> {
|
||||
offload(move || {
|
||||
let duplicated = cvt(unsafe { libc::fcntl(fd, libc::F_DUPFD_CLOEXEC, 0) })?;
|
||||
Ok(unsafe { OwnedFd::from_raw_fd(duplicated) })
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn recv_timeout(
|
||||
fd: RawFd,
|
||||
len: usize,
|
||||
flags: i32,
|
||||
timeout: Duration,
|
||||
) -> io::Result<Vec<u8>> {
|
||||
offload(move || {
|
||||
wait_socket(fd, libc::POLLIN, timeout)?;
|
||||
recv_sync(fd, len, flags)
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn send_timeout(
|
||||
fd: RawFd,
|
||||
data: Vec<u8>,
|
||||
flags: i32,
|
||||
timeout: Duration,
|
||||
) -> io::Result<usize> {
|
||||
offload(move || {
|
||||
wait_socket(fd, libc::POLLOUT, timeout)?;
|
||||
send_sync(fd, data, flags)
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn recv_from_timeout(
|
||||
fd: RawFd,
|
||||
len: usize,
|
||||
flags: i32,
|
||||
timeout: Duration,
|
||||
) -> io::Result<ReceivedDatagram> {
|
||||
offload(move || {
|
||||
wait_socket(fd, libc::POLLIN, timeout)?;
|
||||
recv_from_sync(fd, len, flags)
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn send_to_timeout(
|
||||
fd: RawFd,
|
||||
data: Vec<u8>,
|
||||
target: SocketAddr,
|
||||
flags: i32,
|
||||
timeout: Duration,
|
||||
) -> io::Result<usize> {
|
||||
offload(move || {
|
||||
wait_socket(fd, libc::POLLOUT, timeout)?;
|
||||
send_to_sync(fd, data, RawSocketAddr::from_socket_addr(target), flags)
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn connect_stream_timeout(addr: SocketAddr, timeout: Duration) -> io::Result<OwnedFd> {
|
||||
offload(move || connect_stream_timeout_sync(addr, timeout)).await
|
||||
}
|
||||
|
||||
pub fn local_addr(fd: RawFd) -> io::Result<SocketAddr> {
|
||||
socket_addr_with(libc::getsockname, fd)
|
||||
}
|
||||
|
||||
pub fn peer_addr(fd: RawFd) -> io::Result<SocketAddr> {
|
||||
socket_addr_with(libc::getpeername, fd)
|
||||
}
|
||||
|
||||
pub fn nodelay(fd: RawFd) -> io::Result<bool> {
|
||||
let mut value = 0;
|
||||
let mut len = std::mem::size_of::<libc::c_int>() as libc::socklen_t;
|
||||
cvt(unsafe {
|
||||
libc::getsockopt(
|
||||
fd,
|
||||
libc::IPPROTO_TCP,
|
||||
libc::TCP_NODELAY,
|
||||
&mut value as *mut libc::c_int as *mut c_void,
|
||||
&mut len,
|
||||
)
|
||||
})?;
|
||||
Ok(value != 0)
|
||||
}
|
||||
|
||||
pub fn broadcast(fd: RawFd) -> io::Result<bool> {
|
||||
getsockopt_int(fd, libc::SOL_SOCKET, libc::SO_BROADCAST).map(|value| value != 0)
|
||||
}
|
||||
|
||||
pub fn set_broadcast(fd: RawFd, enabled: bool) -> io::Result<()> {
|
||||
setsockopt_int(fd, libc::SOL_SOCKET, libc::SO_BROADCAST, enabled.into())
|
||||
}
|
||||
|
||||
pub fn ttl(fd: RawFd) -> io::Result<u32> {
|
||||
match socket_family(fd)? {
|
||||
libc::AF_INET => {
|
||||
getsockopt_int(fd, libc::IPPROTO_IP, libc::IP_TTL).map(|value| value as u32)
|
||||
}
|
||||
libc::AF_INET6 => getsockopt_int(fd, libc::IPPROTO_IPV6, libc::IPV6_UNICAST_HOPS)
|
||||
.map(|value| value as u32),
|
||||
family => Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
format!("unsupported socket family {family} for TTL"),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_ttl(fd: RawFd, ttl: u32) -> io::Result<()> {
|
||||
let ttl = i32::try_from(ttl)
|
||||
.map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "TTL exceeds i32 range"))?;
|
||||
match socket_family(fd)? {
|
||||
libc::AF_INET => setsockopt_int(fd, libc::IPPROTO_IP, libc::IP_TTL, ttl),
|
||||
libc::AF_INET6 => setsockopt_int(fd, libc::IPPROTO_IPV6, libc::IPV6_UNICAST_HOPS, ttl),
|
||||
family => Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
format!("unsupported socket family {family} for TTL"),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_nodelay(fd: RawFd, enabled: bool) -> io::Result<()> {
|
||||
let value: libc::c_int = enabled.into();
|
||||
cvt(unsafe {
|
||||
libc::setsockopt(
|
||||
fd,
|
||||
libc::IPPROTO_TCP,
|
||||
libc::TCP_NODELAY,
|
||||
&value as *const libc::c_int as *const c_void,
|
||||
std::mem::size_of_val(&value) as libc::socklen_t,
|
||||
)
|
||||
})
|
||||
.map(|_| ())
|
||||
}
|
||||
|
||||
pub type RecvFuture = Pin<Box<dyn Future<Output = io::Result<Vec<u8>>> + 'static>>;
|
||||
pub type SendFuture = Pin<Box<dyn Future<Output = io::Result<usize>> + 'static>>;
|
||||
pub type ShutdownFuture = Pin<Box<dyn Future<Output = io::Result<()>> + 'static>>;
|
||||
|
||||
pub fn recv_future(fd: RawFd, len: usize) -> RecvFuture {
|
||||
Box::pin(recv(NetOp::Recv { fd, len, flags: 0 }))
|
||||
}
|
||||
|
||||
pub fn send_future(fd: RawFd, data: Vec<u8>) -> SendFuture {
|
||||
Box::pin(send(NetOp::Send { fd, data, flags: 0 }))
|
||||
}
|
||||
|
||||
pub fn shutdown_future(fd: RawFd, how: Shutdown) -> ShutdownFuture {
|
||||
Box::pin(shutdown(NetOp::Shutdown { fd, how }))
|
||||
}
|
||||
|
||||
async fn submit_uring<T: Send + 'static, M>(
|
||||
fill: impl FnOnce(&mut IoUringSqe),
|
||||
map: M,
|
||||
) -> io::Result<T>
|
||||
where
|
||||
M: FnOnce(IoUringCqe) -> io::Result<T> + Send + 'static,
|
||||
{
|
||||
let (future, handle) = completion_for_current_thread::<io::Result<T>>();
|
||||
let callback_handle = handle.clone();
|
||||
let token = with_current_reactor(|reactor| {
|
||||
reactor.submit_operation(fill, move |cqe| {
|
||||
callback_handle.complete(map(cqe));
|
||||
})
|
||||
})?;
|
||||
|
||||
handle.set_cancel(move || {
|
||||
let _ = with_current_reactor(|reactor| reactor.cancel_operation(token));
|
||||
});
|
||||
|
||||
future.await
|
||||
}
|
||||
|
||||
async fn offload<T: Send + 'static>(
|
||||
task: impl FnOnce() -> io::Result<T> + Send + 'static,
|
||||
) -> io::Result<T> {
|
||||
let (future, handle) = completion_for_current_thread::<io::Result<T>>();
|
||||
thread::Builder::new()
|
||||
.name("ruin-runtime-net-offload".into())
|
||||
.spawn(move || handle.complete(task()))
|
||||
.map_err(io::Error::other)?;
|
||||
future.await
|
||||
}
|
||||
|
||||
fn socket_domain(addr: SocketAddr) -> i32 {
|
||||
match addr {
|
||||
SocketAddr::V4(_) => libc::AF_INET,
|
||||
SocketAddr::V6(_) => libc::AF_INET6,
|
||||
}
|
||||
}
|
||||
|
||||
fn shutdown_how(how: Shutdown) -> i32 {
|
||||
match how {
|
||||
Shutdown::Read => libc::SHUT_RD,
|
||||
Shutdown::Write => libc::SHUT_WR,
|
||||
Shutdown::Both => libc::SHUT_RDWR,
|
||||
}
|
||||
}
|
||||
|
||||
fn socket_addr_with(
|
||||
op: unsafe extern "C" fn(RawFd, *mut libc::sockaddr, *mut libc::socklen_t) -> libc::c_int,
|
||||
fd: RawFd,
|
||||
) -> io::Result<SocketAddr> {
|
||||
let mut storage = MaybeUninit::<libc::sockaddr_storage>::zeroed();
|
||||
let mut len = std::mem::size_of::<libc::sockaddr_storage>() as libc::socklen_t;
|
||||
cvt(unsafe { op(fd, storage.as_mut_ptr().cast::<libc::sockaddr>(), &mut len) })?;
|
||||
let storage = unsafe { storage.assume_init() };
|
||||
socket_addr_from_storage(&storage, len)
|
||||
}
|
||||
|
||||
fn set_reuse_addr(fd: RawFd, enabled: bool) -> io::Result<()> {
|
||||
setsockopt_int(fd, libc::SOL_SOCKET, libc::SO_REUSEADDR, enabled.into())
|
||||
}
|
||||
|
||||
fn socket_family(fd: RawFd) -> io::Result<i32> {
|
||||
let mut storage = MaybeUninit::<libc::sockaddr_storage>::zeroed();
|
||||
let mut len = std::mem::size_of::<libc::sockaddr_storage>() as libc::socklen_t;
|
||||
cvt(unsafe { libc::getsockname(fd, storage.as_mut_ptr().cast::<libc::sockaddr>(), &mut len) })?;
|
||||
let storage = unsafe { storage.assume_init() };
|
||||
Ok(storage.ss_family as i32)
|
||||
}
|
||||
|
||||
fn getsockopt_int(fd: RawFd, level: i32, name: i32) -> io::Result<i32> {
|
||||
let mut value = 0;
|
||||
let mut len = std::mem::size_of::<libc::c_int>() as libc::socklen_t;
|
||||
cvt(unsafe {
|
||||
libc::getsockopt(
|
||||
fd,
|
||||
level,
|
||||
name,
|
||||
&mut value as *mut libc::c_int as *mut c_void,
|
||||
&mut len,
|
||||
)
|
||||
})?;
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
fn setsockopt_int(fd: RawFd, level: i32, name: i32, value: i32) -> io::Result<()> {
|
||||
cvt(unsafe {
|
||||
libc::setsockopt(
|
||||
fd,
|
||||
level,
|
||||
name,
|
||||
&value as *const libc::c_int as *const c_void,
|
||||
std::mem::size_of_val(&value) as libc::socklen_t,
|
||||
)
|
||||
})
|
||||
.map(|_| ())
|
||||
}
|
||||
|
||||
fn socket_addr_from_storage(
|
||||
storage: &libc::sockaddr_storage,
|
||||
len: libc::socklen_t,
|
||||
) -> io::Result<SocketAddr> {
|
||||
match storage.ss_family as i32 {
|
||||
libc::AF_INET => {
|
||||
if len < std::mem::size_of::<libc::sockaddr_in>() as libc::socklen_t {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
"short IPv4 socket address from kernel",
|
||||
));
|
||||
}
|
||||
|
||||
let addr = unsafe { *(storage as *const _ as *const libc::sockaddr_in) };
|
||||
Ok(SocketAddr::V4(SocketAddrV4::new(
|
||||
Ipv4Addr::from(addr.sin_addr.s_addr.to_ne_bytes()),
|
||||
u16::from_be(addr.sin_port),
|
||||
)))
|
||||
}
|
||||
libc::AF_INET6 => {
|
||||
if len < std::mem::size_of::<libc::sockaddr_in6>() as libc::socklen_t {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
"short IPv6 socket address from kernel",
|
||||
));
|
||||
}
|
||||
|
||||
let addr = unsafe { *(storage as *const _ as *const libc::sockaddr_in6) };
|
||||
Ok(SocketAddr::V6(SocketAddrV6::new(
|
||||
Ipv6Addr::from(addr.sin6_addr.s6_addr),
|
||||
u16::from_be(addr.sin6_port),
|
||||
addr.sin6_flowinfo,
|
||||
addr.sin6_scope_id,
|
||||
)))
|
||||
}
|
||||
family => Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
format!("unsupported socket address family {family}"),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct RawSocketAddr {
|
||||
storage: libc::sockaddr_storage,
|
||||
len: libc::socklen_t,
|
||||
}
|
||||
|
||||
impl RawSocketAddr {
|
||||
fn from_socket_addr(addr: SocketAddr) -> Self {
|
||||
match addr {
|
||||
SocketAddr::V4(addr) => {
|
||||
let sockaddr = libc::sockaddr_in {
|
||||
sin_family: libc::AF_INET as libc::sa_family_t,
|
||||
sin_port: addr.port().to_be(),
|
||||
sin_addr: libc::in_addr {
|
||||
s_addr: u32::from_ne_bytes(addr.ip().octets()),
|
||||
},
|
||||
sin_zero: [0; 8],
|
||||
};
|
||||
let mut storage =
|
||||
unsafe { MaybeUninit::<libc::sockaddr_storage>::zeroed().assume_init() };
|
||||
unsafe {
|
||||
std::ptr::write(
|
||||
&mut storage as *mut libc::sockaddr_storage as *mut libc::sockaddr_in,
|
||||
sockaddr,
|
||||
);
|
||||
}
|
||||
Self {
|
||||
storage,
|
||||
len: std::mem::size_of::<libc::sockaddr_in>() as libc::socklen_t,
|
||||
}
|
||||
}
|
||||
SocketAddr::V6(addr) => {
|
||||
let sockaddr = libc::sockaddr_in6 {
|
||||
sin6_family: libc::AF_INET6 as libc::sa_family_t,
|
||||
sin6_port: addr.port().to_be(),
|
||||
sin6_flowinfo: addr.flowinfo(),
|
||||
sin6_addr: libc::in6_addr {
|
||||
s6_addr: addr.ip().octets(),
|
||||
},
|
||||
sin6_scope_id: addr.scope_id(),
|
||||
};
|
||||
let mut storage =
|
||||
unsafe { MaybeUninit::<libc::sockaddr_storage>::zeroed().assume_init() };
|
||||
unsafe {
|
||||
std::ptr::write(
|
||||
&mut storage as *mut libc::sockaddr_storage as *mut libc::sockaddr_in6,
|
||||
sockaddr,
|
||||
);
|
||||
}
|
||||
Self {
|
||||
storage,
|
||||
len: std::mem::size_of::<libc::sockaddr_in6>() as libc::socklen_t,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn as_ptr(&self) -> *const libc::sockaddr {
|
||||
&self.storage as *const libc::sockaddr_storage as *const libc::sockaddr
|
||||
}
|
||||
|
||||
fn len(&self) -> libc::socklen_t {
|
||||
self.len
|
||||
}
|
||||
}
|
||||
|
||||
fn cqe_to_result(cqe: IoUringCqe) -> io::Result<i32> {
|
||||
if cqe.res < 0 {
|
||||
Err(io::Error::from_raw_os_error(-cqe.res))
|
||||
} else {
|
||||
Ok(cqe.res)
|
||||
}
|
||||
}
|
||||
|
||||
fn cvt(value: libc::c_int) -> io::Result<libc::c_int> {
|
||||
if value == -1 {
|
||||
Err(io::Error::last_os_error())
|
||||
} else {
|
||||
Ok(value)
|
||||
}
|
||||
}
|
||||
|
||||
fn should_fallback_to_offload(error: &io::Error) -> bool {
|
||||
matches!(
|
||||
error.raw_os_error(),
|
||||
Some(libc::EINVAL | libc::ENOSYS | libc::EOPNOTSUPP)
|
||||
)
|
||||
}
|
||||
|
||||
fn socket_sync(domain: i32, socket_type: i32, protocol: i32, flags: u32) -> io::Result<OwnedFd> {
|
||||
let fd = cvt(unsafe { libc::socket(domain, socket_type | flags as i32, protocol) })?;
|
||||
Ok(unsafe { OwnedFd::from_raw_fd(fd) })
|
||||
}
|
||||
|
||||
fn connect_sync(fd: RawFd, addr: RawSocketAddr) -> io::Result<()> {
|
||||
cvt(unsafe { libc::connect(fd, addr.as_ptr(), addr.len()) }).map(|_| ())
|
||||
}
|
||||
|
||||
fn bind_sync(fd: RawFd, addr: RawSocketAddr) -> io::Result<()> {
|
||||
cvt(unsafe { libc::bind(fd, addr.as_ptr(), addr.len()) }).map(|_| ())
|
||||
}
|
||||
|
||||
fn listen_sync(fd: RawFd, backlog: i32) -> io::Result<()> {
|
||||
cvt(unsafe { libc::listen(fd, backlog) }).map(|_| ())
|
||||
}
|
||||
|
||||
fn accept_sync(fd: RawFd) -> io::Result<AcceptedSocket> {
|
||||
let mut storage = MaybeUninit::<libc::sockaddr_storage>::zeroed();
|
||||
let mut len = std::mem::size_of::<libc::sockaddr_storage>() as libc::socklen_t;
|
||||
let accepted_fd = cvt(unsafe {
|
||||
libc::accept4(
|
||||
fd,
|
||||
storage.as_mut_ptr().cast::<libc::sockaddr>(),
|
||||
&mut len,
|
||||
libc::SOCK_CLOEXEC,
|
||||
)
|
||||
})?;
|
||||
let storage = unsafe { storage.assume_init() };
|
||||
let peer_addr = socket_addr_from_storage(&storage, len)?;
|
||||
Ok(AcceptedSocket {
|
||||
fd: accepted_fd,
|
||||
peer_addr,
|
||||
})
|
||||
}
|
||||
|
||||
fn send_sync(fd: RawFd, data: Vec<u8>, flags: i32) -> io::Result<usize> {
|
||||
let written = unsafe { libc::send(fd, data.as_ptr().cast::<c_void>(), data.len(), flags) };
|
||||
cvt_long(written).map(|written| written as usize)
|
||||
}
|
||||
|
||||
fn send_to_sync(fd: RawFd, data: Vec<u8>, target: RawSocketAddr, flags: i32) -> io::Result<usize> {
|
||||
let written = unsafe {
|
||||
libc::sendto(
|
||||
fd,
|
||||
data.as_ptr().cast::<c_void>(),
|
||||
data.len(),
|
||||
flags,
|
||||
target.as_ptr(),
|
||||
target.len(),
|
||||
)
|
||||
};
|
||||
cvt_long(written).map(|written| written as usize)
|
||||
}
|
||||
|
||||
fn recv_sync(fd: RawFd, len: usize, flags: i32) -> io::Result<Vec<u8>> {
|
||||
let mut buffer = vec![0; len];
|
||||
let read = unsafe {
|
||||
libc::recv(
|
||||
fd,
|
||||
buffer.as_mut_ptr().cast::<c_void>(),
|
||||
buffer.len(),
|
||||
flags,
|
||||
)
|
||||
};
|
||||
let read = cvt_long(read)? as usize;
|
||||
buffer.truncate(read);
|
||||
Ok(buffer)
|
||||
}
|
||||
|
||||
fn recv_from_sync(fd: RawFd, len: usize, flags: i32) -> io::Result<ReceivedDatagram> {
|
||||
let mut buffer = vec![0; len];
|
||||
let mut storage = MaybeUninit::<libc::sockaddr_storage>::zeroed();
|
||||
let mut addr_len = std::mem::size_of::<libc::sockaddr_storage>() as libc::socklen_t;
|
||||
let read = unsafe {
|
||||
libc::recvfrom(
|
||||
fd,
|
||||
buffer.as_mut_ptr().cast::<c_void>(),
|
||||
buffer.len(),
|
||||
flags,
|
||||
storage.as_mut_ptr().cast::<libc::sockaddr>(),
|
||||
&mut addr_len,
|
||||
)
|
||||
};
|
||||
let read = cvt_long(read)? as usize;
|
||||
buffer.truncate(read);
|
||||
let storage = unsafe { storage.assume_init() };
|
||||
let peer_addr = socket_addr_from_storage(&storage, addr_len)?;
|
||||
Ok(ReceivedDatagram {
|
||||
data: buffer,
|
||||
peer_addr,
|
||||
})
|
||||
}
|
||||
|
||||
fn shutdown_sync(fd: RawFd, how: Shutdown) -> io::Result<()> {
|
||||
cvt(unsafe { libc::shutdown(fd, shutdown_how(how)) }).map(|_| ())
|
||||
}
|
||||
|
||||
fn close_sync(fd: RawFd) -> io::Result<()> {
|
||||
cvt(unsafe { libc::close(fd) }).map(|_| ())
|
||||
}
|
||||
|
||||
fn connect_stream_timeout_sync(addr: SocketAddr, timeout: Duration) -> io::Result<OwnedFd> {
|
||||
let fd = cvt(unsafe {
|
||||
libc::socket(
|
||||
socket_domain(addr),
|
||||
libc::SOCK_STREAM | libc::SOCK_CLOEXEC | libc::SOCK_NONBLOCK,
|
||||
0,
|
||||
)
|
||||
})?;
|
||||
let raw_addr = RawSocketAddr::from_socket_addr(addr);
|
||||
let connect_result = unsafe { libc::connect(fd, raw_addr.as_ptr(), raw_addr.len()) };
|
||||
if connect_result == 0 {
|
||||
set_nonblocking(fd, false)?;
|
||||
return Ok(unsafe { OwnedFd::from_raw_fd(fd) });
|
||||
}
|
||||
|
||||
let error = io::Error::last_os_error();
|
||||
if error.raw_os_error() != Some(libc::EINPROGRESS) {
|
||||
let _ = close_sync(fd);
|
||||
return Err(error);
|
||||
}
|
||||
|
||||
let completion = wait_socket(fd, libc::POLLOUT, timeout)
|
||||
.and_then(|_| getsockopt_int(fd, libc::SOL_SOCKET, libc::SO_ERROR));
|
||||
match completion {
|
||||
Ok(0) => {
|
||||
set_nonblocking(fd, false)?;
|
||||
Ok(unsafe { OwnedFd::from_raw_fd(fd) })
|
||||
}
|
||||
Ok(code) => {
|
||||
let _ = close_sync(fd);
|
||||
Err(io::Error::from_raw_os_error(code))
|
||||
}
|
||||
Err(error) => {
|
||||
let _ = close_sync(fd);
|
||||
Err(error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn set_nonblocking(fd: RawFd, enabled: bool) -> io::Result<()> {
|
||||
let flags = cvt(unsafe { libc::fcntl(fd, libc::F_GETFL) })?;
|
||||
let new_flags = if enabled {
|
||||
flags | libc::O_NONBLOCK
|
||||
} else {
|
||||
flags & !libc::O_NONBLOCK
|
||||
};
|
||||
cvt(unsafe { libc::fcntl(fd, libc::F_SETFL, new_flags) }).map(|_| ())
|
||||
}
|
||||
|
||||
fn wait_socket(fd: RawFd, events: i16, timeout: Duration) -> io::Result<()> {
|
||||
let timeout_ms = timeout
|
||||
.as_millis()
|
||||
.min(i32::MAX as u128)
|
||||
.try_into()
|
||||
.unwrap_or(i32::MAX);
|
||||
|
||||
loop {
|
||||
let mut poll_fd = libc::pollfd {
|
||||
fd,
|
||||
events,
|
||||
revents: 0,
|
||||
};
|
||||
let result = unsafe { libc::poll(&mut poll_fd, 1, timeout_ms) };
|
||||
if result == 0 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::TimedOut,
|
||||
"socket operation timed out",
|
||||
));
|
||||
}
|
||||
if result < 0 {
|
||||
let error = io::Error::last_os_error();
|
||||
if error.kind() == io::ErrorKind::Interrupted {
|
||||
continue;
|
||||
}
|
||||
return Err(error);
|
||||
}
|
||||
if poll_fd.revents & (libc::POLLERR | libc::POLLHUP | libc::POLLNVAL) != 0 {
|
||||
let socket_error = getsockopt_int(fd, libc::SOL_SOCKET, libc::SO_ERROR).unwrap_or(0);
|
||||
if socket_error != 0 {
|
||||
return Err(io::Error::from_raw_os_error(socket_error));
|
||||
}
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
fn cvt_long(value: libc::ssize_t) -> io::Result<libc::ssize_t> {
|
||||
if value == -1 {
|
||||
Err(io::Error::last_os_error())
|
||||
} else {
|
||||
Ok(value)
|
||||
}
|
||||
}
|
||||
4
lib/runtime/src/sys/mod.rs
Normal file
4
lib/runtime/src/sys/mod.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
//! Platform backend implementations.
|
||||
|
||||
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
|
||||
pub mod linux;
|
||||
175
lib/runtime/src/time.rs
Normal file
175
lib/runtime/src/time.rs
Normal file
@@ -0,0 +1,175 @@
|
||||
//! Runtime time primitives.
|
||||
|
||||
use std::cell::{Cell, RefCell};
|
||||
use std::fmt;
|
||||
use std::future::{Future, poll_fn};
|
||||
use std::io;
|
||||
use std::pin::Pin;
|
||||
use std::rc::Rc;
|
||||
use std::task::Waker;
|
||||
use std::task::{Context, Poll};
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::{clear_timeout, set_timeout};
|
||||
|
||||
pub struct Sleep {
|
||||
delay: Option<Duration>,
|
||||
state: Option<Rc<SleepState>>,
|
||||
handle: Option<crate::TimeoutHandle>,
|
||||
completed: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub struct Elapsed;
|
||||
|
||||
pub fn sleep(duration: Duration) -> Sleep {
|
||||
Sleep {
|
||||
delay: Some(duration),
|
||||
state: None,
|
||||
handle: None,
|
||||
completed: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn timeout<F>(duration: Duration, future: F) -> Result<F::Output, Elapsed>
|
||||
where
|
||||
F: Future,
|
||||
{
|
||||
let mut future = std::pin::pin!(future);
|
||||
let mut sleeper = std::pin::pin!(sleep(duration));
|
||||
|
||||
poll_fn(|cx| {
|
||||
if let Poll::Ready(output) = future.as_mut().poll(cx) {
|
||||
return Poll::Ready(Ok(output));
|
||||
}
|
||||
|
||||
if let Poll::Ready(()) = sleeper.as_mut().poll(cx) {
|
||||
return Poll::Ready(Err(Elapsed));
|
||||
}
|
||||
|
||||
Poll::Pending
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub fn timeout_error(action: &'static str) -> io::Error {
|
||||
io::Error::new(io::ErrorKind::TimedOut, format!("{action} timed out"))
|
||||
}
|
||||
|
||||
impl Future for Sleep {
|
||||
type Output = ();
|
||||
|
||||
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||
if self.completed {
|
||||
return Poll::Ready(());
|
||||
}
|
||||
|
||||
if self.state.is_none() {
|
||||
let delay = self.delay.take().unwrap_or(Duration::ZERO);
|
||||
let state = Rc::new(SleepState::default());
|
||||
let state_for_callback = Rc::clone(&state);
|
||||
let timeout_handle = set_timeout(delay, move || state_for_callback.complete());
|
||||
self.state = Some(state);
|
||||
self.handle = Some(timeout_handle);
|
||||
}
|
||||
|
||||
let state = self
|
||||
.state
|
||||
.as_ref()
|
||||
.expect("sleep state should be initialized");
|
||||
if state.ready.get() {
|
||||
self.completed = true;
|
||||
self.state = None;
|
||||
self.handle = None;
|
||||
Poll::Ready(())
|
||||
} else {
|
||||
*state.waker.borrow_mut() = Some(cx.waker().clone());
|
||||
if state.ready.get() {
|
||||
self.completed = true;
|
||||
self.state = None;
|
||||
self.handle = None;
|
||||
Poll::Ready(())
|
||||
} else {
|
||||
Poll::Pending
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Sleep {
|
||||
fn drop(&mut self) {
|
||||
if self.completed {
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(handle) = self.handle.take() {
|
||||
clear_timeout(&handle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct SleepState {
|
||||
ready: Cell<bool>,
|
||||
waker: RefCell<Option<Waker>>,
|
||||
}
|
||||
|
||||
impl SleepState {
|
||||
fn complete(&self) {
|
||||
self.ready.set(true);
|
||||
if let Some(waker) = self.waker.borrow_mut().take() {
|
||||
waker.wake();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Elapsed {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str("deadline elapsed")
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for Elapsed {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::{queue_future, queue_task, run};
|
||||
|
||||
use super::{sleep, timeout};
|
||||
|
||||
#[test]
|
||||
fn sleep_and_timeout_work() {
|
||||
let log = std::thread::spawn(|| {
|
||||
let log = Arc::new(Mutex::new(Vec::new()));
|
||||
let log_for_task = Arc::clone(&log);
|
||||
|
||||
queue_task(move || {
|
||||
let log_for_task = Arc::clone(&log_for_task);
|
||||
queue_future(async move {
|
||||
log_for_task.lock().unwrap().push("started");
|
||||
sleep(Duration::from_millis(5)).await;
|
||||
log_for_task.lock().unwrap().push("slept");
|
||||
|
||||
let result = timeout(Duration::from_millis(5), async {
|
||||
sleep(Duration::from_millis(20)).await;
|
||||
42usize
|
||||
})
|
||||
.await;
|
||||
assert!(result.is_err(), "timeout should fire first");
|
||||
log_for_task.lock().unwrap().push("timed out");
|
||||
});
|
||||
});
|
||||
run();
|
||||
|
||||
let log = log.lock().unwrap();
|
||||
log.clone()
|
||||
})
|
||||
.join()
|
||||
.expect("time test thread should join successfully");
|
||||
|
||||
assert_eq!(log.as_slice(), ["started", "slept", "timed out"]);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user