Skip to content

Commit 339a5e3

Browse files
committed
Add infrastructure for well-aligned binary I/O
1 parent 856f219 commit 339a5e3

File tree

3 files changed

+231
-0
lines changed

3 files changed

+231
-0
lines changed

src/align/io.rs

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
/// Tools for reading and writing abomonated data in an alignment-aware way
2+
///
3+
/// In order to enable UB-free in-place deserialization, abomonated objects
4+
/// follow Rust's normal memory alignment rules. This requires inserting padding
5+
/// bytes between serialized data and skipping them on readout. This module
6+
/// provides tools to take care of this.
7+
8+
use std::{
9+
io::Write,
10+
mem,
11+
ptr::NonNull,
12+
};
13+
14+
15+
/// Alignment-aware binary data writer
16+
///
17+
/// This wrapper around a standard Rust writer allows writing multiple binary
18+
/// objects in a sequence with a memory layout that is suitable for in-place
19+
/// readout. It does so by inserting padding bytes between the objects as if
20+
/// they were members of a well-aligned C-style struct whose alignment is the
21+
/// maximum of the alignment of all written objects.
22+
pub struct AlignedWriter<W: Write> {
23+
/// Inner writer to which data is eventually dispatched
24+
inner: W,
25+
26+
/// Amount of data that was sent to the inner writer so far
27+
written_so_far: usize,
28+
29+
/// Expected alignment of the output data
30+
#[cfg(debug_assertions)]
31+
output_alignment: usize,
32+
}
33+
34+
impl<W: Write> AlignedWriter<W> {
35+
/// Prepare a writer for alignment-aware binary writes
36+
///
37+
/// In debug builds, `AlignedWriter` will check that the output memory
38+
/// allocation is sufficiently well-aligned for the data that is written
39+
/// into it, as per the `output_alignment` parameter to this function.
40+
//
41+
// FIXME: output_alignment should be #[cfg(debug_assertions)], but at the
42+
// moment Rust 1.39 is a bit too freshly released to rely on that.
43+
#[allow(unused)]
44+
pub fn new(inner: W, output_alignment: usize) -> Self {
45+
Self {
46+
inner,
47+
written_so_far: 0,
48+
#[cfg(debug_assertions)] output_alignment,
49+
}
50+
}
51+
52+
/// Write arbitrary binary data into the inner writer
53+
///
54+
/// This is unsafe because Rust does not yet provide an UB-free way to
55+
/// expose the padding bytes of arbitrary T objects to writers.
56+
pub unsafe fn write_slice<T>(&mut self, data: &[T]) -> crate::IOResult<()> {
57+
// Check how aligned the binary data needs to be
58+
let alignment = mem::align_of_val::<[T]>(data);
59+
60+
// In debug builds, check that the output allocation has sufficiently
61+
// strong alignment for the data that's being written to it.
62+
//
63+
// If the output alignment is too low, readout may go wrong because the
64+
// AlignedReader will skip a number of padding bytes that may not be
65+
// in sync with the amount that AlignedWriter has inserted, in a manner
66+
// that depends on how the data being read out was _actually_ aligned.
67+
debug_assert!(
68+
if cfg!(debug_assertions) { alignment <= self.output_alignment } else { true },
69+
"Insufficient output alignment (output alignment is {}, got data of alignment {})",
70+
self.output_alignment, alignment
71+
);
72+
73+
// Inject padding bytes until the output is well-aligned, assuming that
74+
// the first byte that was written was well-aligned for all output data.
75+
while self.written_so_far % alignment != 0 {
76+
self.inner.write_all(&[0u8])?;
77+
self.written_so_far += 1;
78+
}
79+
80+
// Write down the binary data and exit
81+
// FIXME: Move write_bytes functionality here
82+
crate::write_bytes(&mut self.inner, data)?;
83+
self.written_so_far += mem::size_of_val::<[T]>(data);
84+
Ok(())
85+
}
86+
87+
/// Convenience function for non-slice data
88+
///
89+
/// This is unsafe for the same reason that `write_slice` is.
90+
pub unsafe fn write<T>(&mut self, data: &T) -> crate::IOResult<()> {
91+
self.write_slice(std::slice::from_ref(data))
92+
}
93+
94+
/// Query how much data was written so far
95+
pub fn written_so_far(&self) -> usize {
96+
self.written_so_far
97+
}
98+
}
99+
100+
impl<W: Write> Write for AlignedWriter<W> {
101+
fn write(&mut self, buf: &[u8]) -> crate::IOResult<usize> {
102+
// This will write buf.len() data because bytes are always well-aligned
103+
// It is safe because &[u8] has no padding bytes
104+
unsafe { self.write_slice(buf)? };
105+
Ok(buf.len())
106+
}
107+
108+
fn flush(&mut self) -> crate::IOResult<()> {
109+
// No flushing necessary, we don't buffer anything
110+
Ok(())
111+
}
112+
}
113+
114+
115+
/// Slice-of-bytes reader for data written by `AlignedWriter`
116+
///
117+
/// This reader takes as input a bunch of bytes that were written by
118+
/// `AlignedWriter` and allows fetching back the corresponding binary data under
119+
/// the assumption that the input bytes are aligned on the max of the alignment
120+
/// of all the data that was written by `AlignedWriter`.
121+
pub struct AlignedReader<'bytes> {
122+
/// Remaining bytes to be read
123+
bytes: &'bytes mut [u8],
124+
125+
/// Expected alignment of the input data
126+
#[cfg(debug_assertions)]
127+
input_alignment: usize,
128+
}
129+
130+
impl<'bytes> AlignedReader<'bytes> {
131+
/// Prepare some bytes for alignment-aware readout
132+
///
133+
/// In debug builds, `AlignedReader` will check that the input bytes were
134+
/// sufficiently well-aligned for the data that is being read from it, as
135+
/// per the `input_alignment` parameter to this function.
136+
//
137+
// FIXME: input_alignment should be #[cfg(debug_assertions)], but at the
138+
// moment Rust 1.39 is a bit too freshly released to rely on that.
139+
#[allow(unused)]
140+
pub fn new(bytes: &'bytes mut [u8], input_alignment: usize) -> Self {
141+
debug_assert_eq!((bytes.as_ptr() as usize) % input_alignment, 0,
142+
"Input data is not aligned on a {}-byte boundary as expected",
143+
input_alignment);
144+
Self {
145+
bytes,
146+
#[cfg(debug_assertions)] input_alignment,
147+
}
148+
}
149+
150+
/// Read a slice of data of arbitrary type from the inner bytes, returns a
151+
/// pointer to the first element of the slice, or None if the request
152+
/// overflows the input bytes.
153+
//
154+
// FIXME: This should return a NonNull<[T]>, but pointers to slices are not
155+
// ergonomic enough at this point in time.
156+
pub fn read_slice<T>(&mut self, len: usize) -> Option<NonNull<T>> {
157+
// As far as I know, zero-length slices may be aligned differently but
158+
// all nonzero-length slices are aligned identically
159+
let alignment = if len == 0 {
160+
mem::align_of::<[T; 0]>()
161+
} else {
162+
mem::align_of::<[T; 1]>()
163+
};
164+
165+
// In debug builds, check that the input allocation has sufficiently
166+
// strong alignment for the data that's being read from it.
167+
//
168+
// If the input alignment is too low, readout may go wrong because the
169+
// AlignedReader will skip a number of padding bytes that may not be
170+
// in sync with the amount that AlignedWriter has inserted, in a manner
171+
// that depends on how the data being read out was _actually_ aligned.
172+
debug_assert!(
173+
if cfg!(debug_assertions) { alignment <= self.input_alignment } else { true },
174+
"Insufficient input alignment (input alignment is {}, asked for data of alignment {})",
175+
self.input_alignment, alignment
176+
);
177+
178+
// Drop the alignment padding bytes leading up to the inner T-typed data
179+
let misalignment = self.bytes.as_ptr() as usize % alignment;
180+
if misalignment != 0 {
181+
let offset = alignment - misalignment;
182+
if offset > self.bytes.len() { return None; }
183+
// In an ideal world, one could just write:
184+
// self.bytes = &mut self.bytes[offset..]
185+
// Alas, in this world, we need...
186+
self.bytes = unsafe {
187+
mem::transmute::<&mut [u8], &'bytes mut [u8]>(&mut self.bytes[offset..])
188+
};
189+
}
190+
191+
// Make sure that we sill have enough bytes for readout
192+
let size = mem::size_of::<T>() * len;
193+
if size > self.bytes.len() { return None; }
194+
195+
// Extract the inner T-typed data
196+
// This is safe because we checked that the input size is large enough
197+
// and the first pointer of a slice cannot be null
198+
let (out, rest) = self.bytes.split_at_mut(size);
199+
let result: NonNull<T> = unsafe {
200+
NonNull::new_unchecked(out.as_mut_ptr() as *mut T)
201+
};
202+
203+
// Update the inner slice. In an ideal world, one could just write
204+
// self.bytes = rest
205+
// Alas, in this world, we need...
206+
self.bytes = unsafe {
207+
mem::transmute::<&mut [u8], &'bytes mut [u8]>(rest)
208+
};
209+
Some(result)
210+
}
211+
212+
/// Read arbitrary data from the inner bytes
213+
pub fn read<T>(&mut self) -> Option<NonNull<T>> {
214+
self.read_slice(1)
215+
}
216+
217+
/// Extract the remaining bytes
218+
pub fn remaining(self) -> &'bytes mut [u8] {
219+
self.bytes
220+
}
221+
}
222+
223+
224+
// TODO: Add some tests

src/align/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
/// Utilities for handling alignment in abomonated data
2+
3+
mod io;
4+
5+
#[deprecated(note = "Made pub for internal unsafe_abomonate use only")]
6+
pub use self::io::{AlignedReader, AlignedWriter};

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ use std::num::*;
4343
use std::ptr::NonNull;
4444

4545
pub mod abomonated;
46+
pub mod align;
4647

4748
/// Encodes a typed reference into a binary buffer.
4849
///

0 commit comments

Comments
 (0)