Skip to content

Commit 51edac5

Browse files
committed
Provide abstractions for properly aligning abomonated bytes
1 parent 77583c2 commit 51edac5

File tree

4 files changed

+220
-4
lines changed

4 files changed

+220
-4
lines changed

src/align/alloc.rs

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
/// Tools for storing abomonated objects with correct alignment
2+
///
3+
/// Use of `decode::<T>()` requires that the input bytes are aligned on a
4+
/// `T::alignment()` boundary, or else undefined behavior will ensue.
5+
///
6+
/// This module provides tools for ensuring this alignment constraint on input
7+
/// bytes of unknown or known-incorrect alignment before calling `decode()`.
8+
9+
use crate::{
10+
Entomb,
11+
Exhume,
12+
};
13+
14+
use std::{
15+
alloc::{self, Layout},
16+
marker::PhantomData,
17+
ops::{Deref, DerefMut},
18+
ptr::NonNull,
19+
};
20+
21+
22+
/// Overaligned `Box<[u8]>` for abomonated objects of type T
23+
///
24+
/// Compared with a regular `Box<[u8]>`, this heap-allocated bag of bytes also
25+
/// ensures that the heap allocation is aligned on `T::alignment()`, and thus
26+
/// suitable for use as input to `decode::<T>()`.
27+
pub struct Coffin<T: Entomb>(NonNull<[u8]>, PhantomData<T>);
28+
29+
impl<T: Entomb> Coffin<T> {
30+
/// Copy abomonated bytes into a suitably aligned heap allocation
31+
///
32+
/// May abort the computation if memory is exhausted or the system allocator
33+
/// is not able to satisfy the size or alignment requirements.
34+
pub fn new(bytes: &[u8]) -> Self {
35+
// Perform the memory allocation using the system allocator. This is
36+
// safe because all safety preconditions are checked by Self::layout().
37+
let size = bytes.len();
38+
let layout = Self::layout(size);
39+
let ptr = unsafe { alloc::alloc(layout) };
40+
41+
// Abort on memory allocation errors the recommended way. Since the
42+
// system allocator may abort, no point in not aborting ourselves...
43+
if ptr.is_null() { alloc::handle_alloc_error(layout); }
44+
45+
// Transfer the input bytes on our new allocation. This is safe as...
46+
// - `bytes.as_ptr()` has to be valid for `size` by slice construction
47+
// - `ptr` is non-null and must point to a memory region of `size` bytes
48+
// - Pointers are always byte-aligned, so alignment is irrelevant.
49+
// - Heap allocations may not overlap with existing objects.
50+
unsafe { ptr.copy_from_nonoverlapping(bytes.as_ptr(), size); }
51+
52+
// Produce the output slice. The transmute is safe as...
53+
// - We don't care about lifetimes as we want a NonNull in the end
54+
// - As discussed above, `ptr` is non-null and well-aligned.
55+
// - The bytes of the slice have been initialized above
56+
Self(unsafe { std::slice::from_raw_parts_mut(ptr, size) }.into(),
57+
PhantomData)
58+
}
59+
60+
/// Compute the proper layout for a coffin allocation, checking the safety
61+
/// preconditions of the system memory allocator along the way.
62+
///
63+
/// We handle errors via panics because they all emerge from edge cases that
64+
/// should only be encountered by users actively trying to break this code.
65+
fn layout(size: usize) -> Layout {
66+
// Basic sanity check for debug builds
67+
debug_assert!(size > std::mem::size_of::<T>(),
68+
"Requested size is quite obviously not big enough");
69+
70+
// We're going to use the system allocator, so we cannot accept
71+
// zero-sized slices of bytes.
72+
assert!(size > 0, "Allocation size must be positive");
73+
74+
// At this point, the only layout errors that remain are those caused by
75+
// a bad Abomonation::alignment implementation (alignment is zero or not
76+
// a power of 2) or by a huge input size (close to usize::MAX).
77+
Layout::from_size_align(size, T::alignment())
78+
.expect("Bad Abomonation::alignment() impl or excessive size")
79+
}
80+
}
81+
82+
impl<T: Entomb> Deref for Coffin<T> {
83+
type Target = [u8];
84+
85+
fn deref(&self) -> &Self::Target {
86+
// This is safe as...
87+
// - The target allocation is live until the Coffin will be dropped.
88+
// - Normal borrow-checking rules apply and prevent the user from
89+
// aliasing or retaining the output reference in an invalid way.
90+
//
91+
// ...but see the Drop documentation for a possible edge case :(
92+
unsafe { self.0.as_ref() }
93+
}
94+
}
95+
96+
impl<T: Entomb> DerefMut for Coffin<T> {
97+
fn deref_mut(&mut self) -> &mut Self::Target {
98+
// This is safe for the same reason that Deref is.
99+
unsafe { self.0.as_mut() }
100+
}
101+
}
102+
103+
impl<T: Entomb> Drop for Coffin<T> {
104+
fn drop(&mut self) {
105+
// In principle, this should be safe for the same reason that DerefMut
106+
// is, however there is a wrinkle for all of those...
107+
//
108+
// If we want any form of Deref to be safe, the Rust compiler must
109+
// prevent LLVM from inserting memory reads from the slice after
110+
// deallocation, and currently it doesn't.
111+
//
112+
// There is no clear reason why LLVM would do this, though, and `std`
113+
// encounters the same problem everywhere, so we'll take the risk...
114+
//
115+
// FIXME: Once the Rust team has figured out the right way to handle
116+
// this, use it here if it requires manual action.
117+
//
118+
// Here's one ongoing discussion of this topic for reference:
119+
// https://github.com/rust-lang/rust/issues/55005
120+
let slice = unsafe { self.0.as_mut() };
121+
122+
// This is safe because...
123+
// - Every Coffin is always created with its own allocation, only Drop
124+
// can liberate it, and Drop will only be called once.
125+
// - Layout is computed in the same way as in `Coffin::new()`, and the
126+
// size of the target slice is the same as that of new's input bytes.
127+
unsafe { alloc::dealloc(slice.as_mut_ptr(),
128+
Self::layout(slice.len())); }
129+
}
130+
}
131+
132+
133+
/// `Cow`-style abstraction for aligning abomonated bytes before `decode()`
134+
///
135+
/// Often, one needs to decode input bytes which are _probably_ well-aligned,
136+
/// but may not always to be. For example, POSIX memory allocations are aligned
137+
/// on 16-byte boundaries, which is sufficient for most types... as long as
138+
/// multiple abomonated objects are not stored in a sequence without padding
139+
/// bytes in between.
140+
///
141+
/// In those circumstances, pessimistically using `Coffin<T>` all the time
142+
/// would cause unnecessarily intensive use of the system memory allocator.
143+
/// Instead, it is better to check if the input bytes are well-aligned and only
144+
/// reallocate them if necessary, which is what this abstraction does.
145+
pub enum AlignedBytes<'bytes, T: Exhume<'bytes>> {
146+
/// The orignal bytes were sufficiently well-aligned
147+
Borrowed(&'bytes mut [u8]),
148+
149+
/// The abomonated bytes were relocated into a well-aligned heap location
150+
Owned(Coffin<T>),
151+
}
152+
153+
impl<'bytes, T: Exhume<'bytes>> AlignedBytes<'bytes, T> {
154+
/// Prepare possibly misaligned bytes for decoding
155+
pub fn new(bytes: &'bytes mut [u8]) -> Self {
156+
let misalignment = (bytes.as_ptr() as usize) % T::alignment();
157+
if misalignment == 0 {
158+
Self::Borrowed(bytes)
159+
} else {
160+
Self::Owned(Coffin::new(bytes))
161+
}
162+
}
163+
}
164+
165+
impl<'bytes, T: Exhume<'bytes>> From<&'bytes mut [u8]> for AlignedBytes<'bytes, T> {
166+
fn from(bytes: &'bytes mut [u8]) -> Self {
167+
Self::new(bytes)
168+
}
169+
}
170+
171+
impl<'bytes, T: Exhume<'bytes>> From<Coffin<T>> for AlignedBytes<'bytes, T> {
172+
fn from(coffin: Coffin<T>) -> Self {
173+
Self::Owned(coffin)
174+
}
175+
}
176+
177+
impl<'bytes, T: Exhume<'bytes>> Deref for AlignedBytes<'bytes, T> {
178+
type Target = [u8];
179+
180+
fn deref(&self) -> &[u8] {
181+
match self {
182+
Self::Borrowed(b) => b,
183+
Self::Owned(o) => o,
184+
}
185+
}
186+
}
187+
188+
impl<'bytes, T: Exhume<'bytes>> DerefMut for AlignedBytes<'bytes, T> {
189+
fn deref_mut(&mut self) -> &mut [u8] {
190+
match self {
191+
Self::Borrowed(b) => b,
192+
Self::Owned(o) => o,
193+
}
194+
}
195+
}
196+
197+
198+
// TODO: Add tests

src/align/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
/// Utilities for handling alignment in abomonated data
22
33
mod io;
4+
mod alloc;
45

56
#[deprecated(note = "Made pub for internal unsafe_abomonate use only")]
67
pub use self::io::{AlignedReader, AlignedWriter};
8+
9+
pub use self::alloc::{AlignedBytes, Coffin};

src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,10 @@ pub unsafe fn encode<T: Entomb, W: Write>(typed: &T, write: W) -> IOResult<()> {
119119
/// abomonated data of type T, which you can check with `T::alignment()`.
120120
/// Failure to meet this requirement will result in undefined behavior.
121121
///
122+
/// If you are not able to guarantee sufficient alignment from your data source, you may find the
123+
/// `align::AlignedBytes<T>` utility useful. It checks if your data is well-aligned, and moves it
124+
/// into a well-aligned heap allocation otherwise.
125+
///
122126
/// # Examples
123127
/// ```
124128
/// use abomonation::{encode, decode};

tests/tests.rs

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
extern crate abomonation;
22

33
use abomonation::*;
4+
use abomonation::align::AlignedBytes;
45
use std::fmt::Debug;
56

67
// Test struct for the unsafe_abomonate macro
@@ -135,10 +136,20 @@ fn test_multiple_encode_decode() {
135136
unsafe { encode(&vec![1,2,3], &mut bytes).unwrap(); }
136137
unsafe { encode(&"grawwwwrr".to_owned(), &mut bytes).unwrap(); }
137138

138-
let (t, r) = unsafe { decode::<u32>(&mut bytes) }.unwrap(); assert_eq!(*t, 0);
139-
let (t, r) = unsafe { decode::<u64>(r) }.unwrap(); assert_eq!(*t, 7);
140-
let (t, r) = unsafe { decode::<Vec<i32>>(r) }.unwrap(); assert_eq!(*t, vec![1,2,3]);
141-
let (t, _r) = unsafe { decode::<String>(r) }.unwrap(); assert_eq!(*t, "grawwwwrr".to_owned());
139+
let (t, r) = unsafe { decode::<u32>(&mut bytes) }.unwrap();
140+
assert_eq!(*t, 0);
141+
142+
let mut r = AlignedBytes::<u64>::new(r);
143+
let (t, r) = unsafe { decode::<u64>(&mut r) }.unwrap();
144+
assert_eq!(*t, 7);
145+
146+
let mut r = AlignedBytes::<Vec<i32>>::new(r);
147+
let (t, r) = unsafe { decode::<Vec<i32>>(&mut r) }.unwrap();
148+
assert_eq!(*t, vec![1,2,3]);
149+
150+
let mut r = AlignedBytes::<String>::new(r);
151+
let (t, _r) = unsafe { decode::<String>(&mut r) }.unwrap();
152+
assert_eq!(*t, "grawwwwrr".to_owned());
142153
}
143154

144155
#[test]

0 commit comments

Comments
 (0)