|
| 1 | +/// Tools for reading and writing abomonated data in an alignment-aware way |
| 2 | +/// |
| 3 | +/// In order to enable UB-free in-place deserialization, abomonated objects |
| 4 | +/// follow Rust's normal memory alignment rules. This requires inserting padding |
| 5 | +/// bytes between serialized data and skipping them on readout. This module |
| 6 | +/// provides tools to take care of this. |
| 7 | +
|
| 8 | +use std::{ |
| 9 | + io::Write, |
| 10 | + mem, |
| 11 | + ptr::NonNull, |
| 12 | +}; |
| 13 | + |
| 14 | + |
| 15 | +/// Alignment-aware binary data writer |
| 16 | +/// |
| 17 | +/// This wrapper around a standard Rust writer allows writing multiple binary |
| 18 | +/// objects in a sequence with a memory layout that is suitable for in-place |
| 19 | +/// readout. It does so by inserting padding bytes between the objects as if |
| 20 | +/// they were members of a well-aligned C-style struct whose alignment is the |
| 21 | +/// maximum of the alignment of all written objects. |
| 22 | +pub struct AlignedWriter<W: Write> { |
| 23 | + /// Inner writer to which data is eventually dispatched |
| 24 | + inner: W, |
| 25 | + |
| 26 | + /// Amount of data that was sent to the inner writer so far |
| 27 | + written_so_far: usize, |
| 28 | + |
| 29 | + /// Expected alignment of the output data |
| 30 | + #[cfg(debug_assertions)] |
| 31 | + output_alignment: usize, |
| 32 | +} |
| 33 | + |
| 34 | +impl<W: Write> AlignedWriter<W> { |
| 35 | + /// Prepare a writer for alignment-aware binary writes |
| 36 | + /// |
| 37 | + /// In debug builds, `AlignedWriter` will check that the output memory |
| 38 | + /// allocation is sufficiently well-aligned for the data that is written |
| 39 | + /// into it, as per the `output_alignment` parameter to this function. |
| 40 | + // |
| 41 | + // FIXME: output_alignment should be #[cfg(debug_assertions)], but at the |
| 42 | + // moment Rust 1.39 is a bit too freshly released to rely on that. |
| 43 | + #[allow(unused)] |
| 44 | + pub fn new(inner: W, output_alignment: usize) -> Self { |
| 45 | + Self { |
| 46 | + inner, |
| 47 | + written_so_far: 0, |
| 48 | + #[cfg(debug_assertions)] output_alignment, |
| 49 | + } |
| 50 | + } |
| 51 | + |
| 52 | + /// Write arbitrary binary data into the inner writer |
| 53 | + /// |
| 54 | + /// This is unsafe because Rust does not yet provide an UB-free way to |
| 55 | + /// expose the padding bytes of arbitrary T objects to writers. |
| 56 | + pub unsafe fn write_slice<T>(&mut self, data: &[T]) -> crate::IOResult<()> { |
| 57 | + // Check how aligned the binary data needs to be |
| 58 | + let alignment = mem::align_of_val::<[T]>(data); |
| 59 | + |
| 60 | + // In debug builds, check that the output allocation has sufficiently |
| 61 | + // strong alignment for the data that's being written to it. |
| 62 | + // |
| 63 | + // If the output alignment is too low, readout may go wrong because the |
| 64 | + // AlignedReader will skip a number of padding bytes that may not be |
| 65 | + // in sync with the amount that AlignedWriter has inserted, in a manner |
| 66 | + // that depends on how the data being read out was _actually_ aligned. |
| 67 | + debug_assert!( |
| 68 | + if cfg!(debug_assertions) { alignment <= self.output_alignment } else { true }, |
| 69 | + "Insufficient output alignment (output alignment is {}, got data of alignment {})", |
| 70 | + self.output_alignment, alignment |
| 71 | + ); |
| 72 | + |
| 73 | + // Inject padding bytes until the output is well-aligned, assuming that |
| 74 | + // the first byte that was written was well-aligned for all output data. |
| 75 | + while self.written_so_far % alignment != 0 { |
| 76 | + self.inner.write_all(&[0u8])?; |
| 77 | + self.written_so_far += 1; |
| 78 | + } |
| 79 | + |
| 80 | + // Write down the binary data and exit |
| 81 | + // FIXME: Move write_bytes functionality here |
| 82 | + crate::write_bytes(&mut self.inner, data)?; |
| 83 | + self.written_so_far += mem::size_of_val::<[T]>(data); |
| 84 | + Ok(()) |
| 85 | + } |
| 86 | + |
| 87 | + /// Convenience function for non-slice data |
| 88 | + /// |
| 89 | + /// This is unsafe for the same reason that `write_slice` is. |
| 90 | + pub unsafe fn write<T>(&mut self, data: &T) -> crate::IOResult<()> { |
| 91 | + self.write_slice(std::slice::from_ref(data)) |
| 92 | + } |
| 93 | + |
| 94 | + /// Query how much data was written so far |
| 95 | + pub fn written_so_far(&self) -> usize { |
| 96 | + self.written_so_far |
| 97 | + } |
| 98 | +} |
| 99 | + |
| 100 | +impl<W: Write> Write for AlignedWriter<W> { |
| 101 | + fn write(&mut self, buf: &[u8]) -> crate::IOResult<usize> { |
| 102 | + // This will write buf.len() data because bytes are always well-aligned |
| 103 | + // It is safe because &[u8] has no padding bytes |
| 104 | + unsafe { self.write_slice(buf)? }; |
| 105 | + Ok(buf.len()) |
| 106 | + } |
| 107 | + |
| 108 | + fn flush(&mut self) -> crate::IOResult<()> { |
| 109 | + // No flushing necessary, we don't buffer anything |
| 110 | + Ok(()) |
| 111 | + } |
| 112 | +} |
| 113 | + |
| 114 | + |
| 115 | +/// Slice-of-bytes reader for data written by `AlignedWriter` |
| 116 | +/// |
| 117 | +/// This reader takes as input a bunch of bytes that were written by |
| 118 | +/// `AlignedWriter` and allows fetching back the corresponding binary data under |
| 119 | +/// the assumption that the input bytes are aligned on the max of the alignment |
| 120 | +/// of all the data that was written by `AlignedWriter`. |
| 121 | +pub struct AlignedReader<'bytes> { |
| 122 | + /// Remaining bytes to be read |
| 123 | + bytes: &'bytes mut [u8], |
| 124 | + |
| 125 | + /// Expected alignment of the input data |
| 126 | + #[cfg(debug_assertions)] |
| 127 | + input_alignment: usize, |
| 128 | +} |
| 129 | + |
| 130 | +impl<'bytes> AlignedReader<'bytes> { |
| 131 | + /// Prepare some bytes for alignment-aware readout |
| 132 | + /// |
| 133 | + /// In debug builds, `AlignedReader` will check that the input bytes were |
| 134 | + /// sufficiently well-aligned for the data that is being read from it, as |
| 135 | + /// per the `input_alignment` parameter to this function. |
| 136 | + // |
| 137 | + // FIXME: input_alignment should be #[cfg(debug_assertions)], but at the |
| 138 | + // moment Rust 1.39 is a bit too freshly released to rely on that. |
| 139 | + #[allow(unused)] |
| 140 | + pub fn new(bytes: &'bytes mut [u8], input_alignment: usize) -> Self { |
| 141 | + debug_assert_eq!((bytes.as_ptr() as usize) % input_alignment, 0, |
| 142 | + "Input data is not aligned on a {}-byte boundary as expected", |
| 143 | + input_alignment); |
| 144 | + Self { |
| 145 | + bytes, |
| 146 | + #[cfg(debug_assertions)] input_alignment, |
| 147 | + } |
| 148 | + } |
| 149 | + |
| 150 | + /// Read a slice of data of arbitrary type from the inner bytes, returns a |
| 151 | + /// pointer to the first element of the slice, or None if the request |
| 152 | + /// overflows the input bytes. |
| 153 | + // |
| 154 | + // FIXME: This should return a NonNull<[T]>, but pointers to slices are not |
| 155 | + // ergonomic enough at this point in time. |
| 156 | + pub fn read_slice<T>(&mut self, len: usize) -> Option<NonNull<T>> { |
| 157 | + // As far as I know, zero-length slices may be aligned differently but |
| 158 | + // all nonzero-length slices are aligned identically |
| 159 | + let alignment = if len == 0 { |
| 160 | + mem::align_of::<[T; 0]>() |
| 161 | + } else { |
| 162 | + mem::align_of::<[T; 1]>() |
| 163 | + }; |
| 164 | + |
| 165 | + // In debug builds, check that the input allocation has sufficiently |
| 166 | + // strong alignment for the data that's being read from it. |
| 167 | + // |
| 168 | + // If the input alignment is too low, readout may go wrong because the |
| 169 | + // AlignedReader will skip a number of padding bytes that may not be |
| 170 | + // in sync with the amount that AlignedWriter has inserted, in a manner |
| 171 | + // that depends on how the data being read out was _actually_ aligned. |
| 172 | + debug_assert!( |
| 173 | + if cfg!(debug_assertions) { alignment <= self.input_alignment } else { true }, |
| 174 | + "Insufficient input alignment (input alignment is {}, asked for data of alignment {})", |
| 175 | + self.input_alignment, alignment |
| 176 | + ); |
| 177 | + |
| 178 | + // Drop the alignment padding bytes leading up to the inner T-typed data |
| 179 | + let misalignment = self.bytes.as_ptr() as usize % alignment; |
| 180 | + if misalignment != 0 { |
| 181 | + let offset = alignment - misalignment; |
| 182 | + if offset > self.bytes.len() { return None; } |
| 183 | + // In an ideal world, one could just write: |
| 184 | + // self.bytes = &mut self.bytes[offset..] |
| 185 | + // Alas, in this world, we need... |
| 186 | + self.bytes = unsafe { |
| 187 | + mem::transmute::<&mut [u8], &'bytes mut [u8]>(&mut self.bytes[offset..]) |
| 188 | + }; |
| 189 | + } |
| 190 | + |
| 191 | + // Make sure that we sill have enough bytes for readout |
| 192 | + let size = mem::size_of::<T>() * len; |
| 193 | + if size > self.bytes.len() { return None; } |
| 194 | + |
| 195 | + // Extract the inner T-typed data |
| 196 | + // This is safe because we checked that the input size is large enough |
| 197 | + // and the first pointer of a slice cannot be null |
| 198 | + let (out, rest) = self.bytes.split_at_mut(size); |
| 199 | + let result: NonNull<T> = unsafe { |
| 200 | + NonNull::new_unchecked(out.as_mut_ptr() as *mut T) |
| 201 | + }; |
| 202 | + |
| 203 | + // Update the inner slice. In an ideal world, one could just write |
| 204 | + // self.bytes = rest |
| 205 | + // Alas, in this world, we need... |
| 206 | + self.bytes = unsafe { |
| 207 | + mem::transmute::<&mut [u8], &'bytes mut [u8]>(rest) |
| 208 | + }; |
| 209 | + Some(result) |
| 210 | + } |
| 211 | + |
| 212 | + /// Read arbitrary data from the inner bytes |
| 213 | + pub fn read<T>(&mut self) -> Option<NonNull<T>> { |
| 214 | + self.read_slice(1) |
| 215 | + } |
| 216 | + |
| 217 | + /// Extract the remaining bytes |
| 218 | + pub fn remaining(self) -> &'bytes mut [u8] { |
| 219 | + self.bytes |
| 220 | + } |
| 221 | +} |
| 222 | + |
| 223 | + |
| 224 | +// TODO: Add some tests |
0 commit comments