From 239a22926c5d7303c1563b3489c894d4fdeb5172 Mon Sep 17 00:00:00 2001 From: Ivan Smirnov Date: Thu, 30 Dec 2021 13:22:22 +0300 Subject: [PATCH] Get rid of most unsafe in decoder (1% speed loss) --- src/decode.rs | 183 +++++++++++++++++++++----------------------------- 1 file changed, 77 insertions(+), 106 deletions(-) diff --git a/src/decode.rs b/src/decode.rs index 42ce006..d4f4fa3 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -7,41 +7,7 @@ use crate::consts::{ use crate::error::{Error, Result}; use crate::header::Header; use crate::pixel::{Pixel, SupportedChannels}; -use crate::utils::unlikely; - -struct ReadBuf { - current: *const u8, - end: *const u8, -} - -impl ReadBuf { - pub unsafe fn new(ptr: *const u8, len: usize) -> Self { - Self { current: ptr, end: ptr.add(len) } - } - - #[inline] - pub fn read(&mut self) -> u8 { - unsafe { - let v = self.current.read(); - self.current = self.current.add(1); - v - } - } - - #[inline] - pub fn read_array(&mut self) -> [u8; N] { - unsafe { - let v = self.current.cast::<[u8; N]>().read(); - self.current = self.current.add(N); - v - } - } - - #[inline] - pub fn within_bounds(&self) -> bool { - self.current < self.end - } -} +use crate::utils::{cold, likely, unlikely}; pub fn qoi_decode_impl(data: &[u8], n_pixels: usize) -> Result> where @@ -54,84 +20,89 @@ where }); } - let mut pixels = Vec::>::with_capacity(n_pixels); - unsafe { - // Safety: we have just allocated enough memory to set the length without problems - // We will also fill the entire array, and the data type is pod, so there's no UB. - pixels.set_len(n_pixels); - } - let mut buf = unsafe { - // Safety: we will check within the loop that there are no reads outside the slice - // (note that QOI_PADDING_SIZE covers all possible read options within a single op) - ReadBuf::new(data.as_ptr().add(QOI_HEADER_SIZE), data.len() - QOI_HEADER_SIZE) - }; - - let mut index = [Pixel::new(); 64]; + let mut pixels = vec![Pixel::::new(); n_pixels]; + let mut index = [Pixel::new(); 256]; let mut px = Pixel::new().with_a(0xff); - let mut run = 0_u8; - for px_out in &mut pixels { - if run != 0 { - run -= 1; - *px_out = px; - continue; - } else if unlikely(!buf.within_bounds()) { - return Err(Error::UnexpectedBufferEnd); + const QOI_OP_INDEX_END: u8 = QOI_OP_INDEX | 0x3f; + const QOI_OP_RUN_END: u8 = QOI_OP_RUN | 0x3d; // <- note, 0x3d (not 0x3f) + const QOI_OP_DIFF_END: u8 = QOI_OP_DIFF | 0x3f; + const QOI_OP_LUMA_END: u8 = QOI_OP_LUMA | 0x3f; + + { + let mut pixels = &mut pixels[..]; + let mut data = &data[QOI_HEADER_SIZE..]; + loop { + match pixels { + [px_out, tail @ ..] => { + pixels = tail; + match data { + [b1 @ QOI_OP_INDEX..=QOI_OP_INDEX_END, dtail @ ..] => { + px = index[usize::from(*b1)]; + *px_out = px; + data = dtail; + continue; + } + [QOI_OP_RGB, r, g, b, dtail @ ..] => { + px = Pixel::from_rgb(Pixel::from_array([*r, *g, *b]), px.a_or(0xff)); + data = dtail; + } + [QOI_OP_RGBA, r, g, b, a, dtail @ ..] => { + if N == 4 { + cold(); + } + px = Pixel::from_array([*r, *g, *b, *a]); + data = dtail; + } + [b1 @ QOI_OP_RUN..=QOI_OP_RUN_END, dtail @ ..] => { + let run = usize::from(b1 & 0x3f).min(pixels.len()); + *px_out = px; + if likely(run != 0) { + let (phead, ptail) = pixels.split_at_mut(run); // can't panic + phead.fill(px); + pixels = ptail; + } + data = dtail; + continue; + } + [b1 @ QOI_OP_DIFF..=QOI_OP_DIFF_END, dtail @ ..] => { + px.rgb_add( + ((b1 >> 4) & 0x03).wrapping_sub(2), + ((b1 >> 2) & 0x03).wrapping_sub(2), + (b1 & 0x03).wrapping_sub(2), + ); + data = dtail; + } + [b1 @ QOI_OP_LUMA..=QOI_OP_LUMA_END, b2, dtail @ ..] => { + let vg = (b1 & 0x3f).wrapping_sub(32); + let vg_8 = vg.wrapping_sub(8); + let vr = vg_8.wrapping_add((b2 >> 4) & 0x0f); + let vb = vg_8.wrapping_add(b2 & 0x0f); + px.rgb_add(vr, vg, vb); + data = dtail; + } + _ => { + cold(); + if unlikely(data.len() < 8) { + return Err(Error::UnexpectedBufferEnd); + } + } + } + index[usize::from(px.hash_index())] = px; + *px_out = px; + } + _ => { + cold(); + break; + } + } } - - const QOI_OP_INDEX_END: u8 = QOI_OP_INDEX | 0x3f; - const QOI_OP_RUN_END: u8 = QOI_OP_RUN | 0x3d; // <- note, 0x3d (not 0x3f) - const QOI_OP_DIFF_END: u8 = QOI_OP_DIFF | 0x3f; - const QOI_OP_LUMA_END: u8 = QOI_OP_LUMA | 0x3f; - - match buf.read() { - b1 @ QOI_OP_INDEX..=QOI_OP_INDEX_END => { - px = unsafe { - // Safety: (b1 ^ QOI_INDEX) is guaranteed to be at most 6 bits - *index.get_unchecked(usize::from(b1 ^ QOI_OP_INDEX)) - }; - *px_out = px; - continue; - } - QOI_OP_RGB => { - px = Pixel::from_rgb(Pixel::from_array(buf.read_array::<3>()), px.a_or(0xff)); - } - QOI_OP_RGBA => { - px = Pixel::from_array(buf.read_array::<4>()); - } - b1 @ QOI_OP_RUN..=QOI_OP_RUN_END => { - run = b1 & 0x3f; - *px_out = px; - continue; - } - b1 @ QOI_OP_DIFF..=QOI_OP_DIFF_END => { - px.rgb_add( - ((b1 >> 4) & 0x03).wrapping_sub(2), - ((b1 >> 2) & 0x03).wrapping_sub(2), - (b1 & 0x03).wrapping_sub(2), - ); - } - b1 @ QOI_OP_LUMA..=QOI_OP_LUMA_END => { - let b2 = buf.read(); - let vg = (b1 & 0x3f).wrapping_sub(32); - let vg_8 = vg.wrapping_sub(8); - let vr = vg_8.wrapping_add((b2 >> 4) & 0x0f); - let vb = vg_8.wrapping_add(b2 & 0x0f); - px.rgb_add(vr, vg, vb); - } - }; - - unsafe { - // Safety: hash_index() is computed mod 64, so it will never go out of bounds - *index.get_unchecked_mut(usize::from(px.hash_index())) = px; - } - *px_out = px; } + let ptr = pixels.as_mut_ptr(); + mem::forget(pixels); let bytes = unsafe { // Safety: this is safe because we have previously set all the lengths ourselves - let ptr = pixels.as_mut_ptr(); - mem::forget(pixels); Vec::from_raw_parts(ptr.cast(), n_pixels * N, n_pixels * N) };