Get rid of most unsafe in decoder (1% speed loss)

This commit is contained in:
Ivan Smirnov 2021-12-30 13:22:22 +03:00
parent b25b060945
commit 239a22926c

View file

@ -7,41 +7,7 @@ use crate::consts::{
use crate::error::{Error, Result}; use crate::error::{Error, Result};
use crate::header::Header; use crate::header::Header;
use crate::pixel::{Pixel, SupportedChannels}; use crate::pixel::{Pixel, SupportedChannels};
use crate::utils::unlikely; use crate::utils::{cold, likely, unlikely};
struct ReadBuf {
current: *const u8,
end: *const u8,
}
impl ReadBuf {
pub unsafe fn new(ptr: *const u8, len: usize) -> Self {
Self { current: ptr, end: ptr.add(len) }
}
#[inline]
pub fn read(&mut self) -> u8 {
unsafe {
let v = self.current.read();
self.current = self.current.add(1);
v
}
}
#[inline]
pub fn read_array<const N: usize>(&mut self) -> [u8; N] {
unsafe {
let v = self.current.cast::<[u8; N]>().read();
self.current = self.current.add(N);
v
}
}
#[inline]
pub fn within_bounds(&self) -> bool {
self.current < self.end
}
}
pub fn qoi_decode_impl<const N: usize>(data: &[u8], n_pixels: usize) -> Result<Vec<u8>> pub fn qoi_decode_impl<const N: usize>(data: &[u8], n_pixels: usize) -> Result<Vec<u8>>
where where
@ -54,84 +20,89 @@ where
}); });
} }
let mut pixels = Vec::<Pixel<N>>::with_capacity(n_pixels); let mut pixels = vec![Pixel::<N>::new(); n_pixels];
unsafe { let mut index = [Pixel::new(); 256];
// Safety: we have just allocated enough memory to set the length without problems
// We will also fill the entire array, and the data type is pod, so there's no UB.
pixels.set_len(n_pixels);
}
let mut buf = unsafe {
// Safety: we will check within the loop that there are no reads outside the slice
// (note that QOI_PADDING_SIZE covers all possible read options within a single op)
ReadBuf::new(data.as_ptr().add(QOI_HEADER_SIZE), data.len() - QOI_HEADER_SIZE)
};
let mut index = [Pixel::new(); 64];
let mut px = Pixel::new().with_a(0xff); let mut px = Pixel::new().with_a(0xff);
let mut run = 0_u8;
for px_out in &mut pixels {
if run != 0 {
run -= 1;
*px_out = px;
continue;
} else if unlikely(!buf.within_bounds()) {
return Err(Error::UnexpectedBufferEnd);
}
const QOI_OP_INDEX_END: u8 = QOI_OP_INDEX | 0x3f; const QOI_OP_INDEX_END: u8 = QOI_OP_INDEX | 0x3f;
const QOI_OP_RUN_END: u8 = QOI_OP_RUN | 0x3d; // <- note, 0x3d (not 0x3f) const QOI_OP_RUN_END: u8 = QOI_OP_RUN | 0x3d; // <- note, 0x3d (not 0x3f)
const QOI_OP_DIFF_END: u8 = QOI_OP_DIFF | 0x3f; const QOI_OP_DIFF_END: u8 = QOI_OP_DIFF | 0x3f;
const QOI_OP_LUMA_END: u8 = QOI_OP_LUMA | 0x3f; const QOI_OP_LUMA_END: u8 = QOI_OP_LUMA | 0x3f;
match buf.read() { {
b1 @ QOI_OP_INDEX..=QOI_OP_INDEX_END => { let mut pixels = &mut pixels[..];
px = unsafe { let mut data = &data[QOI_HEADER_SIZE..];
// Safety: (b1 ^ QOI_INDEX) is guaranteed to be at most 6 bits loop {
*index.get_unchecked(usize::from(b1 ^ QOI_OP_INDEX)) match pixels {
}; [px_out, tail @ ..] => {
pixels = tail;
match data {
[b1 @ QOI_OP_INDEX..=QOI_OP_INDEX_END, dtail @ ..] => {
px = index[usize::from(*b1)];
*px_out = px; *px_out = px;
data = dtail;
continue; continue;
} }
QOI_OP_RGB => { [QOI_OP_RGB, r, g, b, dtail @ ..] => {
px = Pixel::from_rgb(Pixel::from_array(buf.read_array::<3>()), px.a_or(0xff)); px = Pixel::from_rgb(Pixel::from_array([*r, *g, *b]), px.a_or(0xff));
data = dtail;
} }
QOI_OP_RGBA => { [QOI_OP_RGBA, r, g, b, a, dtail @ ..] => {
px = Pixel::from_array(buf.read_array::<4>()); if N == 4 {
cold();
} }
b1 @ QOI_OP_RUN..=QOI_OP_RUN_END => { px = Pixel::from_array([*r, *g, *b, *a]);
run = b1 & 0x3f; data = dtail;
}
[b1 @ QOI_OP_RUN..=QOI_OP_RUN_END, dtail @ ..] => {
let run = usize::from(b1 & 0x3f).min(pixels.len());
*px_out = px; *px_out = px;
if likely(run != 0) {
let (phead, ptail) = pixels.split_at_mut(run); // can't panic
phead.fill(px);
pixels = ptail;
}
data = dtail;
continue; continue;
} }
b1 @ QOI_OP_DIFF..=QOI_OP_DIFF_END => { [b1 @ QOI_OP_DIFF..=QOI_OP_DIFF_END, dtail @ ..] => {
px.rgb_add( px.rgb_add(
((b1 >> 4) & 0x03).wrapping_sub(2), ((b1 >> 4) & 0x03).wrapping_sub(2),
((b1 >> 2) & 0x03).wrapping_sub(2), ((b1 >> 2) & 0x03).wrapping_sub(2),
(b1 & 0x03).wrapping_sub(2), (b1 & 0x03).wrapping_sub(2),
); );
data = dtail;
} }
b1 @ QOI_OP_LUMA..=QOI_OP_LUMA_END => { [b1 @ QOI_OP_LUMA..=QOI_OP_LUMA_END, b2, dtail @ ..] => {
let b2 = buf.read();
let vg = (b1 & 0x3f).wrapping_sub(32); let vg = (b1 & 0x3f).wrapping_sub(32);
let vg_8 = vg.wrapping_sub(8); let vg_8 = vg.wrapping_sub(8);
let vr = vg_8.wrapping_add((b2 >> 4) & 0x0f); let vr = vg_8.wrapping_add((b2 >> 4) & 0x0f);
let vb = vg_8.wrapping_add(b2 & 0x0f); let vb = vg_8.wrapping_add(b2 & 0x0f);
px.rgb_add(vr, vg, vb); px.rgb_add(vr, vg, vb);
data = dtail;
} }
}; _ => {
cold();
unsafe { if unlikely(data.len() < 8) {
// Safety: hash_index() is computed mod 64, so it will never go out of bounds return Err(Error::UnexpectedBufferEnd);
*index.get_unchecked_mut(usize::from(px.hash_index())) = px;
} }
}
}
index[usize::from(px.hash_index())] = px;
*px_out = px; *px_out = px;
} }
_ => {
cold();
break;
}
}
}
}
let bytes = unsafe {
// Safety: this is safe because we have previously set all the lengths ourselves
let ptr = pixels.as_mut_ptr(); let ptr = pixels.as_mut_ptr();
mem::forget(pixels); mem::forget(pixels);
let bytes = unsafe {
// Safety: this is safe because we have previously set all the lengths ourselves
Vec::from_raw_parts(ptr.cast(), n_pixels * N, n_pixels * N) Vec::from_raw_parts(ptr.cast(), n_pixels * N, n_pixels * N)
}; };