Get rid of most unsafe in decoder (1% speed loss)
This commit is contained in:
parent
b25b060945
commit
239a22926c
1 changed files with 77 additions and 106 deletions
183
src/decode.rs
183
src/decode.rs
|
@ -7,41 +7,7 @@ use crate::consts::{
|
|||
use crate::error::{Error, Result};
|
||||
use crate::header::Header;
|
||||
use crate::pixel::{Pixel, SupportedChannels};
|
||||
use crate::utils::unlikely;
|
||||
|
||||
struct ReadBuf {
|
||||
current: *const u8,
|
||||
end: *const u8,
|
||||
}
|
||||
|
||||
impl ReadBuf {
|
||||
pub unsafe fn new(ptr: *const u8, len: usize) -> Self {
|
||||
Self { current: ptr, end: ptr.add(len) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn read(&mut self) -> u8 {
|
||||
unsafe {
|
||||
let v = self.current.read();
|
||||
self.current = self.current.add(1);
|
||||
v
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn read_array<const N: usize>(&mut self) -> [u8; N] {
|
||||
unsafe {
|
||||
let v = self.current.cast::<[u8; N]>().read();
|
||||
self.current = self.current.add(N);
|
||||
v
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn within_bounds(&self) -> bool {
|
||||
self.current < self.end
|
||||
}
|
||||
}
|
||||
use crate::utils::{cold, likely, unlikely};
|
||||
|
||||
pub fn qoi_decode_impl<const N: usize>(data: &[u8], n_pixels: usize) -> Result<Vec<u8>>
|
||||
where
|
||||
|
@ -54,84 +20,89 @@ where
|
|||
});
|
||||
}
|
||||
|
||||
let mut pixels = Vec::<Pixel<N>>::with_capacity(n_pixels);
|
||||
unsafe {
|
||||
// Safety: we have just allocated enough memory to set the length without problems
|
||||
// We will also fill the entire array, and the data type is pod, so there's no UB.
|
||||
pixels.set_len(n_pixels);
|
||||
}
|
||||
let mut buf = unsafe {
|
||||
// Safety: we will check within the loop that there are no reads outside the slice
|
||||
// (note that QOI_PADDING_SIZE covers all possible read options within a single op)
|
||||
ReadBuf::new(data.as_ptr().add(QOI_HEADER_SIZE), data.len() - QOI_HEADER_SIZE)
|
||||
};
|
||||
|
||||
let mut index = [Pixel::new(); 64];
|
||||
let mut pixels = vec![Pixel::<N>::new(); n_pixels];
|
||||
let mut index = [Pixel::new(); 256];
|
||||
let mut px = Pixel::new().with_a(0xff);
|
||||
let mut run = 0_u8;
|
||||
|
||||
for px_out in &mut pixels {
|
||||
if run != 0 {
|
||||
run -= 1;
|
||||
*px_out = px;
|
||||
continue;
|
||||
} else if unlikely(!buf.within_bounds()) {
|
||||
return Err(Error::UnexpectedBufferEnd);
|
||||
const QOI_OP_INDEX_END: u8 = QOI_OP_INDEX | 0x3f;
|
||||
const QOI_OP_RUN_END: u8 = QOI_OP_RUN | 0x3d; // <- note, 0x3d (not 0x3f)
|
||||
const QOI_OP_DIFF_END: u8 = QOI_OP_DIFF | 0x3f;
|
||||
const QOI_OP_LUMA_END: u8 = QOI_OP_LUMA | 0x3f;
|
||||
|
||||
{
|
||||
let mut pixels = &mut pixels[..];
|
||||
let mut data = &data[QOI_HEADER_SIZE..];
|
||||
loop {
|
||||
match pixels {
|
||||
[px_out, tail @ ..] => {
|
||||
pixels = tail;
|
||||
match data {
|
||||
[b1 @ QOI_OP_INDEX..=QOI_OP_INDEX_END, dtail @ ..] => {
|
||||
px = index[usize::from(*b1)];
|
||||
*px_out = px;
|
||||
data = dtail;
|
||||
continue;
|
||||
}
|
||||
[QOI_OP_RGB, r, g, b, dtail @ ..] => {
|
||||
px = Pixel::from_rgb(Pixel::from_array([*r, *g, *b]), px.a_or(0xff));
|
||||
data = dtail;
|
||||
}
|
||||
[QOI_OP_RGBA, r, g, b, a, dtail @ ..] => {
|
||||
if N == 4 {
|
||||
cold();
|
||||
}
|
||||
px = Pixel::from_array([*r, *g, *b, *a]);
|
||||
data = dtail;
|
||||
}
|
||||
[b1 @ QOI_OP_RUN..=QOI_OP_RUN_END, dtail @ ..] => {
|
||||
let run = usize::from(b1 & 0x3f).min(pixels.len());
|
||||
*px_out = px;
|
||||
if likely(run != 0) {
|
||||
let (phead, ptail) = pixels.split_at_mut(run); // can't panic
|
||||
phead.fill(px);
|
||||
pixels = ptail;
|
||||
}
|
||||
data = dtail;
|
||||
continue;
|
||||
}
|
||||
[b1 @ QOI_OP_DIFF..=QOI_OP_DIFF_END, dtail @ ..] => {
|
||||
px.rgb_add(
|
||||
((b1 >> 4) & 0x03).wrapping_sub(2),
|
||||
((b1 >> 2) & 0x03).wrapping_sub(2),
|
||||
(b1 & 0x03).wrapping_sub(2),
|
||||
);
|
||||
data = dtail;
|
||||
}
|
||||
[b1 @ QOI_OP_LUMA..=QOI_OP_LUMA_END, b2, dtail @ ..] => {
|
||||
let vg = (b1 & 0x3f).wrapping_sub(32);
|
||||
let vg_8 = vg.wrapping_sub(8);
|
||||
let vr = vg_8.wrapping_add((b2 >> 4) & 0x0f);
|
||||
let vb = vg_8.wrapping_add(b2 & 0x0f);
|
||||
px.rgb_add(vr, vg, vb);
|
||||
data = dtail;
|
||||
}
|
||||
_ => {
|
||||
cold();
|
||||
if unlikely(data.len() < 8) {
|
||||
return Err(Error::UnexpectedBufferEnd);
|
||||
}
|
||||
}
|
||||
}
|
||||
index[usize::from(px.hash_index())] = px;
|
||||
*px_out = px;
|
||||
}
|
||||
_ => {
|
||||
cold();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const QOI_OP_INDEX_END: u8 = QOI_OP_INDEX | 0x3f;
|
||||
const QOI_OP_RUN_END: u8 = QOI_OP_RUN | 0x3d; // <- note, 0x3d (not 0x3f)
|
||||
const QOI_OP_DIFF_END: u8 = QOI_OP_DIFF | 0x3f;
|
||||
const QOI_OP_LUMA_END: u8 = QOI_OP_LUMA | 0x3f;
|
||||
|
||||
match buf.read() {
|
||||
b1 @ QOI_OP_INDEX..=QOI_OP_INDEX_END => {
|
||||
px = unsafe {
|
||||
// Safety: (b1 ^ QOI_INDEX) is guaranteed to be at most 6 bits
|
||||
*index.get_unchecked(usize::from(b1 ^ QOI_OP_INDEX))
|
||||
};
|
||||
*px_out = px;
|
||||
continue;
|
||||
}
|
||||
QOI_OP_RGB => {
|
||||
px = Pixel::from_rgb(Pixel::from_array(buf.read_array::<3>()), px.a_or(0xff));
|
||||
}
|
||||
QOI_OP_RGBA => {
|
||||
px = Pixel::from_array(buf.read_array::<4>());
|
||||
}
|
||||
b1 @ QOI_OP_RUN..=QOI_OP_RUN_END => {
|
||||
run = b1 & 0x3f;
|
||||
*px_out = px;
|
||||
continue;
|
||||
}
|
||||
b1 @ QOI_OP_DIFF..=QOI_OP_DIFF_END => {
|
||||
px.rgb_add(
|
||||
((b1 >> 4) & 0x03).wrapping_sub(2),
|
||||
((b1 >> 2) & 0x03).wrapping_sub(2),
|
||||
(b1 & 0x03).wrapping_sub(2),
|
||||
);
|
||||
}
|
||||
b1 @ QOI_OP_LUMA..=QOI_OP_LUMA_END => {
|
||||
let b2 = buf.read();
|
||||
let vg = (b1 & 0x3f).wrapping_sub(32);
|
||||
let vg_8 = vg.wrapping_sub(8);
|
||||
let vr = vg_8.wrapping_add((b2 >> 4) & 0x0f);
|
||||
let vb = vg_8.wrapping_add(b2 & 0x0f);
|
||||
px.rgb_add(vr, vg, vb);
|
||||
}
|
||||
};
|
||||
|
||||
unsafe {
|
||||
// Safety: hash_index() is computed mod 64, so it will never go out of bounds
|
||||
*index.get_unchecked_mut(usize::from(px.hash_index())) = px;
|
||||
}
|
||||
*px_out = px;
|
||||
}
|
||||
|
||||
let ptr = pixels.as_mut_ptr();
|
||||
mem::forget(pixels);
|
||||
let bytes = unsafe {
|
||||
// Safety: this is safe because we have previously set all the lengths ourselves
|
||||
let ptr = pixels.as_mut_ptr();
|
||||
mem::forget(pixels);
|
||||
Vec::from_raw_parts(ptr.cast(), n_pixels * N, n_pixels * N)
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue