Get rid of most unsafe in decoder (1% speed loss)
This commit is contained in:
parent
b25b060945
commit
239a22926c
1 changed files with 77 additions and 106 deletions
131
src/decode.rs
131
src/decode.rs
|
@ -7,41 +7,7 @@ use crate::consts::{
|
||||||
use crate::error::{Error, Result};
|
use crate::error::{Error, Result};
|
||||||
use crate::header::Header;
|
use crate::header::Header;
|
||||||
use crate::pixel::{Pixel, SupportedChannels};
|
use crate::pixel::{Pixel, SupportedChannels};
|
||||||
use crate::utils::unlikely;
|
use crate::utils::{cold, likely, unlikely};
|
||||||
|
|
||||||
struct ReadBuf {
|
|
||||||
current: *const u8,
|
|
||||||
end: *const u8,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ReadBuf {
|
|
||||||
pub unsafe fn new(ptr: *const u8, len: usize) -> Self {
|
|
||||||
Self { current: ptr, end: ptr.add(len) }
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn read(&mut self) -> u8 {
|
|
||||||
unsafe {
|
|
||||||
let v = self.current.read();
|
|
||||||
self.current = self.current.add(1);
|
|
||||||
v
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn read_array<const N: usize>(&mut self) -> [u8; N] {
|
|
||||||
unsafe {
|
|
||||||
let v = self.current.cast::<[u8; N]>().read();
|
|
||||||
self.current = self.current.add(N);
|
|
||||||
v
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn within_bounds(&self) -> bool {
|
|
||||||
self.current < self.end
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn qoi_decode_impl<const N: usize>(data: &[u8], n_pixels: usize) -> Result<Vec<u8>>
|
pub fn qoi_decode_impl<const N: usize>(data: &[u8], n_pixels: usize) -> Result<Vec<u8>>
|
||||||
where
|
where
|
||||||
|
@ -54,84 +20,89 @@ where
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut pixels = Vec::<Pixel<N>>::with_capacity(n_pixels);
|
let mut pixels = vec![Pixel::<N>::new(); n_pixels];
|
||||||
unsafe {
|
let mut index = [Pixel::new(); 256];
|
||||||
// Safety: we have just allocated enough memory to set the length without problems
|
|
||||||
// We will also fill the entire array, and the data type is pod, so there's no UB.
|
|
||||||
pixels.set_len(n_pixels);
|
|
||||||
}
|
|
||||||
let mut buf = unsafe {
|
|
||||||
// Safety: we will check within the loop that there are no reads outside the slice
|
|
||||||
// (note that QOI_PADDING_SIZE covers all possible read options within a single op)
|
|
||||||
ReadBuf::new(data.as_ptr().add(QOI_HEADER_SIZE), data.len() - QOI_HEADER_SIZE)
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut index = [Pixel::new(); 64];
|
|
||||||
let mut px = Pixel::new().with_a(0xff);
|
let mut px = Pixel::new().with_a(0xff);
|
||||||
let mut run = 0_u8;
|
|
||||||
|
|
||||||
for px_out in &mut pixels {
|
|
||||||
if run != 0 {
|
|
||||||
run -= 1;
|
|
||||||
*px_out = px;
|
|
||||||
continue;
|
|
||||||
} else if unlikely(!buf.within_bounds()) {
|
|
||||||
return Err(Error::UnexpectedBufferEnd);
|
|
||||||
}
|
|
||||||
|
|
||||||
const QOI_OP_INDEX_END: u8 = QOI_OP_INDEX | 0x3f;
|
const QOI_OP_INDEX_END: u8 = QOI_OP_INDEX | 0x3f;
|
||||||
const QOI_OP_RUN_END: u8 = QOI_OP_RUN | 0x3d; // <- note, 0x3d (not 0x3f)
|
const QOI_OP_RUN_END: u8 = QOI_OP_RUN | 0x3d; // <- note, 0x3d (not 0x3f)
|
||||||
const QOI_OP_DIFF_END: u8 = QOI_OP_DIFF | 0x3f;
|
const QOI_OP_DIFF_END: u8 = QOI_OP_DIFF | 0x3f;
|
||||||
const QOI_OP_LUMA_END: u8 = QOI_OP_LUMA | 0x3f;
|
const QOI_OP_LUMA_END: u8 = QOI_OP_LUMA | 0x3f;
|
||||||
|
|
||||||
match buf.read() {
|
{
|
||||||
b1 @ QOI_OP_INDEX..=QOI_OP_INDEX_END => {
|
let mut pixels = &mut pixels[..];
|
||||||
px = unsafe {
|
let mut data = &data[QOI_HEADER_SIZE..];
|
||||||
// Safety: (b1 ^ QOI_INDEX) is guaranteed to be at most 6 bits
|
loop {
|
||||||
*index.get_unchecked(usize::from(b1 ^ QOI_OP_INDEX))
|
match pixels {
|
||||||
};
|
[px_out, tail @ ..] => {
|
||||||
|
pixels = tail;
|
||||||
|
match data {
|
||||||
|
[b1 @ QOI_OP_INDEX..=QOI_OP_INDEX_END, dtail @ ..] => {
|
||||||
|
px = index[usize::from(*b1)];
|
||||||
*px_out = px;
|
*px_out = px;
|
||||||
|
data = dtail;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
QOI_OP_RGB => {
|
[QOI_OP_RGB, r, g, b, dtail @ ..] => {
|
||||||
px = Pixel::from_rgb(Pixel::from_array(buf.read_array::<3>()), px.a_or(0xff));
|
px = Pixel::from_rgb(Pixel::from_array([*r, *g, *b]), px.a_or(0xff));
|
||||||
|
data = dtail;
|
||||||
}
|
}
|
||||||
QOI_OP_RGBA => {
|
[QOI_OP_RGBA, r, g, b, a, dtail @ ..] => {
|
||||||
px = Pixel::from_array(buf.read_array::<4>());
|
if N == 4 {
|
||||||
|
cold();
|
||||||
}
|
}
|
||||||
b1 @ QOI_OP_RUN..=QOI_OP_RUN_END => {
|
px = Pixel::from_array([*r, *g, *b, *a]);
|
||||||
run = b1 & 0x3f;
|
data = dtail;
|
||||||
|
}
|
||||||
|
[b1 @ QOI_OP_RUN..=QOI_OP_RUN_END, dtail @ ..] => {
|
||||||
|
let run = usize::from(b1 & 0x3f).min(pixels.len());
|
||||||
*px_out = px;
|
*px_out = px;
|
||||||
|
if likely(run != 0) {
|
||||||
|
let (phead, ptail) = pixels.split_at_mut(run); // can't panic
|
||||||
|
phead.fill(px);
|
||||||
|
pixels = ptail;
|
||||||
|
}
|
||||||
|
data = dtail;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
b1 @ QOI_OP_DIFF..=QOI_OP_DIFF_END => {
|
[b1 @ QOI_OP_DIFF..=QOI_OP_DIFF_END, dtail @ ..] => {
|
||||||
px.rgb_add(
|
px.rgb_add(
|
||||||
((b1 >> 4) & 0x03).wrapping_sub(2),
|
((b1 >> 4) & 0x03).wrapping_sub(2),
|
||||||
((b1 >> 2) & 0x03).wrapping_sub(2),
|
((b1 >> 2) & 0x03).wrapping_sub(2),
|
||||||
(b1 & 0x03).wrapping_sub(2),
|
(b1 & 0x03).wrapping_sub(2),
|
||||||
);
|
);
|
||||||
|
data = dtail;
|
||||||
}
|
}
|
||||||
b1 @ QOI_OP_LUMA..=QOI_OP_LUMA_END => {
|
[b1 @ QOI_OP_LUMA..=QOI_OP_LUMA_END, b2, dtail @ ..] => {
|
||||||
let b2 = buf.read();
|
|
||||||
let vg = (b1 & 0x3f).wrapping_sub(32);
|
let vg = (b1 & 0x3f).wrapping_sub(32);
|
||||||
let vg_8 = vg.wrapping_sub(8);
|
let vg_8 = vg.wrapping_sub(8);
|
||||||
let vr = vg_8.wrapping_add((b2 >> 4) & 0x0f);
|
let vr = vg_8.wrapping_add((b2 >> 4) & 0x0f);
|
||||||
let vb = vg_8.wrapping_add(b2 & 0x0f);
|
let vb = vg_8.wrapping_add(b2 & 0x0f);
|
||||||
px.rgb_add(vr, vg, vb);
|
px.rgb_add(vr, vg, vb);
|
||||||
|
data = dtail;
|
||||||
}
|
}
|
||||||
};
|
_ => {
|
||||||
|
cold();
|
||||||
unsafe {
|
if unlikely(data.len() < 8) {
|
||||||
// Safety: hash_index() is computed mod 64, so it will never go out of bounds
|
return Err(Error::UnexpectedBufferEnd);
|
||||||
*index.get_unchecked_mut(usize::from(px.hash_index())) = px;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
index[usize::from(px.hash_index())] = px;
|
||||||
*px_out = px;
|
*px_out = px;
|
||||||
}
|
}
|
||||||
|
_ => {
|
||||||
|
cold();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let bytes = unsafe {
|
|
||||||
// Safety: this is safe because we have previously set all the lengths ourselves
|
|
||||||
let ptr = pixels.as_mut_ptr();
|
let ptr = pixels.as_mut_ptr();
|
||||||
mem::forget(pixels);
|
mem::forget(pixels);
|
||||||
|
let bytes = unsafe {
|
||||||
|
// Safety: this is safe because we have previously set all the lengths ourselves
|
||||||
Vec::from_raw_parts(ptr.cast(), n_pixels * N, n_pixels * N)
|
Vec::from_raw_parts(ptr.cast(), n_pixels * N, n_pixels * N)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue