From b0467472ae3fa6502e309944ffa448ce2c1836a2 Mon Sep 17 00:00:00 2001
From: Ivan Smirnov <i.s.smirnov@gmail.com>
Date: Wed, 29 Dec 2021 22:41:39 +0300
Subject: [PATCH] Big rework: everything now matches the latest spec

---
 src/consts.rs |  24 ++----
 src/decode.rs | 107 +++++++++++---------------
 src/encode.rs | 202 +++++++++++++-------------------------------------
 src/pixel.rs  |  72 ++++++++++--------
 4 files changed, 144 insertions(+), 261 deletions(-)

diff --git a/src/consts.rs b/src/consts.rs
index d7a5f05..b143f90 100644
--- a/src/consts.rs
+++ b/src/consts.rs
@@ -1,23 +1,11 @@
-// 2-bit tag
-pub const QOI_INDEX: u8 = 0x00; // (00)000000
-pub const QOI_DIFF_8: u8 = 0x80; // (10)000000
+pub const QOI_OP_INDEX: u8 = 0x00; // 00xxxxxx
+pub const QOI_OP_DIFF: u8 = 0x40; // 01xxxxxx
+pub const QOI_OP_LUMA: u8 = 0x80; // 10xxxxxx
+pub const QOI_OP_RUN: u8 = 0xc0; // 11xxxxxx
+pub const QOI_OP_RGB: u8 = 0xfe; // 11111110
+pub const QOI_OP_RGBA: u8 = 0xff; // 11111111
 
-// 3-bit tag
-pub const QOI_RUN_8: u8 = 0x40; // (010)00000
-pub const QOI_RUN_16: u8 = 0x60; // (011)00000
-pub const QOI_DIFF_16: u8 = 0xc0; // (110)00000
-
-// 4-bit tag
-pub const QOI_DIFF_24: u8 = 0xe0; // (1110)0000
-pub const QOI_COLOR: u8 = 0xf0; // (1111)0000
-
-// tag masks
-#[allow(unused)]
 pub const QOI_MASK_2: u8 = 0xc0; // (11)000000
-#[allow(unused)]
-pub const QOI_MASK_3: u8 = 0xe0; // (111)00000
-#[allow(unused)]
-pub const QOI_MASK_4: u8 = 0xf0; // (1111)0000
 
 pub const QOI_HEADER_SIZE: usize = 14;
 
diff --git a/src/decode.rs b/src/decode.rs
index f06b561..42ce006 100644
--- a/src/decode.rs
+++ b/src/decode.rs
@@ -1,6 +1,9 @@
 use std::mem;
 
-use crate::consts::{QOI_HEADER_SIZE, QOI_INDEX, QOI_PADDING, QOI_PADDING_SIZE};
+use crate::consts::{
+    QOI_HEADER_SIZE, QOI_OP_DIFF, QOI_OP_INDEX, QOI_OP_LUMA, QOI_OP_RGB, QOI_OP_RGBA, QOI_OP_RUN,
+    QOI_PADDING_SIZE,
+};
 use crate::error::{Error, Result};
 use crate::header::Header;
 use crate::pixel::{Pixel, SupportedChannels};
@@ -25,6 +28,15 @@ impl ReadBuf {
         }
     }
 
+    #[inline]
+    pub fn read_array<const N: usize>(&mut self) -> [u8; N] {
+        unsafe {
+            let v = self.current.cast::<[u8; N]>().read();
+            self.current = self.current.add(N);
+            v
+        }
+    }
+
     #[inline]
     pub fn within_bounds(&self) -> bool {
         self.current < self.end
@@ -45,17 +57,18 @@ where
     let mut pixels = Vec::<Pixel<N>>::with_capacity(n_pixels);
     unsafe {
         // Safety: we have just allocated enough memory to set the length without problems
+        // We will also fill the entire array, and the data type is pod, so there's no UB.
         pixels.set_len(n_pixels);
     }
-    let encoded_data_size = data.len() - QOI_HEADER_SIZE - QOI_PADDING_SIZE;
     let mut buf = unsafe {
         // Safety: we will check within the loop that there are no reads outside the slice
-        ReadBuf::new(data.as_ptr().add(QOI_HEADER_SIZE), encoded_data_size)
+        // (note that QOI_PADDING_SIZE covers all possible read options within a single op)
+        ReadBuf::new(data.as_ptr().add(QOI_HEADER_SIZE), data.len() - QOI_HEADER_SIZE)
     };
 
     let mut index = [Pixel::new(); 64];
     let mut px = Pixel::new().with_a(0xff);
-    let mut run = 0_u16;
+    let mut run = 0_u8;
 
     for px_out in &mut pixels {
         if run != 0 {
@@ -66,82 +79,52 @@ where
             return Err(Error::UnexpectedBufferEnd);
         }
 
-        let b1 = buf.read();
-        match b1 >> 4 {
-            0..=3 => {
-                // QOI_INDEX
+        const QOI_OP_INDEX_END: u8 = QOI_OP_INDEX | 0x3f;
+        const QOI_OP_RUN_END: u8 = QOI_OP_RUN | 0x3d; // <- note, 0x3d (not 0x3f)
+        const QOI_OP_DIFF_END: u8 = QOI_OP_DIFF | 0x3f;
+        const QOI_OP_LUMA_END: u8 = QOI_OP_LUMA | 0x3f;
+
+        match buf.read() {
+            b1 @ QOI_OP_INDEX..=QOI_OP_INDEX_END => {
                 px = unsafe {
                     // Safety: (b1 ^ QOI_INDEX) is guaranteed to be at most 6 bits
-                    *index.get_unchecked(usize::from(b1 ^ QOI_INDEX))
+                    *index.get_unchecked(usize::from(b1 ^ QOI_OP_INDEX))
                 };
-            }
-            15 => {
-                // QOI_COLOR
-                if b1 & 8 != 0 {
-                    px.set_r(buf.read());
-                }
-                if b1 & 4 != 0 {
-                    px.set_g(buf.read());
-                }
-                if b1 & 2 != 0 {
-                    px.set_b(buf.read());
-                }
-                if b1 & 1 != 0 {
-                    px.set_a(buf.read());
-                }
-            }
-            12..=13 => {
-                // QOI_DIFF_16
-                let b2 = buf.read();
-                px.rgb_add(
-                    (b1 & 0x1f).wrapping_sub(16),
-                    (b2 >> 4).wrapping_sub(8),
-                    (b2 & 0x0f).wrapping_sub(8),
-                );
-            }
-            14 => {
-                // QOI_DIFF_24
-                let (b2, b3) = (buf.read(), buf.read());
-                px.rgba_add(
-                    (((b1 & 0x0f) << 1) | (b2 >> 7)).wrapping_sub(16),
-                    ((b2 & 0x7c) >> 2).wrapping_sub(16),
-                    (((b2 & 0x03) << 3) | ((b3 & 0xe0) >> 5)).wrapping_sub(16),
-                    (b3 & 0x1f).wrapping_sub(16),
-                );
-            }
-            4..=5 => {
-                // QOI_RUN_8
-                run = u16::from(b1 & 0x1f);
                 *px_out = px;
                 continue;
             }
-            8..=11 => {
-                // QOI_DIFF_8
+            QOI_OP_RGB => {
+                px = Pixel::from_rgb(Pixel::from_array(buf.read_array::<3>()), px.a_or(0xff));
+            }
+            QOI_OP_RGBA => {
+                px = Pixel::from_array(buf.read_array::<4>());
+            }
+            b1 @ QOI_OP_RUN..=QOI_OP_RUN_END => {
+                run = b1 & 0x3f;
+                *px_out = px;
+                continue;
+            }
+            b1 @ QOI_OP_DIFF..=QOI_OP_DIFF_END => {
                 px.rgb_add(
                     ((b1 >> 4) & 0x03).wrapping_sub(2),
                     ((b1 >> 2) & 0x03).wrapping_sub(2),
                     (b1 & 0x03).wrapping_sub(2),
                 );
             }
-            6..=7 => {
-                // QOI_RUN_16
-                run = 32 + ((u16::from(b1 & 0x1f) << 8) | u16::from(buf.read()));
-                *px_out = px;
-                continue;
+            b1 @ QOI_OP_LUMA..=QOI_OP_LUMA_END => {
+                let b2 = buf.read();
+                let vg = (b1 & 0x3f).wrapping_sub(32);
+                let vg_8 = vg.wrapping_sub(8);
+                let vr = vg_8.wrapping_add((b2 >> 4) & 0x0f);
+                let vb = vg_8.wrapping_add(b2 & 0x0f);
+                px.rgb_add(vr, vg, vb);
             }
-            _ => {
-                unsafe {
-                    // the compiler should figure it out on its own, but just in case
-                    core::hint::unreachable_unchecked()
-                }
-            }
-        }
+        };
 
         unsafe {
             // Safety: hash_index() is computed mod 64, so it will never go out of bounds
             *index.get_unchecked_mut(usize::from(px.hash_index())) = px;
         }
-
         *px_out = px;
     }
 
diff --git a/src/encode.rs b/src/encode.rs
index e383d5f..b175392 100644
--- a/src/encode.rs
+++ b/src/encode.rs
@@ -2,8 +2,8 @@ use std::slice;
 
 use crate::colorspace::ColorSpace;
 use crate::consts::{
-    QOI_COLOR, QOI_DIFF_16, QOI_DIFF_24, QOI_DIFF_8, QOI_HEADER_SIZE, QOI_INDEX, QOI_PADDING,
-    QOI_PADDING_SIZE, QOI_PIXELS_MAX, QOI_RUN_16, QOI_RUN_8,
+    QOI_HEADER_SIZE, QOI_OP_DIFF, QOI_OP_INDEX, QOI_OP_LUMA, QOI_OP_RGB, QOI_OP_RGBA, QOI_OP_RUN,
+    QOI_PADDING, QOI_PADDING_SIZE, QOI_PIXELS_MAX,
 };
 use crate::error::{Error, Result};
 use crate::header::Header;
@@ -23,6 +23,7 @@ impl WriteBuf {
     #[inline]
     pub fn write<const N: usize>(&mut self, v: [u8; N]) {
         unsafe {
+            // TODO: single write via deref?
             let mut i = 0;
             while i < N {
                 self.current.add(i).write(v[i]);
@@ -46,84 +47,7 @@ impl WriteBuf {
     }
 }
 
-#[inline]
-fn encode_diff_canonical<const N: usize>(
-    px: Pixel<N>, px_prev: Pixel<N>, buf: &mut WriteBuf,
-) -> Option<(bool, bool, bool, bool)> {
-    let vr = i16::from(px.r()) - i16::from(px_prev.r());
-    let vg = i16::from(px.g()) - i16::from(px_prev.g());
-    let vb = i16::from(px.b()) - i16::from(px_prev.b());
-    let va = i16::from(px.a_or(0)) - i16::from(px_prev.a_or(0));
-
-    let (vr_16, vg_16, vb_16, va_16) = (vr + 16, vg + 16, vb + 16, va + 16);
-    if vr_16 | vg_16 | vb_16 | va_16 | 31 == 31 {
-        loop {
-            if va == 0 {
-                let (vr_2, vg_2, vb_2) = (vr + 2, vg + 2, vb + 2);
-                if vr_2 | vg_2 | vb_2 | 3 == 3 {
-                    buf.write([QOI_DIFF_8 | (vr_2 << 4 | vg_2 << 2 | vb_2) as u8]);
-                    break;
-                }
-                let (vg_8, vb_8) = (vg + 8, vb + 8);
-                if vg_8 | vb_8 | 15 == 15 {
-                    buf.write([QOI_DIFF_16 | vr_16 as u8, (vg_8 << 4 | vb_8) as u8]);
-                    break;
-                }
-            }
-            buf.write([
-                QOI_DIFF_24 | (vr_16 >> 1) as u8,
-                (vr_16 << 7 | vg_16 << 2 | vb_16 >> 3) as u8,
-                (vb_16 << 5 | va_16) as u8,
-            ]);
-            break;
-        }
-        None
-    } else {
-        Some((vr != 0, vg != 0, vb != 0, va != 0))
-    }
-}
-
-#[inline]
-fn encode_diff_wrapping<const N: usize>(
-    px: Pixel<N>, px_prev: Pixel<N>, buf: &mut WriteBuf,
-) -> Option<(bool, bool, bool, bool)> {
-    let vr = px.r().wrapping_sub(px_prev.r());
-    let vg = px.g().wrapping_sub(px_prev.g());
-    let vb = px.b().wrapping_sub(px_prev.b());
-    let va = px.a_or(0).wrapping_sub(px_prev.a_or(0));
-
-    let (vr_16, vg_16, vb_16, va_16) =
-        (vr.wrapping_add(16), vg.wrapping_add(16), vb.wrapping_add(16), va.wrapping_add(16));
-
-    if vr_16 | vg_16 | vb_16 | va_16 | 31 == 31 {
-        loop {
-            if va == 0 {
-                let (vr_2, vg_2, vb_2) =
-                    (vr.wrapping_add(2), vg.wrapping_add(2), vb.wrapping_add(2));
-                if vr_2 | vg_2 | vb_2 | 3 == 3 {
-                    buf.write([QOI_DIFF_8 | vr_2 << 4 | vg_2 << 2 | vb_2]);
-                    break;
-                }
-                let (vg_8, vb_8) = (vg.wrapping_add(8), vb.wrapping_add(8));
-                if vg_8 | vb_8 | 15 == 15 {
-                    buf.write([QOI_DIFF_16 | vr_16, vg_8 << 4 | vb_8]);
-                    break;
-                }
-            }
-            buf.write([
-                QOI_DIFF_24 | vr_16 >> 1,
-                vr_16 << 7 | vg_16 << 2 | vb_16 >> 3,
-                vb_16 << 5 | va_16,
-            ]);
-            break;
-        }
-        None
-    } else {
-        Some((vr != 0, vg != 0, vb != 0, va != 0))
-    }
-}
-
-fn qoi_encode_impl<const CHANNELS: usize, const CANONICAL: bool>(
+fn qoi_encode_impl<const CHANNELS: usize>(
     out: &mut [u8], data: &[u8], width: u32, height: u32, colorspace: ColorSpace,
 ) -> Result<usize>
 where
@@ -159,61 +83,56 @@ where
 
     let mut index = [Pixel::new(); 64];
     let mut px_prev = Pixel::new().with_a(0xff);
-    let mut run = 0_u16;
-
-    let next_run = |buf: &mut WriteBuf, run: &mut u16| {
-        let mut r = *run;
-        if r < 33 {
-            r -= 1;
-            buf.push(QOI_RUN_8 | (r as u8));
-        } else {
-            r -= 33;
-            buf.write([QOI_RUN_16 | ((r >> 8) as u8), (r & 0xff) as u8]);
-        }
-        *run = 0;
-    };
+    let mut run = 0_u8;
 
     for (i, &px) in pixels.iter().enumerate() {
         if px == px_prev {
             run += 1;
-            if run == 0x2020 || i == n_pixels - 1 {
-                next_run(&mut buf, &mut run);
+            if run == 62 || unlikely(i == n_pixels - 1) {
+                buf.push(QOI_OP_RUN | (run - 1));
+                run = 0;
             }
         } else {
             if run != 0 {
-                next_run(&mut buf, &mut run);
+                buf.push(QOI_OP_RUN | (run - 1));
+                run = 0;
             }
             let index_pos = px.hash_index();
             let index_px = unsafe {
                 // Safety: hash_index() is computed mod 64, so it will never go out of bounds
                 index.get_unchecked_mut(usize::from(index_pos))
             };
-            if *index_px == px {
-                buf.push(QOI_INDEX | index_pos);
+            let px4 = px.as_rgba(0xff);
+            if *index_px == px4 {
+                buf.push(QOI_OP_INDEX | index_pos);
             } else {
-                *index_px = px;
+                *index_px = px4;
 
-                let nonzero = if CANONICAL {
-                    encode_diff_canonical::<CHANNELS>(px, px_prev, &mut buf)
+                if px.a_or(0) == px_prev.a_or(0) {
+                    let vr = px.r().wrapping_sub(px_prev.r());
+                    let vg = px.g().wrapping_sub(px_prev.g());
+                    let vb = px.b().wrapping_sub(px_prev.b());
+
+                    let vg_r = vr.wrapping_sub(vg);
+                    let vg_b = vb.wrapping_sub(vg);
+
+                    // TODO maybe add an outer check for vg_32
+                    let (vr_2, vg_2, vb_2) =
+                        (vr.wrapping_add(2), vg.wrapping_add(2), vb.wrapping_add(2));
+                    if vr_2 | vg_2 | vb_2 | 3 == 3 {
+                        buf.push(QOI_OP_DIFF | vr_2 << 4 | vg_2 << 2 | vb_2);
+                    } else {
+                        let (vg_32, vg_r_8, vg_b_8) =
+                            (vg.wrapping_add(32), vg_r.wrapping_add(8), vg_b.wrapping_add(8));
+                        if vg_r_8 | vg_b_8 | 15 == 15 && vg_32 | 63 == 63 {
+                            buf.write([QOI_OP_LUMA | vg_32, vg_r_8 << 4 | vg_b_8]);
+                        } else {
+                            buf.write([QOI_OP_RGB, px.r(), px.g(), px.b()]);
+                        }
+                    }
                 } else {
-                    encode_diff_wrapping::<CHANNELS>(px, px_prev, &mut buf)
-                };
-
-                if let Some((r, g, b, a)) = nonzero {
-                    let c = ((r as u8) << 3) | ((g as u8) << 2) | ((b as u8) << 1) | (a as u8);
-                    buf.push(QOI_COLOR | c);
-                    if r {
-                        buf.push(px.r());
-                    }
-                    if g {
-                        buf.push(px.g());
-                    }
-                    if b {
-                        buf.push(px.b());
-                    }
-                    if a {
-                        buf.push(px.a_or(0));
-                    }
+                    // TODO: or 2 write ops? (QOI_OP_RGBA and px.into_array())
+                    buf.write([QOI_OP_RGBA, px.r(), px.g(), px.b(), px.a_or(0xff)]);
                 }
             }
             px_prev = px;
@@ -225,26 +144,32 @@ where
 }
 
 #[inline]
-pub fn encode_to_buf_impl<const CANONICAL: bool>(
-    out: &mut [u8], data: &[u8], width: u32, height: u32, channels: u8, colorspace: ColorSpace,
+pub fn qoi_encode_to_buf(
+    mut out: impl AsMut<[u8]>, data: impl AsRef<[u8]>, width: u32, height: u32, channels: u8,
+    colorspace: impl Into<ColorSpace>,
 ) -> Result<usize> {
+    let out = out.as_mut();
+    let data = data.as_ref();
+    let colorspace = colorspace.into();
     match channels {
-        3 => qoi_encode_impl::<3, CANONICAL>(out, data, width, height, colorspace),
-        4 => qoi_encode_impl::<4, CANONICAL>(out, data, width, height, colorspace),
+        3 => qoi_encode_impl::<3>(out, data, width, height, colorspace),
+        4 => qoi_encode_impl::<4>(out, data, width, height, colorspace),
         _ => Err(Error::InvalidChannels { channels }),
     }
 }
 
 #[inline]
-pub fn encode_to_vec_impl<const CANONICAL: bool>(
-    data: &[u8], width: u32, height: u32, channels: u8, colorspace: ColorSpace,
+pub fn qoi_encode_to_vec(
+    data: impl AsRef<[u8]>, width: u32, height: u32, channels: u8,
+    colorspace: impl Into<ColorSpace>,
 ) -> Result<Vec<u8>> {
+    let data = data.as_ref();
+    let colorspace = colorspace.into();
     let mut out = Vec::with_capacity(encode_size_required(width, height, channels));
     unsafe {
         out.set_len(out.capacity());
     }
-    let size =
-        encode_to_buf_impl::<CANONICAL>(&mut out, data, width, height, channels, colorspace)?;
+    let size = qoi_encode_to_buf(&mut out, data, width, height, channels, colorspace)?;
     out.truncate(size);
     Ok(out)
 }
@@ -255,26 +180,3 @@ pub fn encode_size_required(width: u32, height: u32, channels: u8) -> usize {
     let n_pixels = width.saturating_mul(height);
     QOI_HEADER_SIZE + n_pixels.saturating_mul(usize::from(channels)) + n_pixels + QOI_PADDING_SIZE
 }
-
-#[inline]
-pub fn qoi_encode_to_vec(
-    data: impl AsRef<[u8]>, width: u32, height: u32, channels: u8,
-    colorspace: impl Into<ColorSpace>,
-) -> Result<Vec<u8>> {
-    encode_to_vec_impl::<false>(data.as_ref(), width, height, channels, colorspace.into())
-}
-
-#[inline]
-pub fn qoi_encode_to_buf(
-    mut out: impl AsMut<[u8]>, data: impl AsRef<[u8]>, width: u32, height: u32, channels: u8,
-    colorspace: impl Into<ColorSpace>,
-) -> Result<usize> {
-    encode_to_buf_impl::<false>(
-        out.as_mut(),
-        data.as_ref(),
-        width,
-        height,
-        channels,
-        colorspace.into(),
-    )
-}
diff --git a/src/pixel.rs b/src/pixel.rs
index e353e16..2ab066b 100644
--- a/src/pixel.rs
+++ b/src/pixel.rs
@@ -8,6 +8,42 @@ impl<const N: usize> Pixel<N> {
         Self([0; N])
     }
 
+    #[inline]
+    pub const fn as_rgba(self, with_a: u8) -> Pixel<4> {
+        let mut i = 0;
+        let mut out = Pixel::new();
+        while i < N {
+            out.0[i] = self.0[i];
+            i += 1;
+        }
+        if N < 4 {
+            out.0[3] = with_a;
+        }
+        out
+    }
+
+    #[inline]
+    pub const fn from_rgb(px: Pixel<3>, with_a: u8) -> Self {
+        let mut i = 0;
+        let mut out = Self::new();
+        while i < 3 {
+            out.0[i] = px.0[i];
+            i += 1;
+        }
+        out.with_a(with_a)
+    }
+
+    #[inline]
+    pub const fn from_array<const M: usize>(arr: [u8; M]) -> Self {
+        let mut i = 0;
+        let mut out = Self::new();
+        while i < N && i < M {
+            out.0[i] = arr[i];
+            i += 1;
+        }
+        out
+    }
+
     #[inline]
     pub const fn r(self) -> u8 {
         self.0[0]
@@ -42,7 +78,11 @@ impl<const N: usize> Pixel<N> {
 
     #[inline]
     pub const fn hash_index(self) -> u8 {
-        (self.r() * 3 + self.g() * 5 + self.b() * 7 + self.a_or(0xff) * 11) % 64
+        let r = self.r().wrapping_mul(3);
+        let g = self.g().wrapping_mul(5);
+        let b = self.b().wrapping_mul(7);
+        let a = self.a_or(0xff).wrapping_mul(11);
+        r.wrapping_add(g).wrapping_add(b).wrapping_add(a) % 64
     }
 
     #[inline]
@@ -51,36 +91,6 @@ impl<const N: usize> Pixel<N> {
         self.0[1] = self.0[1].wrapping_add(g);
         self.0[2] = self.0[2].wrapping_add(b);
     }
-
-    #[inline]
-    pub fn rgba_add(&mut self, r: u8, g: u8, b: u8, a: u8) {
-        self.rgb_add(r, g, b);
-        if N >= 4 {
-            self.0[3] = self.0[3].wrapping_add(a);
-        }
-    }
-
-    #[inline]
-    pub fn set_r(&mut self, value: u8) {
-        self.0[0] = value;
-    }
-
-    #[inline]
-    pub fn set_g(&mut self, value: u8) {
-        self.0[1] = value;
-    }
-
-    #[inline]
-    pub fn set_b(&mut self, value: u8) {
-        self.0[2] = value;
-    }
-
-    #[inline]
-    pub fn set_a(&mut self, value: u8) {
-        if N >= 4 {
-            self.0[3] = value;
-        }
-    }
 }
 
 pub trait SupportedChannels {}