From 7db8e9c3ab0b55f75239595c2a567d197243f699 Mon Sep 17 00:00:00 2001 From: tuxmain Date: Fri, 14 Apr 2023 12:39:11 +0200 Subject: [PATCH] feat: generic collection --- Cargo.toml | 11 +++++- README.md | 19 ++++++++-- benches/comparison.rs | 4 +- src/lib.rs | 87 ++++++++++++++++++++++++++++++++++++------- src/traits.rs | 28 ++++++++++++++ 5 files changed, 128 insertions(+), 21 deletions(-) create mode 100644 src/traits.rs diff --git a/Cargo.toml b/Cargo.toml index 59cf29d..46e9e18 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,20 +1,27 @@ [package] name = "median-accumulator" -version = "0.1.0" +version = "0.2.0" edition = "2021" authors = ["tuxmain "] license = "AGPL-3.0-only" repository = "https://git.txmn.tk/tuxmain/median-accumulator" documentation = "https://docs.rs/median-accumulator/" description = "Simple, fast, space-efficient accumulator for computing median" -categories = ["algorithms"] +categories = ["algorithms", "data-structures", "no-std"] [dependencies] +cc-traits = "1.0.0" +smallvec = { version = "^1.6", optional = true } + +[features] +nostd = ["cc-traits/nostd"] +smallvec = ["dep:smallvec", "cc-traits/smallvec"] [dev-dependencies] criterion = { version = "0.4.0", features = ["html_reports"] } medianheap = "0.3.0" rand = "0.8.5" +smallvec = "^1.6" [[bench]] name = "comparison" diff --git a/README.md b/README.md index ca252c8..44cea41 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,8 @@ Simple, space-efficient algorithm to compute the median of an accumulation of el * **Time-efficient**: push is `O(log(N))` * **Generic**: `T: Clone + Ord` * **Tested** -* **No unsafe**, no deps except `std` +* **No unsafe**, no deps +* **no_std** (optional): supports generic collections Faster than other implementations if there are samples having the same value. If this is not your case, you should use another implementation. @@ -17,7 +18,7 @@ Faster than other implementations if there are samples having the same value. If ```rust use median_accumulator::*; -let mut acc = MedianAcc::new(); +let mut acc = vec::MedianAcc::new(); assert_eq!(acc.get_median(), None); acc.push(7); @@ -30,9 +31,21 @@ assert_eq!(acc.get_median(), Some(MedianResult::One(7))); If you ever encounter an `unreachable` panic, please file an issue or send me an e-mail. +## no_std + +Example with [smallvec](https://crates.io/crates/smallvec): (`smallvec` feature required) + +```rust +use median_accumulator::*; + +let mut acc = MedianAcc::>::new(); +``` + +For other collections than `Vec` or `SmallVec`, you must implement [cc-traits](https://crates.io/crates/cc-traits) and `InsertIndex`. + ## License -CopyLeft 2022 Pascal Engélibert +CopyLeft 2022-2023 Pascal Engélibert [(why copyleft?)](https://txmn.tk/blog/why-copyleft/) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3 of the License. diff --git a/benches/comparison.rs b/benches/comparison.rs index 374b868..867ea07 100644 --- a/benches/comparison.rs +++ b/benches/comparison.rs @@ -11,7 +11,7 @@ fn compare_crates(c: &mut Criterion) { &samples, |b, _i| { b.iter(|| { - let mut median = median_accumulator::MedianAcc::new(); + let mut median = median_accumulator::vec::MedianAcc::new(); samples.iter().for_each(|s| median.push(*s)); median.get_median() }) @@ -35,7 +35,7 @@ fn compare_crates(c: &mut Criterion) { &samples, |b, _i| { b.iter(|| { - let mut median = median_accumulator::MedianAcc::new(); + let mut median = median_accumulator::vec::MedianAcc::new(); samples.iter().for_each(|s| median.push(*s)); median.get_median() }) diff --git a/src/lib.rs b/src/lib.rs index 1582262..80dfef7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,7 @@ //! ```rust //! use median_accumulator::*; //! -//! let mut acc = MedianAcc::new(); +//! let mut acc = vec::MedianAcc::new(); //! //! assert_eq!(acc.get_median(), None); //! acc.push(7); @@ -14,14 +14,29 @@ //! //! In doc comments, _N_ represents the number of samples, _D_ represents the number of different values taken by the samples. -use std::cmp::Ordering; +#![cfg_attr(feature = "nostd", no_std)] + +mod traits; + +pub use traits::*; + +use core::{cmp::Ordering, ops::DerefMut}; /// Accumulator for computing median #[derive(Clone, Debug, Default)] -pub struct MedianAcc { - samples: Vec<(T, u32)>, +pub struct MedianAcc< + T: Clone + Ord, + V: DerefMut + cc_traits::VecMut<(T, u32)> + InsertIndex, +> { + samples: V, median_index: Option, median_subindex: u32, + _t: core::marker::PhantomData, +} + +#[cfg(not(feature = "nostd"))] +pub mod vec { + pub type MedianAcc = crate::MedianAcc>; } /// Computed median @@ -34,17 +49,41 @@ pub enum MedianResult { Two(T, T), } -impl MedianAcc { +impl< + T: Clone + Ord, + V: DerefMut + cc_traits::VecMut<(T, u32)> + InsertIndex, + > MedianAcc +{ /// Create an empty accumulator /// /// _O(1)_ /// - /// Does not allocate until the first push. - pub fn new() -> Self { + /// If using `std::vec::Vec`, does not allocate until the first push. + pub fn new() -> Self + where + V: Default, + { Self { - samples: Vec::new(), + samples: Default::default(), median_index: None, median_subindex: 0, + _t: Default::default(), + } + } + + /// Create an empty accumulator from an existing (empty) collection + /// + /// _O(1)_ + /// + /// Useful when using fixed-length collections or to avoid allocations. + pub fn new_from(collection: V) -> Self { + assert!(collection.is_empty(), "the collection must be empty"); + + Self { + samples: collection, + median_index: None, + median_subindex: 0, + _t: Default::default(), } } @@ -85,7 +124,7 @@ impl MedianAcc { } } Err(sample_index) => { - self.samples.insert(sample_index, (sample, 1)); + self.samples.insert_index(sample_index, (sample, 1)); if *median_index >= sample_index { if self.median_subindex == 0 { self.median_subindex = @@ -105,7 +144,7 @@ impl MedianAcc { } } } else { - self.samples.push((sample, 1)); + self.samples.push_back((sample, 1)); self.median_index = Some(0); } } @@ -148,19 +187,22 @@ impl MedianAcc { } /// Clear the data - pub fn clear(&mut self) { + pub fn clear(&mut self) + where + V: cc_traits::Clear, + { self.samples.clear(); self.median_index = None; self.median_subindex = 0; } - /// Access the underlying vec + /// Access the underlying collection /// /// Just in case you need finer allocation management. /// /// # Safety /// Leaving the vector in an invalid state may cause invalid result or panic (but no UB). - pub unsafe fn get_samples_mut(&mut self) -> &mut Vec<(T, u32)> { + pub unsafe fn get_samples_mut(&mut self) -> &mut V { &mut self.samples } } @@ -198,7 +240,24 @@ mod tests { let len: usize = rng.gen_range(0..100); let mut samples: Vec = (0..len).map(|_| rng.gen_range(-100..100)).collect(); - let mut median = MedianAcc::new(); + let mut median = vec::MedianAcc::new(); + for sample in samples.iter() { + median.push(*sample); + } + + assert_eq!(median.get_median(), naive_median(&mut samples)); + } + } + + #[test] + fn correctness_smallvec() { + let mut rng = rand::thread_rng(); + + for _ in 0..100_000 { + let len: usize = rng.gen_range(0..64); + let mut samples: Vec = (0..len).map(|_| rng.gen_range(-100..100)).collect(); + + let mut median = MedianAcc::>::new(); for sample in samples.iter() { median.push(*sample); } diff --git a/src/traits.rs b/src/traits.rs new file mode 100644 index 0000000..222f482 --- /dev/null +++ b/src/traits.rs @@ -0,0 +1,28 @@ +/// Collection where an item can be inserted at a given index. +pub trait InsertIndex: cc_traits::Collection { + type Output; + + fn insert_index( + &mut self, + index: usize, + element: ::Item, + ) -> Self::Output; +} + +#[cfg(not(feature = "nostd"))] +impl InsertIndex for Vec { + type Output = (); + + fn insert_index(&mut self, index: usize, element: T) { + self.insert(index, element) + } +} + +#[cfg(feature = "smallvec")] +impl> InsertIndex for smallvec::SmallVec { + type Output = (); + + fn insert_index(&mut self, index: usize, element: T) { + self.insert(index, element) + } +}