feat: generic collection

This commit is contained in:
Pascal Engélibert 2023-04-14 12:39:11 +02:00
parent 1594c39337
commit 7db8e9c3ab
Signed by: tuxmain
GPG key ID: 3504BC6D362F7DCA
5 changed files with 128 additions and 21 deletions

View file

@ -1,20 +1,27 @@
[package] [package]
name = "median-accumulator" name = "median-accumulator"
version = "0.1.0" version = "0.2.0"
edition = "2021" edition = "2021"
authors = ["tuxmain <tuxmain@zettascript.org>"] authors = ["tuxmain <tuxmain@zettascript.org>"]
license = "AGPL-3.0-only" license = "AGPL-3.0-only"
repository = "https://git.txmn.tk/tuxmain/median-accumulator" repository = "https://git.txmn.tk/tuxmain/median-accumulator"
documentation = "https://docs.rs/median-accumulator/" documentation = "https://docs.rs/median-accumulator/"
description = "Simple, fast, space-efficient accumulator for computing median" description = "Simple, fast, space-efficient accumulator for computing median"
categories = ["algorithms"] categories = ["algorithms", "data-structures", "no-std"]
[dependencies] [dependencies]
cc-traits = "1.0.0"
smallvec = { version = "^1.6", optional = true }
[features]
nostd = ["cc-traits/nostd"]
smallvec = ["dep:smallvec", "cc-traits/smallvec"]
[dev-dependencies] [dev-dependencies]
criterion = { version = "0.4.0", features = ["html_reports"] } criterion = { version = "0.4.0", features = ["html_reports"] }
medianheap = "0.3.0" medianheap = "0.3.0"
rand = "0.8.5" rand = "0.8.5"
smallvec = "^1.6"
[[bench]] [[bench]]
name = "comparison" name = "comparison"

View file

@ -8,7 +8,8 @@ Simple, space-efficient algorithm to compute the median of an accumulation of el
* **Time-efficient**: push is `O(log(N))` * **Time-efficient**: push is `O(log(N))`
* **Generic**: `T: Clone + Ord` * **Generic**: `T: Clone + Ord`
* **Tested** * **Tested**
* **No unsafe**, no deps except `std` * **No unsafe**, no deps
* **no_std** (optional): supports generic collections
Faster than other implementations if there are samples having the same value. If this is not your case, you should use another implementation. Faster than other implementations if there are samples having the same value. If this is not your case, you should use another implementation.
@ -17,7 +18,7 @@ Faster than other implementations if there are samples having the same value. If
```rust ```rust
use median_accumulator::*; use median_accumulator::*;
let mut acc = MedianAcc::new(); let mut acc = vec::MedianAcc::new();
assert_eq!(acc.get_median(), None); assert_eq!(acc.get_median(), None);
acc.push(7); acc.push(7);
@ -30,9 +31,21 @@ assert_eq!(acc.get_median(), Some(MedianResult::One(7)));
If you ever encounter an `unreachable` panic, please file an issue or send me an e-mail. If you ever encounter an `unreachable` panic, please file an issue or send me an e-mail.
## no_std
Example with [smallvec](https://crates.io/crates/smallvec): (`smallvec` feature required)
```rust
use median_accumulator::*;
let mut acc = MedianAcc::<i32, smallvec::SmallVec<[(i32, u32); 64]>>::new();
```
For other collections than `Vec` or `SmallVec`, you must implement [cc-traits](https://crates.io/crates/cc-traits) and `InsertIndex`.
## License ## License
CopyLeft 2022 Pascal Engélibert CopyLeft 2022-2023 Pascal Engélibert [(why copyleft?)](https://txmn.tk/blog/why-copyleft/)
This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3 of the License. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3 of the License.

View file

@ -11,7 +11,7 @@ fn compare_crates(c: &mut Criterion) {
&samples, &samples,
|b, _i| { |b, _i| {
b.iter(|| { b.iter(|| {
let mut median = median_accumulator::MedianAcc::new(); let mut median = median_accumulator::vec::MedianAcc::new();
samples.iter().for_each(|s| median.push(*s)); samples.iter().for_each(|s| median.push(*s));
median.get_median() median.get_median()
}) })
@ -35,7 +35,7 @@ fn compare_crates(c: &mut Criterion) {
&samples, &samples,
|b, _i| { |b, _i| {
b.iter(|| { b.iter(|| {
let mut median = median_accumulator::MedianAcc::new(); let mut median = median_accumulator::vec::MedianAcc::new();
samples.iter().for_each(|s| median.push(*s)); samples.iter().for_each(|s| median.push(*s));
median.get_median() median.get_median()
}) })

View file

@ -1,7 +1,7 @@
//! ```rust //! ```rust
//! use median_accumulator::*; //! use median_accumulator::*;
//! //!
//! let mut acc = MedianAcc::new(); //! let mut acc = vec::MedianAcc::new();
//! //!
//! assert_eq!(acc.get_median(), None); //! assert_eq!(acc.get_median(), None);
//! acc.push(7); //! acc.push(7);
@ -14,14 +14,29 @@
//! //!
//! In doc comments, _N_ represents the number of samples, _D_ represents the number of different values taken by the samples. //! In doc comments, _N_ represents the number of samples, _D_ represents the number of different values taken by the samples.
use std::cmp::Ordering; #![cfg_attr(feature = "nostd", no_std)]
mod traits;
pub use traits::*;
use core::{cmp::Ordering, ops::DerefMut};
/// Accumulator for computing median /// Accumulator for computing median
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default)]
pub struct MedianAcc<T: Clone + Ord> { pub struct MedianAcc<
samples: Vec<(T, u32)>, T: Clone + Ord,
V: DerefMut<Target = [(T, u32)]> + cc_traits::VecMut<(T, u32)> + InsertIndex,
> {
samples: V,
median_index: Option<usize>, median_index: Option<usize>,
median_subindex: u32, median_subindex: u32,
_t: core::marker::PhantomData<T>,
}
#[cfg(not(feature = "nostd"))]
pub mod vec {
pub type MedianAcc<T> = crate::MedianAcc<T, Vec<(T, u32)>>;
} }
/// Computed median /// Computed median
@ -34,17 +49,41 @@ pub enum MedianResult<T: Clone + Ord> {
Two(T, T), Two(T, T),
} }
impl<T: Clone + Ord> MedianAcc<T> { impl<
T: Clone + Ord,
V: DerefMut<Target = [(T, u32)]> + cc_traits::VecMut<(T, u32)> + InsertIndex,
> MedianAcc<T, V>
{
/// Create an empty accumulator /// Create an empty accumulator
/// ///
/// _O(1)_ /// _O(1)_
/// ///
/// Does not allocate until the first push. /// If using `std::vec::Vec`, does not allocate until the first push.
pub fn new() -> Self { pub fn new() -> Self
where
V: Default,
{
Self { Self {
samples: Vec::new(), samples: Default::default(),
median_index: None, median_index: None,
median_subindex: 0, median_subindex: 0,
_t: Default::default(),
}
}
/// Create an empty accumulator from an existing (empty) collection
///
/// _O(1)_
///
/// Useful when using fixed-length collections or to avoid allocations.
pub fn new_from(collection: V) -> Self {
assert!(collection.is_empty(), "the collection must be empty");
Self {
samples: collection,
median_index: None,
median_subindex: 0,
_t: Default::default(),
} }
} }
@ -85,7 +124,7 @@ impl<T: Clone + Ord> MedianAcc<T> {
} }
} }
Err(sample_index) => { Err(sample_index) => {
self.samples.insert(sample_index, (sample, 1)); self.samples.insert_index(sample_index, (sample, 1));
if *median_index >= sample_index { if *median_index >= sample_index {
if self.median_subindex == 0 { if self.median_subindex == 0 {
self.median_subindex = self.median_subindex =
@ -105,7 +144,7 @@ impl<T: Clone + Ord> MedianAcc<T> {
} }
} }
} else { } else {
self.samples.push((sample, 1)); self.samples.push_back((sample, 1));
self.median_index = Some(0); self.median_index = Some(0);
} }
} }
@ -148,19 +187,22 @@ impl<T: Clone + Ord> MedianAcc<T> {
} }
/// Clear the data /// Clear the data
pub fn clear(&mut self) { pub fn clear(&mut self)
where
V: cc_traits::Clear,
{
self.samples.clear(); self.samples.clear();
self.median_index = None; self.median_index = None;
self.median_subindex = 0; self.median_subindex = 0;
} }
/// Access the underlying vec /// Access the underlying collection
/// ///
/// Just in case you need finer allocation management. /// Just in case you need finer allocation management.
/// ///
/// # Safety /// # Safety
/// Leaving the vector in an invalid state may cause invalid result or panic (but no UB). /// Leaving the vector in an invalid state may cause invalid result or panic (but no UB).
pub unsafe fn get_samples_mut(&mut self) -> &mut Vec<(T, u32)> { pub unsafe fn get_samples_mut(&mut self) -> &mut V {
&mut self.samples &mut self.samples
} }
} }
@ -198,7 +240,24 @@ mod tests {
let len: usize = rng.gen_range(0..100); let len: usize = rng.gen_range(0..100);
let mut samples: Vec<i32> = (0..len).map(|_| rng.gen_range(-100..100)).collect(); let mut samples: Vec<i32> = (0..len).map(|_| rng.gen_range(-100..100)).collect();
let mut median = MedianAcc::new(); let mut median = vec::MedianAcc::new();
for sample in samples.iter() {
median.push(*sample);
}
assert_eq!(median.get_median(), naive_median(&mut samples));
}
}
#[test]
fn correctness_smallvec() {
let mut rng = rand::thread_rng();
for _ in 0..100_000 {
let len: usize = rng.gen_range(0..64);
let mut samples: Vec<i32> = (0..len).map(|_| rng.gen_range(-100..100)).collect();
let mut median = MedianAcc::<i32, smallvec::SmallVec<[(i32, u32); 64]>>::new();
for sample in samples.iter() { for sample in samples.iter() {
median.push(*sample); median.push(*sample);
} }

28
src/traits.rs Normal file
View file

@ -0,0 +1,28 @@
/// Collection where an item can be inserted at a given index.
pub trait InsertIndex: cc_traits::Collection {
type Output;
fn insert_index(
&mut self,
index: usize,
element: <Self as cc_traits::Collection>::Item,
) -> Self::Output;
}
#[cfg(not(feature = "nostd"))]
impl<T> InsertIndex for Vec<T> {
type Output = ();
fn insert_index(&mut self, index: usize, element: T) {
self.insert(index, element)
}
}
#[cfg(feature = "smallvec")]
impl<T, A: smallvec::Array<Item = T>> InsertIndex for smallvec::SmallVec<A> {
type Output = ();
fn insert_index(&mut self, index: usize, element: T) {
self.insert(index, element)
}
}