feat: generic collection

This commit is contained in:
Pascal Engélibert 2023-04-14 12:39:11 +02:00
parent 1594c39337
commit 7db8e9c3ab
Signed by: tuxmain
GPG key ID: 3504BC6D362F7DCA
5 changed files with 128 additions and 21 deletions

View file

@ -1,20 +1,27 @@
[package]
name = "median-accumulator"
version = "0.1.0"
version = "0.2.0"
edition = "2021"
authors = ["tuxmain <tuxmain@zettascript.org>"]
license = "AGPL-3.0-only"
repository = "https://git.txmn.tk/tuxmain/median-accumulator"
documentation = "https://docs.rs/median-accumulator/"
description = "Simple, fast, space-efficient accumulator for computing median"
categories = ["algorithms"]
categories = ["algorithms", "data-structures", "no-std"]
[dependencies]
cc-traits = "1.0.0"
smallvec = { version = "^1.6", optional = true }
[features]
nostd = ["cc-traits/nostd"]
smallvec = ["dep:smallvec", "cc-traits/smallvec"]
[dev-dependencies]
criterion = { version = "0.4.0", features = ["html_reports"] }
medianheap = "0.3.0"
rand = "0.8.5"
smallvec = "^1.6"
[[bench]]
name = "comparison"

View file

@ -8,7 +8,8 @@ Simple, space-efficient algorithm to compute the median of an accumulation of el
* **Time-efficient**: push is `O(log(N))`
* **Generic**: `T: Clone + Ord`
* **Tested**
* **No unsafe**, no deps except `std`
* **No unsafe**, no deps
* **no_std** (optional): supports generic collections
Faster than other implementations if there are samples having the same value. If this is not your case, you should use another implementation.
@ -17,7 +18,7 @@ Faster than other implementations if there are samples having the same value. If
```rust
use median_accumulator::*;
let mut acc = MedianAcc::new();
let mut acc = vec::MedianAcc::new();
assert_eq!(acc.get_median(), None);
acc.push(7);
@ -30,9 +31,21 @@ assert_eq!(acc.get_median(), Some(MedianResult::One(7)));
If you ever encounter an `unreachable` panic, please file an issue or send me an e-mail.
## no_std
Example with [smallvec](https://crates.io/crates/smallvec): (`smallvec` feature required)
```rust
use median_accumulator::*;
let mut acc = MedianAcc::<i32, smallvec::SmallVec<[(i32, u32); 64]>>::new();
```
For other collections than `Vec` or `SmallVec`, you must implement [cc-traits](https://crates.io/crates/cc-traits) and `InsertIndex`.
## License
CopyLeft 2022 Pascal Engélibert
CopyLeft 2022-2023 Pascal Engélibert [(why copyleft?)](https://txmn.tk/blog/why-copyleft/)
This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3 of the License.

View file

@ -11,7 +11,7 @@ fn compare_crates(c: &mut Criterion) {
&samples,
|b, _i| {
b.iter(|| {
let mut median = median_accumulator::MedianAcc::new();
let mut median = median_accumulator::vec::MedianAcc::new();
samples.iter().for_each(|s| median.push(*s));
median.get_median()
})
@ -35,7 +35,7 @@ fn compare_crates(c: &mut Criterion) {
&samples,
|b, _i| {
b.iter(|| {
let mut median = median_accumulator::MedianAcc::new();
let mut median = median_accumulator::vec::MedianAcc::new();
samples.iter().for_each(|s| median.push(*s));
median.get_median()
})

View file

@ -1,7 +1,7 @@
//! ```rust
//! use median_accumulator::*;
//!
//! let mut acc = MedianAcc::new();
//! let mut acc = vec::MedianAcc::new();
//!
//! assert_eq!(acc.get_median(), None);
//! acc.push(7);
@ -14,14 +14,29 @@
//!
//! In doc comments, _N_ represents the number of samples, _D_ represents the number of different values taken by the samples.
use std::cmp::Ordering;
#![cfg_attr(feature = "nostd", no_std)]
mod traits;
pub use traits::*;
use core::{cmp::Ordering, ops::DerefMut};
/// Accumulator for computing median
#[derive(Clone, Debug, Default)]
pub struct MedianAcc<T: Clone + Ord> {
samples: Vec<(T, u32)>,
pub struct MedianAcc<
T: Clone + Ord,
V: DerefMut<Target = [(T, u32)]> + cc_traits::VecMut<(T, u32)> + InsertIndex,
> {
samples: V,
median_index: Option<usize>,
median_subindex: u32,
_t: core::marker::PhantomData<T>,
}
#[cfg(not(feature = "nostd"))]
pub mod vec {
pub type MedianAcc<T> = crate::MedianAcc<T, Vec<(T, u32)>>;
}
/// Computed median
@ -34,17 +49,41 @@ pub enum MedianResult<T: Clone + Ord> {
Two(T, T),
}
impl<T: Clone + Ord> MedianAcc<T> {
impl<
T: Clone + Ord,
V: DerefMut<Target = [(T, u32)]> + cc_traits::VecMut<(T, u32)> + InsertIndex,
> MedianAcc<T, V>
{
/// Create an empty accumulator
///
/// _O(1)_
///
/// Does not allocate until the first push.
pub fn new() -> Self {
/// If using `std::vec::Vec`, does not allocate until the first push.
pub fn new() -> Self
where
V: Default,
{
Self {
samples: Vec::new(),
samples: Default::default(),
median_index: None,
median_subindex: 0,
_t: Default::default(),
}
}
/// Create an empty accumulator from an existing (empty) collection
///
/// _O(1)_
///
/// Useful when using fixed-length collections or to avoid allocations.
pub fn new_from(collection: V) -> Self {
assert!(collection.is_empty(), "the collection must be empty");
Self {
samples: collection,
median_index: None,
median_subindex: 0,
_t: Default::default(),
}
}
@ -85,7 +124,7 @@ impl<T: Clone + Ord> MedianAcc<T> {
}
}
Err(sample_index) => {
self.samples.insert(sample_index, (sample, 1));
self.samples.insert_index(sample_index, (sample, 1));
if *median_index >= sample_index {
if self.median_subindex == 0 {
self.median_subindex =
@ -105,7 +144,7 @@ impl<T: Clone + Ord> MedianAcc<T> {
}
}
} else {
self.samples.push((sample, 1));
self.samples.push_back((sample, 1));
self.median_index = Some(0);
}
}
@ -148,19 +187,22 @@ impl<T: Clone + Ord> MedianAcc<T> {
}
/// Clear the data
pub fn clear(&mut self) {
pub fn clear(&mut self)
where
V: cc_traits::Clear,
{
self.samples.clear();
self.median_index = None;
self.median_subindex = 0;
}
/// Access the underlying vec
/// Access the underlying collection
///
/// Just in case you need finer allocation management.
///
/// # Safety
/// Leaving the vector in an invalid state may cause invalid result or panic (but no UB).
pub unsafe fn get_samples_mut(&mut self) -> &mut Vec<(T, u32)> {
pub unsafe fn get_samples_mut(&mut self) -> &mut V {
&mut self.samples
}
}
@ -198,7 +240,24 @@ mod tests {
let len: usize = rng.gen_range(0..100);
let mut samples: Vec<i32> = (0..len).map(|_| rng.gen_range(-100..100)).collect();
let mut median = MedianAcc::new();
let mut median = vec::MedianAcc::new();
for sample in samples.iter() {
median.push(*sample);
}
assert_eq!(median.get_median(), naive_median(&mut samples));
}
}
#[test]
fn correctness_smallvec() {
let mut rng = rand::thread_rng();
for _ in 0..100_000 {
let len: usize = rng.gen_range(0..64);
let mut samples: Vec<i32> = (0..len).map(|_| rng.gen_range(-100..100)).collect();
let mut median = MedianAcc::<i32, smallvec::SmallVec<[(i32, u32); 64]>>::new();
for sample in samples.iter() {
median.push(*sample);
}

28
src/traits.rs Normal file
View file

@ -0,0 +1,28 @@
/// Collection where an item can be inserted at a given index.
pub trait InsertIndex: cc_traits::Collection {
type Output;
fn insert_index(
&mut self,
index: usize,
element: <Self as cc_traits::Collection>::Item,
) -> Self::Output;
}
#[cfg(not(feature = "nostd"))]
impl<T> InsertIndex for Vec<T> {
type Output = ();
fn insert_index(&mut self, index: usize, element: T) {
self.insert(index, element)
}
}
#[cfg(feature = "smallvec")]
impl<T, A: smallvec::Array<Item = T>> InsertIndex for smallvec::SmallVec<A> {
type Output = ();
fn insert_index(&mut self, index: usize, element: T) {
self.insert(index, element)
}
}