feat: generic collection
This commit is contained in:
parent
1594c39337
commit
7db8e9c3ab
5 changed files with 128 additions and 21 deletions
11
Cargo.toml
11
Cargo.toml
|
@ -1,20 +1,27 @@
|
||||||
[package]
|
[package]
|
||||||
name = "median-accumulator"
|
name = "median-accumulator"
|
||||||
version = "0.1.0"
|
version = "0.2.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
authors = ["tuxmain <tuxmain@zettascript.org>"]
|
authors = ["tuxmain <tuxmain@zettascript.org>"]
|
||||||
license = "AGPL-3.0-only"
|
license = "AGPL-3.0-only"
|
||||||
repository = "https://git.txmn.tk/tuxmain/median-accumulator"
|
repository = "https://git.txmn.tk/tuxmain/median-accumulator"
|
||||||
documentation = "https://docs.rs/median-accumulator/"
|
documentation = "https://docs.rs/median-accumulator/"
|
||||||
description = "Simple, fast, space-efficient accumulator for computing median"
|
description = "Simple, fast, space-efficient accumulator for computing median"
|
||||||
categories = ["algorithms"]
|
categories = ["algorithms", "data-structures", "no-std"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
cc-traits = "1.0.0"
|
||||||
|
smallvec = { version = "^1.6", optional = true }
|
||||||
|
|
||||||
|
[features]
|
||||||
|
nostd = ["cc-traits/nostd"]
|
||||||
|
smallvec = ["dep:smallvec", "cc-traits/smallvec"]
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
criterion = { version = "0.4.0", features = ["html_reports"] }
|
criterion = { version = "0.4.0", features = ["html_reports"] }
|
||||||
medianheap = "0.3.0"
|
medianheap = "0.3.0"
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
|
smallvec = "^1.6"
|
||||||
|
|
||||||
[[bench]]
|
[[bench]]
|
||||||
name = "comparison"
|
name = "comparison"
|
||||||
|
|
19
README.md
19
README.md
|
@ -8,7 +8,8 @@ Simple, space-efficient algorithm to compute the median of an accumulation of el
|
||||||
* **Time-efficient**: push is `O(log(N))`
|
* **Time-efficient**: push is `O(log(N))`
|
||||||
* **Generic**: `T: Clone + Ord`
|
* **Generic**: `T: Clone + Ord`
|
||||||
* **Tested**
|
* **Tested**
|
||||||
* **No unsafe**, no deps except `std`
|
* **No unsafe**, no deps
|
||||||
|
* **no_std** (optional): supports generic collections
|
||||||
|
|
||||||
Faster than other implementations if there are samples having the same value. If this is not your case, you should use another implementation.
|
Faster than other implementations if there are samples having the same value. If this is not your case, you should use another implementation.
|
||||||
|
|
||||||
|
@ -17,7 +18,7 @@ Faster than other implementations if there are samples having the same value. If
|
||||||
```rust
|
```rust
|
||||||
use median_accumulator::*;
|
use median_accumulator::*;
|
||||||
|
|
||||||
let mut acc = MedianAcc::new();
|
let mut acc = vec::MedianAcc::new();
|
||||||
|
|
||||||
assert_eq!(acc.get_median(), None);
|
assert_eq!(acc.get_median(), None);
|
||||||
acc.push(7);
|
acc.push(7);
|
||||||
|
@ -30,9 +31,21 @@ assert_eq!(acc.get_median(), Some(MedianResult::One(7)));
|
||||||
|
|
||||||
If you ever encounter an `unreachable` panic, please file an issue or send me an e-mail.
|
If you ever encounter an `unreachable` panic, please file an issue or send me an e-mail.
|
||||||
|
|
||||||
|
## no_std
|
||||||
|
|
||||||
|
Example with [smallvec](https://crates.io/crates/smallvec): (`smallvec` feature required)
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use median_accumulator::*;
|
||||||
|
|
||||||
|
let mut acc = MedianAcc::<i32, smallvec::SmallVec<[(i32, u32); 64]>>::new();
|
||||||
|
```
|
||||||
|
|
||||||
|
For other collections than `Vec` or `SmallVec`, you must implement [cc-traits](https://crates.io/crates/cc-traits) and `InsertIndex`.
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
CopyLeft 2022 Pascal Engélibert
|
CopyLeft 2022-2023 Pascal Engélibert [(why copyleft?)](https://txmn.tk/blog/why-copyleft/)
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3 of the License.
|
This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3 of the License.
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@ fn compare_crates(c: &mut Criterion) {
|
||||||
&samples,
|
&samples,
|
||||||
|b, _i| {
|
|b, _i| {
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
let mut median = median_accumulator::MedianAcc::new();
|
let mut median = median_accumulator::vec::MedianAcc::new();
|
||||||
samples.iter().for_each(|s| median.push(*s));
|
samples.iter().for_each(|s| median.push(*s));
|
||||||
median.get_median()
|
median.get_median()
|
||||||
})
|
})
|
||||||
|
@ -35,7 +35,7 @@ fn compare_crates(c: &mut Criterion) {
|
||||||
&samples,
|
&samples,
|
||||||
|b, _i| {
|
|b, _i| {
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
let mut median = median_accumulator::MedianAcc::new();
|
let mut median = median_accumulator::vec::MedianAcc::new();
|
||||||
samples.iter().for_each(|s| median.push(*s));
|
samples.iter().for_each(|s| median.push(*s));
|
||||||
median.get_median()
|
median.get_median()
|
||||||
})
|
})
|
||||||
|
|
87
src/lib.rs
87
src/lib.rs
|
@ -1,7 +1,7 @@
|
||||||
//! ```rust
|
//! ```rust
|
||||||
//! use median_accumulator::*;
|
//! use median_accumulator::*;
|
||||||
//!
|
//!
|
||||||
//! let mut acc = MedianAcc::new();
|
//! let mut acc = vec::MedianAcc::new();
|
||||||
//!
|
//!
|
||||||
//! assert_eq!(acc.get_median(), None);
|
//! assert_eq!(acc.get_median(), None);
|
||||||
//! acc.push(7);
|
//! acc.push(7);
|
||||||
|
@ -14,14 +14,29 @@
|
||||||
//!
|
//!
|
||||||
//! In doc comments, _N_ represents the number of samples, _D_ represents the number of different values taken by the samples.
|
//! In doc comments, _N_ represents the number of samples, _D_ represents the number of different values taken by the samples.
|
||||||
|
|
||||||
use std::cmp::Ordering;
|
#![cfg_attr(feature = "nostd", no_std)]
|
||||||
|
|
||||||
|
mod traits;
|
||||||
|
|
||||||
|
pub use traits::*;
|
||||||
|
|
||||||
|
use core::{cmp::Ordering, ops::DerefMut};
|
||||||
|
|
||||||
/// Accumulator for computing median
|
/// Accumulator for computing median
|
||||||
#[derive(Clone, Debug, Default)]
|
#[derive(Clone, Debug, Default)]
|
||||||
pub struct MedianAcc<T: Clone + Ord> {
|
pub struct MedianAcc<
|
||||||
samples: Vec<(T, u32)>,
|
T: Clone + Ord,
|
||||||
|
V: DerefMut<Target = [(T, u32)]> + cc_traits::VecMut<(T, u32)> + InsertIndex,
|
||||||
|
> {
|
||||||
|
samples: V,
|
||||||
median_index: Option<usize>,
|
median_index: Option<usize>,
|
||||||
median_subindex: u32,
|
median_subindex: u32,
|
||||||
|
_t: core::marker::PhantomData<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(feature = "nostd"))]
|
||||||
|
pub mod vec {
|
||||||
|
pub type MedianAcc<T> = crate::MedianAcc<T, Vec<(T, u32)>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Computed median
|
/// Computed median
|
||||||
|
@ -34,17 +49,41 @@ pub enum MedianResult<T: Clone + Ord> {
|
||||||
Two(T, T),
|
Two(T, T),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: Clone + Ord> MedianAcc<T> {
|
impl<
|
||||||
|
T: Clone + Ord,
|
||||||
|
V: DerefMut<Target = [(T, u32)]> + cc_traits::VecMut<(T, u32)> + InsertIndex,
|
||||||
|
> MedianAcc<T, V>
|
||||||
|
{
|
||||||
/// Create an empty accumulator
|
/// Create an empty accumulator
|
||||||
///
|
///
|
||||||
/// _O(1)_
|
/// _O(1)_
|
||||||
///
|
///
|
||||||
/// Does not allocate until the first push.
|
/// If using `std::vec::Vec`, does not allocate until the first push.
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self
|
||||||
|
where
|
||||||
|
V: Default,
|
||||||
|
{
|
||||||
Self {
|
Self {
|
||||||
samples: Vec::new(),
|
samples: Default::default(),
|
||||||
median_index: None,
|
median_index: None,
|
||||||
median_subindex: 0,
|
median_subindex: 0,
|
||||||
|
_t: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create an empty accumulator from an existing (empty) collection
|
||||||
|
///
|
||||||
|
/// _O(1)_
|
||||||
|
///
|
||||||
|
/// Useful when using fixed-length collections or to avoid allocations.
|
||||||
|
pub fn new_from(collection: V) -> Self {
|
||||||
|
assert!(collection.is_empty(), "the collection must be empty");
|
||||||
|
|
||||||
|
Self {
|
||||||
|
samples: collection,
|
||||||
|
median_index: None,
|
||||||
|
median_subindex: 0,
|
||||||
|
_t: Default::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -85,7 +124,7 @@ impl<T: Clone + Ord> MedianAcc<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(sample_index) => {
|
Err(sample_index) => {
|
||||||
self.samples.insert(sample_index, (sample, 1));
|
self.samples.insert_index(sample_index, (sample, 1));
|
||||||
if *median_index >= sample_index {
|
if *median_index >= sample_index {
|
||||||
if self.median_subindex == 0 {
|
if self.median_subindex == 0 {
|
||||||
self.median_subindex =
|
self.median_subindex =
|
||||||
|
@ -105,7 +144,7 @@ impl<T: Clone + Ord> MedianAcc<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
self.samples.push((sample, 1));
|
self.samples.push_back((sample, 1));
|
||||||
self.median_index = Some(0);
|
self.median_index = Some(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -148,19 +187,22 @@ impl<T: Clone + Ord> MedianAcc<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Clear the data
|
/// Clear the data
|
||||||
pub fn clear(&mut self) {
|
pub fn clear(&mut self)
|
||||||
|
where
|
||||||
|
V: cc_traits::Clear,
|
||||||
|
{
|
||||||
self.samples.clear();
|
self.samples.clear();
|
||||||
self.median_index = None;
|
self.median_index = None;
|
||||||
self.median_subindex = 0;
|
self.median_subindex = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Access the underlying vec
|
/// Access the underlying collection
|
||||||
///
|
///
|
||||||
/// Just in case you need finer allocation management.
|
/// Just in case you need finer allocation management.
|
||||||
///
|
///
|
||||||
/// # Safety
|
/// # Safety
|
||||||
/// Leaving the vector in an invalid state may cause invalid result or panic (but no UB).
|
/// Leaving the vector in an invalid state may cause invalid result or panic (but no UB).
|
||||||
pub unsafe fn get_samples_mut(&mut self) -> &mut Vec<(T, u32)> {
|
pub unsafe fn get_samples_mut(&mut self) -> &mut V {
|
||||||
&mut self.samples
|
&mut self.samples
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -198,7 +240,24 @@ mod tests {
|
||||||
let len: usize = rng.gen_range(0..100);
|
let len: usize = rng.gen_range(0..100);
|
||||||
let mut samples: Vec<i32> = (0..len).map(|_| rng.gen_range(-100..100)).collect();
|
let mut samples: Vec<i32> = (0..len).map(|_| rng.gen_range(-100..100)).collect();
|
||||||
|
|
||||||
let mut median = MedianAcc::new();
|
let mut median = vec::MedianAcc::new();
|
||||||
|
for sample in samples.iter() {
|
||||||
|
median.push(*sample);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(median.get_median(), naive_median(&mut samples));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn correctness_smallvec() {
|
||||||
|
let mut rng = rand::thread_rng();
|
||||||
|
|
||||||
|
for _ in 0..100_000 {
|
||||||
|
let len: usize = rng.gen_range(0..64);
|
||||||
|
let mut samples: Vec<i32> = (0..len).map(|_| rng.gen_range(-100..100)).collect();
|
||||||
|
|
||||||
|
let mut median = MedianAcc::<i32, smallvec::SmallVec<[(i32, u32); 64]>>::new();
|
||||||
for sample in samples.iter() {
|
for sample in samples.iter() {
|
||||||
median.push(*sample);
|
median.push(*sample);
|
||||||
}
|
}
|
||||||
|
|
28
src/traits.rs
Normal file
28
src/traits.rs
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
/// Collection where an item can be inserted at a given index.
|
||||||
|
pub trait InsertIndex: cc_traits::Collection {
|
||||||
|
type Output;
|
||||||
|
|
||||||
|
fn insert_index(
|
||||||
|
&mut self,
|
||||||
|
index: usize,
|
||||||
|
element: <Self as cc_traits::Collection>::Item,
|
||||||
|
) -> Self::Output;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(feature = "nostd"))]
|
||||||
|
impl<T> InsertIndex for Vec<T> {
|
||||||
|
type Output = ();
|
||||||
|
|
||||||
|
fn insert_index(&mut self, index: usize, element: T) {
|
||||||
|
self.insert(index, element)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "smallvec")]
|
||||||
|
impl<T, A: smallvec::Array<Item = T>> InsertIndex for smallvec::SmallVec<A> {
|
||||||
|
type Output = ();
|
||||||
|
|
||||||
|
fn insert_index(&mut self, index: usize, element: T) {
|
||||||
|
self.insert(index, element)
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue