feat: generic collection
This commit is contained in:
parent
1594c39337
commit
7db8e9c3ab
5 changed files with 128 additions and 21 deletions
11
Cargo.toml
11
Cargo.toml
|
@ -1,20 +1,27 @@
|
|||
[package]
|
||||
name = "median-accumulator"
|
||||
version = "0.1.0"
|
||||
version = "0.2.0"
|
||||
edition = "2021"
|
||||
authors = ["tuxmain <tuxmain@zettascript.org>"]
|
||||
license = "AGPL-3.0-only"
|
||||
repository = "https://git.txmn.tk/tuxmain/median-accumulator"
|
||||
documentation = "https://docs.rs/median-accumulator/"
|
||||
description = "Simple, fast, space-efficient accumulator for computing median"
|
||||
categories = ["algorithms"]
|
||||
categories = ["algorithms", "data-structures", "no-std"]
|
||||
|
||||
[dependencies]
|
||||
cc-traits = "1.0.0"
|
||||
smallvec = { version = "^1.6", optional = true }
|
||||
|
||||
[features]
|
||||
nostd = ["cc-traits/nostd"]
|
||||
smallvec = ["dep:smallvec", "cc-traits/smallvec"]
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.4.0", features = ["html_reports"] }
|
||||
medianheap = "0.3.0"
|
||||
rand = "0.8.5"
|
||||
smallvec = "^1.6"
|
||||
|
||||
[[bench]]
|
||||
name = "comparison"
|
||||
|
|
19
README.md
19
README.md
|
@ -8,7 +8,8 @@ Simple, space-efficient algorithm to compute the median of an accumulation of el
|
|||
* **Time-efficient**: push is `O(log(N))`
|
||||
* **Generic**: `T: Clone + Ord`
|
||||
* **Tested**
|
||||
* **No unsafe**, no deps except `std`
|
||||
* **No unsafe**, no deps
|
||||
* **no_std** (optional): supports generic collections
|
||||
|
||||
Faster than other implementations if there are samples having the same value. If this is not your case, you should use another implementation.
|
||||
|
||||
|
@ -17,7 +18,7 @@ Faster than other implementations if there are samples having the same value. If
|
|||
```rust
|
||||
use median_accumulator::*;
|
||||
|
||||
let mut acc = MedianAcc::new();
|
||||
let mut acc = vec::MedianAcc::new();
|
||||
|
||||
assert_eq!(acc.get_median(), None);
|
||||
acc.push(7);
|
||||
|
@ -30,9 +31,21 @@ assert_eq!(acc.get_median(), Some(MedianResult::One(7)));
|
|||
|
||||
If you ever encounter an `unreachable` panic, please file an issue or send me an e-mail.
|
||||
|
||||
## no_std
|
||||
|
||||
Example with [smallvec](https://crates.io/crates/smallvec): (`smallvec` feature required)
|
||||
|
||||
```rust
|
||||
use median_accumulator::*;
|
||||
|
||||
let mut acc = MedianAcc::<i32, smallvec::SmallVec<[(i32, u32); 64]>>::new();
|
||||
```
|
||||
|
||||
For other collections than `Vec` or `SmallVec`, you must implement [cc-traits](https://crates.io/crates/cc-traits) and `InsertIndex`.
|
||||
|
||||
## License
|
||||
|
||||
CopyLeft 2022 Pascal Engélibert
|
||||
CopyLeft 2022-2023 Pascal Engélibert [(why copyleft?)](https://txmn.tk/blog/why-copyleft/)
|
||||
|
||||
This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3 of the License.
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ fn compare_crates(c: &mut Criterion) {
|
|||
&samples,
|
||||
|b, _i| {
|
||||
b.iter(|| {
|
||||
let mut median = median_accumulator::MedianAcc::new();
|
||||
let mut median = median_accumulator::vec::MedianAcc::new();
|
||||
samples.iter().for_each(|s| median.push(*s));
|
||||
median.get_median()
|
||||
})
|
||||
|
@ -35,7 +35,7 @@ fn compare_crates(c: &mut Criterion) {
|
|||
&samples,
|
||||
|b, _i| {
|
||||
b.iter(|| {
|
||||
let mut median = median_accumulator::MedianAcc::new();
|
||||
let mut median = median_accumulator::vec::MedianAcc::new();
|
||||
samples.iter().for_each(|s| median.push(*s));
|
||||
median.get_median()
|
||||
})
|
||||
|
|
87
src/lib.rs
87
src/lib.rs
|
@ -1,7 +1,7 @@
|
|||
//! ```rust
|
||||
//! use median_accumulator::*;
|
||||
//!
|
||||
//! let mut acc = MedianAcc::new();
|
||||
//! let mut acc = vec::MedianAcc::new();
|
||||
//!
|
||||
//! assert_eq!(acc.get_median(), None);
|
||||
//! acc.push(7);
|
||||
|
@ -14,14 +14,29 @@
|
|||
//!
|
||||
//! In doc comments, _N_ represents the number of samples, _D_ represents the number of different values taken by the samples.
|
||||
|
||||
use std::cmp::Ordering;
|
||||
#![cfg_attr(feature = "nostd", no_std)]
|
||||
|
||||
mod traits;
|
||||
|
||||
pub use traits::*;
|
||||
|
||||
use core::{cmp::Ordering, ops::DerefMut};
|
||||
|
||||
/// Accumulator for computing median
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct MedianAcc<T: Clone + Ord> {
|
||||
samples: Vec<(T, u32)>,
|
||||
pub struct MedianAcc<
|
||||
T: Clone + Ord,
|
||||
V: DerefMut<Target = [(T, u32)]> + cc_traits::VecMut<(T, u32)> + InsertIndex,
|
||||
> {
|
||||
samples: V,
|
||||
median_index: Option<usize>,
|
||||
median_subindex: u32,
|
||||
_t: core::marker::PhantomData<T>,
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "nostd"))]
|
||||
pub mod vec {
|
||||
pub type MedianAcc<T> = crate::MedianAcc<T, Vec<(T, u32)>>;
|
||||
}
|
||||
|
||||
/// Computed median
|
||||
|
@ -34,17 +49,41 @@ pub enum MedianResult<T: Clone + Ord> {
|
|||
Two(T, T),
|
||||
}
|
||||
|
||||
impl<T: Clone + Ord> MedianAcc<T> {
|
||||
impl<
|
||||
T: Clone + Ord,
|
||||
V: DerefMut<Target = [(T, u32)]> + cc_traits::VecMut<(T, u32)> + InsertIndex,
|
||||
> MedianAcc<T, V>
|
||||
{
|
||||
/// Create an empty accumulator
|
||||
///
|
||||
/// _O(1)_
|
||||
///
|
||||
/// Does not allocate until the first push.
|
||||
pub fn new() -> Self {
|
||||
/// If using `std::vec::Vec`, does not allocate until the first push.
|
||||
pub fn new() -> Self
|
||||
where
|
||||
V: Default,
|
||||
{
|
||||
Self {
|
||||
samples: Vec::new(),
|
||||
samples: Default::default(),
|
||||
median_index: None,
|
||||
median_subindex: 0,
|
||||
_t: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an empty accumulator from an existing (empty) collection
|
||||
///
|
||||
/// _O(1)_
|
||||
///
|
||||
/// Useful when using fixed-length collections or to avoid allocations.
|
||||
pub fn new_from(collection: V) -> Self {
|
||||
assert!(collection.is_empty(), "the collection must be empty");
|
||||
|
||||
Self {
|
||||
samples: collection,
|
||||
median_index: None,
|
||||
median_subindex: 0,
|
||||
_t: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -85,7 +124,7 @@ impl<T: Clone + Ord> MedianAcc<T> {
|
|||
}
|
||||
}
|
||||
Err(sample_index) => {
|
||||
self.samples.insert(sample_index, (sample, 1));
|
||||
self.samples.insert_index(sample_index, (sample, 1));
|
||||
if *median_index >= sample_index {
|
||||
if self.median_subindex == 0 {
|
||||
self.median_subindex =
|
||||
|
@ -105,7 +144,7 @@ impl<T: Clone + Ord> MedianAcc<T> {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
self.samples.push((sample, 1));
|
||||
self.samples.push_back((sample, 1));
|
||||
self.median_index = Some(0);
|
||||
}
|
||||
}
|
||||
|
@ -148,19 +187,22 @@ impl<T: Clone + Ord> MedianAcc<T> {
|
|||
}
|
||||
|
||||
/// Clear the data
|
||||
pub fn clear(&mut self) {
|
||||
pub fn clear(&mut self)
|
||||
where
|
||||
V: cc_traits::Clear,
|
||||
{
|
||||
self.samples.clear();
|
||||
self.median_index = None;
|
||||
self.median_subindex = 0;
|
||||
}
|
||||
|
||||
/// Access the underlying vec
|
||||
/// Access the underlying collection
|
||||
///
|
||||
/// Just in case you need finer allocation management.
|
||||
///
|
||||
/// # Safety
|
||||
/// Leaving the vector in an invalid state may cause invalid result or panic (but no UB).
|
||||
pub unsafe fn get_samples_mut(&mut self) -> &mut Vec<(T, u32)> {
|
||||
pub unsafe fn get_samples_mut(&mut self) -> &mut V {
|
||||
&mut self.samples
|
||||
}
|
||||
}
|
||||
|
@ -198,7 +240,24 @@ mod tests {
|
|||
let len: usize = rng.gen_range(0..100);
|
||||
let mut samples: Vec<i32> = (0..len).map(|_| rng.gen_range(-100..100)).collect();
|
||||
|
||||
let mut median = MedianAcc::new();
|
||||
let mut median = vec::MedianAcc::new();
|
||||
for sample in samples.iter() {
|
||||
median.push(*sample);
|
||||
}
|
||||
|
||||
assert_eq!(median.get_median(), naive_median(&mut samples));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correctness_smallvec() {
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
for _ in 0..100_000 {
|
||||
let len: usize = rng.gen_range(0..64);
|
||||
let mut samples: Vec<i32> = (0..len).map(|_| rng.gen_range(-100..100)).collect();
|
||||
|
||||
let mut median = MedianAcc::<i32, smallvec::SmallVec<[(i32, u32); 64]>>::new();
|
||||
for sample in samples.iter() {
|
||||
median.push(*sample);
|
||||
}
|
||||
|
|
28
src/traits.rs
Normal file
28
src/traits.rs
Normal file
|
@ -0,0 +1,28 @@
|
|||
/// Collection where an item can be inserted at a given index.
|
||||
pub trait InsertIndex: cc_traits::Collection {
|
||||
type Output;
|
||||
|
||||
fn insert_index(
|
||||
&mut self,
|
||||
index: usize,
|
||||
element: <Self as cc_traits::Collection>::Item,
|
||||
) -> Self::Output;
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "nostd"))]
|
||||
impl<T> InsertIndex for Vec<T> {
|
||||
type Output = ();
|
||||
|
||||
fn insert_index(&mut self, index: usize, element: T) {
|
||||
self.insert(index, element)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "smallvec")]
|
||||
impl<T, A: smallvec::Array<Item = T>> InsertIndex for smallvec::SmallVec<A> {
|
||||
type Output = ();
|
||||
|
||||
fn insert_index(&mut self, index: usize, element: T) {
|
||||
self.insert(index, element)
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue