This commit is contained in:
Pascal Engélibert 2024-03-15 20:15:21 +01:00
parent 7db8e9c3ab
commit 7c73d32ffa
5 changed files with 32 additions and 44 deletions

View file

@ -1,28 +1,34 @@
[package] [package]
name = "median-accumulator" name = "median-accumulator"
version = "0.2.0" version = "0.4.0"
edition = "2021" edition = "2021"
authors = ["tuxmain <tuxmain@zettascript.org>"] authors = ["tuxmain <tuxmain@zettascript.org>"]
license = "AGPL-3.0-only" license = "AGPL-3.0-only"
repository = "https://git.txmn.tk/tuxmain/median-accumulator" repository = "https://git.txmn.tk/tuxmain/median-accumulator"
documentation = "https://docs.rs/median-accumulator/" documentation = "https://docs.rs/median-accumulator/"
description = "Simple, fast, space-efficient accumulator for computing median" description = "Simple, fast, space-efficient, generic accumulator for computing median"
categories = ["algorithms", "data-structures", "no-std"] categories = ["algorithms", "data-structures", "no-std"]
keywords = ["median"]
[dependencies] [dependencies]
cc-traits = "1.0.0" cc-traits = { version = "2.0.0", default_features = false }
smallvec = { version = "^1.6", optional = true } smallvec = { version = "^1.6", optional = true }
[features] [features]
nostd = ["cc-traits/nostd"] std = ["cc-traits/alloc", "cc-traits/std"]
smallvec = ["dep:smallvec", "cc-traits/smallvec"] smallvec = ["dep:smallvec", "cc-traits/smallvec"]
default = ["std"]
[dev-dependencies] [dev-dependencies]
criterion = { version = "0.4.0", features = ["html_reports"] } criterion = { version = "0.5.1", features = ["html_reports"] }
medianheap = "0.3.0" medianheap = "0.4.1"
rand = "0.8.5" rand = "0.8.5"
smallvec = "^1.6" smallvec = "^1.6"
[[bench]] [[bench]]
name = "comparison" name = "comparison"
harness = false harness = false
[package.metadata.docs.rs]
features = ["std"]

View file

@ -7,11 +7,10 @@ Simple, space-efficient algorithm to compute the median of an accumulation of el
* **Space-efficient**: `O(D)` space, D being the number of _different_ samples, not the _total_ number of samples * **Space-efficient**: `O(D)` space, D being the number of _different_ samples, not the _total_ number of samples
* **Time-efficient**: push is `O(log(N))` * **Time-efficient**: push is `O(log(N))`
* **Generic**: `T: Clone + Ord` * **Generic**: `T: Clone + Ord`
* **Tested** * **No unsafe**
* **No unsafe**, no deps
* **no_std** (optional): supports generic collections * **no_std** (optional): supports generic collections
Faster than other implementations if there are samples having the same value. If this is not your case, you should use another implementation. Faster than other implementations if lots of samples have the same value. If this is not your case, you should use another implementation.
## Use ## Use
@ -45,7 +44,7 @@ For other collections than `Vec` or `SmallVec`, you must implement [cc-traits](h
## License ## License
CopyLeft 2022-2023 Pascal Engélibert [(why copyleft?)](https://txmn.tk/blog/why-copyleft/) CopyLeft 2022-2024 Pascal Engélibert [(why copyleft?)](https://txmn.tk/blog/why-copyleft/)
This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3 of the License. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3 of the License.

View file

@ -1,54 +1,34 @@
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use rand::Rng; use rand::Rng;
static ITERS: u32 = 10_000;
fn compare_crates(c: &mut Criterion) { fn compare_crates(c: &mut Criterion) {
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();
let mut group = c.benchmark_group("Comparison"); let mut group = c.benchmark_group("Comparison");
for len in [10, 50, 100, 500, 1000] { for redundancy in [1, 5, 10, 20, 40] {
let samples: Vec<u32> = (0..len).map(|_| rng.gen_range(0..len / 5)).collect(); let samples: Vec<u32> = (0..ITERS)
.map(|_| rng.gen_range(0..ITERS / redundancy))
.collect();
group.bench_with_input( group.bench_with_input(
BenchmarkId::new("median_accumulator 1:5", len), BenchmarkId::new("median_accumulator", redundancy),
&samples, &samples,
|b, _i| { |b, _i| {
b.iter(|| { b.iter(|| {
let mut median = median_accumulator::vec::MedianAcc::new(); let mut median = median_accumulator::vec::MedianAcc::new();
samples.iter().for_each(|s| median.push(*s)); samples.iter().for_each(|s| median.push(*s));
median.get_median() black_box(median.get_median());
}) })
}, },
); );
group.bench_with_input( group.bench_with_input(
BenchmarkId::new("medianheap 1:5", len), BenchmarkId::new("medianheap", redundancy),
&samples, &samples,
|b, _i| { |b, _i| {
b.iter(|| { b.iter(|| {
let mut median = medianheap::MedianHeap::new(); let mut median = medianheap::MedianHeap::new();
samples.iter().for_each(|s| median.push(*s)); samples.iter().for_each(|s| median.push(*s));
median.median() black_box(median.median());
})
},
);
let samples: Vec<u32> = (0..len).map(|_| rng.gen_range(0..len)).collect();
group.bench_with_input(
BenchmarkId::new("median_accumulator 1:1", len),
&samples,
|b, _i| {
b.iter(|| {
let mut median = median_accumulator::vec::MedianAcc::new();
samples.iter().for_each(|s| median.push(*s));
median.get_median()
})
},
);
group.bench_with_input(
BenchmarkId::new("medianheap 1:1", len),
&samples,
|b, _i| {
b.iter(|| {
let mut median = medianheap::MedianHeap::new();
samples.iter().for_each(|s| median.push(*s));
median.median()
}) })
}, },
); );

View file

@ -14,7 +14,7 @@
//! //!
//! In doc comments, _N_ represents the number of samples, _D_ represents the number of different values taken by the samples. //! In doc comments, _N_ represents the number of samples, _D_ represents the number of different values taken by the samples.
#![cfg_attr(feature = "nostd", no_std)] #![cfg_attr(not(feature = "std"), no_std)]
mod traits; mod traits;
@ -34,7 +34,7 @@ pub struct MedianAcc<
_t: core::marker::PhantomData<T>, _t: core::marker::PhantomData<T>,
} }
#[cfg(not(feature = "nostd"))] #[cfg(feature = "std")]
pub mod vec { pub mod vec {
pub type MedianAcc<T> = crate::MedianAcc<T, Vec<(T, u32)>>; pub type MedianAcc<T> = crate::MedianAcc<T, Vec<(T, u32)>>;
} }
@ -213,6 +213,7 @@ mod tests {
use rand::Rng; use rand::Rng;
#[cfg(feature = "std")]
fn naive_median<T: Clone + Ord>(samples: &mut [T]) -> Option<MedianResult<T>> { fn naive_median<T: Clone + Ord>(samples: &mut [T]) -> Option<MedianResult<T>> {
if samples.is_empty() { if samples.is_empty() {
None None
@ -232,6 +233,7 @@ mod tests {
} }
} }
#[cfg(feature = "std")]
#[test] #[test]
fn correctness() { fn correctness() {
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();
@ -249,6 +251,7 @@ mod tests {
} }
} }
#[cfg(feature = "smallvec")]
#[test] #[test]
fn correctness_smallvec() { fn correctness_smallvec() {
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();

View file

@ -9,7 +9,7 @@ pub trait InsertIndex: cc_traits::Collection {
) -> Self::Output; ) -> Self::Output;
} }
#[cfg(not(feature = "nostd"))] #[cfg(feature = "std")]
impl<T> InsertIndex for Vec<T> { impl<T> InsertIndex for Vec<T> {
type Output = (); type Output = ();