v0.3.0

feat: generic collection
2024-03-15 20:15:21 +01:00 · 2023-04-14 12:39:11 +02:00
5 changed files with 151 additions and 56 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,21 +1,34 @@
 [package]
 name = "median-accumulator"
-version = "0.1.0"
+version = "0.4.0"
 edition = "2021"
 authors = ["tuxmain <tuxmain@zettascript.org>"]
 license = "AGPL-3.0-only"
 repository = "https://git.txmn.tk/tuxmain/median-accumulator"
 documentation = "https://docs.rs/median-accumulator/"
-description = "Simple, fast, space-efficient accumulator for computing median"
-categories = ["algorithms"]
+description = "Simple, fast, space-efficient, generic accumulator for computing median"
+categories = ["algorithms", "data-structures", "no-std"]
+keywords = ["median"]

 [dependencies]
+cc-traits = { version = "2.0.0", default_features = false }
+smallvec = { version = "^1.6", optional = true }
+
+[features]
+std = ["cc-traits/alloc", "cc-traits/std"]
+smallvec = ["dep:smallvec", "cc-traits/smallvec"]
+
+default = ["std"]

 [dev-dependencies]
-criterion = { version = "0.4.0", features = ["html_reports"] }
-medianheap = "0.3.0"
+criterion = { version = "0.5.1", features = ["html_reports"] }
+medianheap = "0.4.1"
 rand = "0.8.5"
+smallvec = "^1.6"

 [[bench]]
 name = "comparison"
 harness = false
+
+[package.metadata.docs.rs]
+features = ["std"]
--- a/README.md
+++ b/README.md
@ -7,17 +7,17 @@ Simple, space-efficient algorithm to compute the median of an accumulation of el
 * **Space-efficient**: `O(D)` space, D being the number of _different_ samples, not the _total_ number of samples
 * **Time-efficient**: push is `O(log(N))`
 * **Generic**: `T: Clone + Ord`
-* **Tested**
-* **No unsafe**, no deps except `std`
+* **No unsafe**
+* **no_std** (optional): supports generic collections

-Faster than other implementations if there are samples having the same value. If this is not your case, you should use another implementation.
+Faster than other implementations if lots of samples have the same value. If this is not your case, you should use another implementation.

 ## Use

 ```rust
 use median_accumulator::*;

-let mut acc = MedianAcc::new();
+let mut acc = vec::MedianAcc::new();

 assert_eq!(acc.get_median(), None);
 acc.push(7);
@ -30,9 +30,21 @@ assert_eq!(acc.get_median(), Some(MedianResult::One(7)));

 If you ever encounter an `unreachable` panic, please file an issue or send me an e-mail.

+## no_std
+
+Example with [smallvec](https://crates.io/crates/smallvec): (`smallvec` feature required)
+
+```rust
+use median_accumulator::*;
+
+let mut acc = MedianAcc::<i32, smallvec::SmallVec<[(i32, u32); 64]>>::new();
+```
+
+For other collections than `Vec` or `SmallVec`, you must implement [cc-traits](https://crates.io/crates/cc-traits) and `InsertIndex`.
+
 ## License

-CopyLeft 2022 Pascal Engélibert
+CopyLeft 2022-2024 Pascal Engélibert [(why copyleft?)](https://txmn.tk/blog/why-copyleft/)

 This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3 of the License.

--- a/benches/comparison.rs
+++ b/benches/comparison.rs
@ -1,54 +1,34 @@
-use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
 use rand::Rng;

+static ITERS: u32 = 10_000;
+
 fn compare_crates(c: &mut Criterion) {
 	let mut rng = rand::thread_rng();
 	let mut group = c.benchmark_group("Comparison");
-	for len in [10, 50, 100, 500, 1000] {
-		let samples: Vec<u32> = (0..len).map(|_| rng.gen_range(0..len / 5)).collect();
+	for redundancy in [1, 5, 10, 20, 40] {
+		let samples: Vec<u32> = (0..ITERS)
+			.map(|_| rng.gen_range(0..ITERS / redundancy))
+			.collect();
 		group.bench_with_input(
-			BenchmarkId::new("median_accumulator 1:5", len),
+			BenchmarkId::new("median_accumulator", redundancy),
 			&samples,
 			|b, _i| {
 				b.iter(|| {
-					let mut median = median_accumulator::MedianAcc::new();
+					let mut median = median_accumulator::vec::MedianAcc::new();
 					samples.iter().for_each(|s| median.push(*s));
-					median.get_median()
+					black_box(median.get_median());
 				})
 			},
 		);
 		group.bench_with_input(
-			BenchmarkId::new("medianheap 1:5", len),
+			BenchmarkId::new("medianheap", redundancy),
 			&samples,
 			|b, _i| {
 				b.iter(|| {
 					let mut median = medianheap::MedianHeap::new();
 					samples.iter().for_each(|s| median.push(*s));
-					median.median()
-				})
-			},
-		);
-
-		let samples: Vec<u32> = (0..len).map(|_| rng.gen_range(0..len)).collect();
-		group.bench_with_input(
-			BenchmarkId::new("median_accumulator 1:1", len),
-			&samples,
-			|b, _i| {
-				b.iter(|| {
-					let mut median = median_accumulator::MedianAcc::new();
-					samples.iter().for_each(|s| median.push(*s));
-					median.get_median()
-				})
-			},
-		);
-		group.bench_with_input(
-			BenchmarkId::new("medianheap 1:1", len),
-			&samples,
-			|b, _i| {
-				b.iter(|| {
-					let mut median = medianheap::MedianHeap::new();
-					samples.iter().for_each(|s| median.push(*s));
-					median.median()
+					black_box(median.median());
 				})
 			},
 		);
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,7 +1,7 @@
 //! ```rust
 //! use median_accumulator::*;
 //!
-//! let mut acc = MedianAcc::new();
+//! let mut acc = vec::MedianAcc::new();
 //!
 //! assert_eq!(acc.get_median(), None);
 //! acc.push(7);
@ -14,14 +14,29 @@
 //!
 //! In doc comments, _N_ represents the number of samples, _D_ represents the number of different values taken by the samples.

-use std::cmp::Ordering;
+#![cfg_attr(not(feature = "std"), no_std)]
+
+mod traits;
+
+pub use traits::*;
+
+use core::{cmp::Ordering, ops::DerefMut};

 /// Accumulator for computing median
 #[derive(Clone, Debug, Default)]
-pub struct MedianAcc<T: Clone + Ord> {
-	samples: Vec<(T, u32)>,
+pub struct MedianAcc<
+	T: Clone + Ord,
+	V: DerefMut<Target = [(T, u32)]> + cc_traits::VecMut<(T, u32)> + InsertIndex,
+> {
+	samples: V,
 	median_index: Option<usize>,
 	median_subindex: u32,
+	_t: core::marker::PhantomData<T>,
+}
+
+#[cfg(feature = "std")]
+pub mod vec {
+	pub type MedianAcc<T> = crate::MedianAcc<T, Vec<(T, u32)>>;
 }

 /// Computed median
@ -34,17 +49,41 @@ pub enum MedianResult<T: Clone + Ord> {
 	Two(T, T),
 }

-impl<T: Clone + Ord> MedianAcc<T> {
+impl<
+		T: Clone + Ord,
+		V: DerefMut<Target = [(T, u32)]> + cc_traits::VecMut<(T, u32)> + InsertIndex,
+	> MedianAcc<T, V>
+{
 	/// Create an empty accumulator
 	///
 	/// _O(1)_
 	///
-	/// Does not allocate until the first push.
-	pub fn new() -> Self {
+	/// If using `std::vec::Vec`, does not allocate until the first push.
+	pub fn new() -> Self
+	where
+		V: Default,
+	{
 		Self {
-			samples: Vec::new(),
+			samples: Default::default(),
 			median_index: None,
 			median_subindex: 0,
+			_t: Default::default(),
+		}
+	}
+
+	/// Create an empty accumulator from an existing (empty) collection
+	///
+	/// _O(1)_
+	///
+	/// Useful when using fixed-length collections or to avoid allocations.
+	pub fn new_from(collection: V) -> Self {
+		assert!(collection.is_empty(), "the collection must be empty");
+
+		Self {
+			samples: collection,
+			median_index: None,
+			median_subindex: 0,
+			_t: Default::default(),
 		}
 	}

@ -85,7 +124,7 @@ impl<T: Clone + Ord> MedianAcc<T> {
 					}
 				}
 				Err(sample_index) => {
-					self.samples.insert(sample_index, (sample, 1));
+					self.samples.insert_index(sample_index, (sample, 1));
 					if *median_index >= sample_index {
 						if self.median_subindex == 0 {
 							self.median_subindex =
@ -105,7 +144,7 @@ impl<T: Clone + Ord> MedianAcc<T> {
 				}
 			}
 		} else {
-			self.samples.push((sample, 1));
+			self.samples.push_back((sample, 1));
 			self.median_index = Some(0);
 		}
 	}
@ -148,19 +187,22 @@ impl<T: Clone + Ord> MedianAcc<T> {
 	}

 	/// Clear the data
-	pub fn clear(&mut self) {
+	pub fn clear(&mut self)
+	where
+		V: cc_traits::Clear,
+	{
 		self.samples.clear();
 		self.median_index = None;
 		self.median_subindex = 0;
 	}

-	/// Access the underlying vec
+	/// Access the underlying collection
 	///
 	/// Just in case you need finer allocation management.
 	///
 	/// # Safety
 	/// Leaving the vector in an invalid state may cause invalid result or panic (but no UB).
-	pub unsafe fn get_samples_mut(&mut self) -> &mut Vec<(T, u32)> {
+	pub unsafe fn get_samples_mut(&mut self) -> &mut V {
 		&mut self.samples
 	}
 }
@ -171,6 +213,7 @@ mod tests {

 	use rand::Rng;

+	#[cfg(feature = "std")]
 	fn naive_median<T: Clone + Ord>(samples: &mut [T]) -> Option<MedianResult<T>> {
 		if samples.is_empty() {
 			None
@ -190,6 +233,7 @@ mod tests {
 		}
 	}

+	#[cfg(feature = "std")]
 	#[test]
 	fn correctness() {
 		let mut rng = rand::thread_rng();
@ -198,7 +242,25 @@ mod tests {
 			let len: usize = rng.gen_range(0..100);
 			let mut samples: Vec<i32> = (0..len).map(|_| rng.gen_range(-100..100)).collect();

-			let mut median = MedianAcc::new();
+			let mut median = vec::MedianAcc::new();
+			for sample in samples.iter() {
+				median.push(*sample);
+			}
+
+			assert_eq!(median.get_median(), naive_median(&mut samples));
+		}
+	}
+
+	#[cfg(feature = "smallvec")]
+	#[test]
+	fn correctness_smallvec() {
+		let mut rng = rand::thread_rng();
+
+		for _ in 0..100_000 {
+			let len: usize = rng.gen_range(0..64);
+			let mut samples: Vec<i32> = (0..len).map(|_| rng.gen_range(-100..100)).collect();
+
+			let mut median = MedianAcc::<i32, smallvec::SmallVec<[(i32, u32); 64]>>::new();
 			for sample in samples.iter() {
 				median.push(*sample);
 			}
--- a/src/traits.rs
+++ b/src/traits.rs
@ -0,0 +1,28 @@
+/// Collection where an item can be inserted at a given index.
+pub trait InsertIndex: cc_traits::Collection {
+	type Output;
+
+	fn insert_index(
+		&mut self,
+		index: usize,
+		element: <Self as cc_traits::Collection>::Item,
+	) -> Self::Output;
+}
+
+#[cfg(feature = "std")]
+impl<T> InsertIndex for Vec<T> {
+	type Output = ();
+
+	fn insert_index(&mut self, index: usize, element: T) {
+		self.insert(index, element)
+	}
+}
+
+#[cfg(feature = "smallvec")]
+impl<T, A: smallvec::Array<Item = T>> InsertIndex for smallvec::SmallVec<A> {
+	type Output = ();
+
+	fn insert_index(&mut self, index: usize, element: T) {
+		self.insert(index, element)
+	}
+}
Author	SHA1	Message	Date
Pascal Engélibert	7c73d32ffa	v0.3.0	2024-03-15 20:15:21 +01:00
Pascal Engélibert	7db8e9c3ab	feat: generic collection	2023-04-14 12:39:11 +02:00