rustmodel/examples/old/opti.rs

use crate::{model::*, solver::*, utils::*};

use nalgebra::{base::*, ComplexField};
use num_traits::{FromPrimitive, Zero};
use std::marker::PhantomData;

pub fn newton<
	T: Copy + Scalar + ComplexField<RealField = T> + PartialOrd,
	S: Settings,
	const D: usize,
>(
	model: &impl Model<T, S, D>,
	x0: Vect<T, D>,
	dt: T,
	tol: T,
	niters: usize,
) -> Vect<T, D>
where
	Const<D>: ToTypenum + DimMin<Const<D>, Output = Const<D>>,
{
	let mut x = x0;

	for _ in 0..niters {
		if let Some(m) = (Mat::<T, D, D>::identity() - model.df(x) * dt).try_inverse() {
			let dx = m * (x - x0 - model.f(x) * dt);
			if dx.norm() < tol {
				break;
			}
			x -= dx;
		} else {
			break;
		}
	}
	x
}

/// Slower version using a linear system.
pub fn _newton_slow<
	T: Copy + Scalar + ComplexField<RealField = T> + PartialOrd,
	S: Settings,
	const D: usize,
>(
	model: &impl Model<T, S, D>,
	x0: Vect<T, D>,
	dt: T,
	tol: T,
	niters: usize,
) -> Vect<T, D>
where
	Const<D>: ToTypenum + DimMin<Const<D>, Output = Const<D>>,
{
	let mut x = x0;

	for _ in 0..niters {
		let fi = model.f(x);
		let dfi = model.df(x);
		let g = x0 + fi * dt - x;
		let dgdx = dfi * dt - Mat::<T, D, D>::identity();
		if let Some(dx) = dgdx.lu().solve(&g) {
			if dx.norm() < tol {
				break;
			}
			x -= dx;
		} else {
			break;
		}
	}
	x
}

#[derive(Clone)]
pub struct GradientDescentOptimizer<
	T,
	S: Settings,
	M: Model<T, S, D> + Clone,
	R: Solver<T, S, M, D>,
	const D: usize,
	const DS: usize,
> {
	pub model: M,
	pub solver: R,
	_ph: PhantomData<(T, S)>,
}

impl<
		T: Copy + ComplexField<RealField = T> + FromPrimitive,
		S: Settings + Clone + Into<Vect<T, DS>> + From<Vect<T, DS>>,
		M: Model<T, S, D> + Clone,
		R: Solver<T, S, M, D>,
		const D: usize,
		const DS: usize,
	> GradientDescentOptimizer<T, S, M, R, D, DS>
where
	Vect<T, DS>: std::ops::DivAssign<T> + std::ops::Mul<T, Output = Vect<T, DS>>,
{
	pub fn new(model: M, solver: R) -> Self {
		Self {
			model,
			solver,
			_ph: PhantomData {},
		}
	}

	/// Distance between f(x) and y_true, that we want to minimize
	pub fn objective(&self, model: &M, x: Vect<T, D>, y_true: Vect<T, D>) -> T {
		(self.solver.f(model, x) - y_true).norm()
	}

	/// Return gradient of the objective function
	/// (opposite direction for Settings in order to make f(x) closer to y_true)
	///
	/// `free_settings` gives the indices of the settings we can change.
	/// For example, if all the settings can change, set it to `0..DS`.
	pub fn objective_gradient(
		&self,
		x: Vect<T, D>,
		y_true: Vect<T, D>,
		ep: T,
		free_settings: impl Iterator<Item = usize>,
	) -> Vect<T, DS> {
		let diff = self.objective(&self.model, x, y_true);
		let mut model = self.model.clone();
		let s: Vect<T, DS> = model.get_settings().clone().into();
		let mut si = s;
		let mut ds = Vect::<T, DS>::zero();
		for i in free_settings {
			si[i] += ep;
			*model.get_settings_mut() = si.into();
			ds[i] = (self.objective(&model, x, y_true) - diff) / ep;
			si[i] = s[i];
		}
		ds
	}

	pub fn objective_gradient_batch(
		&self,
		ylist_true: &[Vect<T, D>],
		ep: T,
		free_settings: impl Iterator<Item = usize> + Clone,
	) -> Vect<T, DS> {
		let nsamples = T::from_usize(ylist_true.len() - 1).unwrap();

		let mut ds_batch = Vect::<T, DS>::zero();
		for (x, y_true) in ylist_true.iter().zip(ylist_true.iter().skip(1)) {
			let ds = self.objective_gradient(*x, *y_true, ep, free_settings.clone());
			ds_batch += ds;
		}
		ds_batch / nsamples
	}

	/// Calibrate settings using batch GD
	pub fn calibrate_batch(
		&mut self,
		ylist_true: &[Vect<T, D>],
		ep: T,
		rate: T,
		niters: usize,
		free_settings: impl Iterator<Item = usize> + Clone,
	) {
		for _ in 0..niters {
			let ds_batch = self.objective_gradient_batch(ylist_true, ep, free_settings.clone());

			// ds_batch is the mean of the gradient of the objective function for each sample
			let mut s: Vect<T, DS> = self.model.get_settings().clone().into();
			s -= ds_batch * rate;
			*self.model.get_settings_mut() = s.into();
		}
	}

	/// Calibrate settings using stochastic GD
	pub fn calibrate_stochastic(
		&mut self,
		ylist_true: &[Vect<T, D>],
		ep: T,
		rate: T,
		niters: usize,
		free_settings: impl Iterator<Item = usize> + Clone,
	) {
		for _ in 0..niters {
			for (x, y_true) in ylist_true.iter().zip(ylist_true.iter().skip(1)) {
				let ds = self.objective_gradient(*x, *y_true, ep, free_settings.clone());

				let mut s: Vect<T, DS> = self.model.get_settings().clone().into();
				s -= ds * rate;
				*self.model.get_settings_mut() = s.into();
			}
		}
	}

	/// Calibrate settings using batch GD and record path and error
	///
	/// Returns (path, error)
	pub fn calibrate_batch_record(
		&mut self,
		ylist_true: &[Vect<T, D>],
		ep: T,
		rate: T,
		niters: usize,
		free_settings: impl Iterator<Item = usize> + Clone,
	) -> (Vec<Vect<T, DS>>, Vec<T>)
	where
		T: PartialOrd,
	{
		let mut path = Vec::with_capacity(niters + 1);
		path.push(self.model.get_settings().clone().into());
		let mut errorlist = Vec::with_capacity(niters + 1);
		errorlist.push(self.objective_batch(&self.model, ylist_true));

		for _ in 0..niters {
			let ds_batch = self.objective_gradient_batch(ylist_true, ep, free_settings.clone());

			// ds_batch is the mean of the gradient of the objective function for each sample
			let mut s: Vect<T, DS> = self.model.get_settings().clone().into();
			s -= ds_batch * rate;
			path.push(s);
			*self.model.get_settings_mut() = s.into();

			let error = self.objective_batch(&self.model, ylist_true);
			if error > *errorlist.last().unwrap() {
				path.pop();
				*self.model.get_settings_mut() = (*path.last().unwrap()).into();
				break;
			}
			errorlist.push(error);
		}
		(path, errorlist)
	}

	/// Calibrate settings using stochastic GD and record path and error
	///
	/// Returns (path, error)
	pub fn calibrate_stochastic_record(
		&mut self,
		ylist_true: &[Vect<T, D>],
		ep: T,
		rate: T,
		niters: usize,
		free_settings: impl Iterator<Item = usize> + Clone,
	) -> (Vec<Vect<T, DS>>, Vec<T>)
	where
		T: PartialOrd,
	{
		let mut path = Vec::with_capacity(niters * (niters - 1) + 1);
		path.push(self.model.get_settings().clone().into());
		let mut errorlist = Vec::with_capacity(niters * (niters - 1) + 1);
		errorlist.push(self.objective_batch(&self.model, ylist_true));

		for _ in 0..niters {
			for (x, y_true) in ylist_true.iter().zip(ylist_true.iter().skip(1)) {
				let ds = self.objective_gradient(*x, *y_true, ep, free_settings.clone());

				let mut s: Vect<T, DS> = self.model.get_settings().clone().into();
				s -= ds * rate;
				path.push(s);
				*self.model.get_settings_mut() = s.into();

				let error = self.objective_batch(&self.model, ylist_true);
				if error > *errorlist.last().unwrap() {
					path.pop();
					*self.model.get_settings_mut() = (*path.last().unwrap()).into();
					continue;
				}
				errorlist.push(error);
			}
		}
		(path, errorlist)
	}

	/// Mean of the objective function on all the samples
	pub fn objective_batch(&self, model: &M, ylist_true: &[Vect<T, D>]) -> T {
		let nsamples = T::from_usize(ylist_true.len() - 1).unwrap();

		let mut obj_batch = T::zero();
		for (x, y_true) in ylist_true.iter().zip(ylist_true.iter().skip(1)) {
			obj_batch += self.objective(model, *x, *y_true);
		}
		obj_batch / nsamples
	}
}

#[cfg(test)]
mod test {
	use super::*;
	use nalgebra::vector;
	use rand::Rng;

	/// This test can fail, but should succeed most of the time
	#[test]
	fn test_objective_gradient_convergence() {
		let mut rng = rand::thread_rng();

		let ep0 = 0.01;
		let nep = 20;
		let ntests = 1000;

		let mut fail_spikes = 0;
		let mut fail_d = 0;

		for _ in 0..ntests {
			let model = sir::Sir {
				s: sir::SirSettings {
					beta: rng.gen(),
					gamma: rng.gen(),
					pop: 1.0,
				},
			};
			let solver = ImplicitEulerSolver {
				dt: 0.1,
				tol: 0.000001,
				niters: 100,
			};
			let optimizer = GradientDescentOptimizer::new(model, solver);
			let x = rng.gen();
			let y_true = rng.gen();

			let mut g = optimizer.objective_gradient(x, y_true, ep0, 0..2);
			let mut d = f64::MAX;
			let mut spikes = 5;
			for ep in (1..nep).map(|i| ep0 / 2.0.powi(i)) {
				let ng = optimizer.objective_gradient(x, y_true, dbg!(ep), 0..2);
				let nd = (dbg!(ng) - g).norm();
				if nd.is_zero() {
					break;
				}
				// Allow obj' having a local minimum between s and s+ep
				if dbg!(nd) >= dbg!(d) {
					if spikes == 0 {
						fail_spikes += 1;
						break;
					}
					spikes -= 1;
				}
				g = ng;
				d = nd;
			}
			// d should be very small
			if d > 10.0 * ep0 / 2.0.powi(nep - 1) {
				fail_d += 1;
			}
		}

		let prop_fail_spikes = fail_spikes as f64 / ntests as f64;
		let prop_fail_d = fail_d as f64 / ntests as f64;
		println!("Fail spikes: {} %", prop_fail_spikes * 100.0);
		println!("Fail d: {} %", prop_fail_d * 100.0);
		assert!(prop_fail_spikes < 0.015);
		assert!(prop_fail_d < 0.0015);
	}

	// TODO fix
	//#[test]
	fn _test_objective_gradient_direction() {
		let mut rng = rand::thread_rng();

		let niters: usize = 1000;
		let x0 = vector![0.99, 0.01];
		let dt = 0.1;

		// Generate "true" data
		let settings_true = sir::SirSettings {
			beta: rng.gen(),
			gamma: rng.gen(),
			pop: 1.0,
		};
		let model = sir::Sir {
			s: dbg!(settings_true),
		};
		let solver = ImplicitEulerSolver {
			dt,
			tol: 0.000001,
			niters: 100,
		};
		let mut optimizer = GradientDescentOptimizer::new(model, solver);
		let mut xlist_true = Vec::with_capacity(niters + 1);
		xlist_true.push(x0);
		let mut x = x0;
		for _ in 0..niters {
			x = optimizer.solver.f(&optimizer.model, x);
			xlist_true.push(x);
		}

		// Start with random settings
		*optimizer.model.get_settings_mut() = dbg!(sir::SirSettings {
			beta: rng.gen(),
			gamma: rng.gen(),
			pop: 1.0,
		});

		// Compute descent direction
		let dir = dbg!(optimizer.objective_gradient(x0, xlist_true[1], 0.0000001, 0..2));

		// Check that this direction leads to smaller error
		let s: Vect<f64, 3> = optimizer.model.get_settings().clone().into();
		let y = optimizer.model.f(x0);
		*optimizer.model.get_settings_mut() = (s - dir).into(); // Apply direction
		let y_new = optimizer.model.f(x0);
		assert!((y - xlist_true[1]).norm() >= (y_new - xlist_true[1]).norm());
	}
}