rustmodel/examples/old/opti.rs

400 lines
10 KiB
Rust

use crate::{model::*, solver::*, utils::*};
use nalgebra::{base::*, ComplexField};
use num_traits::{FromPrimitive, Zero};
use std::marker::PhantomData;
pub fn newton<
T: Copy + Scalar + ComplexField<RealField = T> + PartialOrd,
S: Settings,
const D: usize,
>(
model: &impl Model<T, S, D>,
x0: Vect<T, D>,
dt: T,
tol: T,
niters: usize,
) -> Vect<T, D>
where
Const<D>: ToTypenum + DimMin<Const<D>, Output = Const<D>>,
{
let mut x = x0;
for _ in 0..niters {
if let Some(m) = (Mat::<T, D, D>::identity() - model.df(x) * dt).try_inverse() {
let dx = m * (x - x0 - model.f(x) * dt);
if dx.norm() < tol {
break;
}
x -= dx;
} else {
break;
}
}
x
}
/// Slower version using a linear system.
pub fn _newton_slow<
T: Copy + Scalar + ComplexField<RealField = T> + PartialOrd,
S: Settings,
const D: usize,
>(
model: &impl Model<T, S, D>,
x0: Vect<T, D>,
dt: T,
tol: T,
niters: usize,
) -> Vect<T, D>
where
Const<D>: ToTypenum + DimMin<Const<D>, Output = Const<D>>,
{
let mut x = x0;
for _ in 0..niters {
let fi = model.f(x);
let dfi = model.df(x);
let g = x0 + fi * dt - x;
let dgdx = dfi * dt - Mat::<T, D, D>::identity();
if let Some(dx) = dgdx.lu().solve(&g) {
if dx.norm() < tol {
break;
}
x -= dx;
} else {
break;
}
}
x
}
#[derive(Clone)]
pub struct GradientDescentOptimizer<
T,
S: Settings,
M: Model<T, S, D> + Clone,
R: Solver<T, S, M, D>,
const D: usize,
const DS: usize,
> {
pub model: M,
pub solver: R,
_ph: PhantomData<(T, S)>,
}
impl<
T: Copy + ComplexField<RealField = T> + FromPrimitive,
S: Settings + Clone + Into<Vect<T, DS>> + From<Vect<T, DS>>,
M: Model<T, S, D> + Clone,
R: Solver<T, S, M, D>,
const D: usize,
const DS: usize,
> GradientDescentOptimizer<T, S, M, R, D, DS>
where
Vect<T, DS>: std::ops::DivAssign<T> + std::ops::Mul<T, Output = Vect<T, DS>>,
{
pub fn new(model: M, solver: R) -> Self {
Self {
model,
solver,
_ph: PhantomData {},
}
}
/// Distance between f(x) and y_true, that we want to minimize
pub fn objective(&self, model: &M, x: Vect<T, D>, y_true: Vect<T, D>) -> T {
(self.solver.f(model, x) - y_true).norm()
}
/// Return gradient of the objective function
/// (opposite direction for Settings in order to make f(x) closer to y_true)
///
/// `free_settings` gives the indices of the settings we can change.
/// For example, if all the settings can change, set it to `0..DS`.
pub fn objective_gradient(
&self,
x: Vect<T, D>,
y_true: Vect<T, D>,
ep: T,
free_settings: impl Iterator<Item = usize>,
) -> Vect<T, DS> {
let diff = self.objective(&self.model, x, y_true);
let mut model = self.model.clone();
let s: Vect<T, DS> = model.get_settings().clone().into();
let mut si = s;
let mut ds = Vect::<T, DS>::zero();
for i in free_settings {
si[i] += ep;
*model.get_settings_mut() = si.into();
ds[i] = (self.objective(&model, x, y_true) - diff) / ep;
si[i] = s[i];
}
ds
}
pub fn objective_gradient_batch(
&self,
ylist_true: &[Vect<T, D>],
ep: T,
free_settings: impl Iterator<Item = usize> + Clone,
) -> Vect<T, DS> {
let nsamples = T::from_usize(ylist_true.len() - 1).unwrap();
let mut ds_batch = Vect::<T, DS>::zero();
for (x, y_true) in ylist_true.iter().zip(ylist_true.iter().skip(1)) {
let ds = self.objective_gradient(*x, *y_true, ep, free_settings.clone());
ds_batch += ds;
}
ds_batch / nsamples
}
/// Calibrate settings using batch GD
pub fn calibrate_batch(
&mut self,
ylist_true: &[Vect<T, D>],
ep: T,
rate: T,
niters: usize,
free_settings: impl Iterator<Item = usize> + Clone,
) {
for _ in 0..niters {
let ds_batch = self.objective_gradient_batch(ylist_true, ep, free_settings.clone());
// ds_batch is the mean of the gradient of the objective function for each sample
let mut s: Vect<T, DS> = self.model.get_settings().clone().into();
s -= ds_batch * rate;
*self.model.get_settings_mut() = s.into();
}
}
/// Calibrate settings using stochastic GD
pub fn calibrate_stochastic(
&mut self,
ylist_true: &[Vect<T, D>],
ep: T,
rate: T,
niters: usize,
free_settings: impl Iterator<Item = usize> + Clone,
) {
for _ in 0..niters {
for (x, y_true) in ylist_true.iter().zip(ylist_true.iter().skip(1)) {
let ds = self.objective_gradient(*x, *y_true, ep, free_settings.clone());
let mut s: Vect<T, DS> = self.model.get_settings().clone().into();
s -= ds * rate;
*self.model.get_settings_mut() = s.into();
}
}
}
/// Calibrate settings using batch GD and record path and error
///
/// Returns (path, error)
pub fn calibrate_batch_record(
&mut self,
ylist_true: &[Vect<T, D>],
ep: T,
rate: T,
niters: usize,
free_settings: impl Iterator<Item = usize> + Clone,
) -> (Vec<Vect<T, DS>>, Vec<T>)
where
T: PartialOrd,
{
let mut path = Vec::with_capacity(niters + 1);
path.push(self.model.get_settings().clone().into());
let mut errorlist = Vec::with_capacity(niters + 1);
errorlist.push(self.objective_batch(&self.model, ylist_true));
for _ in 0..niters {
let ds_batch = self.objective_gradient_batch(ylist_true, ep, free_settings.clone());
// ds_batch is the mean of the gradient of the objective function for each sample
let mut s: Vect<T, DS> = self.model.get_settings().clone().into();
s -= ds_batch * rate;
path.push(s);
*self.model.get_settings_mut() = s.into();
let error = self.objective_batch(&self.model, ylist_true);
if error > *errorlist.last().unwrap() {
path.pop();
*self.model.get_settings_mut() = (*path.last().unwrap()).into();
break;
}
errorlist.push(error);
}
(path, errorlist)
}
/// Calibrate settings using stochastic GD and record path and error
///
/// Returns (path, error)
pub fn calibrate_stochastic_record(
&mut self,
ylist_true: &[Vect<T, D>],
ep: T,
rate: T,
niters: usize,
free_settings: impl Iterator<Item = usize> + Clone,
) -> (Vec<Vect<T, DS>>, Vec<T>)
where
T: PartialOrd,
{
let mut path = Vec::with_capacity(niters * (niters - 1) + 1);
path.push(self.model.get_settings().clone().into());
let mut errorlist = Vec::with_capacity(niters * (niters - 1) + 1);
errorlist.push(self.objective_batch(&self.model, ylist_true));
for _ in 0..niters {
for (x, y_true) in ylist_true.iter().zip(ylist_true.iter().skip(1)) {
let ds = self.objective_gradient(*x, *y_true, ep, free_settings.clone());
let mut s: Vect<T, DS> = self.model.get_settings().clone().into();
s -= ds * rate;
path.push(s);
*self.model.get_settings_mut() = s.into();
let error = self.objective_batch(&self.model, ylist_true);
if error > *errorlist.last().unwrap() {
path.pop();
*self.model.get_settings_mut() = (*path.last().unwrap()).into();
continue;
}
errorlist.push(error);
}
}
(path, errorlist)
}
/// Mean of the objective function on all the samples
pub fn objective_batch(&self, model: &M, ylist_true: &[Vect<T, D>]) -> T {
let nsamples = T::from_usize(ylist_true.len() - 1).unwrap();
let mut obj_batch = T::zero();
for (x, y_true) in ylist_true.iter().zip(ylist_true.iter().skip(1)) {
obj_batch += self.objective(model, *x, *y_true);
}
obj_batch / nsamples
}
}
#[cfg(test)]
mod test {
use super::*;
use nalgebra::vector;
use rand::Rng;
/// This test can fail, but should succeed most of the time
#[test]
fn test_objective_gradient_convergence() {
let mut rng = rand::thread_rng();
let ep0 = 0.01;
let nep = 20;
let ntests = 1000;
let mut fail_spikes = 0;
let mut fail_d = 0;
for _ in 0..ntests {
let model = sir::Sir {
s: sir::SirSettings {
beta: rng.gen(),
gamma: rng.gen(),
pop: 1.0,
},
};
let solver = ImplicitEulerSolver {
dt: 0.1,
tol: 0.000001,
niters: 100,
};
let optimizer = GradientDescentOptimizer::new(model, solver);
let x = rng.gen();
let y_true = rng.gen();
let mut g = optimizer.objective_gradient(x, y_true, ep0, 0..2);
let mut d = f64::MAX;
let mut spikes = 5;
for ep in (1..nep).map(|i| ep0 / 2.0.powi(i)) {
let ng = optimizer.objective_gradient(x, y_true, dbg!(ep), 0..2);
let nd = (dbg!(ng) - g).norm();
if nd.is_zero() {
break;
}
// Allow obj' having a local minimum between s and s+ep
if dbg!(nd) >= dbg!(d) {
if spikes == 0 {
fail_spikes += 1;
break;
}
spikes -= 1;
}
g = ng;
d = nd;
}
// d should be very small
if d > 10.0 * ep0 / 2.0.powi(nep - 1) {
fail_d += 1;
}
}
let prop_fail_spikes = fail_spikes as f64 / ntests as f64;
let prop_fail_d = fail_d as f64 / ntests as f64;
println!("Fail spikes: {} %", prop_fail_spikes * 100.0);
println!("Fail d: {} %", prop_fail_d * 100.0);
assert!(prop_fail_spikes < 0.015);
assert!(prop_fail_d < 0.0015);
}
// TODO fix
//#[test]
fn _test_objective_gradient_direction() {
let mut rng = rand::thread_rng();
let niters: usize = 1000;
let x0 = vector![0.99, 0.01];
let dt = 0.1;
// Generate "true" data
let settings_true = sir::SirSettings {
beta: rng.gen(),
gamma: rng.gen(),
pop: 1.0,
};
let model = sir::Sir {
s: dbg!(settings_true),
};
let solver = ImplicitEulerSolver {
dt,
tol: 0.000001,
niters: 100,
};
let mut optimizer = GradientDescentOptimizer::new(model, solver);
let mut xlist_true = Vec::with_capacity(niters + 1);
xlist_true.push(x0);
let mut x = x0;
for _ in 0..niters {
x = optimizer.solver.f(&optimizer.model, x);
xlist_true.push(x);
}
// Start with random settings
*optimizer.model.get_settings_mut() = dbg!(sir::SirSettings {
beta: rng.gen(),
gamma: rng.gen(),
pop: 1.0,
});
// Compute descent direction
let dir = dbg!(optimizer.objective_gradient(x0, xlist_true[1], 0.0000001, 0..2));
// Check that this direction leads to smaller error
let s: Vect<f64, 3> = optimizer.model.get_settings().clone().into();
let y = optimizer.model.f(x0);
*optimizer.model.get_settings_mut() = (s - dir).into(); // Apply direction
let y_new = optimizer.model.f(x0);
assert!((y - xlist_true[1]).norm() >= (y_new - xlist_true[1]).norm());
}
}