400 lines
10 KiB
Rust
400 lines
10 KiB
Rust
use crate::{model::*, solver::*, utils::*};
|
|
|
|
use nalgebra::{base::*, ComplexField};
|
|
use num_traits::{FromPrimitive, Zero};
|
|
use std::marker::PhantomData;
|
|
|
|
pub fn newton<
|
|
T: Copy + Scalar + ComplexField<RealField = T> + PartialOrd,
|
|
S: Settings,
|
|
const D: usize,
|
|
>(
|
|
model: &impl Model<T, S, D>,
|
|
x0: Vect<T, D>,
|
|
dt: T,
|
|
tol: T,
|
|
niters: usize,
|
|
) -> Vect<T, D>
|
|
where
|
|
Const<D>: ToTypenum + DimMin<Const<D>, Output = Const<D>>,
|
|
{
|
|
let mut x = x0;
|
|
|
|
for _ in 0..niters {
|
|
if let Some(m) = (Mat::<T, D, D>::identity() - model.df(x) * dt).try_inverse() {
|
|
let dx = m * (x - x0 - model.f(x) * dt);
|
|
if dx.norm() < tol {
|
|
break;
|
|
}
|
|
x -= dx;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
x
|
|
}
|
|
|
|
/// Slower version using a linear system.
|
|
pub fn _newton_slow<
|
|
T: Copy + Scalar + ComplexField<RealField = T> + PartialOrd,
|
|
S: Settings,
|
|
const D: usize,
|
|
>(
|
|
model: &impl Model<T, S, D>,
|
|
x0: Vect<T, D>,
|
|
dt: T,
|
|
tol: T,
|
|
niters: usize,
|
|
) -> Vect<T, D>
|
|
where
|
|
Const<D>: ToTypenum + DimMin<Const<D>, Output = Const<D>>,
|
|
{
|
|
let mut x = x0;
|
|
|
|
for _ in 0..niters {
|
|
let fi = model.f(x);
|
|
let dfi = model.df(x);
|
|
let g = x0 + fi * dt - x;
|
|
let dgdx = dfi * dt - Mat::<T, D, D>::identity();
|
|
if let Some(dx) = dgdx.lu().solve(&g) {
|
|
if dx.norm() < tol {
|
|
break;
|
|
}
|
|
x -= dx;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
x
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct GradientDescentOptimizer<
|
|
T,
|
|
S: Settings,
|
|
M: Model<T, S, D> + Clone,
|
|
R: Solver<T, S, M, D>,
|
|
const D: usize,
|
|
const DS: usize,
|
|
> {
|
|
pub model: M,
|
|
pub solver: R,
|
|
_ph: PhantomData<(T, S)>,
|
|
}
|
|
|
|
impl<
|
|
T: Copy + ComplexField<RealField = T> + FromPrimitive,
|
|
S: Settings + Clone + Into<Vect<T, DS>> + From<Vect<T, DS>>,
|
|
M: Model<T, S, D> + Clone,
|
|
R: Solver<T, S, M, D>,
|
|
const D: usize,
|
|
const DS: usize,
|
|
> GradientDescentOptimizer<T, S, M, R, D, DS>
|
|
where
|
|
Vect<T, DS>: std::ops::DivAssign<T> + std::ops::Mul<T, Output = Vect<T, DS>>,
|
|
{
|
|
pub fn new(model: M, solver: R) -> Self {
|
|
Self {
|
|
model,
|
|
solver,
|
|
_ph: PhantomData {},
|
|
}
|
|
}
|
|
|
|
/// Distance between f(x) and y_true, that we want to minimize
|
|
pub fn objective(&self, model: &M, x: Vect<T, D>, y_true: Vect<T, D>) -> T {
|
|
(self.solver.f(model, x) - y_true).norm()
|
|
}
|
|
|
|
/// Return gradient of the objective function
|
|
/// (opposite direction for Settings in order to make f(x) closer to y_true)
|
|
///
|
|
/// `free_settings` gives the indices of the settings we can change.
|
|
/// For example, if all the settings can change, set it to `0..DS`.
|
|
pub fn objective_gradient(
|
|
&self,
|
|
x: Vect<T, D>,
|
|
y_true: Vect<T, D>,
|
|
ep: T,
|
|
free_settings: impl Iterator<Item = usize>,
|
|
) -> Vect<T, DS> {
|
|
let diff = self.objective(&self.model, x, y_true);
|
|
let mut model = self.model.clone();
|
|
let s: Vect<T, DS> = model.get_settings().clone().into();
|
|
let mut si = s;
|
|
let mut ds = Vect::<T, DS>::zero();
|
|
for i in free_settings {
|
|
si[i] += ep;
|
|
*model.get_settings_mut() = si.into();
|
|
ds[i] = (self.objective(&model, x, y_true) - diff) / ep;
|
|
si[i] = s[i];
|
|
}
|
|
ds
|
|
}
|
|
|
|
pub fn objective_gradient_batch(
|
|
&self,
|
|
ylist_true: &[Vect<T, D>],
|
|
ep: T,
|
|
free_settings: impl Iterator<Item = usize> + Clone,
|
|
) -> Vect<T, DS> {
|
|
let nsamples = T::from_usize(ylist_true.len() - 1).unwrap();
|
|
|
|
let mut ds_batch = Vect::<T, DS>::zero();
|
|
for (x, y_true) in ylist_true.iter().zip(ylist_true.iter().skip(1)) {
|
|
let ds = self.objective_gradient(*x, *y_true, ep, free_settings.clone());
|
|
ds_batch += ds;
|
|
}
|
|
ds_batch / nsamples
|
|
}
|
|
|
|
/// Calibrate settings using batch GD
|
|
pub fn calibrate_batch(
|
|
&mut self,
|
|
ylist_true: &[Vect<T, D>],
|
|
ep: T,
|
|
rate: T,
|
|
niters: usize,
|
|
free_settings: impl Iterator<Item = usize> + Clone,
|
|
) {
|
|
for _ in 0..niters {
|
|
let ds_batch = self.objective_gradient_batch(ylist_true, ep, free_settings.clone());
|
|
|
|
// ds_batch is the mean of the gradient of the objective function for each sample
|
|
let mut s: Vect<T, DS> = self.model.get_settings().clone().into();
|
|
s -= ds_batch * rate;
|
|
*self.model.get_settings_mut() = s.into();
|
|
}
|
|
}
|
|
|
|
/// Calibrate settings using stochastic GD
|
|
pub fn calibrate_stochastic(
|
|
&mut self,
|
|
ylist_true: &[Vect<T, D>],
|
|
ep: T,
|
|
rate: T,
|
|
niters: usize,
|
|
free_settings: impl Iterator<Item = usize> + Clone,
|
|
) {
|
|
for _ in 0..niters {
|
|
for (x, y_true) in ylist_true.iter().zip(ylist_true.iter().skip(1)) {
|
|
let ds = self.objective_gradient(*x, *y_true, ep, free_settings.clone());
|
|
|
|
let mut s: Vect<T, DS> = self.model.get_settings().clone().into();
|
|
s -= ds * rate;
|
|
*self.model.get_settings_mut() = s.into();
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Calibrate settings using batch GD and record path and error
|
|
///
|
|
/// Returns (path, error)
|
|
pub fn calibrate_batch_record(
|
|
&mut self,
|
|
ylist_true: &[Vect<T, D>],
|
|
ep: T,
|
|
rate: T,
|
|
niters: usize,
|
|
free_settings: impl Iterator<Item = usize> + Clone,
|
|
) -> (Vec<Vect<T, DS>>, Vec<T>)
|
|
where
|
|
T: PartialOrd,
|
|
{
|
|
let mut path = Vec::with_capacity(niters + 1);
|
|
path.push(self.model.get_settings().clone().into());
|
|
let mut errorlist = Vec::with_capacity(niters + 1);
|
|
errorlist.push(self.objective_batch(&self.model, ylist_true));
|
|
|
|
for _ in 0..niters {
|
|
let ds_batch = self.objective_gradient_batch(ylist_true, ep, free_settings.clone());
|
|
|
|
// ds_batch is the mean of the gradient of the objective function for each sample
|
|
let mut s: Vect<T, DS> = self.model.get_settings().clone().into();
|
|
s -= ds_batch * rate;
|
|
path.push(s);
|
|
*self.model.get_settings_mut() = s.into();
|
|
|
|
let error = self.objective_batch(&self.model, ylist_true);
|
|
if error > *errorlist.last().unwrap() {
|
|
path.pop();
|
|
*self.model.get_settings_mut() = (*path.last().unwrap()).into();
|
|
break;
|
|
}
|
|
errorlist.push(error);
|
|
}
|
|
(path, errorlist)
|
|
}
|
|
|
|
/// Calibrate settings using stochastic GD and record path and error
|
|
///
|
|
/// Returns (path, error)
|
|
pub fn calibrate_stochastic_record(
|
|
&mut self,
|
|
ylist_true: &[Vect<T, D>],
|
|
ep: T,
|
|
rate: T,
|
|
niters: usize,
|
|
free_settings: impl Iterator<Item = usize> + Clone,
|
|
) -> (Vec<Vect<T, DS>>, Vec<T>)
|
|
where
|
|
T: PartialOrd,
|
|
{
|
|
let mut path = Vec::with_capacity(niters * (niters - 1) + 1);
|
|
path.push(self.model.get_settings().clone().into());
|
|
let mut errorlist = Vec::with_capacity(niters * (niters - 1) + 1);
|
|
errorlist.push(self.objective_batch(&self.model, ylist_true));
|
|
|
|
for _ in 0..niters {
|
|
for (x, y_true) in ylist_true.iter().zip(ylist_true.iter().skip(1)) {
|
|
let ds = self.objective_gradient(*x, *y_true, ep, free_settings.clone());
|
|
|
|
let mut s: Vect<T, DS> = self.model.get_settings().clone().into();
|
|
s -= ds * rate;
|
|
path.push(s);
|
|
*self.model.get_settings_mut() = s.into();
|
|
|
|
let error = self.objective_batch(&self.model, ylist_true);
|
|
if error > *errorlist.last().unwrap() {
|
|
path.pop();
|
|
*self.model.get_settings_mut() = (*path.last().unwrap()).into();
|
|
continue;
|
|
}
|
|
errorlist.push(error);
|
|
}
|
|
}
|
|
(path, errorlist)
|
|
}
|
|
|
|
/// Mean of the objective function on all the samples
|
|
pub fn objective_batch(&self, model: &M, ylist_true: &[Vect<T, D>]) -> T {
|
|
let nsamples = T::from_usize(ylist_true.len() - 1).unwrap();
|
|
|
|
let mut obj_batch = T::zero();
|
|
for (x, y_true) in ylist_true.iter().zip(ylist_true.iter().skip(1)) {
|
|
obj_batch += self.objective(model, *x, *y_true);
|
|
}
|
|
obj_batch / nsamples
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
use nalgebra::vector;
|
|
use rand::Rng;
|
|
|
|
/// This test can fail, but should succeed most of the time
|
|
#[test]
|
|
fn test_objective_gradient_convergence() {
|
|
let mut rng = rand::thread_rng();
|
|
|
|
let ep0 = 0.01;
|
|
let nep = 20;
|
|
let ntests = 1000;
|
|
|
|
let mut fail_spikes = 0;
|
|
let mut fail_d = 0;
|
|
|
|
for _ in 0..ntests {
|
|
let model = sir::Sir {
|
|
s: sir::SirSettings {
|
|
beta: rng.gen(),
|
|
gamma: rng.gen(),
|
|
pop: 1.0,
|
|
},
|
|
};
|
|
let solver = ImplicitEulerSolver {
|
|
dt: 0.1,
|
|
tol: 0.000001,
|
|
niters: 100,
|
|
};
|
|
let optimizer = GradientDescentOptimizer::new(model, solver);
|
|
let x = rng.gen();
|
|
let y_true = rng.gen();
|
|
|
|
let mut g = optimizer.objective_gradient(x, y_true, ep0, 0..2);
|
|
let mut d = f64::MAX;
|
|
let mut spikes = 5;
|
|
for ep in (1..nep).map(|i| ep0 / 2.0.powi(i)) {
|
|
let ng = optimizer.objective_gradient(x, y_true, dbg!(ep), 0..2);
|
|
let nd = (dbg!(ng) - g).norm();
|
|
if nd.is_zero() {
|
|
break;
|
|
}
|
|
// Allow obj' having a local minimum between s and s+ep
|
|
if dbg!(nd) >= dbg!(d) {
|
|
if spikes == 0 {
|
|
fail_spikes += 1;
|
|
break;
|
|
}
|
|
spikes -= 1;
|
|
}
|
|
g = ng;
|
|
d = nd;
|
|
}
|
|
// d should be very small
|
|
if d > 10.0 * ep0 / 2.0.powi(nep - 1) {
|
|
fail_d += 1;
|
|
}
|
|
}
|
|
|
|
let prop_fail_spikes = fail_spikes as f64 / ntests as f64;
|
|
let prop_fail_d = fail_d as f64 / ntests as f64;
|
|
println!("Fail spikes: {} %", prop_fail_spikes * 100.0);
|
|
println!("Fail d: {} %", prop_fail_d * 100.0);
|
|
assert!(prop_fail_spikes < 0.015);
|
|
assert!(prop_fail_d < 0.0015);
|
|
}
|
|
|
|
// TODO fix
|
|
//#[test]
|
|
fn _test_objective_gradient_direction() {
|
|
let mut rng = rand::thread_rng();
|
|
|
|
let niters: usize = 1000;
|
|
let x0 = vector![0.99, 0.01];
|
|
let dt = 0.1;
|
|
|
|
// Generate "true" data
|
|
let settings_true = sir::SirSettings {
|
|
beta: rng.gen(),
|
|
gamma: rng.gen(),
|
|
pop: 1.0,
|
|
};
|
|
let model = sir::Sir {
|
|
s: dbg!(settings_true),
|
|
};
|
|
let solver = ImplicitEulerSolver {
|
|
dt,
|
|
tol: 0.000001,
|
|
niters: 100,
|
|
};
|
|
let mut optimizer = GradientDescentOptimizer::new(model, solver);
|
|
let mut xlist_true = Vec::with_capacity(niters + 1);
|
|
xlist_true.push(x0);
|
|
let mut x = x0;
|
|
for _ in 0..niters {
|
|
x = optimizer.solver.f(&optimizer.model, x);
|
|
xlist_true.push(x);
|
|
}
|
|
|
|
// Start with random settings
|
|
*optimizer.model.get_settings_mut() = dbg!(sir::SirSettings {
|
|
beta: rng.gen(),
|
|
gamma: rng.gen(),
|
|
pop: 1.0,
|
|
});
|
|
|
|
// Compute descent direction
|
|
let dir = dbg!(optimizer.objective_gradient(x0, xlist_true[1], 0.0000001, 0..2));
|
|
|
|
// Check that this direction leads to smaller error
|
|
let s: Vect<f64, 3> = optimizer.model.get_settings().clone().into();
|
|
let y = optimizer.model.f(x0);
|
|
*optimizer.model.get_settings_mut() = (s - dir).into(); // Apply direction
|
|
let y_new = optimizer.model.f(x0);
|
|
assert!((y - xlist_true[1]).norm() >= (y_new - xlist_true[1]).norm());
|
|
}
|
|
}
|