Setup code architecture, some basic traits and types, and steepest descent algorithm

2024-12-12 23:03:31 -05:00 · 2024-12-12 23:03:31 -05:00 · f9bc5adb71
commit f9bc5adb71
parent d998a034a3
8 changed files with 220 additions and 0 deletions
--- a/src/gradient_descent/line_search.rs
+++ b/src/gradient_descent/line_search.rs
@ -0,0 +1,12 @@
+pub enum LineSearch {
+    ConstAlpha { learning_rate: f64 },
+}
+
+impl LineSearch {
+    pub fn get_learning_rate(&self) -> f64 {
+        match self {
+            LineSearch::ConstAlpha { learning_rate } => *learning_rate,
+        }
+    }
+}
+
--- a/src/gradient_descent/mod.rs
+++ b/src/gradient_descent/mod.rs
@ -0,0 +1,2 @@
+pub mod line_search;
+pub mod steepest_descent;
--- a/src/gradient_descent/steepest_descent.rs
+++ b/src/gradient_descent/steepest_descent.rs
@ -0,0 +1,79 @@
+use crate::{
+    minimize::{Direction, ExitCondition, OptimizationResult},
+    objective_function::ObjectiveFun,
+    traits::XVar,
+};
+
+use super::line_search::LineSearch;
+
+pub fn steepest_descent<T: XVar<E> + Clone, E>(
+    fun: &dyn ObjectiveFun<T, E>,
+    x0: &[T],
+    max_iters: usize,
+    tolerance: f64,
+    line_search: &LineSearch,
+    direction: f64,
+) -> OptimizationResult<T> {
+    // Make a mutable copy of x0 to work with
+    let mut xs = Vec::new();
+    xs.extend_from_slice(x0);
+
+    // Perform the iteration
+    let mut f_iminus1 = f64::INFINITY;
+    let mut f = 0.0;
+    let mut i = 0;
+    for _ in 0..max_iters {
+        let primes = fun.prime(&xs);
+        xs.iter_mut().zip(primes.iter()).for_each(|(x, prime)| {
+            *x = x.update(direction * line_search.get_learning_rate(), prime)
+        });
+        f = fun.eval(&xs);
+
+        if (f - f_iminus1).abs() < tolerance {
+            break;
+        } else {
+            f_iminus1 = f;
+        }
+        i += 1;
+    }
+
+    let exit_con = if i == max_iters {
+        ExitCondition::MaxIter
+    } else {
+        ExitCondition::Converged
+    };
+    OptimizationResult {
+        best_xs: xs,
+        best_fun_val: f,
+        exit_con,
+        iters: i,
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use crate::objective_function::Fun;
+
+    use super::*;
+
+    #[test]
+    pub fn simple_steepest_descent_test() {
+        let fun = Box::new(|xs: &[f64]| xs.iter().fold(0.0, |acc, x| acc + x.powi(2)));
+        let prime = Box::new(|xs: &[f64]| xs.iter().copied().collect::<Vec<f64>>());
+
+        let obj = Fun::new(fun, prime);
+        let line_search = LineSearch::ConstAlpha {
+            learning_rate: 0.25,
+        };
+        let res = steepest_descent(&obj, &[20.0], 1000, 1e-12, &line_search, -1.0);
+
+        if let ExitCondition::MaxIter = res.exit_con {
+            panic!("Failed to converge to minima");
+        }
+        println!(
+            "{:?} on iteration {}\n{}",
+            res.best_xs, res.iters, res.best_fun_val
+        );
+        assert!(res.best_fun_val < 1e-8);
+    }
+}
--- a/src/heuristics/mod.rs
+++ b/src/heuristics/mod.rs
--- a/src/lib.rs
+++ b/src/lib.rs
@ -0,0 +1,5 @@
+pub mod gradient_descent;
+pub mod heuristics;
+pub mod minimize;
+pub mod objective_function;
+pub mod traits;
--- a/src/minimize.rs
+++ b/src/minimize.rs
@ -0,0 +1,28 @@
+/// Result Enum dictating the exit condition for an optimization call
+pub enum ExitCondition {
+    /// Optimization has converged to user specified tolerance
+    Converged,
+    /// Optimization has exceeded user specified max iteration count
+    MaxIter,
+}
+
+pub enum Direction {
+    Minimize,
+    Maximize,
+}
+
+impl Direction {
+    pub fn factor(&self) -> f64 {
+        match self {
+            Direction::Minimize => -1.0,
+            Direction::Maximize => 1.0,
+        }
+    }
+}
+/// Struct holding the results for a minimization call
+pub struct OptimizationResult<T> {
+    pub best_xs: Vec<T>,
+    pub best_fun_val: f64,
+    pub exit_con: ExitCondition,
+    pub iters: usize,
+}
--- a/src/objective_function.rs
+++ b/src/objective_function.rs
@ -0,0 +1,84 @@
+use crate::traits::XVar;
+
+/// Trait that should be implemented for objects that will be minimzed
+pub trait ObjectiveFun<T: XVar<E> + Clone, E> {
+    /// Return the objective function value at a specified coordinate
+    fn eval(&self, xs: &[T]) -> f64;
+    /// Return the gradients of the objective function value for specified coordinates
+    fn prime(&self, xs: &[T]) -> Vec<E>;
+}
+
+/// Enum allowing for selection of style of numerical differentiation
+pub enum DiffStyle {
+    ForwardDifference,
+    BackwardDifference,
+    CentralDifference,
+}
+
+/// Struct that wraps a lambda and provides a numerical derivative for it for use in gradient
+/// descent algorithms
+pub struct FunWithNumericalDiff {
+    function: Box<dyn Fn(&[f64]) -> f64>,
+    dx: f64,
+    style: DiffStyle,
+}
+
+impl ObjectiveFun<f64, f64> for FunWithNumericalDiff {
+    fn eval(&self, xs: &[f64]) -> f64 {
+        (self.function)(xs)
+    }
+
+    fn prime(&self, xs: &[f64]) -> Vec<f64> {
+        let mut xs_local = Vec::new();
+        xs_local.extend_from_slice(xs);
+        let f: Box<dyn FnMut((usize, &f64)) -> f64> = match self.style {
+            DiffStyle::ForwardDifference => Box::new(move |(i, x)| -> f64 {
+                xs_local[i] = x + self.dx;
+                let xprime = ((self.function)(&xs_local) - (self.function)(xs)) / (self.dx);
+                xs_local[i] = *x;
+                xprime
+            }),
+            DiffStyle::BackwardDifference => Box::new(move |(i, x)| -> f64 {
+                xs_local[i] = x - self.dx;
+                let xprime = ((self.function)(xs) - (self.function)(&xs_local)) / (self.dx);
+                xs_local[i] = *x;
+                xprime
+            }),
+            DiffStyle::CentralDifference => Box::new(move |(i, x)| -> f64 {
+                xs_local[i] = x - (0.5 * self.dx);
+                let f1 = (self.function)(&xs_local);
+                xs_local[i] = x + (0.5 * self.dx);
+                let f2 = (self.function)(&xs_local);
+                xs_local[i] = *x;
+                (f2 - f1) / self.dx
+            }),
+        };
+        xs.iter().enumerate().map(f).collect()
+    }
+}
+
+/// Struct that wraps two lambda with one providing the objective function evaluation and the other
+/// providing the gradient value
+pub struct Fun<T: XVar<E>, E> {
+    function: Box<dyn Fn(&[T]) -> f64>,
+    prime: Box<dyn Fn(&[T]) -> Vec<E>>,
+}
+
+// Simple type to remove the generics
+pub type F64Fun = Fun<f64, f64>;
+
+impl<T: XVar<E>, E> ObjectiveFun<T, E> for Fun<T, E> {
+    fn eval(&self, xs: &[T]) -> f64 {
+        (self.function)(xs)
+    }
+
+    fn prime(&self, xs: &[T]) -> Vec<E> {
+        (self.prime)(xs)
+    }
+}
+
+impl<T: XVar<E>, E> Fun<T, E> {
+    pub fn new(function: Box<dyn Fn(&[T]) -> f64>, prime: Box<dyn Fn(&[T]) -> Vec<E>>) -> Self {
+        Fun { function, prime }
+    }
+}
--- a/src/traits.rs
+++ b/src/traits.rs
@ -0,0 +1,10 @@
+pub trait XVar<E>: Clone {
+    fn update(&self, alpha: f64, prime: &E) -> Self;
+}
+
+/// Implementation of XVar for an f64 type
+impl XVar<f64> for f64 {
+    fn update(&self, alpha: f64, prime: &f64) -> Self {
+        self + alpha * prime
+    }
+}