Source code for jenn.core.cost

"""Cost Function.
=================

This module contains class and methods to efficiently 
compute the neural net cost function used for training. 
It is a modified version of the Least Squared Estimator (LSE), 
augmented with a penalty function for regularization and another 
term which accounts for Jacobian prediction error. See
`paper`_ for details and notation. 
"""  # noqa W291

from typing import List, Union

import numpy as np

from .data import Dataset
from .parameters import Parameters



[docs]
class SquaredLoss:
    r"""Least Squares Estimator.

    :param Y_true: training data outputs :math:`Y \in \mathbb{R}^{n_y
        \times m}`
    :param Y_weights: weights by which to prioritize data points
        (optional)
    """

    def __init__(
        self, Y_true: np.ndarray, Y_weights: Union[np.ndarray, float] = 1.0
    ) -> None:
        self.Y_true = Y_true
        self.Y_error = np.zeros(Y_true.shape)  # preallocate to save resources
        self.Y_weights = np.ones(Y_true.shape) * Y_weights
        self.n_y, self.m = Y_true.shape


[docs]
    def evaluate(self, Y_pred: np.ndarray) -> np.float64:
        r"""Compute least squares estimator of the states in place.

        :param Y_pred: predicted outputs :math:`A^{[L]} \in
            \mathbb{R}^{n_y \times m}`
        """
        self.Y_error[:, :] = Y_pred - self.Y_true
        self.Y_error *= np.sqrt(self.Y_weights)
        cost = 0
        for j in range(0, self.n_y):
            cost += np.dot(self.Y_error[j], self.Y_error[j].T)
        return np.float64(cost)





[docs]
class GradientEnhancement:
    r"""Least Squares Estimator for partials.

    :param J_true: training data jacobian :math:`Y^{\prime} \in
        \mathbb{R}^{n_y \times m}`
    :param J_weights: weights by which to prioritize partials (optional)
    """

    def __init__(
        self, J_true: np.ndarray, J_weights: Union[np.ndarray, float] = 1.0
    ) -> None:
        self.J_true = J_true
        self.J_error = np.zeros(J_true.shape)
        self.J_weights = np.ones(J_true.shape) * J_weights
        self.n_y, self.n_x, self.m = J_true.shape


[docs]
    def evaluate(self, J_pred: np.ndarray) -> np.float64:
        r"""Compute least squares estimator for the partials.

        :param J_pred: predicted Jacobian :math:`A^{\prime[L]} \in
            \mathbb{R}^{n_y \times n_x \times m}`
        """
        self.J_error[:, :, :] = self.J_weights * (J_pred - self.J_true)
        self.J_error *= np.sqrt(self.J_weights)
        cost = 0.0
        for k in range(0, self.n_y):
            for j in range(0, self.n_x):
                dot_product = np.dot(self.J_error[k, j], self.J_error[k, j].T)
                cost += np.squeeze(dot_product)
        return np.float64(cost)





[docs]
class Regularization:
    """Compute regularization penalty."""

    def __init__(self, weights: List[np.ndarray], lambd: float = 0.0) -> None:
        r"""Compute L2 norm penalty.

        :param weights: neural parameters :math:`W^{[l]} \in
            \mathbb{R}^{n^{[l]} \times n^{[l-1]}}` associated with each
            layer
        """
        self.weights = weights
        self.lambd = lambd


[docs]
    def evaluate(
        self,
    ) -> float:
        r"""Compute L2 norm penalty.

        :param weights: neural parameters :math:`W^{[l]} \in
            \mathbb{R}^{n^{[l]} \times n^{[l-1]}}` associated with each
            layer
        :param lambd: regularization coefficient :math:`\lambda \in
            \mathbb{R}` (hyperparameter to be tuned)
        """
        penalty = 0.0
        if self.lambd > 0.0:
            for W in self.weights:
                penalty += np.sum(np.square(W)).squeeze()
            return self.lambd * penalty
        return 0.0





[docs]
class Cost:
    r"""Neural Network cost function.

    :param data: Dataset object containing training data (and associated
        metadata)
    :param parameters: object containing neural net parameters (and
        associated metadata) for each layer
    :param lambd: regularization coefficient to avoid overfitting
    """

    def __init__(
        self,
        data: Dataset,
        parameters: Parameters,
        lambd: float = 0.0,
    ) -> None:
        self.data = data
        self.parameters = parameters
        self.squared_loss = SquaredLoss(data.Y, data.Y_weights)
        self.regularization = Regularization(parameters.W, lambd)
        if data.J is not None:  # noqa: PLR2004
            self.gradient_enhancement = GradientEnhancement(data.J, data.J_weights)


[docs]
    def evaluate(
        self, Y_pred: np.ndarray, J_pred: Union[np.ndarray, None] = None
    ) -> np.float64:
        r"""Evaluate cost function.

        :param Y_pred: predicted outputs :math:`A^{[L]} \in
            \mathbb{R}^{n_x \times m}`
        :param J_pred: predicted Jacobian :math:`A^{\prime[L]} \in
            \mathbb{R}^{n_y \times n_x \times m}`
        """
        cost = self.squared_loss.evaluate(Y_pred)
        if J_pred is not None and hasattr(self, "gradient_enhancement"):
            cost += self.gradient_enhancement.evaluate(J_pred)
        cost += self.regularization.evaluate()
        cost *= 0.5 / self.data.m
        return cost