% Upper-case    A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
% Lower-case    a b c d e f g h i j k l m n o p q r s t u v w x y z
% Digits        0 1 2 3 4 5 6 7 8 9
% Exclamation   !           Double quote "          Hash (number) #
% Dollar        $           Percent      %          Ampersand     &
% Acute accent  '           Left paren   (          Right paren   )
% Asterisk      *           Plus         +          Comma         ,
% Minus         -           Point        .          Solidus       /
% Colon         :           Semicolon    ;          Less than     <
% Equals        =           Greater than >          Question mark ?
% At            @           Left bracket [          Backslash     \
% Right bracket ]           Circumflex   ^          Underscore    _
% Grave accent  `           Left brace   {          Vertical bar  |
% Right brace   }           Tilde        ~

% ---------------------------------------------------------------------|
% --------------------------- 72 characters ---------------------------|
% ---------------------------------------------------------------------|
%
% Optimal Foraging Theory Revisited: Appendix. Lagrange Multiplier
%                                    Method
% (note: this material not included in final version of document)
%
% (c) Copyright 2007 by Theodore P. Pavlic
%

\chapter{Lagrange Multiplier Method}
\label{app:lagrange}

In this \appname{}, we discuss a well-known method for optimizing a real
functional of multiple real variables subject to equality and inequality
constraints. This is a summary of the discussion by \citet{Bertsekas95}
of \emph{Lagrange multiplier theory}.

First, take $n \in \N$, set $\set{D} \subseteq \R^n$, and function
%
\begin{equation*}
        f: \set{D} \mapsto \R
\end{equation*}
%
This will be the function we wish to \emph{minimize}. The solution to
this problem will be a vector from $\set{D}$ that we will call the
\emph{local minimum of $f$ subject to constraints}. Now, take $m \in
\W$ and a family $(h_i)_{i=1}^m$ where
%
\begin{equation*}
        h_i: \set{D} \mapsto \R
\end{equation*}
%
is a function called an \emph{equality constraint} for all $i \in
\{1,2,\dots,m\}$. The local minimum of $f$ will be a vector $\v{x} \in
\set{D}$ such that $h_i(\v{x})=0$ for all $i \in \{1,2,\dots,m\}$.
Finally, take $r \in \W$ and a family $(g_j)_{j=1}^r$ where
%
\begin{equation*}
        g_j: \set{D} \mapsto \R
\end{equation*}
%
is a function called an \emph{inequality constraint} for all $j \in
\{1,2,\dots,r\}$. The local minimum of $f$ will be a vector $\v{x} \in
\set{D}$ such that $g_i(\v{x}) \leq 0$ for all $i \in \{1,2,\dots,r\}$.
In summary, the problem is to
%
\begin{align*}
        \text{minimize} 
        &\enskip f(\v{x})\\
        \text{subject to} 
        &\enskip h_i(\v{x}) = 0 
        \text{ for all } i \in \{1,2,\dots,m\}\\
        &\enskip g_j(\v{x}) \leq 0 
        \text{ for all } j \in \{1,2,\dots,r\}
\end{align*}
%
To change this to a maximization problem, define a function $f^\star:
\set{D} \mapsto \R$ with $f^\star(\v{x})=-f(\v{x})$ for all $\v{x} \in
\set{D}$ and minimize $f^\star$ instead of $f$; the \emph{local maximum}
for $f$ will be a local minimum of $f^\star$. Thus, we will only handle
the minimization problem here. For the rest of this \appname{}, assume
the above problem. Note that it is assumed that $f$, $g$, and $h$ are
all continuously differentiable functions. At some points, it may also
be assumed that they are twice continuously differentiable functions.
Also note that while we assume this type of differentiability holds for
all points in $\R^n$, it is only necessary that it holds in some open
set around the minimum of $f$; we make the stronger assumption for
notational simplicity.

\section{Terminology}

A number of terms and constructs need to be defined before giving a
solution to the minimization problem.
%
\begin{description}
        \item\emph{Feasible Set:} For this problem, the \emph{feasible
                set} of points is denoted $\set{F}$ and defined by
                %
                \begin{equation*}
                        \set{F}
                        \triangleq
                        \{ 
                        \v{x} \in \set{D}
                        : 
                        h_i(\v{x}) = 0,
                        g_j(\v{x})\leq 0,
                        i \in \{1,2,\dots,m\},
                        j \in \{1,2,\dots,r\}
                        \}
                \end{equation*}
                %
                That is, the set $\set{F}$ represents all possible
                candidates for a minimum of $f$. Any point $\v{x} \in
                \set{F}$ is called a \emph{feasible point} or a
                \emph{feasible vector}. For all intents and purposes,
                the function $f$ need only be defined on the set
                $\set{F}$. That is, $\set{F}$ can be viewed as the
                domain of interest for function $f$. Thus, minimizing
                $f$ subject to $g$ and $h$ is equivalent to minimizing
                the restriction of $f$ to $\set{F}$ (\ie, $f|_\set{F}$). 
                Therefore, consider the following statements.
                %
                \begin{itemize}
                        \item If $\set{F}$ is a compact set, by the
                                extreme value theorem, $f|_\set{F}$
                                achieves its minimum bound.
                        \item If $\set{F}$ is a convex set and $f$ is a
                                convex function \emph{over $\set{F}$}
                                then $f|_\set{F}$ is a convex function.
                        \item Assume that all of the following are the
                                case.
                                %
                                \begin{enumerate}[(i)]
                                        \item $\set{D}$ is a convex set.
                                        \item $f$ and $g$ are convex
                                                functions over
                                                $\set{D}$.
                                        \item $h$ is \emph{affine} (\ie,
                                                there exists some $\v{a}
                                                \in \R^n$ and $b \in \R$
                                                such that $h(\v{x}) =
                                                \v{a}^\T \v{x} + b$ for
                                                all $\v{x} \in
                                                \set{D}$).
                                \end{enumerate}
                                %
                                In this case, $\set{F}$ is a convex set.
                \end{itemize}
                %
        \item\emph{Local Minimum:} To say that $\v{x}^* \in \set{F}$ is
                a \emph{local minimum} of $f$ subject to $g$ and $h$
                (\ie, $f$ over $\set{F}$) means that $\varepsilon \in
                \R_{>0}$, 
                %
                \begin{equation}
                        f(\v{x}^*) \leq f(\v{y})
                        \text{ for all }
                        \v{y} \in \set{F} \setdiff \{\v{x}^*\}
                        \text{ with }
                        \|\v{x}^*-\v{y}\|_2 < \varepsilon
                        \label{eq:lagrange_minimum}
                \end{equation}
                %
                A point $\v{x}^* \in \set{F}$ is a \emph{strict local
                minimum} if \longref{eq:lagrange_minimum} is true when
                the $\leq$ relation replaced by $<$. Note that we will
                often omit reference to $g$, $h$, and $\set{F}$ when
                discussing minima of $f$; however, it is implied that
                $f$ is not being minimized over $\set{F}$ rather than
                its entire domain.
                %
        \item\emph{Global Minimum:} To say that $\v{x}^* \in \set{F}$ is
                a \emph{global minimum} of $f$ (subject to $g$ and $h$)
                means that
                %
                \begin{equation}
                        f(\v{x}^*) \leq f(\v{y})
                        \text{ for all }
                        \v{y} \in \set{F} \setdiff \{\v{x}^*\}
                        \label{eq:lagrange_global_minimum}
                \end{equation}
                %
                Clearly, all global minima are local minima. A point
                $\v{x}^* \in \set{F}$ is \emph{the} \emph{strict global
                minimum} if \longref{eq:lagrange_global_minimum} is true
                when the $\leq$ relation replaced by $<$. Note the
                following statements.
                %
                \begin{itemize}
                        \item By the extreme value theorem, if $\set{F}$
                                is a compact set then a feasible point
                                exists that is a global minimum of $f$.
                        \item By the discussion in
                                \longref{app:math_euclidean_convexity},
                                if $\set{F}$ is a convex set and $f$ is
                                a convex function over $\set{F}$ then
                                every local minimum of $f$ is a global
                                minimum of $f$.
                        \item By the discussion in
                                \longref{app:math_euclidean_convexity},
                                if $\set{F}$ is a convex set and $f$ is
                                a \emph{strict} convex function over
                                $\set{F}$ then there exists at most one
                                global minimum of $f$.
                        \item If $\set{F}$ is a convex compact set and
                                $f$ is a strictly convex function over
                                $\set{F}$ then a feasible point exists
                                that is the single global minimum of
                                $f$.
                \end{itemize}
                %
        \item\emph{Active Inequality Constraints:} For any point $\v{x}
                \in \set{F}$, define the \emph{active inequality
                constraints} $\set{A}(\v{x})$ by
                %
                \begin{equation*}
                        \set{A}(\v{x})
                        \triangleq
                        \{ j \in \{1,2,\dots,r\} : g_j(\v{x}) = 0 \}
                \end{equation*}
                %
                In other words, for any $\v{x} \in \set{F}$,
                $\set{A}(\v{x})$ represents a set of indices for
                inequality constraints in which the point $\v{x}$ is
                nearly infeasible. Note that if $\set{A}(\v{x}) =
                \emptyset$ for all $\v{x} \in \R^n$ then this
                minimization problem should not be impacted by the
                inequality constraints.  Thus, this set really
                represents the inequality constraints that alter the
                solution from the one that would exist without
                inequality constraints.
        \item\emph{Equality Constraint Gradients:} For each $\v{x} \in
                \set{F}$ and $i \in \{1,2,\dots,m\}$, the gradient
                $\nabla_\v{x} h_i(\v{x})$ is called an \emph{equality
                constraint gradient} at $\v{x}$.
        \item\emph{Inequality Constraint Gradients:} For each $\v{x} \in
                \set{F}$ and $j \in \set{A}(\v{x})$, the gradient
                $\nabla_\v{x} g_j(\v{x})$ is called an \emph{(active)
                inequality constraint gradient} at $\v{x}$. Notice the
                use of the set of active inequality constraints (\ie,
                $\set{A}(\v{x})$ for each $\v{x} \in \set{F}$).
        \item\emph{Linear Independence:} Take $q \in \N$ and vector
                $\v{a} \in \R^q$ and family $(\v{x}^i)_{i=1}^q$ where
                $\v{x}^i \in \R^n$ for all $i \in \{1,2,\dots,q\}$.
                Recall that $\v{a}=0$ is equivalent to the statement
                that $a_1=a_2=\cdots=a_q=0$. The vectors in
                $(\v{x}^i)_{i=1}^q$ are said to be \emph{linearly
                independent} if $\v{a}=0$ is the \emph{only} vector for
                which
                %
                \begin{equation*}
                        a_1 \v{x}^1 + 
                        a_2 \v{x}^2 +
                        a_3 \v{x}^3 +
                        \cdots +
                        a_n \v{x}^n = 0
                \end{equation*}
                %
        \item\emph{Regular Vectors:} A point $\v{x} \in \set{F}$ is
                called a \emph{regular point} or a \emph{regular vector}
                if the single family consisting of all of the equality
                constraint gradients and all \emph{active} inequality
                constraint gradients are linearly independent at
                $\v{x}$. In other words, all active constraints (both
                inequality and equality) need to be linearly independent
                at the point. 
        \item\emph{(First-Order) Feasible Variations:} For any point
                $\v{x}^* \in \set{D}$, the \emph{(first-order) feasible
                variations} at $\v{x}^*$ are denoted $\set{V}(\v{x}^*)$
                and defined by
                %
                \begin{align*}
                        \set{V}(\v{x}^*)
                        \triangleq
                        \{
                                \v{\Delta} \in \R^n
                                :
                                (\nabla_\v{x} h_i(\v{x}^*))^\T
                                \v{\Delta} = 0,
                                i \in \{1,2,\dots,m\}
                        \}\\
                        \mathrel{\phantom{\triangleq}}
                        {}\cap
                        \{
                                \v{\Delta} \in \R^n
                                :
                                (\nabla_\v{x} g_j(\v{x}^*))^\T
                                \v{\Delta} = 0,
                                j \in \set{A}(\v{x}^*)
                        \}
                \end{align*}
                %
                For any point $\v{x}^* \in \set{D}$, a direction
                $\v{\Delta} \in \set{V}(\v{x}^*)$ is roughly such that
                there exists some small $\delta \in \R_{>0}$ with
                $\v{x}^* + \delta \v{\Delta} \in \set{F}$. Thus, the
                first-order feasible variations are roughly the
                directions in Euclidean space along which feasible
                vectors can move and still stay feasible. 
        \item\emph{The Lagrangian:} The \emph{Lagrangian function} $L:
                \set{D} \times \R^m \times \R^r \mapsto \R$ is defined
                by
                %
                \begin{equation*}
                        L(\v{x},\v{\lambda},\v{\mu})
                        \triangleq 
                        f(\v{x})
                        +
                        \sum\limits_{i=1}^m \lambda_i h_i(\v{x})
                        +
                        \sum\limits_{j=1}^r \mu_j g_j(\v{x})
                \end{equation*}
                %
                This function \emph{adjoins} the constraints to the
                function to minimize so that minimization of $L$ results
                in the minimization of $f$ with respect to its
                constraints. The coefficients represented by the
                elements of $\v{\lambda}$ and $\v{\mu}$ are known as
                \emph{Lagrange multipliers}, and so $\v{\lambda}$ and
                $\v{\mu}$ are known as \emph{Lagrange multiplier
                vectors}.
\end{description}

\section{Necessary Conditions}
\label{app:math_lagrange_necessary}

The following are typically known as
\emph{\acro{KKT}{Karush-Kuhn-Tucker} conditions} or \emph{Kuhn-Tucker
conditions}. They represent the conditions that are necessary for a
(regular) point to be a minimum. 

\paragraph{First-Order Necessary Conditions:} Assume that $\v{x}^* \in
\set{F}$ is a local minimum of $f$ subject to the constraints by $f$ and
$g$ and assume that $\v{x}^*$ is regular. As \citet{Bertsekas95} notes,
this regularity condition is not necessary in a class of problems with
\emph{linear constraints}; however, we leave out that interesting case
for brevity. Given these assumptions, there must exist unique Lagrange
multiplier vectors $\v{\lambda}^* \in \R^m$ $\v{\mu}^* \in \R^r$ defined
by
%
\begin{equation*}
        \v{\lambda}^* \triangleq \begin{bmatrix}
                \lambda^*_1 \\ \lambda^*_2 \\ \vdots \\ \lambda^*_m
        \end{bmatrix}
        \quad \text{ and } \quad
        \v{\mu}^* \triangleq \begin{bmatrix}
                \mu^*_1 \\ \mu^*_2 \\ \vdots \\ \mu^*_r
        \end{bmatrix}
\end{equation*}
%
such that
%
\begin{equation}
        \nabla_\v{x} L( \v{x}^*, \v{\lambda}^*, \v{\mu}^* ) = 0
        \label{eq:lagrange_if}
\end{equation}
%
where
%
\begin{equation*}
        \mu^*_j \geq 0 \text{ for all } j \in \set{A}(\v{x}^*)
\end{equation*}
%
and
%
\begin{equation*}
        \mu^*_j = 0 \text{ for all } j \notin \set{A}(\v{x}^*)
\end{equation*}
%
This is known as the \emph{first-order necessary condition} for any
point to be a local minimum of $f$ subject to the constraints of $f$ and
$g$. That is, if $\v{x}^*$ is a point such that \longref{eq:lagrange_if}
is not satisfied then $\v{x}^*$ cannot be a local minimum of $f$ subject
to $g$ and $h$. Note that not all points that satisfy
\longref{eq:lagrange_if} will be local minima of $f$ subject to $g$ and
$h$; however, if it is also the case that $\set{F}$ is a convex set and
$f$ is a convex function over $\set{F}$ then \longref{eq:lagrange_if} is
also a sufficient condition for a minimum. That is, for the convex case,
any point $\v{x}^*$ that satisfies \longref{eq:lagrange_if} will be a
local minimum of $f$ subject to $g$ and $h$. We discuss this convex case
further in \longref{app:math_lagrange_convex}.

\paragraph{Second-Order Necessary Conditions:} Assume that the point
$\v{x}^* \in \set{F}$ is a local minimum of $f$ subject to the
constraints by $f$ and $g$ and assume that $\v{x}^*$ is regular. Take
$\v{\lambda}^* \in \R^m$ and $\v{\mu}^* \in \R^r$ to be the Lagrange
multiplier vectors where the first-order necessary conditions hold for
$\v{x}^*$. If, in addition, $f$, $g$, and $h$ are twice continuously
differentiable then 
%
\begin{equation*}
        \v{\Delta}^\T 
        \nabla^2_{\v{x}\v{x}} L(\v{x}^*,\v{\lambda}^*,\v{\mu}^*)
        \v{\Delta} \geq 0
        \text{ for all } \v{\Delta} \in \set{V}( \v{x}^* ) 
\end{equation*}
%
These are known as a \emph{second-order necessary condition} for any
point to be a local minimum of $f$ subject to the constraints of $f$ and
$g$. That is, any point $\v{x}^*$ that does not satisfy this condition
cannot be a local minimum of $f$ subject to $g$ and $h$.

\paragraph{Local Minimum Candidates:} Take set $\set{C} \subseteq
\set{F}$ defined by
%
\begin{equation*}
        \set{C} 
        \triangleq
        \{
        \v{x}^* \in \set{F}
        :
        \v{x}^* \text{ satisfies \longref{eq:lagrange_if} for some }
        \v{\lambda}^* \in \R^m,\v{\mu}^* \in \R^r
        \}
\end{equation*}
%
The set $\set{C}$ is a set of candidates for a local minimum of $f$
subject to $g$ and $h$. That is, it is impossible for any point that is
not an element of $\set{C}$ to be a local minimum of $f$ subject to $g$
and $h$. Of course, when $f$, $g$, and $h$ are twice continuously
differentiable, the size of this candidate set can be further reduced by
the second-order necessary condition. If a local minimum exists, it must
be an element of $\set{C}$. Now assume that a global minimum is
\emph{known} to exist (\eg, $\set{F}$ is a compact set). In this case,
it must be an element of $\set{C}$. That is, if a global minimum is
known to exist, the point $\v{x}^*$ such that
%
\begin{equation*}
        f(\v{x}^*) = \min\{ f(\v{x}) : \v{x} \in \set{C} \}
\end{equation*}
%
will be a global minimum of $f$ subject to $g$ and $h$. Therefore, if
$\set{C}$ is a finite set and a global minimum is known to exist, a
simple strategy for finding a global minimum is to evaluate $f$ at every
point of $\set{C}$ and take the point that results in the least $f$.

\section{Sufficiency Conditions}

\longrefs{app:math_lagrange_sufficient} and
\shortref{app:math_lagrange_sufficient_alt} give different sets of
conditions that are sufficient to call a point a strict local minimum.
These conditions are similar to the \ac{KKT} conditions for a minimum
given in \longref{app:math_lagrange_necessary}; however, there are some
important differences. In \longref{app:math_lagrange_convex}, we discuss
how convexity allows further conclusions to be made about points which
satisfy these sufficiency conditions.

\subsection{Standard Sufficiency Conditions}
\label{app:math_lagrange_sufficient}

Assume that $f$, $g$, and $h$ are all twice continuously differentiable.
Take $\v{x}^* \in \R^n$, $\v{\lambda}^* \in \R^m$, and $\v{\mu}^* \in
\R^r$ such that
%
\begin{enumerate}[(i)]
        \item $\nabla_\v{x} L( \v{x}^*, \v{\lambda}^*, \v{\mu}^* ) = 0$
        \item $h_i(\v{x}^*) = 0$ for all $i \in \{1,2,\dots,m\}$
        \item $g_j(\v{x}^*) \leq 0$ for all $j \in \{1,2,\dots,r\}$
        \item $\mu_j^* > 0$ for all $j \in \set{A}(\v{x}^*)$
                \label{item:lagrange_sufficient_strict_mu}
        \item $\mu_j^* = 0$ for all $j \notin \set{A}(\v{x}^*)$
        \item $\v{\Delta}^\T \nabla^2_{\v{x}\v{x}}
                L(\v{x}^*,\v{\lambda}^*,\v{\mu}^*) \v{\Delta} > 0$ for
                all $\v{\Delta} \in \set{V}(\v{x}^*) \setdiff \{0\}$
                \label{item:lagrange_sufficient_hessian}
\end{enumerate}
%
In this case, $\v{x}^*$ must be a strict local minimum of $f$ with
respect to constraints from $g$ and $h$. Notice that properties
(\shortref{item:lagrange_sufficient_strict_mu}) and
(\shortref{item:lagrange_sufficient_hessian}) use a \emph{strict}
inequality. Also notice that property
(\shortref{item:lagrange_sufficient_hessian}) excludes $0$ from the set
of feasible variations. These are the classical \emph{second-order
sufficiency conditions} for a minimum of $f$ subject to the constraints
of $f$ and $g$.

\subsection{Alternate Sufficiency Conditions}
\label{app:math_lagrange_sufficient_alt}

Assume that $f$, $g$, and $h$ are all twice continuously differentiable.
Take $\v{x}^* \in \R^n$, $\v{\lambda}^* \in \R^m$, and $\v{\mu}^* \in
\R^r$ such that
%
\begin{enumerate}[(i)]
        \item $\nabla_\v{x} L( \v{x}^*, \v{\lambda}^*, \v{\mu}^* ) = 0$
        \item $h_i(\v{x}^*) = 0$ for all $i \in \{1,2,\dots,m\}$
        \item $g_j(\v{x}^*) \leq 0$ for all $j \in \{1,2,\dots,r\}$
        \item $\mu_j^* \geq 0$ for all $j \in \set{A}(\v{x}^*)$
                \label{item:lagrange_sufficient_alt_mu}
        \item $\mu_j^* = 0$ for all $j \notin \set{A}(\v{x}^*)$
        \item $\v{\Delta}^\T \nabla^2_{\v{x}\v{x}}
                L(\v{x}^*,\v{\lambda}^*,\v{\mu}^*) \v{\Delta} > 0$ for
                all $\v{\Delta} \in \set{V}_2(\v{x}^*)$ where
                %
                \begin{align*}
                        \set{V}_2(\v{x}^*)
                        &\triangleq
                        \{
                                \v{\Delta} \in \R^n
                                :
                                (\nabla_\v{x} h_i(\v{x}^*))^\T
                                \v{\Delta} = 0,
                                i \in \{1,2,\dots,m\}
                        \}\\
                        &\mathrel{\phantom{\triangleq}}
                        {}\cap
                        \{
                                \v{\Delta} \in \R^n
                                :
                                (\nabla_\v{x} g_j(\v{x}^*))^\T
                                \v{\Delta} = 0,
                                \mu_j^* > 0,
                                j \in \set{A}(\v{x}^*)
                        \}\\
                        &\mathrel{\phantom{\triangleq}}
                        {}\cap
                        \{
                                \v{\Delta} \in \R^n
                                :
                                (\nabla_\v{x} g_j(\v{x}^*))^\T
                                \v{\Delta} \leq 0,
                                \mu_j^* = 0,
                                j \in \set{A}(\v{x}^*)
                        \}\\
                        &\mathrel{\phantom{\triangleq}}
                        {}\cap
                        \{ \v{\Delta} \in \R^n : \v{\Delta} \neq 0 \}
                \end{align*}
                \label{item:lagrange_sufficient_alt_hessian}
\end{enumerate}
%
In this case, $\v{x}^*$ must be a strict local minimum of $f$ with
respect to constraints from $g$ and $h$. These are alternate
second-order sufficiency conditions for a minimum of $f$ subject to
constraints from $g$ and $h$. Notice that property
(\shortref{item:lagrange_sufficient_alt_mu}) does \emph{not} require a
strict inequality; because of this, property
(\shortref{item:lagrange_sufficient_alt_hessian}) is a stronger
condition than the analogous condition from
\longref{app:math_lagrange_sufficient}.


\section{Lagrange Multipliers and Convexity}
\label{app:math_lagrange_convex}

Recall the discussion of convexity in Euclidean spaces from
\longref{app:math_euclidean_convexity}. Under convexity conditions,
finding local and global minima of $f$ subject to $g$ and $h$ is greatly
simplified. 

\subsection{Convex Sets and Convex Functions}

Recall the definition of a convex set and a convex function introduced
in \longref{app:math_euclidean_convexity}.
%
\begin{description}
        \item\emph{Convex Set:} Assume that for all $\v{x},\v{y} \in
                \set{F}$ with $\v{x} \neq \v{y}$,
                %
                \begin{equation*}
                        t \v{x} + (1-t) \v{y} \in \set{F}
                \end{equation*}
                %
                for all $t \in (0,1)$. Thus, $\set{F}$ is a \emph{convex
                set}. It can be shown that $\set{F}$ can be represented
                as the Cartesian product of $n$ intervals of $\R$.
        \item\emph{Convex Function over a Convex Set:} Assume that
                $\set{F}$ is a convex set. To say that function $f$ is
                \emph{convex over $\set{F}$} means that for all
                $\v{x},\v{y} \in \set{F}$ with $\v{x} \neq \v{y}$,
                %
                \begin{equation*}
                        f( t \v{x} + (1-t) \v{y} )
                        \leq
                        t f(\v{x}) + (1-t) \v{y}
                \end{equation*}
                %
                for all $t \in (0,1)$. 
        \item\emph{Strictly Convex Function over a Convex Set:} Assume
                that $\set{F}$ is a convex set. To say that function $f$
                is \emph{strictly convex over $\set{F}$} means that for
                all $\v{x},\v{y} \in \set{F}$ with $\v{x} \neq \v{y}$,
                %
                \begin{equation*}
                        f( t \v{x} + (1-t) \v{y} )
                        <
                        t f(\v{x}) + (1-t) \v{y}
                \end{equation*}
                %
                for all $t \in (0,1)$.
\end{description}
%
Now assume that $f$ is twice continuously differentiable and $\set{F}$
is a convex set. If it is the case that
%
\begin{equation*}
        \v{\Delta}^\T \nabla^2_{\v{x}\v{x}} f(\v{x}) \v{\Delta} \geq 0
        \text{ for all }
        \v{\Delta} \in \R^n
        \text{ and }
        \v{x} \in \set{F}
\end{equation*}
%
then $f$ is convex over convex set $\set{F}$. Additionally, if it is the
case that
%
\begin{equation*}
        \v{\Delta}^\T \nabla^2_{\v{x}\v{x}} f(\v{x}) \v{\Delta} > 0
        \text{ for all }
        \v{\Delta} \in \R^n \setdiff \{0\}
        \text{ and }
        \v{x} \in \set{F}
\end{equation*}
%
then $f$ is strictly convex over convex set $\set{F}$. These are
sufficiency conditions for convex functions over convex sets.

\subsection{Convex Feasible Set}

Assume that $\set{F}$ is a convex set (\eg, the sufficient conditions
for convexity given in the definition of $\set{F}$ above). Assume that
$\v{x}^* \in \set{F}$ is a local minimum of $f$ subject to $g$ and $h$.
It must be the case that
%
\begin{equation}
        ( \nabla_{\v{x}} f(\v{x}^*) )^\T (\v{x} - \v{x}^*) \geq 0
        \label{eq:lagrange_convex_if}
\end{equation}
%
for all $\v{x} \in \set{F}$. That is, this is a necessary condition for
any minimum of $f$ subject to $g$ and $h$. If there is a point $\v{x}^*
\in \set{F}$ such that \longref{eq:lagrange_convex_if} does not hold for
all $\v{x} \in \set{F}$ then $\v{x}^*$ cannot be a minimum of $f$
subject to $g$ and $h$. In fact, for the case where $\set{F}$ is convex,
\longref{eq:lagrange_convex_if} is equivalent to the condition in
\longref{eq:lagrange_if}.

\subsection{Convex Function over Convex Set}

Assume that $\set{F}$ is a convex set. Also assume that $f$ is convex
over $\set{F}$. Consider the following consequences of the convexity of
$f$ over convex set $\set{F}$.
%
\begin{description}
        \item\emph{Necessary Conditions Become Sufficient:} In this
                case, the necessary conditions in
                \longrefs{eq:lagrange_if} and
                \shortref{eq:lagrange_convex_if} also become sufficient
                conditions. In other words,
                %
                \begin{itemize}
                        \item A point $\v{x}^* \in \set{F}$ is a local
                                minimum of $f$ subject to $g$ and $h$ if
                                and only if \longref{eq:lagrange_if}
                                holds.
                        \item A point $\v{x}^* \in \set{F}$ is a local
                                minimum of $f$ subject to $g$ and $h$ if
                                and only if
                                \longref{eq:lagrange_convex_if} holds.
                \end{itemize}
                %
                That is, for this case of a convex function over a
                convex feasible set, the following are equivalent
                %
                \begin{enumerate}[(i)]
                        \item A point $\v{x}^* \in \set{F}$ is a local
                                minimum of $f$ subject to $g$ and $h$.
                        \item A point $\v{x}^* \in \set{F}$ satisfies
                                \longref{eq:lagrange_if}.
                        \item A point $\v{x}^* \in \set{F}$ satisfies
                                \longref{eq:lagrange_convex_if}.
                \end{enumerate}
                %
        \item\emph{Local Minima Become Global Minima:} Now assume that
                $\v{x}^*$ is known to be a local minimum of $f$ subject
                to $g$ and $h$ (\eg, any of the sufficiency conditions
                given above are met). Because $f$ is convex over the
                convex set $\set{F}$, the following are true.
                %
                \begin{itemize}
                        \item The point $\v{x}^*$ is a global minimum of
                                $f$ subject to $g$ and $h$.
                        \item If $\v{x}^*$ is a strict local minimum of
                                $f$ subject to $g$ and $h$ then
                                $\v{x}^*$ is a strict global minimum of
                                $f$ subject to $g$ and $h$. In fact, it
                                can be shown that $\v{x}^*$ is the
                                unique (strict) global minimum of $f$
                                subject to $g$ and $h$.
                        \item If $f$ is strictly convex over convex set
                                $\set{F}$ then $\v{x}^*$ must be the
                                unique (strict) global minimum of $f$
                                subject to $g$ and $h$.
                \end{itemize}
                %
\end{description}