\documentclass[11pt]{amsart} \pdfoutput=1 %\usepackage[text={420pt,660pt},centering]{geometry} \usepackage{amsmath} % I added this package to use \tag{}. \allowdisplaybreaks % allow a single display equation to naturally break across two pages \usepackage[foot]{amsaddr} \usepackage{color} \usepackage{appendix} \usepackage{cancel} \usepackage{esint,amssymb} \usepackage{graphicx} \usepackage{MnSymbol} \usepackage{mathtools} %\usepackage{stmaryrd} % for the integer intervals \usepackage{xcolor} \usepackage{microtype} \newcommand\hmmax{0} \newcommand\bmmax{0} \usepackage{bm} \usepackage{dsfont} \usepackage{mathrsfs} \usepackage{enumitem} %\usepackage[notcite,notref,color]{showkeys} %comment this to remove the labels next to each \label{...} on the pdf; need to place this after xcolor/color \usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue,pagebackref=false]{hyperref} \usepackage{orcidlink} %\usepackage{refcheck} %\usepackage[normalem]{ulem} %\usepackage{booktabs} % nice tables %\setcounter{tocdepth}{2} \definecolor{darkgreen}{rgb}{0,0.5,0} \definecolor{darkblue}{rgb}{0,0,0.7} \definecolor{darkred}{rgb}{0.9,0.1,0.1} \newcommand{\hbccomment}[1]{\marginpar{\raggedright\scriptsize{\textcolor{orange}{#1}}}} \newcommand{\hbc}[1]{{\color{orange}{#1}}} \newcommand{\viccomment}[1]{\marginpar{\raggedright\scriptsize{\textcolor{darkblue}{#1}}}} \newcommand{\vic}[1]{{\color{darkblue}{#1}}} \newcommand{\rmk}[1]{{\color{cyan}{#1}}} \newtheorem{proposition}{Proposition} \newtheorem{theorem}[proposition]{Theorem} \newtheorem{lemma}[proposition]{Lemma} \newtheorem{corollary}[proposition]{Corollary} \newtheorem{claim}[proposition]{Claim} \newtheorem{conjecture}[proposition]{Conjecture} \theoremstyle{remark} \newtheorem{remark}[proposition]{Remark} \newtheorem{example}[proposition]{Example} \theoremstyle{definition} \newtheorem{definition}[proposition]{Definition} \newtheorem{question}{Question} \newtheorem{hypothesis}[proposition]{Hypothesis} \numberwithin{equation}{section} \numberwithin{proposition}{section} \numberwithin{figure}{section} \numberwithin{table}{section} %\numberwithin{theorem}{document} \let\oldH=\H \newcommand{\Z}{\mathbb{Z}} \newcommand{\N}{\mathbb{N}} \newcommand{\Q}{\mathbb{Q}} \newcommand{\R}{\mathbb{R}} \newcommand{\Zd}{{\mathbb{Z}^d}} \newcommand{\Rd}{{\mathbb{R}^d}} \newcommand{\E}{\mathbb{E}} \renewcommand{\P}{\mathbb{P}} \newcommand{\EE}{\mathbf{E}} \newcommand{\PP}{\mathbf{P}} \newcommand{\F}{\mathcal{F}} \renewcommand{\le}{\leqslant} \renewcommand{\ge}{\geqslant} \renewcommand{\leq}{\leqslant} \renewcommand{\geq}{\geqslant} \newcommand{\ls}{\lesssim} \renewcommand{\subset}{\subseteq} \renewcommand{\bar}{\overline} \renewcommand{\tilde}{\widetilde} \newcommand{\td}{\widetilde} %\renewcommand{\hat}{\widehat} %\newcommand{\un}{\underline} \newcommand{\Ll}{\left} \newcommand{\Rr}{\right} \renewcommand{\d}{\mathrm{d}} \newcommand{\dr}{\partial} \renewcommand{\fint}{\strokedint} \newcommand{\1}{\mathbf{1}} \newcommand{\tsp}{\intercal} \newcommand{\mcl}{\mathcal} \newcommand{\msf}{\mathsf} \newcommand{\mfk}{\mathfrak} \newcommand{\msc}{\mathscr} \newcommand{\mbf}{\mathbf} \newcommand{\al}{\alpha} \newcommand{\be}{\beta} \newcommand{\ga}{\gamma} \newcommand{\de}{\delta} \newcommand{\ep}{\varepsilon} \newcommand{\eps}{\varepsilon} \newcommand{\si}{\sigma} %%%%%%%% [Begin] customized symbols %%%%%%%% \newcommand{\la}{\left\langle} \newcommand{\ra}{\right\rangle} \renewcommand{\ae}{{a.e.\ }} \newcommand{\Id}{\mathsf{Id}} \newcommand{\id}{\mathsf{id}} \newcommand{\D}{{\mathrm{D}}} \renewcommand{\S}{\mathrm{S}} \newcommand{\ellipt}{\mathsf{Ellipt}} %%%%%%%% [End] customized symbols %%%%%%%% \DeclareMathOperator{\dist}{dist} \DeclareMathOperator{\diam}{diam} \DeclareMathOperator{\tr}{tr} \DeclareMathOperator{\supp}{supp} \DeclareMathOperator{\sign}{sign} %\newcommand{\var}{\mathbb{V}\mathrm{ar}} \newcommand{\upa}{\uparrow} \newcommand{\cF}{\mathcal{F}} \newcommand{\bx}{{\mathbf{x}}} \newcommand{\bff}{{\mathbf{f}}} \newcommand{\cav}{\circ} \newcommand{\fR}{\mathfrak{R}} \newcommand{\scdot}{\,\cdot\,} % \cdot with space as placeholder for arguments in functions or measures. \newcommand{\sfr}{{\mathsf{r}}} \newcommand{\sfp}{{\mathsf{p}}} \newcommand{\sfq}{{\mathsf{q}}} \newcommand{\sfs}{{\mathsf{s}}} \newcommand{\sS}{\mathscr{S}} \newcommand{\s}{{\mathsf{s}}} \newenvironment{e}{\begin{equation}}{\end{equation}\ignorespacesafterend} \newenvironment{e*}{\begin{equation*}}{\end{equation*}\ignorespacesafterend} %\newcommand{\la}{\left\langle} %\newcommand{\ra}{\right\rangle} %\newcommand{\prog}{\mathsf{Prog}} %\newcommand{\mart}{\mathsf{Mart}} %\newcommand{\bprog}{\mathbf{Prog}} %\newcommand{\bmart}{\mathbf{Mart}} %% The averaged sum notation %\newcommand{\avsum}{\mathop{\mathpalette\avsuminner\relax}\displaylimits} %\makeatletter %\newcommand\avsuminner[2]{% % {\sbox0{$\m@th#1\sum$}% % \vphantom{\usebox0}% % \ooalign{% % \hidewidth % \smash{\rule[.5ex]{1.5ex}{.8pt} \relax}% % \hidewidth\cr % $\m@th#1\sum$\cr % }% % }% %} %\makeatother % % % % % \begin{document} \author{Hong-Bin Chen\,\orcidlink{0000-0001-6412-0800}} \address[Hong-Bin Chen]{NYU-ECNU Institute of Mathematical Sciences, NYU Shanghai, China} \email{\href{mailto:hongbin.chen@nyu.edu}{hongbin.chen@nyu.edu}} \author{Victor Issa\,\orcidlink{0009-0009-1304-046X}} \address[Victor Issa]{Department of Mathematics, ENS Lyon, France} \email{\href{mailto:victor.issa@ens-lyon.fr}{victor.issa@ens-lyon.fr}} \keywords{} \subjclass[2010]{} \title[Vector spin glasses with Mattis interaction]{Vector spin glasses\\ with Mattis interaction II:\\ non-convex high-temperature models} \begin{abstract} % In this work, we study mean-field glass models with a general Mattis interaction term, without assuming that the spin glass part of the model is convex. We derive a variational formula for the free energy that is valid at high temperature. When there is no Mattis interaction, this variational formula simplifies and we can observe that it is equal to the viscosity solution of some Hamilton--Jacobi equation. In particular, this shows that in the high-temperature regime the (enriched) free energy of non-convex spin glasses converges to the limit predicted in \cite[Conjecture~2.6]{mourrat2019parisi}. This paper constitutes the second part of a two-paper series devoted to the systematic study of vector spin glass models whose energy function involves a spin glass part and a general Mattis interaction part. In this paper, we focus on models whose spin glass part \emph{does not} satisfy the usual convexity assumption. In this case, the Parisi formula breaks down, and there are no known methods to fully identify the limit free energy. It was suggested in \cite{mourrat2019parisi} that the limit free energy may be described using the unique solution of a partial differential equation of Hamilton--Jacobi type. In the present paper, we prove the validity of this conjecture in the high-temperature regime and provide an explicit representation for the free energy in terms of critical points. Using the duality between the free energy and large deviation principles, one can then easily deduce from the previous result a large deviation principle for the mean magnetization as well as a representation for the free energy of spin glass models with additional Mattis interaction at high temperature. In the companion paper~\cite{MattisI}, we establish similar results at all temperatures for models whose spin glass part is assumed to satisfy the usual convexity assumption. \bigskip \noindent \textsc{Keywords and phrases: Disordered systems, Large deviation principles, Spin glasses, Hamilton--Jacobi equations.} \medskip \noindent \textsc{MSC 2020: 82B44, % Disordered systems 60F10, % Large deviations, 35F21.% HJ } % 35D40 Viscosity solutions to PDEs % 60K35 Interacting random processes; statistical mechanics type models; percolation theory % 82B44 Disordered systems % 82D30 Statistical mechanics of random media, disordered materials (including liquid crystals and spin glasses) \end{abstract} \maketitle %\newpage %\setcounter{tocdepth}{1} \thispagestyle{empty} { \hypersetup{linkcolor=black} \setcounter{tocdepth}{1} \tableofcontents } %\newpage %\pagenumbering{arabic} \section{Introduction} \subsection{Preamble} This paper is the second paper of a two-part series in which we investigate general mean-field spin glass models with Mattis interaction. In this paper, we focus on models whose spin glass part does not satisfy the usual convexity assumption.\footnote{By the usual convexity assumption, we mean the assumption that the function $\xi: \R^{D \times D} \to \R$ defined below in \eqref{e.cov}, is convex on the set of symmetric positive semi-definite matrices. For the model \eqref{e.simple}, we have $D = 2$ and $\xi(a) = a_{11}a_{22}.$} An important model in this class is a specific disordered variant of the so-called Restricted Boltzmann Machine, which can be naturally mapped to a bipartite Sherrington-Kirkpatrick spin glass featuring Mattis interactions. This model consists of a system of $\pm 1$ spins interacting through the following energy function % \begin{equation} \label{e.simple} E_N(\sigma) = \frac{\beta}{\sqrt{N}} \sum_{i,j = 1}^N W_{ij} \sigma_{1,i} \sigma_{2,j} + \sum_{i = 1}^N \chi^1_i \sigma_{1,i} + \sum_{i = 1}^N \chi^2_i \sigma_{2,i}, \end{equation} % where $\sigma =(\sigma_1,\sigma_2) \in \{-1,1\}^N \times \{-1,1\}^N$ is the bipartite spin configuration, and the couplings $(W_{ij})_{1 \leq i,j \leq N}$ are independent centered Gaussian random variables with variance $1$, representing the quenched random weights of the network. Furthermore, the vectors $(\chi_i^1)_{1 \leq i \leq N}$ and $(\chi^2_i)_{1 \leq i \leq N}$ are i.i.d.\ centered $\pm 1$ Bernoulli random variables that define the quenched Mattis interactions, acting as planted patterns for the two respective layers. Restricted Boltzmann Machines are significant because they provide a simple yet powerful framework for unsupervised learning. In this analogy, the spins correspond to binary units, the couplings to weights, and learning amounts to shaping an energy landscape with many metastable states \cite{tubiana2018RBM,smolensky1986harmonytheory}. This connection has enabled fruitful exchanges between machine learning and spin glass theory, with tools from statistical mechanics helping to analyze restricted Boltzmann machines' phase behavior, expressivity, and learning dynamics \cite{barra2018rbm,franz1997phase}. Some of this work has been produced by John Hopfield and Geoffrey Hinton, to whom the Nobel committee awarded the 2024 Physics prize, citing in particular their foundational works on these models and their underlying physical analogies \cite{ackley1985learning,hinton2002training,hopfield1982collective,hopfield1984graded}. In this series of papers~\cite{MattisI,MattisII}, our interest is twofold. First, we identify the large $N$ limit of the free energy defined by % \begin{equation} \label{e.F_Nbasic} f_N = -\frac{1}{N} \log \left( \frac{1}{4^N} \sum_{\sigma \in \{-1,1\}^N \times \{-1,1\}^N} e^{E_N(\sigma)} \right), \end{equation} % under the assumption that $\beta$ is small. Second, assuming the same condition on $\beta$, we establish large deviation principles (see Definition~\ref{d.ldp}) for the $\R^2$-valued mean magnetization \begin{align*} m_N= (m_{N,1},m_{N,2}) = \Ll(\frac{1}{N} \sum_{i = 1}^N \chi^1_i \sigma_{1,i},\quad \frac{1}{N} \sum_{i = 1}^N \chi^2_i \sigma_{2,i}\Rr) \end{align*} under the Gibbs measure $\la \cdot \ra_N$ defined by % \begin{equation*} \la g(\sigma) \ra_N \propto \frac{1}{4^N} \sum_{\sigma \in \{-1,1\}^N \times \{-1,1\}^N } g(\sigma) e^{E_N(\sigma)}. \end{equation*} % For models without Mattis interaction that satisfy the usual convexity assumption, the free energy can be computed using the Parisi formula \cite{pan.potts,pan.vec}. For models such as \eqref{e.simple}, computing the limit free energy, even in the absence of Mattis interaction, is a difficult problem that has so far not been resolved. Under the assumption that $\beta$ is small enough, this problem becomes significantly easier. For certain models it has been shown that in this regime, the system exhibits a property known as \emph{replica symmetry} \cite{dey2021msk,wu2024thouless}. In this case, the limit free energy can be linked to the solution of a finite dimensional partial differential equation \cite{guerra2001sum,barra2,abarra,barramulti,barra2014quantum,barra2008mean,genovese2009mechanical}. In an attempt to tackle the computation of the free energy outside of the replica symmetric phase (i.e. at high $\beta$), a new approach has been put forward relying on an infinite dimensional partial differential equation \cite{HJbook,mourrat2019parisi,mourrat2020nonconvex,mourrat2020free}. In this setting, one introduces an enriched version of the model depending on an additional parameter $q$ which can be encoded as a càdlàg function $[0,1) \to \R_+^2$ with increasing coordinates. Here, and throughout, we write $ \R_+=[0,\infty)$. Conjecturally, the large $N$ limit of the free energy of this enriched model is the unique solution of a partial differential equation of Hamilton--Jacobi type with time parameter $t = \beta^2/2$ and space parameter $q$. When one chooses $q$ to be the constant path taking the value $0 \in \R^2_+$, the free energy of the enriched model is equal to the free energy of the original model, and the system is replica symmetric in the low $\beta$ regime. But, when $q$ is chosen to be a path with \emph{strictly} increasing coordinates, the system corresponding to the enriched model is not replica symmetric, even in the low $\beta$ regime\footnote{The system exhibits infinitely many steps of replica symmetry breaking in the small $\beta$ regime in this case, see Remark~\ref{r.rsb}.}. Therefore, the verification of this conjectural link between the free energy and the Hamilton--Jacobi equation is highly non-trivial, even in the low $\beta$ regime. We dive deeper into this issue of replica symmetry breaking versus replica symmetry in Section~\ref{ss.proj}. Our main results are stated for very general models in Theorem~\ref{t.free_energy} and Theorem~\ref{t.ldp}. The first main contribution of this paper is a verification, in the low $\beta$ regime, of this link between the free energy and the solution of an infinite dimensional PDE for a very general class of non-convex spin glass models. This yields an implicit representation for the free energy of enriched versions of those models. When the enrichment parameter is set to $0$, this result reduces to a statement about the free energy of a replica symmetric system, but otherwise this is not the case. From this, using well-known tools from large deviation theory, we will identify the limit free energy of models with Mattis interaction and establish a large deviation principle for the mean magnetization, still in the low $\beta$ regime. The application of our main results to the model in~\eqref{e.simple} can be summarized as follows. We let $\eta$ be a standard real-valued Gaussian random variable, and $\chi$ be a centered $\pm 1$ Bernoulli random variable. For $y \in \R^2_+$ and $x \in \R^2$, we define % \begin{align*} \phi(y;x) = \sum_{i=1}^2 \Ll( - \E \log \cosh(\sqrt{2y_i} \eta + x_i \chi) + y_i\Rr). \end{align*} %\begin{e} \label{e.def.vartheta} % \vartheta(y_0,x_0) = y_0 - \E \log \cosh(\sqrt{2y_0} \eta + x_0 \chi). %\end{e} % For some $\beta_\star$ specified in the theorem below, we will show in Proposition~\ref{p.f-d-proj} that, for every $x\in\R^2$, the partial differential equation % \begin{align*} \begin{cases} \partial_t g(t,y;x) - \partial_{y_1}g(t,y;x) \partial_{y_2}g(t,y;x)=0 \\ %\quad &\text{for } (t,y)\in(0,\frac{\beta^2_\star}{2})\times \R_+^2, \\ g(0,y;x) = \phi(y;x) \end{cases} \end{align*} % admits a unique differentiable solution $g(\cdot,\scdot;x)$ on $[0,\beta^2_\star/2)\times\R^2_+$. % There exists $\beta_\star \geq \frac{1}{2\sqrt{2}}$, such that for every $x \in \R^2$ and $\beta < \beta_\star$, the following system of equations % % % \begin{e} \label{e.system} % \begin{cases} % z_1 = \partial_y \vartheta \left( \frac{\beta^2}{2} z_2;x_1 \right) \\ % z_2 = \partial_y \vartheta\left( \frac{\beta^2}{2}z_1;x_2\right), % \end{cases} % \end{e} % % % with unknown $z \in \R^2_+$ has a unique solution. % Here we state a specialized version of our main results for the model defined by \eqref{e.simple}. Here $\mcl Q(1)^2$ denotes the set of functions $q : [0,1) \to \R^2_+$ with increasing coordinates, $\hat 0 \in \mcl Q(1)^2$ is the constant path taking the value $0$ and the function $\psi = \psi(q;x) : \mcl Q(1)^2 \times \R^2 \to \R$ appearing in the statement is given below in \eqref{e.psi(q;x)=}. We again stress that even though the following specialized statement concerns replica symmetric systems, this will not be the case for the general versions. \begin{theorem}\label{t.parisi+ldp.basic} There exists $\beta_\star \geq \frac{1}{2\sqrt{2}}$ such that for every $\beta < \beta_\star$ and almost every realization of $(W_{i,j})_{i,j}$, $(\chi^1_i)_i$ and $(\chi^2_i)_i$, the following holds. We have \begin{e} \label{e.limf_nbasic} \lim_{N \to +\infty} f_N = \inf_{m \in \R^2}\sup_{x \in \R^2}\Ll\{g\Ll(\tfrac{\beta^2}{2},0;x\Rr)+m\cdot x-(m_1+m_2)\Rr\}- \tfrac{\beta^2}{2}, \end{e} % and the $\R^2$-valued mean magnetization $m_N$ satisfies a large deviation principle under $\langle \cdot \rangle_N$ with rate function $J$ defined for $m\in\R^2$ by \begin{e*} J(m) = -(m_1+m_2) + g^*\left( \tfrac{\beta^2}{2}, 0;m\right) + \sup_{m' \in \R^2} \left\{m'_1+m'_2 - g^*\left( \tfrac{\beta^2}{2}, 0;m'\right) \right\}, \end{e*} % where $ g^*\left( \tfrac{\beta^2}{2},0;m\right) = \sup_{x \in \R^2} \left\{x\cdot m + g\left( \tfrac{\beta^2}{2},0;x\right)\right\}.$ \end{theorem} % \begin{theorem}\label{t.parisi+ldp.basic} % For every $x' \in \R^2$, let $f(\cdot,\scdot;x') : \R_+ \times \mcl Q^2(1) \to \R$ denote the unique solution of the partial differential equation % % % \begin{e} % \begin{cases} % \partial_t f - \int \partial_{q_1} f \partial_{q_2} f = 0 \text{ on } (0,+\infty) \times \mcl Q^2(1) \\ % f(0,\scdot;x') = \psi(\cdot,x'). % \end{cases} % \end{e} % % % There exists $\beta_\star \geq \frac{1}{2\sqrt{2}}$ such that for all $\beta \leq \beta_\star$, almost surely over the randomness of $(W_{i,j})_{i,j}$, $(\chi_i)_i$ and $(\chi^2_i)_i$, we have % % % \begin{e*} % \lim_{N \to +\infty} f_N = \inf_{m \in \R^2}\sup_{x' \in \R^2}\Ll\{f\left( \frac{\beta^2}{2},\hat 0;x'\right)+m\cdot x'-(m_1+m_2)\Rr\}. % \end{e*} % % % Furthermore, still almost surely over the randomness of $(W_{i,j})_{i,j}$, $(\chi_i)_i$ and $(\chi^2_i)_i$, the $\R^2$-valued mean magnetization $m_N = (m_{N,1},m_{N,2})$ satisfies a large deviation principle under $\langle \cdot \rangle_N$ with rate function % % % \begin{e*} % J(m) = -(m_1+m_2) + f^*\left( \frac{\beta^2}{2},\hat 0;m\right) + \sup_{m' \in \R^2} \left\{(m'_1+m'_2 - f^*\left( \frac{\beta^2}{2},\hat 0;m'\right) \right\}, % \end{e*} % % % where we have defined $f^*\left( \frac{\beta^2}{2},\hat 0;m\right) = \sup_{x \in \R^2} \left\{x\cdot m + f\left( \frac{\beta^2}{2},\hat 0;x\right)\right\}$. % \end{theorem} %The notation for the function $f^*$ used to define the rate function $J$ is perhaps surprising. This curious choice is explained by the fact that the quantity $\vartheta\left( \frac{\beta^2}{2} z_2,x_1 \right) + \vartheta \left( \frac{\beta^2}{2} z_1,x_2 \right)$ is equal to $f\left( \frac{\beta^2}{2},\hat 0;x\right)$, where $f(\cdot,\cdot,;x)$ is the solution of a Hamilton-Jacobi equation and $\hat{0}$ is the constant path $[0,1) \to S^D_+$ taking the value $0 \in S^D_+$. We explain this further in Remark~\ref{r.simplef}. \begin{remark} \label{r.RBM_fixed_point} We mention that the quantity $g\Ll(\tfrac{\beta^2}{2},0;x\Rr)$ can be understood in more familiar terms using a critical point representation. For $t < \beta_*^2/2$, there is a unique solution $z^x =(z^x_1,z^x_2) \in\R^2_+$ to the fixed point equation % \begin{align}\label{e.RBM_fixed_point} z=\nabla_y\phi(t(z_2,z_1);x)\quad\text{for }z\in\R^2_+, \end{align} % and we have \begin{align}\label{e.RBM_crit_point} g\Ll(t,0;x\Rr)=\phi\Ll(t(z^x_2,z^x_1);x\Rr)-tz^x_1z^x_2. \end{align} In addition, we also have that $\nabla_y g(t,0;x)= z^x$. \qed \end{remark} % We refer to~\eqref{e.RBM_crit_point} as a critical-point representation since $z^x$ as a solution of~\eqref{e.RBM_fixed_point} is a critical point of the function on the right-hand side of~\eqref{e.RBM_crit_point}. Also, we do not expect that the lower bound $\frac{1}{2\sqrt{2}}$ for $\beta_\star$ is sharp. To the best of our knowledge, with the presence of Mattis interaction, identifying the limit free energy at high temperature without additional assumptions on replica symmetry has not been achieved. However, much progress has been made in related spin glass systems and neural networks. For two-layer bipartite models, such as the analogical neural network and standard Restricted Boltzmann Machines, the replica-symmetric (RS) ansatz has been derived using interpolation and self-averaging properties \cite{barra2010rsapprox, agliari2019free}. These methods have been extended to multi-layer geometries like Deep Boltzmann Machines. In this context, the RS ansatz can be bounded by Sherrington-Kirkpatrick models in the annealed region \cite{alberici2020annealing} and expressed through minimax formulas \cite{genovese2022minimax}. Other deep architectures rely on transport equations to handle RS and one-step replica symmetry breaking (1RSB) \cite{agliari2021transport}, while exact solutions without explicit RS assumptions are limited to specific regimes like the Nishimori line \cite{alberici2021solution}. Furthermore, the existence of the infinite-volume thermodynamic limit for the free energy in mean field neural networks has been rigorously established by assuming the measure-concentration of order parameters \cite{agliari2024thermodynamic}. In short, computing the limit free energy in all these works requires either imposing a replica-symmetric assumption (or limited symmetry-breaking) or restricting the analysis to specific regimes where such symmetry naturally holds. For models satisfying the usual convexity assumption, computing the free energy in the absence of Mattis interaction is a classical problem, and it is now well known that the limit of the free energy can be understood as the supremum of an explicit functional, which is the celebrated Parisi formula \cite{parisi1979infinite,gue03,Tpaper}. For those models, the free energy can also be studied from the perspective of Hamilton--Jacobi equations, which leads to a more systematic and general treatment. The recent developments regarding this are summarized in~\cite{mourrat2025spin}. On the other hand, models with Mattis interaction have not yet received a similar systematic treatment. The identification of their free energy relies on model-dependent proofs. Furthermore, large deviation principles for the mean magnetization have traditionally been established by computing a constrained version of the free energy % \begin{equation*} f_N(A) = -\frac{1}{N} \log \left( \frac{1}{4^N} \sum_{\sigma \in A} e^{E_N(\sigma)} \right), \end{equation*} % which leads to technical proofs \cite{guionnet2025estimating,franz2020largedeviations,camilli2022mismatch,pan.vec,chen2014mixedSK}. Similar techniques have also been employed to prove large deviation principles in the context of classical spin glass models \cite{jagannath2018spectralgap,jagannath2020tensorpca}, see also \cite{ko2020multiplespherical}. For spin glass models with additional Mattis interaction, there are no known general results that allow for a systematic identification of the free energy nor the verification of large deviation principles for the mean magnetization. In this series of papers, we resolve this problem and establish systematic results using a new approach that relies on the computation of the following object % \begin{equation*} f_N^G = -\frac{1}{N} \log \left( \frac{1}{2^N} \sum_{\sigma \in \{-1,1\}^N \times \{-1,1\}^N} \exp\left(\frac{\beta}{\sqrt{N}} \sum_{i,j = 1}^N W_{ij} \sigma_{1,i} \sigma_{2,j} + NG(m_N) \right) \right). \end{equation*} % The main difference of our approach is that the continuous function $G : \R^2 \to \R$ encoding the Mattis interaction is now treated as a \emph{parameter of the model} instead of being fixed. Furthermore, as opposed to the traditional approach, we do not impose constraints on the spin configuration $\sigma$. This allows for smoother, shorter, and less technical computations that one can carry out in a model-independent way. %We will see in detail below that the computation of $\lim_{N \to +\infty} f_N^G$ can be done systematically for a very large class of spin glass models. This, in turn, leads to the identification of the free energy as defined in \eqref{e.F_Nbasic} and suffices for the verification of a large deviation principle for the mean magnetization. To compute $\lim_{N \to +\infty} f_N^G$, we can proceed as follows. We let $\langle \cdot \rangle^G_N$ denote the Gibbs measure associated with $f_N^G$. We start with the case $G(m) = x \cdot m$. Under this assumption, the sum structure of % \begin{e*} N G(m_N) = x_1 \sum_{i = 1}^N \chi_i^1 \sigma_{1,i} + x_2 \sum_{i = 1}^N \chi_i^2 \sigma_{2,i} \end{e*} % makes the computation of $\lim_{N \to +\infty} f_N^{m \mapsto x\cdot m}$ extremely similar to the computation of $\lim_{N \to +\infty} f_N^{0}$. We show that in this case the free energy can be computed, at least in the small $\beta$ region, using a critical point representation for $\lim_{N \to +\infty} f_N^{m \mapsto x\cdot m}$ in the style of Remark~\ref{r.RBM_fixed_point}. This representation can then be connected to the aforementioned Hamilton--Jacobi equation. % \footnote{To be precise, if one is only interested in representations for the non-enriched free energy in the low beta regime, like in \eqref{e.limf_nbasic} with $g$ understood as being defined like in \eqref{e.RBM_crit_point} for example, then it is possible to proceed without using the Hamilton--Jacobi approach. We provide this simplified version of the argument in Section~\rmk{s.simplified} and refer to Remark~\ref{r.simplified} for further discussion on this topic.} Then, as a direct application of the Gärtner–Ellis theorem, we deduce from the knowledge of $\lim_{N \to +\infty} f_N^{m \mapsto x\cdot m}$ for all $x \in \R^2$, a large deviation principle for $m_N$ under $\langle \cdot \rangle_N^0$ with an explicit rate function $I$. Varadhan's lemma then implies that for every continuous function $G$, % \begin{e*} \lim_{N \to +\infty} \frac{1}{N} \log \left\langle e^{N G(m_N) } \right\rangle_N^{0} = \sup_{m} \left\{ G(m) - I(m) \right\}. \end{e*} % Observing that $\frac{1}{N} \log \left\langle e^{N G(m_N) } \right\rangle_N^{0} = f_N^G - f_N^0$ it follows that $\lim_{N \to +\infty} f_N^G$ can be computed. Finally, as an application of Bryc’s inverse Varadhan lemma, we deduce from the previous display that $m_N$ satisfies a large deviation principle under $\langle \cdot \rangle_N^G$ with an explicit rate function $I^G$. Finally, using $G(m) = m_1 +m_2$ we obtain Theorem~\ref{t.parisi+ldp.basic}. Note that here we have used crucially that the previous display holds \emph{for all} continuous functions $G$. \subsection{Setting} \label{ss.setting} In this section, we define the class of models that we treat. For $m,n\in\N$, we denote by $\R^{m\times n}$ the space of all $m\times n$ real matrices. For any $a=(a_{ij})_{1\leq i\leq m,\, 1\leq j\leq n}\in \R^{m\times n}$, we denote its $j$-th column vector as $a_{\bullet j}=(a_{ij})_{1\leq i\leq m}$ and its $i$-th row vector as $a_{i\bullet}=(a_{ij})_{1\leq j\leq n}$. If not specified, a vector is understood to be a column vector. For a matrix or vector $a$, we denote by $a^\intercal$ its transpose. For $a,b\in \R^{m\times n}$, we write $a\cdot b= \sum_{ij}a_{ij}b_{ij}$, $|a|=\sqrt{a\cdot a}$, and similarly for vectors. %More generally, for any $k \in \N$ and $a,b\in \R^k$, we write %\begin{align}\label{e.dot_product} % a\cdot b=\sum_{i= 1}^k a_ib_i \qquad\text{and}\qquad |a|=\sqrt{a\cdot a}. %\end{align} For $n\in \N$, let $\S^n$ be the linear space of $n\times n$ real symmetric matrices %For $a,b\in\S^n$, viewing them as elements in $\R^{n\times n}$, we write $a\cdot b= \sum_{ij}a_{ij}b_{ij}$ and $|a|=\sqrt{a\cdot a}$. and also let $\S^n_+$ (resp.\ $\S^n_{++}$) be the subset consisting of positive semi-definite (resp.\ definite) matrices. For $a,b \in \S^n$, we write $a\geq b$ provided $a-b\in\S^n_+$, which gives a natural partial order on $\S^n$. We start by defining the spin glass part of the model. Let $D\in\N$ be the dimension of a single spin. Let $P_1$ be a finite measure supported on the unit ball $\Ll\{\tau\in\R^D:\:|\tau|\leq1\Rr\}$ of $\R^D$ and we assume that the linear span of vectors in $\supp P_1$ is $\R^D$. For each $N\in\N$, we denote by $\sigma =(\sigma_{\bullet i})_{i=1}^N =(\sigma_{di})_{1\leq d\leq D, \, 1\leq i\leq N}\in\R^{D\times N}$ the spin configuration consisting of $N$ spins $\sigma_{\bullet i} \in \R^D$ understood as column vectors. We view $\sigma$ as a $D\times N$ matrix. We sample each spin independently from $P_1$, and thus the distribution of $\sigma$ is given by % \begin{e*} \d P_1^{\otimes N}(\sigma)= \otimes_{i=1}^N \d P_1( \sigma_{\bullet i}). \end{e*} % We let $\xi:\R^{D\times D}\to \R$ be a smooth function that can be expressed as a power series. For each $N$, we consider a centered Gaussian field $(H_N(\sigma))_{\sigma\in\R^{D\times N}}$ with covariance structure given by % \begin{e} \label{e.cov} \E \Ll[H_N(\sigma)H_N(\sigma')\Rr] = N \xi \Ll( \tfrac{\sigma\sigma'^\intercal}{N}\Rr),\qquad\forall \sigma,\sigma'\in \R^{D\times N}. \end{e} % The Hamiltonian $H_N : \R^{D \times N} \to \R$ is the spin glass part of the model. For convenience we will also use the function $\theta:\R^{D\times D}\to\R$ defined by % \begin{align}\label{e.theta=} \theta (a) = a\cdot\nabla\xi(a)-\xi(a),\quad\forall a\in\R^{D\times D}. \end{align} We now describe the Mattis interaction part of the model. Let $L\in\N$ and let $(\chi_i)_{i\in\N}$ be i.i.d.\ $\R^L$-valued random vectors, which is particularly independent from $H_N(\sigma)$. Let $d\in\N$ and $h:\R^D\times \R^L \to \R^d$ be a bounded\footnote{Throughout it will be enough to assume that $h$ is bounded on the support of the finite measure $P_1 \otimes \text{Law}(\chi_1)$. } measurable function. For each $i\in\N$, we view $h(\sigma_{\bullet i},\chi_i)$ as a generalized spin and define the mean magnetization \begin{align}\label{e.m_N=} m_N = \frac{1}{N}\sum_{i=1}^N h(\sigma_{\bullet i},\chi_i). \end{align} Let $G:\R^d\to\R$ be a continuous function, and we add to the system the Curie--Weiss-type interaction $NG\Ll(m_N\Rr)$. At this point, we have described the spin glass part and the Mattis interaction part of our energy function. As explained in the introduction, recently a new approach has been put forward to describe the free energy of spin glass models (without Mattis interaction). In this approach, one needs to enrich the model by adding several new parameters. To define the enriched version of the model, we need to add external fields parameterized by increasing paths and driven by Poisson--Dirichlet cascades. Let $\mcl Q_\infty$ be the collection of bounded increasing càdlàg paths $p:[0,1) \to \S^D_+$, where the monotonicity is understood as $p(s)-p(s')\in\S^D_+$ for $s>s'$. Throughout given $p \in \mcl Q_\infty$ we will define % \begin{e*} p(1) = \lim_{s \to 1-} p(s), \end{e*} which is well-defined by monotonicity of $p$. %For $\sfp\in[0,\infty]$, we write $\mcl Q_\sfp = \mcl Q\cap L^\sfp([0,1);\S^D)$. The monotonicity also allows us to define $q(1)=\lim_{s\uparrow1}q(s)$ for every $q\in\mcl Q_\infty$. Throughout, let $\fR$ be the Ruelle probability cascade (RPC) with overlap uniformly distributed over $[0,1]$ (see \cite[Theorem~2.17]{pan}). Precisely, $\fR$ is a random probability measure on the unit sphere of a fixed separable Hilbert space (the exact form of the space is not important), with the inner product denoted by $\alpha\wedge\alpha'$. Let $\alpha$ and $\alpha'$ be independent samples from $\fR$. Then, the law of $\alpha\wedge\alpha'$ under $\E \fR^{\otimes 2}$ is the uniform distribution over $[0,1]$, where $\E$ integrates the randomness of $\fR$. This overlap distribution uniquely determines $\fR$ (see~\cite[Theorem~2.13]{pan}). Almost surely, the support of $\fR$ is ultrametric in the induced topology. For rigorous definitions and basic properties, we refer to \cite[Chapter 2]{pan} (also see \cite[Chapter 5]{HJbook}). We also refer to~\cite[Section~4]{chenmourrat2023cavity} for the construction and properties of $\fR$ useful in this work. For $q\in\mcl Q_\infty$ and almost every realization of $\fR$, let $(w^q(\alpha))_{\alpha\in\supp\fR}$ be the $\R^D$-valued centered Gaussian process with covariance \begin{align}\label{e.Ew^qw^q=} \E\Ll[ w^q (\alpha)w^q(\alpha')^\intercal\Rr] = q(\alpha\wedge\alpha'). \end{align} Its existence and properties are given in~\cite[Section~4]{chenmourrat2023cavity}. Conditioned on $\fR$, let $(w^q_i)_{i\in\N}$ be i.i.d.\ copies of $w^q$ and also independent of other randomness. For $N\in \N$, $q\in \mcl Q_\infty$, $t \geq 0$, the full energy function of the system will be chosen to be \begin{align} \label{e.H^t,q;s,x_N(sigma,alpha,chi)=} \begin{split} H^G_{N;t,q}(\sigma,\alpha) &= N G(m_N) + \sqrt{2t} H_N(\sigma) - Nt \xi\Ll(\tfrac{\sigma\sigma^\intercal}{N}\Rr) \\ &+ \sum_{i=1}^N w^q_i(\alpha)\cdot \sigma_{\bullet i} -\tfrac{1}{2} q(1)\cdot\sigma\sigma^\intercal. \end{split} \end{align} % We are interested in the infinite-volume limit of the associated free energy, defined as % \begin{align} \label{e.F_N(t,q;s,x)=} F_N^G(t,q) = - \frac{1}{N} \log \iint \exp\Ll(H^G_{N;t,q}(\sigma,\alpha) \Rr) \d P_1^{\otimes N}(\sigma) \d \mfk R(\alpha). \end{align} % We also let $\bar F_N^G(t,q)= \E F_N^G(t,q) = \E_{H_N,\,(w^q_i)_i,\,(\chi_i)_i,\,\fR} F_N^G(t,q)$. In the following, if not specified, $\E$ always averages over the displayed randomness. %In this paper, we choose to define the free energy as $F_N = - \frac{1}{N} \log(\dots)$ while in the companion paper~\cite{MattisI}, we use the other sign convention and define $\msc F_N = + \frac{1}{N} \log(\dots)$. In an attempt to minimize confusion, we use plain capital letters to denote the former and calligraphic letters to denote the latter. The reason for this discrepancy is that two different types of Hamilton--Jacobi equations appear in this series. First, there is the equation used to deal with the Mattis interaction $G(m_N)$ (see \textcolor{ref} in the companion paper~\cite{MattisI}), and then there is the equation used to deal with the spin glass part (see \eqref{e.hj}). Unfortunately, to have nice expressions, those equation each require a different sign convention for the free energy. As is explained in full detail in Section~\ref{ss.usual}, usual models such as \eqref{e.simple} can be obtained as special cases of the Hamiltonian $H_{N,t,q}^G$. Briefly, to do so one can set $t = \beta^2/2$ and $q = 0$ and make a wise choice for $G$ in order to absorb $- \tfrac{N \beta^2}{2}\xi\Ll(\tfrac{\sigma\sigma^\intercal}{N}\Rr)$ in the Mattis interaction. Here, we have chosen to introduce the correction terms $Nt \xi\Ll(\sigma\sigma^\intercal/N\Rr)$ and $\frac{1}{2}q(1)\cdot\sigma\sigma^\intercal$ to simplify the forthcoming variational formulas. As explained in Section~\ref{ss.usual}, they are easily removed. Also, notice that those terms are one-half of the variance of $ \sqrt{2t}H_N(\sigma)$ and $\sum_{i=1}^Nw^q_i(\alpha)\cdot \sigma_{\bullet i}$, respectively. So they correspond to drifts in exponential martingales. The large deviation principle we prove holds almost surely with respect to the randomness of $(H_N)_{N \geq 1}$, $(\chi_i)_{i \in \N}$, and $\mfk R$. Therefore, throughout we will assume that those random variables are defined on a common probability space $\Omega$ to have a convenient way of stating those results. Let $x \in \R^d$. When $G(m) = m \cdot x$, at $t = 0$ we have $\bar F^G_N(0,q) =\bar F_1^G(0,q)$ (see~\cite[Proposition~3.2]{chenmourrat2023cavity}). In this case, we set $\psi(q;x)=\bar F_1^{m \mapsto m \cdot x}(0,q)$ which can be expressed as \begin{align} \begin{split} \psi(q;x) = -\E\log\iint\exp\Ll(\sqrt{2}w^q(\alpha)\cdot \tau-q(1)\cdot\tau\tau^\intercal+x\cdot h(\tau,\chi_1)\Rr) \\ \d P_1(\tau)\d\mathfrak{R}(\alpha). \end{split} \label{e.psi(q;x)=} \end{align} % The Gibbs measure associated with $\psi$ in~\eqref{e.psi(q;x)=} is given by % \begin{align}\label{e.<>_R,q,x} % \la \cdot\ra_{\fR,q,x} \propto \exp\Ll(\sqrt{2}w^q(\alpha)\cdot\tau -q(1)\cdot \tau\tau^\intercal + x\cdot h(\tau,\chi_1)\Rr) \d \mu(\tau) \d \fR(\alpha). % \end{align} % \subsection{Setting old} % We describe a mean-field vector spin glass with an additional Curie--Weiss-type interaction. % First of all, since we are working with vector-valued spins whose overlaps are matrix-valued, we introduce relevant notations. % For $m,n\in\N$, we denote by $\R^{m\times n}$ the space of all $m\times n$ real matrices. % For any $a=(a_{ij})_{1\leq i\leq m,\, 1\leq j\leq n}\in \R^{m\times n}$, we denote its $j$-th column vector as $a_{\bullet j}=(a_{ij})_{1\leq i\leq m}$ and its $i$-th row vector as $a_{i\bullet}=(a_{ij})_{1\leq j\leq n}$. If not specified, a vector is understood to be a column vector. For a matrix or vector $a$, we denote by $a^\intercal$ its transpose. % For $a,b\in \R^{m\times n}$, we write $a\cdot b= \sum_{ij}a_{ij}b_{ij}$, $|a|=\sqrt{a\cdot a}$, and similarly for vectors. More generally, for any finite set $I$ and $a,b\in \R^I$, we write % \begin{align}\label{e.dot_product} % a\cdot b=\sum_{i\in I}a_ib_i; \qquad |a|=\sqrt{a\cdot a}. % \end{align} % For $n\in \N$, let $\S^n$ be the linear space of $n\times n$ real symmetric matrices. For $a,b\in\S^n$, viewing them as elements in $\R^{n\times n}$, we write $a\cdot b= \sum_{ij}a_{ij}b_{ij}$ and $|a|=\sqrt{a\cdot a}$. Let $\S^n_+$ (resp.\ $\S^n_{++}$) be the subset consisting of positive semi-definite (resp.\ definite) matrices. For $a,b \in \S^n$, we write $a\geq b$ provided $a-b\in\S^n_+$, which gives a natural partial order on $\S^n$. % We start with the spin glass part. Let $D\in\N$ be the dimension of a single spin. Let $\mu$ be a finite measure supported on the unit ball $\Ll\{\tau\in\R^D:\:|\tau|\leq1\Rr\}$ of $\R^D$ and we assume that % \begin{align}\label{e.mu_span} % \text{the linear span of vectors in $\supp\mu$ is $\R^D$.} % \end{align} % For each $N\in\N$, we denote by $\sigma =(\sigma_{\bullet i})_{i=1}^N =(\sigma_{di})_{1\leq d\leq D, \, 1\leq i\leq N}\in\R^{D\times N}$ the spin configuration consisting of $N$ $\R^D$-valued spins $\sigma_{\bullet i}$ as column vectors. We view $\sigma$ as a $D\times N$ matrix. We sample each spin independent from $\mu$ and thus the distribution of $\sigma$ is given by $ \d\mu^{\otimes N}(\sigma)= \otimes_{i=1}^N \d\mu( \sigma_{\bullet i})$. % For each $N$, assume that there is a centered Gaussian field $(H_N(\sigma))_{\sigma\in\R^{D\times N}}$ with covariance structure given by % \begin{align}\label{e.H_N(sigma)=} % \E \Ll[H_N(\sigma)H_N(\sigma')\Rr] = N \xi \Ll(N^{-1} \sigma\sigma'^\intercal\Rr),\qquad\forall \sigma,\sigma'\in \R^{D\times N} % \end{align} % where $\xi:\R^{D\times D}\to \R$ is a smooth function but not necessarily convex. % Then, we describe the Curie--Weiss-type interaction with additional randomness. % Let $L\in\N$ and let $(\chi_i)_{i\in\N}$ be i.i.d.\ $\R^L$-valued random vectors, which is particularly independent from $H_N(\sigma)$. % Let $d\in\N$ and $h:\R^D\times \R^L \to \R^d$ be a bounded measurable function. % For each $i\in\N$, we view $h(\sigma_{\bullet i},\chi_i)$ as a generalized spin and define the mean magnetization % \begin{align}\label{e.m_N=} % m_N = \frac{1}{N}\sum_{i=1}^N h(\sigma_i,\chi_i). % \end{align} % Let $G:\R^d\to\R$ be a locally Lipschitz function and we add to the system the Curie--Weiss-type interaction $NG\Ll(m_N\Rr)$. % Let $\beta\geq0$ be the inverse temperature. % We are interested in the infinite-volume limit of the free energy defined as % \begin{align}\label{e.F_N(beta)} % F_N(\beta) = - \frac{1}{N}\E_{H_N,\,(\chi_i)_i} \log \int \exp\Ll( \beta H_N(\sigma) + NG\Ll(m_N\Rr) \Rr) \d P_1^{\otimes N}(\sigma) % \end{align} % where $\E_{H_N,\,(\chi_i)_i}$ averages over the randomness in $H_N(\sigma)$ and that of $(\chi_i)_i$. % One can also add an inverse temperature parameter in front of $G$ but we choose to absorb it into $G$. % Given $\xi$ in~\eqref{e.H_N(sigma)=}, let $\theta:\R^{D\times D}\to\R$ be defined by % \begin{align}\label{e.theta=} % \theta(a) = a\cdot \nabla\xi(a)-\xi(a),\quad\forall a\in\R^{D\times D}. % \end{align} % \subsection{Enriched spin glass models with Mattis interaction} \label{ss.enriched} % We start with the setting of enriched models. We need to add external fields parameterized by increasing paths and driven by Poisson--Dirichlet cascades. % Let $\mcl Q$ be the collection of increasing càdlàg paths $p:[0,1) \to \S^D_+$, where the monotonicity is understood as $p(s)-p(s')\in\S^D_+$ for $s>s'$. % For $\sfp\in[0,\infty]$, we write $\mcl Q_\sfp = \mcl Q\cap L^\sfp([0,1);\S^D)$. % The monotonicity also allows us to define $q(1)=\lim_{s\uparrow1}q(s)$ for every $q\in\mcl Q_\infty$. % Throughout, let $\fR$ be the Ruelle probability cascade (RPC) with overlap uniformly distributed over $[0,1]$ (see \cite[Theorem~2.17]{pan}). Precisely, $\fR$ is a random probability measure on the unit sphere of a fixed separable Hilbert space (the exact form of the space is not important), with the inner product denoted by $\alpha\wedge\alpha'$. Let $\alpha$ and $\alpha'$ be independent samples from $\fR$. Then, the law of $\alpha\wedge\alpha'$ under $\E \fR^{\otimes 2}$ is the uniform distribution over $[0,1]$, where $\E$ integrates the randomness of $\fR$. % This overlap distribution uniquely determines $\fR$ (see~\cite[Theorem~2.13]{pan}). % Almost surely, the support of $\fR$ is ultrametric in the induced topology. For rigorous definitions and basic properties, we refer to \cite[Chapter2]{pan} (also see \cite[Chapter5]{HJbook}). % We also refer to~\cite[Section~4]{chenmourrat2023cavity} for the construction and properties of $\fR$ useful in this work. % For $q\in\mcl Q_\infty$ and almost every realization of $\fR$, let $(w^q(\alpha))_{\alpha\in\supp\fR}$ be the $\R^D$-valued centered Gaussian process with covariance % \begin{align}\label{e.Ew^qw^q=} % \E\Ll[ w^q (\alpha)w^q(\alpha')^\intercal\Rr] = q(\alpha\wedge\alpha'). % \end{align} % Its existence and properties are given in~\cite[Section~4]{chenmourrat2023cavity}. % Conditioned on $\fR$, let $(w^q_i)_{i\in\N}$ be i.i.d.\ copies of $w^q$ and also independent of other randomness. % For $N\in \N$, $(t,q)\in \R_+\times \mcl Q_\infty$, $(s,x)\in\R_+\times\R^d$, we consider the Hamiltonian % \begin{align} % H^{t,q;s,x}_N(\sigma,\alpha) = \sqrt{2t}H_N(\sigma) - tN\xi\Ll(\tfrac{\sigma\sigma^\intercal}{N}\Rr) + \sqrt{2}\sum_{i=1}^N w^q_i(\alpha)\cdot \sigma_{\bullet i} - q(1)\cdot\sigma\sigma^\intercal \notag % \\ % + sNG\Ll(m_N\Rr) + N x\cdot m_N .\label{e.H^t,q;s,x_N(sigma,alpha,chi)=} % \end{align} % Here, notice that $tN\xi\Ll(\sigma\sigma^\intercal/N\Rr)$ and $q(1)\cdot\sigma\sigma^\intercal$ are one half of the variance of $\sqrt{2t}H_N(\sigma)$ and $\sqrt{2}\sum_{i=1}^Nw^q_i(\alpha)\cdot \sigma_{\bullet i}$, respectively. They resemble the drift terms in exponential martingales and make our analysis easier. We will remove them afterwards. The enriched free energy is defined as % \begin{align}\label{e.F_N(t,q;s,x)=} % \begin{split} % F_N(t,q;s,x)&=-\frac{1}{N}\log \iint \exp\Ll(H^{t,q;s,x}_N(\sigma,\alpha)\Rr) \d \mu^{\otimes N}(\sigma)\d\mathfrak{R}(\alpha), % \\ % \bar F_N(t,q;s,x) &= \E F_N(t,q;s,x) = \E_{H_N,\,(w^q_i)_i,\,(\chi_i)_i,\,\fR} F_N(t,q;s,x). % \end{split} % \end{align} %and $\Ll\|\nabla \xi\lfloor_{B}\Rr\|_{\mathrm{Lip}(B;\R^{D\times D})}$ is the Lipschitz coefficient for $\nabla\xi:B\to \R^{D\times D}$. \subsection{Main results} Throughout, given two metric spaces $\mcl X$ and $\mcl Y$, we use $\|\scdot\|_{\mathrm{Lip}(\mcl X;\mcl Y)}$ to indicate the Lipschitz coefficient of functions from $\mcl X$ to $\mcl Y$. We let $\nabla \xi\lfloor_B$ be the restriction of $\nabla \xi$ to $B$ where \begin{align}\label{e.B=} B=\Ll\{a\in\R^{D\times D}:\: |a|\leq1\Rr\}. \end{align} % We let $t_\star \in (0,+\infty)$ be the positive real number defined by \begin{align}\label{e.t_star=} t^{-1}_\star = \sup_{x'\in\R^d} \Ll\|\nabla_q \psi(\scdot;x')\Rr\|_{\mathrm{Lip}(\mcl Q_\infty;\mcl Q_\infty)}\Ll\|\nabla \xi\lfloor_{B}\Rr\|_{\mathrm{Lip}(B;\R^{D\times D})}. \end{align} % \begin{remark}[Lower bound for $t_\star$] %\label{r.ont*} Below, see \eqref{e.p.psi_smooth_2} in Proposition~\ref{p.psi_smooth}, we justify that for every $x' \in \R^d$, the function $\psi(\scdot;x')$ is differentiable on $\mcl Q_\infty$ and that its gradient $\nabla_q \psi(\scdot;x') : \mcl Q_\infty \to \mcl Q_\infty$ is a Lipschitz function with Lipschitz constant $\leq 16$. This yields the following lower bound % \begin{align}\label{e.t_star>} t_\star \geq \Ll(16\Ll\|\nabla \xi\lfloor_{B}\Rr\|_{\mathrm{Lip}(B;\R^{D\times D})}\Rr)^{-1} > 0. \end{align} We note that this bound is very loose and is unlikely to be optimal. \qed \end{remark} % For every $x \in \R^d$, let $f=f(\cdot,\scdot;x) : \R_+ \times \mcl Q_2 \to \R$ denote the unique solution\footnote{Below in Section~\ref{s.short_time} we give a precise meaning to this partial differential equation and explain in what sense it has a unique solution.} of % \begin{e} \label{e.hj_intro} \begin{cases} \partial_t f - \int \xi(\nabla f) = 0 \text{ on } (0,+\infty) \times \mcl Q_2 \\ f(0,\scdot;x) = \psi(\scdot;x). \end{cases} \end{e} % We refer the reader to the discussion at the beginning of Section~\ref{s.short_time} for more details on the precise meaning of \eqref{e.hj_intro} and the associated notion of viscosity solution. %\begin{remark}[Alternative definition for $f$] \label{r.def.f} % Fix $x \in \R^d$ and let $t_\star < t$ and $q \in \mcl Q_2$. For every $p \in \mcl Q_\infty$ we define % % \begin{e} \label{e.P_t,q(p;x)=} % \msc P_{t,q}(p;x) = \psi(q +t \nabla \xi(p);x) - t \int_0^1 \theta(p(u)) \d u. % \end{e} % % The functional $\msc P_{t,q}(\scdot;x)$ is the functional used to write the Parisi formula for convex models (see~\cite{MattisI}). In Proposition~\ref{p.f=UZ} we show that % % \begin{e} \label{e.visco=crit_parisi} % f(t,q) = \msc P_{t,q}(p^x;x), % \end{e} % % where $p^x \in \mcl Q_\infty$ is the unique solution of the fixed point equation % % \begin{e} \label{e.fixed_point} % p = \nabla_q \psi(q+t \nabla \xi(p);x). % \end{e} % % This fixed point equation can be understood as the critical point condition for the functional $\msc P_{t,q}(\scdot;x)$ (at least when $\nabla^2 \xi$ is non-singular). We refer the reader to the discussion at the beginning of Section~\ref{s.short_time} for more details on the precise meaning of \eqref{e.hj_intro} and the associated notion of viscosity solution. We also mention that representations similar to \eqref{e.visco=crit_parisi} have been obtained for the mutual information in the sparse stochastic block model (with no Mattis interaction)~\cite[Proposition~1.3]{dominguez2024critical}. \qed %\end{remark} % \begin{theorem}[Limit free energy] \label{t.free_energy} Let $t < t_\star$ and $q \in \mcl Q_\infty$. For every continuous function $G : \R^d \to \R$, we have \begin{e} \label{e.free_energy} \lim_{N \to +\infty} \bar F_N^G(t,q) = \inf_{m\in\R^d}\sup_{x\in\R^d}\Ll\{f(t,q;x)+m\cdot x-G(m)\Rr\}. \end{e} Moreover, for every $x\in\R^d$, there is a unique solution $p^x\in\mcl Q_\infty$ of the fixed point equation \begin{e} \label{e.fixed_point} p = \nabla_q \psi(q+t \nabla \xi(p);x) \quad\text{for $p\in \mcl Q_\infty$}, \end{e} and we have \begin{e} \label{e.visco=crit_parisi} \begin{split} f(t,q;x ) = \psi(q +t \nabla \xi(p^x);x) - t \int_0^1 \theta(p^x(u)) \d u \quad\text{and}\quad \nabla_qf(t,q;x) = p^x. \end{split} \end{e} \end{theorem} % The convergence in~\eqref{e.free_energy} also happens almost surely due to the following. \begin{remark}[Concentration]\label{r.concentration} The random variable $F_N^G(t,q)$ concentrates around its expectation $ \E F_N^G(t,q)$ in the large $N$ limit. Indeed, this can be deduced using the standard Gaussian concentration result (e.g.\ \cite[Theorem~1.2]{pan}) for the Gaussian randomness in $F_N^G(t,q)$ together with the Efron--Stein inequality and the boundedness of the function $h(\cdot,\cdot)$ for random variables $(\chi_i)_i$. %Therefore, to describe the almost sure limit of $F_N^G(t,q)$ it suffices to describe the limit of its expectation, which is a deterministic quantity. \qed \end{remark} The fixed point equation~\eqref{e.fixed_point} can be understood as the critical point condition for the functional on the right-hand side of the first equation in~\eqref{e.visco=crit_parisi}. One can interpret the unique $p^x$ solution of \eqref{e.fixed_point} as the Parisi measure, by identifying $p^x$ with the law of $p^x(U)$ with $U$ the uniform random variable over $[0,1]$. We also mention that representations similar to \eqref{e.visco=crit_parisi} have been obtained for the mutual information in the sparse stochastic block model (with no Mattis interaction)~\cite[Proposition~1.3]{dominguez2024critical}. Note that for every $x \in \R^d$, $t < t_\star$ and $q \in \mcl Q_\infty$ we have % \begin{e} \label{e.def.phi} f(t,q;x) = \lim_{N \to +\infty} F_N^{m \mapsto x \cdot m}(t,q). \end{e} % %\begin{remark}[Comparison with a previous result] \label{r.simplified} %By an argument similar to that used for~\cite[Proposition~1.3]{dominguez2024critical} in the setting of the sparse stochastic block model, one can show that~\eqref{e.def.phi} holds for $t_Ntqsx=} \la\cdot\ra_{N;t,q}^G \quad\propto\quad \exp\Ll(H^G_{N;t,q}(\sigma,\alpha)\Rr)\d P_1^{\otimes N}(\sigma)\mathfrak{R}(\alpha). \end{align} % For brevity, we also denote by $ \la\cdot\ra_{N;t,q}^G$ its tensorization. Recall the definition of $f(t,q,\cdot)$ in \eqref{e.def.phi}. We let $ f^*(t,q;\cdot)$ denote the convex dual of $-f(t,q;\cdot)$, namely, % \begin{e}\label{e.f^*=} f^*(t,q;m)= \sup_{x\in\R^d} \{x \cdot m + f(t,q;x)\}, \quad \forall m \in \R^d. \end{e} % Recall that the random variables $(H^G_{N;t,q})_{N \geq 1}$ and $(\chi_i)_{i \in \N}$ are defined on a common probability space $\Omega$. For a random variable $X : \Omega \to \mfk X$, we let $X^\omega \in \mfk X$ denote the realization of the random variable $X$ on the event $\omega \in \Omega$. Recall the mean magnetization $m_N$ defined in \eqref{e.m_N=}. Our next main result is a (quenched) large deviation principle for the random variables $m_N$ under $\la\cdot\ra^{G}_{N;t,q}$. \begin{theorem}[Large deviations of $m_N$] \label{t.ldp} Let $t < t_\star$ and $q \in \mcl Q_\infty$. There is a full-measure subset $\Omega' \subset \Omega$ such that the following holds. For every $\omega \in \Omega'$ and every continuous function $G : \R^d \to \R$, the random variable $m_N$ satisfies a large deviation principle under $\langle \cdot \rangle_{N;t,q}^{G,\omega}$ with rate function $I^G_{t,q}$ defined by % \begin{e}\label{e.I^G=} I^G_{t,q}(m)= -G(m) + f^*(t,q;m) + \sup_{m'} \left\{ G(m') - f^*(t,q;m') \right\},\, \forall \, m \in \R^d. \end{e} \end{theorem} %The setting of multi-species models is slightly different but requires more definitions. Hence, we postpone to Appendix~\ref{s.multi-sp} the description of the multi-species setting and the adaption of the main results (Theorems~\ref{t-ms} and~\ref{t.so-ms}). %The replica method in physics suggests computing the free energy via the following identity %% %\begin{e} % \E \log Z = \lim_{n \to 0} \frac{1}{n} \log \E Z^n. %\end{e} %% %To describe this method, we will assume that $D = 1$. The idea is to first compute $\E Z_N^n$ where $n \geq 1$ is an \emph{integer} and %% %\begin{e*} % Z_N = \int e^{H_{N;t,0}^0(\sigma)} \d P_N(\sigma) . %\end{e*} %% %The quantity $\E Z_N^n$ can be expressed as an integral over $n$ spin configurations $\sigma^1,\dots,\sigma^n \in \R^{}$. From this, it is possible to obtain a formula for $\lim_{N \to +\infty} -\frac{1}{Nn} \log \E Z_N^n$ involving a maximization scheme over $n \times n$ matrices. The $n \times n$ matrix appearing in this computation corresponds to the large $N$ limit of the matrix whose coefficients are $\sigma^k \cdot \sigma^l/N$. Under additional assumptions, it is possible to solve this maximization problem explicitly. For example, one can assume a priori that the maximum is reached by a matrix that is invariant under permutation of the indices. Under this assumption, one obtains a formula for $\lim_{N \to +\infty} -\frac{1}{Nn} \log \E Z_N^n$ whose right-hand side involves an explicit (say polynomial) function of $n$. The meaning of the expression on the right-hand side naturally extends to any $n \in (0,+\infty)$ and one declares that its limit as $n \to 0$ is $\lim_{N \to +\infty} F_N^0(t,q)$. Of course, since $n \times n$ matrices are defined when $n \in \N$ and taking the limit $n \to 0$ requires eventually that $n \in (0,1)$, this computation has no rigorous mathematical grounding. %For the SK model (i.e. $\xi(x)= x^2$), an attempt was made to compute the free energy using this method and relying on the replica symmetry assumption (i.e. that the maximum is reached by a matrix that is invariant under permutation) \cite{sherrington1975solvable}. Surprisingly, this yielded a plausible result, but, as the authors pointed out, their solution was only valid when the variable $t$ was chosen to be small enough. In an effort to resolve this issue, the replica symmetry breaking scheme was devised \cite{parisi1980sequence}. This scheme prescribes a recursive procedure for defining larger and larger spaces of matrices where to search for maximizers. Instead of positing that the maximum is reached by a permutation invariant matrix, one assumes that the indices $a \in \{1,\dots,n\}$ are separated into $K+1$ groups and that the maximizers are contained in the set of matrices that are invariant by group-preserving permutation of indices. When there is only one group ($K = 0$), we recover the replica symmetric (RS) ansatz. When one needs to use $K \geq 1$ to obtain the right expression for the free energy, the system is said to exhibit $K$ steps or replica symmetry breaking ($K$-RSB). For the SK model, it was observed that to obtain a solution that matches the computer simulations for all $t$, one needs to use a $K$-RSB ansatz with $K \to +\infty$ \cite{parisi1980order,parisi1983order}. Because of this, the SK model is said to exhibit infinitely many steps or replica symmetry breaking ($\infty$-RSB). This observation led to the discovery of the Parisi formula \cite{parisi1979infinite}. %Let us now come back to our original setting where $D \in \N$ is arbitrary. In its modern form, the Parisi formula can be understood as the statement that, when $\xi$ is convex on $S^D_+$, the large $N$ limit of $F_N^0(t,q)$ coincides with the supremum over $p \in \mcl Q_\infty$ of $\msc P_{t,q}(\cdot,0)$ \cite{chenmourrat2023cavity}. In many cases, it is known that this supremum is attained at a unique $p^*$ \cite{auffinger2015parisi,chen2025uniqueness,chen2025convexstructure}. When $\xi$ is not assumed to be convex on $S^D_+$, it has recently been proven that if $F^0_N(t,q)$ converges, then its limit is among the critical values of a functional $\mcl J_{t,q} : \mcl Q_2 \times \mcl Q_2 \to \R$. The functional $\mcl J_{t,q}$ is related to $\msc P_{t,q}(\cdot,0)$ via %% %\begin{e} % \msc P_{t,q}(p;0) = \mcl J_{t,q}(q+t \nabla \xi(p),p). %\end{e} %% %In both cases, the path $p$ that matches $\msc P_{t,q}(p;0)$ with the free energy is linked to the limit in law of the overlap. More precisely, given two independent random variables $\sigma^1,\sigma^2$ drawn from the Gibbs measure $\langle \cdot \rangle^0_{N;t,q}$, we have that the limit in law as $N \to +\infty$ of $\sigma^1 \cdot \sigma^2/N$ is the law of $p(U)$ where $U$ is a uniform random variable in $[0,1]$ \cite[Theorem~1.4]{chenmourrat2023cavity}. Thanks to this, the replica picture can be connected to the rigorous framework. In particular, the number of steps of replica symmetry breaking can be understood as the number of points in the image of $p$ minus $1$. For $y\in\S^D$, we define \begin{align}\label{e.haty=} \hat y(s) : [0,1) \to S^D \end{align} to be the constant path with value $y$. For $\kappa\in L^1$, we define % \begin{e} \check{\kappa}=(\kappa)^\vee =\int_0^1\kappa(r)\d r \in\S^D. \end{e} % When the path $q$ is chosen to be $\hat y$ and particularly $\hat 0$, we show that the representation for the free energy derived in Theorem~\ref{t.free_energy} reduces to a replica symmetric formula (Theorem~\ref{t.rs}). %This result should be understood as saying that the path $p^x$ such that $f(t,\hat{0};x)= \msc P_{t,\hat{0}}(p^x;x)$ is a constant path. This is not surprising and corresponds to the traditional picture of replica symmetry at low $t$ as previously mentioned. %We also show that when the path $q$ is chosen to be strictly increasing, then the image of $p^x$ is an uncountable subset of $S^D_+$ (Theorem~\ref{t.rsb}). This is in sharp contrast with what is expected of a disordered system at high temperature, as this observation suggests that in this regime the system is $\infty$-RSB. This surprise is explained by the fact that the Poisson-Dirichlet cascade we use to enrich $H_N = H_N(\sigma)$ into $H_{N;t,q}^0 = H_{N;t,q}^0(\sigma,\alpha)$ encodes an ultrametric structure which is itself $\infty$-RSB when $q$ is strictly increasing. For every $y\in \S^D_+$ and $x \in \R^d$, we set \begin{align}\label{e.phi(y,x)=} \phi(y;x)=-\E\log\int \exp\Ll(\sqrt{2y}\eta\cdot\tau-y\cdot\tau\tau^\intercal + x \cdot h(\tau,\chi_1) \Rr)\d P_1(\tau), \end{align} where $\eta$ is an independent standard $\R^D$-valued Gaussian vector (i.e.\ $\E\eta\eta^\intercal$ is the $D\times D$-identity matrix) and $\E$ averages over both $\eta$ and $\chi_1$. We denote the derivative in the first variable of $\phi(\scdot;x)$ as $\nabla_y \phi(\scdot;x)$. \begin{theorem}[Replica symmetric form]\label{t.rs} Let $x\in\R^d$. Define $g(\scdot,\scdot;x):[0,t_\star)\times \S^D_+\to\R$ by % \begin{e} g(t,y;x)=f(t,\hat y;x). \end{e} % The function $g(\scdot,\scdot;x)$ is differentiable on $[0,t_\star)\times \S^D_+$ and satisfies \begin{align}%\label{e.hj_g-p.f-d-proj} \begin{cases} \partial_t g(t,y;x) - \xi\big(\nabla_y g(t,y;x)\big)=0 &\quad \text{ for } (t,y)\in(0,t_\star)\times \S^D_+, \\ g(0,y;x) = \phi(y;x) &\quad \text{ for } y \in \S^D_+. \end{cases} \end{align} % Moreover, at every $(t,y)\in (0,t_\star)\times \S^D_+$, there is a unique solution $z\in\S^D_+$ to the fixed point equation \begin{align}\label{e.t.fixed_pt_eqn} z' = \nabla_y \phi(t\nabla\xi(z');x) \quad\text{for }z'\in\S^D_+, \end{align} and we have % \begin{e}\label{e.t.fixed_pt_eqn2} g(t,y;x) = \phi(y+t\nabla\xi(z);x) - t \theta(z)\quad\text{and}\quad \nabla_y g(t,y;x) = z. \end{e} \end{theorem} As before, the fixed point equation~\eqref{e.t.fixed_pt_eqn} can be understood as the critical point condition for the right-hand side of the first equation in~\eqref{e.t.fixed_pt_eqn2}. \begin{remark}[Gaussian external field]\label{r.Gauss_ext_field} When $q=\hat y$, we can interpret the model as a non-enriched model with a Gaussian external field. Recall the covariance structure of $w^q(\alpha)$ from~\eqref{e.Ew^qw^q=}. When $q=\hat y$ for some $y\in\S^D_+$, we have \begin{align}\label{e.w^haty=sqrtyeta} w^{\hat y}(\alpha) \stackrel{\d}{=} \sqrt{y}\,\eta, \end{align} where $\eta$ is an independent standard $\R^D$-valued Gaussian vector. Therefore, inside $H^G_{N;t,q}(\sigma,\alpha)$, we have \begin{align} \sum_{i=1}^N w^{\hat y}_i(\alpha)\cdot \sigma_{\bullet i} \stackrel{\d}{=} \sqrt{y}\sum_{i=1}^N \eta_i \cdot \sigma_{\bullet i}, \end{align} where $(\eta_i)$ consists of i.i.d.\ standard $\R^D$-valued Gaussian vectors. Since this quantity no longer depends on $\alpha$, we can rewrite $H^G_{N;t,\hat y}(\sigma,\alpha)$ simply as $H^G_{N;t,y}(\sigma)$, where $y$ determines the Gaussian external field $\sqrt{y}\sum_{i=1}^N \eta_i \cdot \sigma_{\bullet i}$. In view of~\eqref{e.psi(q;x)=} and~\eqref{e.phi(y,x)=}, we can use~\eqref{e.w^haty=sqrtyeta} to see that for every $y\in \S^D_+$ and $x \in\R^d$, \begin{align}\label{e.phi=psi} \phi(y;x)=\psi\Ll(\hat y;x\Rr). \qed \end{align} \end{remark} \begin{remark}[Replica symmetric formula for the limit free energy] At $q=\hat y$ for $y\in\S^D_+$ which includes the non-enriched case $q=\hat 0$, we can thus replace $f(t,q;x)$ appearing in Theorems~\ref{t.free_energy} and~\ref{t.ldp} by the replica-symmetric counterpart $g(t,y;x)$. Recall that we interpret $p^x$ appearing in Theorem~\ref{t.free_energy} as the Parisi measure. Then, in this case, $p^x$ corresponds to $\widehat{z^x}$, where $z^x$ is the unique solution of~\eqref{e.t.fixed_pt_eqn} at $x$. Hence, indeed the system is replica-symmetric. \qed \end{remark} When $q$ is not a constant path, then $p^x$ will not be constant. Here, the symmetry breaking is not intrinsic to system but rather caused by $q$. \begin{remark}[Replica symmetry breaking induced by $q$]\label{r.rsb} Fix $x \in \R^d$. Let $(t,q) \in [0,t_\star) \times \mcl Q_2$ and assume that $q$ as a path has a jump at $s\in(0,1)$. The map $\nabla_q \psi(\scdot;x)$ preserves jumps (see Lemma~\ref{l.detect_jump}). Therefore, the unique solution $p^x \in \mcl Q_\infty$ in Theorem~\ref{t.free_energy} has also a jump at $s$. In particular, if $q$ is strictly increasing on $[0,1)$, then so is $p^x$. \qed \end{remark} %Fix $x \in \R^d$. Let $(t,q) \in [0,t_\star) \times \mcl Q_2$, assume that $q$ is strictly increasing, that is for all $0 < s < s' < 1$ we have $q(s) \neq q(s')$. The map $\nabla_q \psi(\scdot;x)$ sends strictly increasing paths to strictly increasing paths (see Lemma~\ref{l.detect_jump}). Therefore, the unique solution $p \in \mcl Q_\infty$ to the fixed point equation $p = \nabla_q \psi(q + t \nabla \xi(p);x)$ satisfies for all $0 < s < s' < 1$, $p(s) \neq p(s')$. In particular, the image of $p$ is an uncountable subset of $S^D_+$. % \begin{proof}[Proof of Theorem~\ref{t.rsb}] % Let $q' = q + t \nabla \xi(p)$. Since $q$ is strictly increasing and $\nabla \xi(p)$ is increasing, we have that $q'$ is strictly increasing. Furthermore, we have that $p = \nabla \psi(q';x)$, so according to it follows that $p$ is strictly increasing. % \end{proof} Our main results are stated for models based on vector spin glasses. The same conclusions can be deduced for the following models as well. \begin{remark}[Multi-type and multi-species models] \label{r.multi-sp} Further simplification can be made if $\xi$ only depends on the diagonal of the matrix. We call such a model multi-type. In this setting, only the diagonal in the self-overlap matrix matters, and thus the free energy can be encoded with paths taking values in $\R^D_+$ rather than $S^D_+$. A multi-type model is a special case of multi-species models. In Appendix~\ref{s.multi-sp}, we describe how our main results can be adapted to multi-species models. We note that passing back and forth between vector models and multi-species models is largely a technical matter and does not introduce significant new challenges. Essentially all the results mentioned so far on the free energy of vector spin glasses remain valid in this setting~\cite{barcon,pan.multi}. \qed %Hence, the assertion here follows from Theorems~\ref{t-ms} and~\ref{t.so-ms} which are adaptions of the above theorems to the multi-species setting. \qed \end{remark} \subsection{Recovering concrete models from the general setting} \label{ss.usual} The main results of this paper are stated for very general models as introduced in Section~\ref{ss.setting}. Let us explain how to recover results about more usual models, like \eqref{e.simple}, from those. Given $h:\R^D\times \R^L\to\R^d$ in~\eqref{e.F_N(t,q;s,x)=}, we define $\tilde h:\R^D\times \R^L\to \R^d\times \S^D$ through \begin{e} \tilde h(\tau,\chi) = \Ll(h(\tau,\chi),\ \tau\tau^\intercal\Rr). \end{e} Henceforth, we isometrically identify $\R^d\times \S^D$ with $\R^{\tilde d}$ for some $\tilde d \in\N$. Given $G:\R^d\to\R$ in~\eqref{e.F_N(t,q;s,x)=}, we introduce $\tilde G:\R^d\times \S^D\to\R$ given by % \begin{e} \tilde G(m,r) =G(m) + \frac{\beta^2}{2}\xi(r). \end{e} % Then, we consider $H_{N;t,q}^G$ in~\eqref{e.H^t,q;s,x_N(sigma,alpha,chi)=} with $G,h,d$, therein substituted with $\tilde G,\tilde h,\tilde d$ introduced above and with $t=\beta^2/2$ and $q=\hat 0$ (constant path taking the value $0$), we denote the resulting Hamiltonian $\tilde H_N$. Observe that we have % \begin{align} \tilde H_N(\sigma) &= \beta H_N(\sigma) + N \tilde G \left( \frac{1}{N} \sum_{i = 1}^N \tilde h(\sigma_i, \chi_i) \right) - \frac{N \beta^2}{2} \xi \left( \frac{\sigma \sigma^\intercal}{N}\right) \\ &= \beta H_N(\sigma) + N G \left( \frac{1}{N} \sum_{i = 1}^N h(\sigma_{\bullet i}, \chi_i) \right). \end{align} % In particular, observe that the model described in \eqref{e.simple} is recovered by choosing $D=2$, $d=2$, $L = 2$, $h(\sigma,\chi) = (\text{sgn}(\sigma_1 \chi_1),\text{sgn}(\sigma_2 \chi_2))$, $G(m) = m_1+m_2$, $\xi(a) = a_{11}a_{22}$, and $P_1$ is the uniform measure on $\{-1,1\}^2$. \begin{remark}[About Theorem~\ref{t.parisi+ldp.basic}] %\label{r.path_values} Observe that thanks to the specialization of the general models from Section~\ref{ss.setting} to \eqref{e.simple} given in this section, Theorem~\ref{t.parisi+ldp.basic} can (almost) be recovered from Theorem~\ref{t.free_energy}, Theorem~\ref{t.ldp} and Theorem~\ref{t.rs}. The small difference being that in those results the paths in $\mcl Q$ are $S^2_+$ valued, while in Theorem~\ref{t.parisi+ldp.basic} the expression for the limit free energy comes from constant $\R^2_+$ valued paths. This difference originates from the fact that the model described in \eqref{e.simple} is a multi-type model in the sense defined in Remark~\ref{r.multi-sp}. Indeed, $\xi(a) = a_{11}a_{22}$ does not depend on the whole input matrix but only on its diagonal elements. To recover Theorem~\ref{t.parisi+ldp.basic} as stated, one can use the version of the main results stated in Appendix~\ref{s.multi-sp} instead. The lower bound $\beta_\star \geq \frac{1}{2\sqrt{2}}$ comes from setting $\beta_\star = \sqrt{2 t_\star}$ and the lower bound \eqref{e.t_star>} for $t_\star$.\qed \end{remark} % \begin{remark} \label{r.simplef} % The solution $f(\cdot,\cdot,;)$ of \eqref{e.hj_intro} is characterized by \eqref{e.visco=crit_parisi}. For the model \eqref{e.simple}, this representation can be simplified using Theorem~\ref{t.rs}. In particular, when $G(m) = x \cdot m$, we have in this case % % % \begin{e} % f(t,\hat{0};x) = \vartheta(tz_2,x_1) + \vartheta(tz_1,x_2), % \end{e} % % % where the function $\vartheta$ is defined in \eqref{e.def.vartheta} and where $z \in \R^2_+$ is the unique solution of the following system of equations % % % \begin{e} % \begin{cases} % z_1 = \partial_y \vartheta(tz_2;x_1) \\ % z_2 = \partial_y \vartheta(tz_1;x_2). % \end{cases} % \end{e} % \qed % \end{remark} % Lastly, we mention that the following model is covered here, which serves as our main motivation. % \begin{example}[Restricted Boltzmann Machine] % For simplicity, we assume that the visible layer and the hidden layer are of the same size. The general case is a two-species model covered in Appendix~\ref{s.multi-sp}. % Let $D=2$, $L=d=1$, $\xi(r_1,r_2) = r_1r_2$, $h(\sigma_{\bullet i},\chi_i) = \sigma_{1i}\chi_i$, and $G(r)= \beta r^2/2$. % We interpret the row vectors $\sigma_{1\bullet}$ and $\sigma_{2\bullet}$ as visible and hidden layers, respectively. % We assume that each $\sigma_{di}$ takes value in $[-1,1]$ and that $\chi_i$ is bounded. % The entire Hamiltonian (terms inside $\exp(\cdots)$ in~\eqref{e.F_N(t,q;s,x)=}) is % \begin{align*} % \beta\sum_{i,j=1}^N\Ll(\frac{1}{\sqrt{N}} g_{ij}\sigma_{1i}\sigma_{2j} + \frac{1}{2N} \Ll(\sigma_{1i}\chi_i\Rr)\Ll(\sigma_{1j}\chi_j\Rr) \Rr) % \end{align*} % where $(g_{ij})$ are i.i.d.\ standard Gaussians independent from $(\chi_i)$. % The first term is the Hamiltonian in a bipartite spin glass model and the second is a Mattis interaction. \qed % \end{example} % When $\sigma_{2i}$ and $\chi_i$ are Gaussian which technically goes beyond the current setting, this model is known to be equivalent to the Hopfield model. Deep Boltzmann Machines with more layers are covered by the multi-species setting in Appendix~\ref{s.multi-sp}. \subsection{Organization of the paper} In Section~\ref{s.psi_reg}, we establish some regularities of the function $\psi$ that are useful later on; these results are gathered in Proposition~\ref{p.psi_smooth}. %In Section~\ref{s.lip diff on H} we prove a general version of the following statement: ``if a function is differentiable on an interval with a point removed and its derivative admits a limit at the removed point, then the function is differentiable at this point''. In Section~\ref{s.short_time}, we show that the solution of \eqref{e.hj_intro} is differentiable on the time interval $[0,t_\star)$. Finally, in Section~\ref{s.proof_main_results}, we prove our main results. We start with a proof of \eqref{e.def.phi}, this is the key aspect of our argument, and it relies on the content of all the previous sections, see Corollary~\ref{c.limF_N=f}. We also crucially verify, thanks to \eqref{e.def.phi}, that $x \mapsto \lim_{N \to +\infty} F_N^{x \mapsto x \cdot m}(t,q)$ is continuously differentiable in $x$, see Lemma~\ref{l.varphi}. Once those two key results are proven, we can deduce Theorem~\ref{t.free_energy} and Theorem~\ref{t.ldp} using classical results from large deviation theory. \subsection{Acknowledgements} We would like to warmly thank Jean-Christophe Mourrat for many useful inputs and interesting discussions during the conception and writing of this paper. HBC acknowledges funding from the NYU Shanghai Start-Up Fund and support from the NYU–ECNU Institute of Mathematical Sciences at NYU Shanghai. \section{Regularity of \texorpdfstring{$\psi$}{psi}} \label{s.psi_reg} \begin{definition}[Fréchet differentiability]%\label{d.frechet} % Let $h : (0,+\infty) \times \mcl Q_{\infty,\uparrow} \to \R$ and $(t,q) \in (0,+\infty) \times \mcl Q_{\infty,\uparrow}$. We say that $h$ is Fréchet differentiable at $(t,q)$ when there exists a unique $(a,y) \in \R \times L^2$ such that the following holds as $(t',q') \to (t,q)$ in $(0,+\infty) \times \mcl Q_{\infty,\uparrow}$, % % % \begin{e*} % h(t',q') = h(t,q) + \langle y, q'-q \rangle_{L^2} + a(t'-t) +o(|q'-q|_{L^2}+|t'-t|). % \end{e*} % % % In this case, we call $(a,y)$ the Fréchet derivative of $h$ at $(t,q)$ and denote it $(\partial_t h(t,q),\nabla h(t,q))$. Let $(E,\,|\,\cdot\,|_E)$ be a Banach space and denote by $\langle \,\cdot\,,\, \cdot\, \rangle_E$ the canonical pairing between $E$ and $E^*$. Let $G$ be a subset of $E$ and let $q \in G$. A function $g : G \to \R$ is \textbf{Fréchet differentiable} at $q \in G$ if there exists a unique $y^* \in E^*$ such that the following holds: % \begin{e*} \lim_{\substack{q' \to q \\ q' \in G}} \frac{g(q') - g(q) - \langle y^*, q'-q \rangle_E}{|q'-q|_E} = 0. \end{e*} % In such a case, we call $y^*$ the \textbf{Fréchet derivative} of $g$ at $q$, and we denote it by $\nabla g(q)$ or $\nabla_q g(q)$. More generally, let $(E',|\scdot|_{E'})$ be another Banach space. Let $G, G'$ be subsets of $E$ and $E'$, respectively, and let $q\in G$. A function $g:G\to G'$ is \textbf{Fréchet differentiable} at $q\in G$ if there exists a unique bounded operator $A:E\to E'$ such that \begin{e*} \lim_{\substack{q' \to q \\ q' \in G}} \frac{\Ll|g(q') - g(q) - A(q'-q)\Rr|_{E'}}{|q'-q|_E} = 0. \end{e*} Again, in such a case, we denote $A$ as $\nabla g(q)$ or $\nabla_q g(q)$. \end{definition} We will use the operator norm on the set of bounded operators $A : E \to E'$, defined as % \begin{e} \label{e.def.opnorm} \| A \|_{\mathrm{Op}(E,E')} = \sup_{q \in E \setminus \{0\}} \frac{|Aq|_{E'}}{|q|_E}. \end{e} \begin{proposition}[Regularity of $\psi$]\label{p.psi_smooth} %\leavevmode The following holds, where we write $L^\sfp=L^\sfp([0,1);\S^D)$ for $\sfp\in[1,\infty]$. \begin{enumerate} \item \label{i.p.psi_smooth(1)} The function $\psi$ can be extended to $\mcl Q_1\times \R^d$ and satisfies, for any $q,q'\in\mcl Q_1$ and $x,x'\in\R^d$, \begin{align}\label{e.psi_Lip} \Ll|\psi(q;x) - \psi(q';x')\Rr|\leq |q-q'|_{L^1} + |h|_{L^\infty} |x-x'|. \end{align} \item \label{i.p.psi_smooth(2)} For each $x\in\R^d$, the restriction of the function $\psi(\scdot;x)$ to $\mcl Q_2$ is Fréchet differentiable everywhere. The derivative $\nabla_q\psi(\scdot;x)$ can be extended to $\mcl Q_1$ and satisfies, for any $q,q'\in\mcl Q_1$, $x,x'\in\R^d$, and $\sfr\in[1,+\infty]$, \begin{gather} \nabla_q \psi(q;x)\in\mcl Q,\quad \Ll|\nabla_q\psi(q;x)\Rr|_{L^\infty}\leq 1,\label{e.p.psi_smooth_1} \\ \Ll|\psi(q';x)-\psi(q;x)-\la q'-q,\,\nabla_q\psi(q;x)\ra_{L^2}\Rr|\leq 8|q-q'|_{L^2}^2,\label{e.frechet_bound} \\ \Ll|\nabla_q \psi(q;x) - \nabla_q \psi(q';x')\Rr|_{L^\sfr}\leq 16|q-q'|_{L^r}+ 4|h|_{L^\infty}|x-x'|.\label{e.p.psi_smooth_2} \end{gather} \item \label{i.p.psi_smooth(3)} For each $q\in\mcl Q_1$, the function $\psi(q;\scdot)$ is differentiable everywhere on $\R^d$ and its derivative satisfies, for $q,q'\in\mcl Q_1$, $x,x'\in\R^d$, \begin{gather} |\nabla_x\psi(q;x)|\leq |h|_{L^\infty} ,\label{e.smooth(3).1} \\ \Ll|\psi(q;x')-\psi(q;x)-(x'-x)\cdot\nabla_x\psi(q;x)\Rr|\leq |h|_{L^\infty}^2|x-x'|^2,\label{e.smooth(3).1.5} \\ \Ll|\nabla_x \psi(q;x) - \nabla_x \psi(q';x')\Rr|\leq 8|h|_{L^\infty}|q-q'|_{L^1}+ 2|h|^2_{L^\infty}|x-x'|.\label{e.smooth(3).2} \end{gather} \item \label{i.p.psi_smooth(4)} For each $x\in\R^d$, the function $\nabla_q\psi(\scdot;x) :\: \mcl Q_{2\sfs} \to L^\sfs$ is Fréchet differentiable everywhere for every $\sfs \in[1,+\infty]$ and satisfies, for $q,q'\in\mcl Q_{2\sfs}$, \begin{align}\label{e.psi_smooth(4).0} \Ll| \nabla_q\psi(q';x)-\nabla_q\psi(q;x) - \nabla^2_q\psi(q;x)(q'-q)\Rr|_{L^\sfs}\leq C|q-q'|_{L^{2\sfs}}^2/2, \end{align} for some absolute constant $C>0$ independent of $x$ and $\sfs$, where $\nabla_q^2\psi(q;x)$ is viewed as a bounded linear operator from $L^{2\sfs}$ to $L^\sfs$ (but actually enjoys better regularity as in~\eqref{e.psi_smooth(4).1}). The derivative $\nabla^2_q\psi(\scdot;x)$ can be extended to $\mcl Q_1$ and satisfies, for every $q,q'\in\mcl Q_1$ and every $\sfp,\sfq\in[1,+\infty]$ satisfying $\frac{1}{\sfp}+\frac{1}{\sfq}\leq 1$, \begin{gather} \Ll\|\nabla^2_q\psi(q;x)\Rr\|_{\mathrm{Op}(L^\sfp;L^\sfp)} \leq 16, \label{e.psi_smooth(4).1} \\ \Ll\|\nabla^2_q\psi(q;x)-\nabla^2_q\psi(q';x)\Rr\|_{\mathrm{Op}(L^\sfp; L^\frac{\sfp\sfq}{\sfp+\sfq})} \leq C\Ll|q-q'\Rr|_{L^\sfq},\label{e.psi_smooth(4).2} \end{gather} for the same constant $C$ in~\eqref{e.psi_smooth(4).0}. \end{enumerate} \end{proposition} One useful case of~\eqref{e.psi_smooth(4).2} is when $\sfp=\sfq=2\sfs$ for some $\sfs\in[1,+\infty]$ and thus $\frac{\sfp\sfq}{\sfp+\sfq}=\sfs$. \begin{proof}[Proof of Proposition~\ref{p.psi_smooth}] The argument is based on computing directional derivatives in $q$ and $x$. The computation is straightforward for the latter, and we explain the former. Fix any $q,q'\in\mcl Q_\sfs$ and let $\lambda\in[0,1]$. We want to compute the derivative in $\lambda$ along $\lambda\mapsto q+\lambda(q'-q)$. The key observation is that $w^{q+\lambda(q'-q)}(\alpha)$ inside $\psi(q+\lambda(q'-q))$ (see~\eqref{e.psi(q;x)=}) has the same law as $\sqrt{1-\lambda}w^q(\alpha) + \sqrt{\lambda}w^{q'}(\alpha)$ where $w^q(\alpha)$ and $w^{q'}(\alpha)$ are independent. Hence, we can carry out the computation in terms of $\sqrt{1-\lambda}w^q(\alpha) + \sqrt{\lambda}w^{q'}(\alpha)$. The recipe is as follows: first, we differentiate a quantity in $\lambda$ to get an expression involving $w^q(\alpha)$ and $w^{q'}(\alpha)$, and then we apply the Gaussian integration by parts~\cite[Theorem~4.6]{HJbook} to them to get the final expression. Since these procedures are standard, we omit the intermediate steps and refer to the computations in the proof of~\cite[Proposition~5.1]{chenmourrat2023cavity} as an example. Throughout the proof, we use the following notation. For $\kappa\in L^\infty([0,1];\S^D)$ and $\ell,\ell'\in\N$, we write \begin{align*} R^{\ell,\ell'}_\kappa = \kappa\Ll(\alpha^\ell\wedge\alpha^{\ell'}\Rr)\cdot \tau^\ell(\tau^{\ell'})^\intercal. \end{align*} where $(\tau^\ell)_{\ell\in\N}$ are independent samples from some Gibbs measure that will be clear from the context. For each $a\in \R^d$ and $\ell\in\N$, we write \begin{align*} h^\ell = h\Ll(\tau^\ell,\chi_1\Rr) \qquad \text{and}\qquad h^\ell_a = a\cdot h\Ll(\tau^\ell,\chi_1\Rr). \end{align*} In the following, let $\lambda\in[0,1]$. Recall the definition of the Gibbs measure $\langle \cdot \rangle_{N;t,q}^G$ in \eqref{e.<>_Ntqsx=}. For $q,q'\in \mcl Q_\infty$ and $x,x'\in\R^d$, we write \begin{align*} \la\scdot\ra_\lambda =\la\scdot\ra_{1;0,q+\lambda(q'-q)}^{m \mapsto m \cdot(x+\lambda(x'-x))}, \end{align*} % \begin{align*} % \la\scdot\ra_0 =\la\scdot\ra_{\fR,q,\,x}\qquad \text{and}\qquad \la\scdot\ra_\lambda =\la\scdot\ra_{\fR,q+\lambda(q'-q),\,x+\lambda(x'-x)} % \end{align*} where it is also possible that we take $q=q'$ or $x=x'$, which will be clear from the context. Since $\alpha^\ell\wedge\alpha^{\ell'}$ distributes uniformly over $[0,1]$ under $\E\la\scdot\ra_\lambda$ for $\ell\neq\ell'$ and $\Ll|\tau\tau^\intercal\Rr|\leq1$ due to the support of $\tau$ on the unit ball in $\R^D$, we often use the the following estimates: \begin{align}\label{e.HolderE<>} \E\la\Ll| R_{\kappa}^{\ell,\ell'}\Rr|\ra_\lambda \leq |\kappa|_{L^1}\qquad \text{and}\qquad \E\la\Ll| R_{\kappa}^{\ell,\ell'}R_{\eta}^{\ell'',\ell'''}\Rr|\ra_\lambda \leq |\kappa|_{L^\sfp}|\eta|_{L^\sfq}, \end{align} where $\ell\neq \ell'$, $\ell''\neq\ell'''$, $\kappa,\eta\in L^\infty([0,1];\S^D)$, and $\frac{1}{\sfp}+\frac{1}{\sfq}=1$. The last inequality follows from the H\"older's inequality applied to the measure $\E\la\scdot\ra_\lambda$. Later, we will also use the generalization of the last one to three components. We also mention that the correction term $-q(1)\cdot\tau\tau^\intercal$ in~\eqref{e.psi(q;x)=} becomes $-(q+\lambda(q'-q))(1)\cdot\tau\tau^\intercal$ when we compute the direction derivative. The presence of this term will cancel all $R^{\ell,\ell}_\kappa$ and thus in the final expression we only have terms $R^{\ell,\ell'}_\kappa$ with $\ell\neq\ell'$. This is an important reason for the usefulness of the correction. Lastly, slightly abusing the notation, we denote by $\la\scdot,\,\scdot\ra_{L^2}$ not only the inner product in $L^2([0,1];\S^D)$ but also the natural paring in $L^\sfp([0,1];\S^D)$ and its dual. We also use the shorthand $L^\sfp=L^\sfp([0,1];\S^D)$. The only exception is that $L^\infty$ in $|h|_{L^\infty}$ means $L^\infty(\R^D\times \R^L;\R^d)$. With the above setup, we are ready to prove the lemma part by part. \textit{Part~\eqref{i.p.psi_smooth(1)}.} Recall that $\psi$ is initially defined on $\mcl Q_\infty\times \R^d$ as in~\eqref{e.psi(q;x)=}. For $q,q'\in \mcl Q_\infty$ and $x,x'\in\R^d$, we can compute \begin{align}\label{e.d/dlambdapsi()=E-E} \frac{\d}{\d\lambda}\psi\Ll(q+\lambda(q'-q);x+\lambda(x'-x)\Rr) = \E \la R^{1,2}_{q'-q}\ra_\lambda - \E \la h^1_{x'-x} \ra_\lambda. \end{align} Using~\eqref{e.HolderE<>}, the last term is bounded by $|q'-q|_{L^1}+ |h|_{L^\infty}$. Therefore, we get that~\eqref{e.psi_Lip} holds for $q,q'\in \mcl Q_\infty$ and $x,x'\in\R^d$. By this continuity, we can extend the definition of $\psi$ to $\mcl Q_1\times \R^d$ and accordingly extend~\eqref{e.psi_Lip} as desired. \textit{Part~\eqref{i.p.psi_smooth(2)}.} For each $q\in\mcl Q_\infty$ and $x\in\R^d$, we define $\nabla_q\psi(q;x)\in L^2$ as the element determined through \begin{align}\label{e.nabla_qpsi(q;x)=} \la \kappa,\, \nabla_q\psi(q;x)\ra_{L^2} = \E \la R^{1,2}_\kappa\ra_0,\quad\forall \kappa \in L^\infty. \end{align} Indeed, setting $x=x'$ in~\eqref{e.d/dlambdapsi()=E-E}, we get \begin{align}\label{e.d/dlambdapsi=<>} \frac{\d}{\d\lambda}\psi(q+\lambda(q'-q);x)= \la q'-q, \nabla_q\psi(q;x)\ra_{L^2},\qquad\forall q,q'\in \mcl Q_\infty,\, x\in\R^d. \end{align} This suggests that $\nabla_q\psi(q;x)$ is the correct derivative, which we will justify later after gathering enough properties. Since $\la R^{1,2}_{\kappa}\ra_\lambda \leq |\kappa|_{L^1}$ due to~\eqref{e.HolderE<>} and the density of $L^\infty$ in $L^2$, we get from~\eqref{e.nabla_qpsi(q;x)=} that \begin{align}\label{e.|nablapsi|<1} \Ll|\nabla_q\psi(q;x)\Rr|_{L^\infty} \leq 1,\qquad\forall q \in\mcl Q_\infty,\, x\in\R^d. \end{align} For $\kappa\in L^\infty$, $q,q'\in\mcl Q_\infty$, and $x\in\R^d$, we can compute \begin{align}\label{e.d/dlambda=} \frac{\d}{\d\lambda}\la \kappa,\, \nabla_q\psi(q+\lambda(q'-q);x)\ra_{L^2}=2\E\la R^{1,2}_\kappa\Ll(R^{1,2}_{q'-q}-4R^{1,3}_{q'-q}+3R^{3,4}_{q'-q}\Rr)\ra_{\lambda}. \end{align} By~\eqref{e.HolderE<>}, the right-hand side is bounded by $16|\kappa|_{L^{\sfr^*}}|q-q'|_{L^\sfr}$ with $\frac{1}{\sfr^*}+\frac{1}{\sfr}=1$. Integrating in $\lambda$ and using the density of $L^\infty$ in $L^{\sfr^*}$, we get \begin{align}\label{e.|nablapsi-nablapsi|<16|q-q'|} \Ll|\nabla_q\psi(q;x) - \nabla_q\psi(q';x)\Rr|_{L^\sfr}\leq 16|q-q'|_{L^\sfr},\qquad\forall q,q'\in\mcl Q_\infty,\, x\in\R^d. \end{align} Setting $\sfr=1$ and using~\eqref{e.|nablapsi|<1}, we extend $\nabla\psi(q,x)$ by continuity to $q\in\mcl Q_1$ and achieve the $L^\infty$-bound in~\eqref{e.p.psi_smooth_1}. First property in~\eqref{e.p.psi_smooth_1} is proved in~\cite[Corollary~5.2]{chenmourrat2023cavity}. Setting $\kappa=q'-q$ in~\eqref{e.d/dlambda=} and $\sfr=2$ in the bound mentioned below~\eqref{e.d/dlambda=} and comparing with~\eqref{e.d/dlambdapsi=<>}, we get $\Ll|\frac{\d^2}{\d\lambda^2}\psi(q+\lambda(q'-q);x)\Rr|\leq 16|q-q'|_{L^2}$. This, along with~\eqref{e.d/dlambdapsi=<>} and Taylor's expansion, gives that for any fixed $x\in\R^d$, \begin{align}\label{e.|...|<8|q-q'|^2} \Ll|\psi(q';x)-\psi(q;x)-\la q'-q,\,\nabla_q\psi(q;x)\ra_{L^2}\Rr|\leq 8|q-q'|^2_{L^2},\quad \forall q,q'\in\mcl Q_\infty. \end{align} Since we have shown that $\psi(\scdot;x)$ and $\nabla_q\psi(\scdot;x)$ can be extended continuously to $\mcl Q_1$ and that the latter is also bounded, we can now extend~\eqref{e.|...|<8|q-q'|^2} to $q,q'\in\mcl Q_2$. This proves that $\psi(\scdot;x)$ is Fréchet differentiable everywhere on $\mcl Q_2$ satisfying~\eqref{e.frechet_bound} and that $\nabla_q\psi(q;x)$ defined via~\eqref{e.nabla_qpsi(q;x)=} is its derivative. To complete this part, it remains to show~\eqref{e.p.psi_smooth_2}. For $\kappa\in L^\infty$, $q\in\mcl Q_\infty$ and $x,x'\in\R^d$, we compute from~\eqref{e.nabla_qpsi(q;x)=} that \begin{align*} \frac{\d}{\d\lambda}\la \kappa,\, \nabla_q \psi(q; x+\lambda(x'-x))\ra_{L^2}= \E \la R^{1,2}_\kappa\Ll(h^1_{x'-x}+ h^1_{x'-x}- 2h^1_{x'-x}\Rr) \ra_\lambda. \end{align*} Using~\eqref{e.HolderE<>} to bound the right-hand side by $4|h|_{L^\infty}|\kappa|_{L^1}|x-x'|$ and using the density of $L^\infty$ in $L^1$, we get \begin{align*} \Ll|\nabla_q \psi(q;x)-\nabla_q \psi(q;x')\Rr|\leq 4|\kappa|_{L^1}|x-x'|,\qquad\forall q\in\mcl Q_\infty,\ x,x'\in\R^d. \end{align*} As before, we can extend this relation to $q\in\mcl Q_1$, which along with~\eqref{e.|nablapsi-nablapsi|<16|q-q'|} extended to $\mcl Q_1$ yields~\eqref{e.p.psi_smooth_2}. \textit{Part~\eqref{i.p.psi_smooth(3)}.} We can easily compute \begin{align*} \nabla_x\psi(q;x) = - \E \la h^1\ra_0,\quad \Ll|\nabla_x\psi(q;x)\Rr|\leq |h|_{L^\infty},\qquad\forall q\in\mcl Q_\infty,\, x\in\R^d. \end{align*} For $q,q'\in \mcl Q_\infty$ and $x\in\R^d$, we can then compute \begin{align*} \frac{\d}{\d\lambda}\nabla_x \psi(q+\lambda(q'-q);x) = 4 \E \la h^1\Ll(R^{1,2}_{q'-q}-R^{2,3}_{q'-q}\Rr) \ra_\lambda. \end{align*} Using~\eqref{e.HolderE<>} to bound the right-hand side, we get \begin{align}\label{e.|nabla_xpsi-nabla_xpsi|<8|h||q-q'|} \Ll|\nabla_x \psi(q;x)-\nabla_x \psi(q';x)\Rr|\leq 8|h|_{L^\infty}|q-q'|_{L^1} \qquad \forall q\in\mcl Q_\infty,\, x\in\R^d. \end{align} Therefore, we can extend $\nabla_x\psi(q;x)$ by continuity to $q\in\mcl Q_1$ and get the bound in~\eqref{e.smooth(3).1}. Notice that we have yet to verify that the extended $\nabla_x\psi(q;x)$ is indeed the derivative at $q\in\mcl Q_1$. To do so, we start by computing, for $q\in\mcl Q_\infty$ and $x,x'\in\R^d$, \begin{align}\label{e.d/dlambdanabla_xpsi=-E} \frac{\d}{\d\lambda}\nabla_x\psi(q;x+\lambda(x'-x)) = - \E \la h^1\Ll(h^1_{x'-x}-h^2_{x'-x}\Rr) \ra_\lambda, \end{align} which implies \begin{align*} \Ll|\nabla_x\psi(q;x)-\nabla_x\psi(q;x')\Rr| \leq 2|h|_{L^\infty}^2|x-x'|,\quad\forall q\in\mcl Q_\infty,\ x,x'\in\R^d. \end{align*} Again, \eqref{e.|nabla_xpsi-nabla_xpsi|<8|h||q-q'|} allows us to extend this bound to $q\in \mcl Q_1$, which along with the extension of~\eqref{e.|nabla_xpsi-nabla_xpsi|<8|h||q-q'|} to $\mcl Q_1$ yields~\eqref{e.smooth(3).2}. Multiply both sides in~\eqref{e.d/dlambdanabla_xpsi=-E} by $x'-x$, we can get $\Ll|\frac{\d}{\d\lambda}\psi(q;x+\lambda(x'-x))\Rr|\leq 2|h|_{L^\infty}^2|x-x'|^2$, which by Taylor's expansion gives that, uniformly in $x,x'\in\R^d$, \begin{align*} \Ll|\psi(q;x')-\psi(q;x)-(x'-x)\cdot\nabla_x\psi(q;x)\Rr|\leq |h|_{L^\infty}^2|x-x'|^2,\quad \forall q \in\mcl Q_\infty. \end{align*} As before, we can extend this by continuity to $q\in\mcl Q_1$. This verifies that $\nabla\psi(q;\scdot)$ is differentiable at $q\in\mcl Q_1$, \eqref{e.smooth(3).1.5}, and $\nabla_x\psi(q;x)$ is indeed the derivative. \textit{Part~\eqref{i.p.psi_smooth(4)}.} We fix any $x\in\R^d$ and write $\psi=\psi(\scdot;x)$. For any $\sfp\in[0,+\infty]$ and $q\in\mcl Q_\infty$, we define $\nabla^2_q\psi(q)$ as a linear operator from $L^\sfp$ to $L^\sfp$ by setting \begin{align}\label{e.nabla^2psi=} \la \kappa,\, \nabla^2_q \psi(q)\eta\ra_{L^2} = 2\E\la R^{1,2}_\kappa\Ll(R^{1,2}_\eta-4R^{1,3}_\eta+3R^{3,4}_\eta\Rr)\ra_0,\quad\forall\kappa,\eta\in L^\infty. \end{align} Notice that this definition can be made simultaneously for all $\sfp$. Using~\eqref{e.HolderE<>}, we can bound the right-hand side by $16|\kappa|_{L^{\sfp^*}}|\eta|_{L^\sfp}$ with $\frac{1}{\sfp^*}+\frac{1}{\sfp}=1$, which implies \begin{align}\label{e.|nabla^2psi|<16} \Ll\|\nabla^2_q\psi(q)\Rr\|_{\mathrm{Op}(L^\sfp;L^\sfp)} \leq 16,\qquad\forall q \in \mcl Q_\infty. \end{align} Comparing with~\eqref{e.d/dlambda=}, we get, for $q,q'\in\mcl Q_\infty$ and $\kappa \in L^\infty$, \begin{align}\label{e.part(4)_1} \frac{\d}{\d\lambda}\la \kappa,\, \nabla_q\psi(q+\lambda(q'-q))\ra_{L^2}= \la \kappa,\, \nabla^2_q \psi(q+\lambda(q'-q))(q'-q)\ra_{L^2}. \end{align} We will extend the definition of $\nabla^2_q\psi$ to $\mcl Q_1$ and verify that it is indeed a Fréchet derivative. But first, we need to derive some estimates. For $\kappa,\eta\in L^\infty$ and $q,q'\in\mcl Q_\infty$, we can compute from~\eqref{e.nabla^2psi=} that \begin{align}\label{e.part(4).2} \frac{\d}{\d\lambda}\la \kappa,\, \nabla^2_q\psi(q+\lambda(q'-q))\eta\ra_{L^2} = \E\la R^{1,2}_\kappa\Ll(R^{1,2}_\eta-4R^{1,3}_\eta+3R^{3,4}_\eta\Rr)\mcl P\Ll(R_{q'-q}\Rr)\ra_\lambda, \end{align} where $\mcl P$ is a homogeneous polynomial of degree one and depends on finitely many entries in $R_{q'-q} = \big(R^{\ell,\ell'}_{q'-q}\big)_{\ell\neq \ell'}$. Using an extension of the estimate in~\eqref{e.HolderE<>}, we can bound the right-hand side in~\eqref{e.part(4).2} by $C|\kappa|_{L^\sfr}|\eta|_{L^\sfp}|q-q'|_{L^\sfq}$ for some absolute constant $C>0$, where $\frac{1}{\sfr}+\frac{1}{\sfp}+\frac{1}{\sfq}=1$. Notice that the conjugate of $\sfr$ is $\sfr^*= \frac{\sfp\sfq}{\sfp+\sfq}$. From this, we can get \begin{align}\label{e.|nabla^2psi-nabla^2psi|_Op<|q-q'|} \Ll\|\nabla^2_q\psi(q)-\nabla^2_q\psi(q')\Rr\|_{\mathrm{Op}(L^\sfp; L^\frac{\sfp\sfq}{\sfp+\sfq})}\leq C\Ll|q-q'\Rr|_{L^\sfq},\quad\forall q,q'\in\mcl Q_\infty. \end{align} In this relation, setting $\sfr=\sfp=\infty$ and $\sfq=1$, we can thus extend by continuity the definition of $\nabla^2_q\psi(q)$ to every $q\in\mcl Q_1$ as a bounded linear operator from $L^\infty$ to $L^1$. Then, we want to accordingly extend~\eqref{e.|nabla^2psi|<16}. Let $q\in \mcl Q_1$ and let $(q_n)_{n\in\N}$ be a sequence in $\mcl Q_\infty$ convergent to $q$ in $L^1$. Then, for any $\kappa,\eta\in L^\infty$, we have \begin{align*} \la \kappa,\, \nabla^2_q\psi(q)\eta\ra_{L^2} \leq \la \kappa,\, \nabla^2_q\psi(q_n)\eta\ra_{L^2} + \la \kappa,\, \Ll(\nabla^2_q\psi(q)-\nabla^2_q\psi(q_n)\Rr)\eta\ra_{L^2}. \end{align*} By~\eqref{e.|nabla^2psi-nabla^2psi|_Op<|q-q'|} with $\sfr=\sfp=\infty$ and $\sfq=1$, the last term vanishes as $n\to\infty$. Taking this limit and using~\eqref{e.|nabla^2psi|<16} for $q_n$, we can obtain from the above display that~\eqref{e.|nabla^2psi|<16} holds for $q\in\mcl Q_1$, which gives~\eqref{e.psi_smooth(4).1} as desired. Therefore, for every $q\in\mcl Q_1$, $\nabla^2_q\psi(q)$ is also a bounded operator from $L^\sfp\to L^\sfp$. Due to $\sfp\geq \frac{\sfp\sfq}{\sfp+\sfq}$, it is also an operator from $L^\sfp\to L^\frac{\sfp\sfq}{\sfp+\sfq}$. Therefore, we can extend~\eqref{e.|nabla^2psi-nabla^2psi|_Op<|q-q'|} by continuity to $q,q'\in \mcl Q_1$, which is exactly~\eqref{e.psi_smooth(4).2}. Lastly, we verify that $\nabla^2_q\psi(q)$ indeed defines a Fréchet derivative. Setting $\eta=q'-q$ in~\eqref{e.part(4).2}, comparing it with~\eqref{e.part(4)_1}, and using the estimate mentioned below~\eqref{e.part(4).2} with $\sfr=\frac{\sfs}{\sfs-1}$ and $\sfp=\sfq=2\sfs$, we get, for $\kappa\in L^\infty$ and $q,q'\in\mcl Q_\infty$, \begin{align*} \Ll|\frac{\d^2}{\d\lambda^2}\la \kappa,\, \nabla_q\psi(q+\lambda(q'-q))\ra_{L^2}\Rr|\leq C|\kappa|_{L^\frac{\sfs}{\sfs-1}} |q-q'|_{L^{2\sfs}}^2. \end{align*} Using this, Taylor's expansion, and~\eqref{e.part(4)_1}, we get \begin{align*} \Ll|\la\kappa,\, \nabla_q\psi(q')-\nabla_q\psi(q) - \nabla^2_q\psi(q)(q'-q)\ra_{L^2}\Rr|\leq C|\kappa|_{L^\frac{\sfs}{\sfs-1}} |q-q'|_{L^{2\sfs}}^2/2. \end{align*} and thus \begin{align*} \Ll| \nabla_q\psi(q')-\nabla_q\psi(q) - \nabla^2_q\psi(q)(q'-q)\Rr|_{L^\sfs}\leq C|q-q'|_{L^{2\sfs}}^2/2,\quad\forall q,q'\in\mcl Q_\infty. \end{align*} We can extend this to $q,q'\in\mcl Q_{2\sfs}$ by using~\eqref{e.p.psi_smooth_2} (with $\sfr =\sfs$), \eqref{e.psi_smooth(4).1} (with $\sfp=\sfs$), and~\eqref{e.psi_smooth(4).2} (with $\sfp=\sfq=2\sfs$). This implies the statement on the Fréchet differentiability of this part and~\eqref{e.psi_smooth(4).0}, completing the proof. \end{proof} We also record a simple result based on Proposition~\ref{p.psi_smooth}. Define the set \begin{align}\label{e.Q_infty,<1} \mcl Q_{\infty, \leq 1} = \Ll\{p\in\mcl Q_\infty:\: |p|_{L^\infty}\leq 1\Rr\}. \end{align} For every $t\geq0$, $p ,q \in \mcl Q_\infty$, and $x\in\R^d$, we define % \begin{e} \label{e.P_t,q(p;x)=} \msc P_{t,q}(p;x) = \psi(q +t \nabla \xi(p);x) - t \int_0^1 \theta(p(u)) \d u. \end{e} %Recall the definition of the function $\msc P_{t,q}(\scdot;x):\mcl Q_{\infty, \leq 1} \to \R$ in \eqref{e.P_t,q(p;x)=}. \begin{lemma}\label{l.sP(p^x)_critical} Let $t\in\R_+$ and $q\in\mcl Q_2$. There is a constant $C > 0$ such that, for every $x \in \R^d$, if $p^x \in \mcl Q_{\infty,\leq 1}$ satisfies \begin{align}\label{e.p^x=nablapsi} p^x = \nabla_q \psi\Ll(q+t\nabla \xi(p^x);x\Rr), \end{align} then we have \begin{align}\label{e.l.sP(p^x)_critical} \Ll|\msc P_{t,q}(p;x) - \msc P_{t,q}(p^x;x)\Rr| \leq C|p-p^x|_{L^\infty}^2,\quad\forall p \in \mcl Q_{\infty,\leq 1}. \end{align} \end{lemma} \begin{proof} For brevity of notation, we write $\psi(\scdot) = \psi(\scdot;x)$. Let $p\in\mcl Q_{\infty,\leq 1}$ and let $B$ be given as in~\eqref{e.B=}. Then, as paths, both $p$ and $p^x$ takes value in $B$. Let $C_1=8t^2\Ll\|\nabla \xi\lfloor_{B}\Rr\|_{\mathrm{Lip}(B;\R^{D\times D})}$, using~\eqref{e.p^x=nablapsi} and the Fréchet differentiability of $\psi$ as in~\eqref{e.frechet_bound}, we have \begin{align*} \Ll|\psi(q+t \nabla\xi(p))-\psi(q+t \nabla\xi(p^x))-\la p^x , t\nabla\xi(p)-t\nabla\xi(p^x)\ra_{L^2}\Rr| \\ \leq 8\Ll|t\nabla\xi(p)-t\nabla\xi(p^x)\Rr|^2_{L^2} \leq C_1|p-p^x|^2_{L^\infty}. \end{align*} Recall $\theta$ from~\eqref{e.theta=}. For each $a \in\R^{D\times D}$, we interpret $\nabla^2 \xi(a)$ as the linear operator on $\R^{D\times D}$ and thus we can interpret $\nabla \theta(a) = a\cdot\nabla^2\xi(a)$ as the linear operator $\R^{D\times D}$ defined by $a'\mapsto a\cdot\nabla^2\xi(a)a'$. Using the smoothness of $\theta$ over $B$, for some constant $C_2$ only depending on $\xi$, we also have \begin{align*} \Ll|t \int_0^1\theta(p) -t \int_0^1\theta(p^x) - \la p^x , t\nabla^2\xi(p^x)(p-p^x) \ra_{L^2}\Rr| \leq tC_2\Ll|p-p^x\Rr|^2_{L^\infty}, \end{align*} where $\nabla^2\xi(p^x)(p-p^x)$ is interpreted pointwise as the path $[0,1]\ni s\mapsto \nabla^2\xi(p^x(s))(p(s)-p^x(s))$. Similarly, for some constant $C_3$ only depending on $\xi$, we have \begin{align*} \Ll|\la p^x , t\nabla\xi(p)-t\nabla\xi(p^x)\ra_{L^2} - \la p^x , t\nabla^2\xi(p^x)(p-p^x) \ra_{L^2} \Rr|\leq t C_3\Ll|p-p^x\Rr|^2_{L^\infty}. \end{align*} Combining the above three displays, we get~\eqref{e.l.sP(p^x)_critical}. \end{proof} \section{Short-time smooth solution of the Hamilton--Jacobi equation} \label{s.short_time} We fix $x \in \R^d$. In this section, we are interested in the partial differential equation % \begin{align}\label{e.hj} \begin{cases} \partial_t f - \int \xi(\nabla_q f) =0 \text{ on } (0,+\infty) \times \mcl Q_2 \\ f(0,\cdot) = \psi(\scdot;x). \end{cases} \end{align} % Equation \eqref{e.hj} is a so-called Hamilton--Jacobi equation. First, we explain the exact meaning of the term $\int \xi(\nabla_q f)$. Here, $\nabla_q f$ is path-valued. More precisely, $\nabla_q f(t,q)$ is equal to some path $p:[0,1)\to\S^D$. The nonlinearity $\int \xi(\nabla_q f(t,q))$ is understood as $\int_0^1\xi(p(s))\d s$. We define $X(\cdot,\scdot;x) : \R_+\times \mcl Q_\infty \to L^\infty$ by % \begin{align}\label{e.X(t,q)=} X(t,q;x) = q - t\nabla\xi(\nabla_q\psi(q;x)). \end{align} % In view of Proposition~\ref{p.psi_smooth}~\eqref{i.p.psi_smooth(2)}, the definition of $X(\cdot,\scdot;x)$ can be extended to $\R_+\times \mcl Q_1$. However, we restrict the domain to $\R_+\times \mcl Q_\infty$ for better regularity. The family curves $t \mapsto X(t,q;x)$ parametrized by $q$ ($x$ is fixed) can be understood as the characteristic lines of equation \eqref{e.hj}. More precisely, it can be checked that if $f$ is a smooth solution of \eqref{e.hj}, then % \begin{e} \frac{\d}{\d t} \nabla_q f(t,X(t,q;x);x) = 0. \end{e} % Thanks to this, one can verify by differentiation that we must have % \begin{e} \label{e.f along char} f(t,X(t,q;x);x) = \msc P_{t,q}(p;x), \end{e} % where $p = \nabla_q f(t,X(t,q;x);x) = \nabla_q \psi(q;x)$ and $\msc P_{t,q}$ is the functional defined in \eqref{e.P_t,q(p;x)=}. If the map $q \mapsto X(t,q;x)$ is a bijection for every $t > 0$, then \eqref{e.f along char} uniquely defines a function $f$ and it is possible to verify that $f(\cdot,\scdot;x)$ is a solution of \eqref{e.hj}. However, at least in finite dimensional spaces, the characteristic lines can cross. So, the function $q \mapsto X(t,q;x)$ is sometimes non-injective. This means that the recipe given by \eqref{e.f along char} to define a differentiable solution may not work. Even worse, Hamilton--Jacobi equations may sometimes not have any differentiable solutions \cite[Section~3.2.5.c Example 6]{evans}. To restore the existence and uniqueness of solutions, one needs to appeal to the notion of viscosity solution \cite[Section~10]{evans}. In this setting, the notion of solution is weakened in order to also allow non-differentiable functions to solve the equation. This theory can be extended to infinite dimensional spaces, and it can be shown that \eqref{e.hj} also admits a unique viscosity solution \cite{chen2022hamilton}. The goal of this section is to show that on the time interval $[0,t_\star)$, the characteristic lines do not cross. This allows us to define via \eqref{e.f along char} a differentiable solution $f(\cdot,\scdot;x)$ of \eqref{e.hj} on the interval $[0,t_\star)$. In addition, by the uniqueness of the viscosity solution, the function $f(\cdot,\scdot;x)$ we build must coincide with the viscosity solution. In the end, the differentiability of the viscosity solution on the interval $[0,t_\star)$ is going to be one of the key ingredients in the verification of \eqref{e.def.phi}. Recall the definition of the operator norm in \eqref{e.def.opnorm}. We recall that to improve readability, we sometimes abbreviate $\nabla_q$ to $\nabla$. \begin{lemma}\label{l.invert_grad_X} Fix any $x\in\R^d$. The function $X(\cdot,\scdot;x) :\R_+\times \mcl Q_\infty\to L^\infty$ is Fréchet differentiable. At each $(t,q) \in \R_+\times \mcl Q_\infty$, the derivative $\nabla_q X(t,q;x)$ is a bounded linear operator from $L^\infty$ to $L^\infty$ with expression \begin{align}\label{e.nablaX(t,q)=1-...} \nabla X(t,q;x) = \mathbf{I}-t\nabla^2\xi(\nabla\psi(q;x))\nabla^2 \psi(q;x), \end{align} where $\mathbf{I}:L^\infty\to L^\infty$ is the identity operator. Moreover, we have \begin{align}\label{e.|I-nablaX|_Op<} \Ll\|\mathbf{I}-\nabla X(t,q;x)\Rr\|_{\mathrm{Op}(L^\infty; L^\infty)}\leq t/t_\star. \end{align} As a consequence, if $t0$ such that whenever $|q'-q|_{L^\infty}\leq \eps$, we have $\Ll|\nabla^2\psi(q)(q'-q) + p_\mathsf{error}\Rr|_{L^\infty}\leq 1$. In other words, $\nabla^2\psi(q)(q'-q) + p_\mathsf{error}$ is a path taking values in $B$. Hence, using the above two displays, we have that, when $|q'-q|_{L^\infty}\leq \eps$, $C_1 = \Ll\|\nabla^3\xi\lfloor_{B}\Rr\|_\infty = \Ll\|\nabla^2\xi\lfloor_{B}\Rr\|_\mathrm{Lip}$, we have, for $a,a' \in B$, \begin{e*} \nabla \xi(a') =\nabla \xi(a) + \nabla^2\xi(a)(a'-a) + \mathsf{error}_a(a'-a), \end{e*} where $|\mathsf{error}_a(a'-a)|\leq C_1|a'-a|^2$. By~\eqref{e.p.psi_smooth_1}, $\nabla\psi(q)$ and $\nabla\psi(q')$ are paths taking values in $B$. Hence, using the above two displays, we have that, % \begin{align*} \nabla\xi\Ll(\nabla\psi(q')\Rr) &= \nabla\xi\Ll(\nabla\psi(q)\Rr) + \nabla^2\xi\Ll(\nabla\psi(q')\Rr)\nabla^2\psi(q)(q'-q) \\ &+ \nabla^2\xi\Ll(\nabla\psi(q')\Rr)p_\mathsf{error}+\mathsf{error}_{\nabla\psi(q)}\Ll(\nabla \psi(q') - \nabla \psi(q)\Rr). \end{align*} Again by~\eqref{e.psi_smooth(4).1}, the last two terms are bounded in the $L^\infty$-norm by $|q'-q|_{L^\infty}^2$. This shows the Fréchet differentiability of the nonlinear part in $X(t,\scdot)$ (see~\eqref{e.X(t,q)=}), which implies the differentiability of $X(t,\scdot)$ and~\eqref{e.nablaX(t,q)=1-...}. Next, we verify~\eqref{e.|I-nablaX|_Op<}. Let $\kappa\in L^\infty$ and recall the interpretation of~\eqref{e.interp_nablaX(t,q)=1-...}. Then, we can compute \begin{align} \label{e.|nabla^2xi(nablapsi)nabla^2psi<} \Ll|\nabla^2\xi\Ll(\nabla\psi(q')\Rr)\nabla^2\psi(q)\kappa\Rr|_{L^\infty} \stackrel{\eqref{e.p.psi_smooth_1}}{\leq} \sup_{a\in B}\Ll\|\nabla^2\xi(a)\Rr\|_{\mathrm{Op}(\R^{D\times D}; \R^{D\times D})}\Ll|\nabla^2\psi(q)\kappa\Rr|_{L^\infty} \notag \\ \stackrel{\eqref{e.psi_smooth(4).1}}{\leq}\sup_{a\in B}\Ll\|\nabla^2\xi(a)\Rr\|_{\mathrm{Op}(\R^{D\times D}; \R^{D\times D})}\Ll\|\nabla^2\psi(q)\Rr\|_{\mathrm{Op}(L^\infty; L^\infty)} |\kappa|_{L^\infty} \\ \leq t^{-1}_\star|\kappa|_{L^\infty}. \notag \end{align} % The last inequality in the display follows from \eqref{e.t_star=} once we have shown the following inequality % \begin{e}\label{e.|nabla^2psi|<|nablapsi|_Lip} \Ll\|\nabla^2\psi(q)\Rr\|_{\mathrm{Op}(L^\infty; L^\infty)} \leq \|\nabla\psi\|_\mathrm{Lip(\mcl Q_\infty,\mcl Q_\infty)}. \end{e} % % % We now prove \eqref{e.|nabla^2psi|<|nablapsi|_Lip}. Recall $\id$ is the path $s\mapsto s\Id$ and $\Id \in S^D_+$ is the $D \times D$ identity matrix. For each $n\in\N$, let $q_n = q + n^{-1}\id$. % Then, for each $n\in\N$ and any $\kappa\in L^\infty$, we have $q_n+\eps\kappa\in \mcl Q_\infty$ for sufficiently small $\eps$ (depending on $n$). % Using the differentiability of $\psi$ in Proposition~\ref{p.psi_smooth}~\eqref{i.p.psi_smooth(4)} (with $\sfs=\infty$), for any $\kappa,\kappa'\in L^\infty$, we have % \begin{align*} % \la \kappa', \nabla^2\psi(q_n)\kappa\ra_{L^2} = \lim_{\eps\to0}\la \kappa', \nabla\psi(q_n+\eps \kappa)-\nabla\psi(q_n)\ra_{L^2} % \end{align*} % which implies % \begin{align} \label{a.bizzare} % \Ll|\la \kappa', \nabla^2\psi(q_n)\kappa\ra_{L^2}\Rr|&\leq |\kappa|_{L^2}\Ll\|\nabla\psi\Rr\|_{\mathrm{Lip}(\mcl Q_2,\mcl Q_2)}|\kappa'|_{L^2} % \\ % &\leq|\kappa|_{L^\infty}\Ll\|\nabla\psi\Rr\|_{\mathrm{Lip}(\mcl Q_2,\mcl Q_2)}|\kappa'|_{L^\infty}. % \end{align} % This gives~\eqref{e.|nabla^2psi|<|nablapsi|_Lip} at $q_n\in\mcl Q_{\infty,\upa}$. Using ~\eqref{e.psi_smooth(4).2} from Proposition~\ref{p.psi_smooth} (with $\sfp=\sfq=\infty$) and the convergence of $q_n$ to $q$ in $L^\infty$, we can obtain~\eqref{e.|nabla^2psi|<|nablapsi|_Lip} at $q$. % By duality between the $L^\infty$ and $L^1$ norm, the previous display is equivalent to % \begin{e} \label{e.replacement} \Ll|\la \kappa', \nabla^2\psi(q_n)\kappa\ra_{L^2}\Rr| \leq |\kappa'|_{L^1} \|\nabla\psi\|_\mathrm{Lip(\mcl Q_\infty,\mcl Q_\infty)} |\kappa|_{L^\infty}, \quad \forall (\kappa,\kappa') \in L^\infty \times L^1. \end{e} % Let us prove this inequality. Recall $\id$ is the path $s\mapsto s\Id$ and $\Id \in S^D_+$ is the $D \times D$ identity matrix. Let $\kappa : [0,1] \to S^D$, and $\kappa \in L^\infty$. For now we assume that $\kappa$ is Lipschitz. For each $n\in\N$, let $q_n = q + n^{-1}\id$. Then, for each $n\in\N$ and any we have $q_n+\eps\kappa\in \mcl Q_\infty$ for sufficiently small $\eps$ (depending on $n$ and $\kappa$). Using the differentiability of $\psi$ in Proposition~\ref{p.psi_smooth}~\eqref{i.p.psi_smooth(4)} (with $\sfs=\infty$), for any $\kappa,\kappa'\in L^\infty$, we have \begin{align*} \la \kappa', \nabla^2\psi(q_n)\kappa\ra_{L^2} = \lim_{\eps\to0}\la \kappa', \frac{\nabla\psi(q_n+\eps \kappa)-\nabla\psi(q_n)}{\eps}\ra_{L^2}. \end{align*} This implies, \begin{align}\label{e.||0$ there is a constant $C>0$ such that for every $N\in\N$, every $x\in\R^d$ and every $q\in\mcl Q_1$ we have for all $t,t' \in [c,+\infty)$ and $\lambda\in[0,1]$, \begin{align*} (1-\lambda)\bar F_N(t)+ \lambda\bar F_N(t')- \bar F_N((1-\lambda)t+\lambda t'), \leq C\lambda(1-\lambda)(t-t')^2 \end{align*} where we used the shorthand $\bar F_N(\cdot) = \bar F^{m \mapsto x \cdot m}_N(\cdot,q)$. \end{lemma} \begin{proof} Recall the Hamiltonian $H^G_{N;t,q}(\sigma,\alpha)$ from~\eqref{e.H^t,q;s,x_N(sigma,alpha,chi)=}. For each $s\geq 0$, we set $H_s(\sigma,\alpha) = H^{m \mapsto x \cdot m}_{N;s^2,q}(\sigma,\alpha)$ and define \begin{align*} G(s) = - \frac{1}{N} \E\log \iint \exp\Ll(H_s(\sigma,\alpha) \Rr) \d P_1^{\otimes N}(\sigma) \d \mfk R(\alpha). \end{align*} In view of~\eqref{e.F_N(t,q;s,x)=}, we have $\bar F_N(t) = G(\sqrt{t})$. This implies, % \begin{e*} \bar F_N''(t) = \frac{G''(\sqrt{t})}{2 \sqrt{t}} - \frac{G'(\sqrt{t})}{t^\frac{3}{2}}. \end{e*} % The local semi-concavity of $\bar F_N$ can be reformulated as $\bar F_N''$ being upper-bounded on $[c,+\infty)$. The function $G'$ is bounded, this can be easily shown using Gaussian integration by parts. Hence, it suffices to show that the $G''$ is upper-bounded. Let $R=\lambda (1-\lambda)(s-s')^2 N\xi(\sigma\sigma^\intercal /N)$. Using the definition of $G$, we have \begin{align*} &(1-\lambda)G(s) + \lambda G(s') = -\frac{1}{N} \E\log \Ll(\cdots\Rr)^{1-\lambda}(\cdots)^\lambda \\ &\stackrel{\text{Jensen}}{\leq}-\frac{1}{N}\E\log \iint e^{(1-\lambda) H_s(\sigma,\alpha) + \lambda H_{s'}(\sigma,\alpha)} \d P_1^{\otimes N}(\sigma) \d \mfk R(\alpha) \\ &= -\frac{1}{N}\E\log \iint e^{H_{(1-\lambda)s + \lambda s'}(\sigma,\alpha) - R} \d P_1^{\otimes N}(\sigma) \d \mfk R(\alpha). \end{align*} Setting $M= \max_{a\in\S^D_+:\:|a|\leq 1}|\xi(a)|$, we have $R \leq \lambda(1-\lambda)(s-s')^2 N M$ and thus \begin{align*} (1-\lambda)G(s) + \lambda G(s') \leq G((1-\lambda)s + \lambda s') + \lambda(1-\lambda)(s-s')^2M. \end{align*} Setting $\lambda = \frac{1}{2}$ and $s'=s+\eps$ and sending $\eps\to0$ in this display, we get $G''(s)\leq 2M$, completing the proof as explained before. \end{proof} \begin{proof}[Proof of Proposition~\ref{p.id_limF}] Since $x$ is fixed, we omit it from the notation and we make the dependence of the fixed point $p$ in $(t,q)$ explicit by writing it $p^{t,q}$. The Arzelà--Ascoli theorem gives a subsequence along which the free energy converges to some limit $f:\R_+\times \mcl Q_2\to\R$. We recall from~\cite[Proposition~5.3]{chenmourrat2023cavity} that $f$ is Gateaux differentiable jointly in $t$ and $q$ on a dense subset of $\R_+\times \mcl Q_2$ (in the $L^2$-topology). We denote this set of Gateaux differentiable point as $G$. For each $(t,q)\in G$, \cite[Proposition~7.2]{chenmourrat2023cavity} gives $\partial_t f(t,q) = \int_0^1\xi(p'^{t,q})$ for some $p'^{t,q}\in\mcl Q_{\infty,\leq 1}$ that satisfies the fixed point equation $p'=\nabla_q\psi(q+t\nabla(p'))$. Lemma~\ref{l.unique crit point} implies $p'^{t,q}=p^{t,q}$ whenever $t0$ such that for sufficiently small $s$, \begin{align*} s^{-1}\Ll(f(t+s,q) - f(t,q)\Rr) &\leq s^{-1} \Ll(f(t_s+s,q_s) - f(t_s,q_s)\Rr) + C_1s \\ &\leq \int_0^1\xi(p^{t_s,q_s}) + C_2s, \end{align*} where we used the local semi-concavity of $f$ inherited from that enjoyed by the free energy as given in Lemma~\ref{l.semi-concave_in_t} below. For the converse bound, we choose $(t'_s,q'_s)$ satisfying $(t'_s+s,q'_s)\in G$ and $|t-t'_s|+|q-q'_s|_{L^2}\leq s^2$. By similar arguments, we get that there exists some absolute constants $C_3, C_4>0$, such that for sufficiently small $s$, \begin{align*} s^{-1}\Ll(f(t+s,q) - f(t,q)\Rr) &\geq s^{-1} \Ll(f(t'_s+s,q'_s) - f(t'_s,q'_s)\Rr) - C_3s \\ &\geq \int_0^1\xi(p^{t'_s+s,q'_s})- C_4s. \end{align*} By a similar argument as in~\eqref{e.t_star_used_here_2}, we can see that the map $[0,t_\star)\times \mcl Q_2\ni (t,q)\mapsto p^{t,q}$ is continuous in $L^\infty$. Therefore, sending $s\to0$ in the above two displays, we obtain the differentiability of $f(\cdot,q)$ and \begin{align*} \partial_t f(t,q) = \int_0^1\xi(p^{t,q}),\quad\forall (t,q)\in (0,t_\star)\times\mcl Q_2. \end{align*} Hence, for every $(t,q)\in (0,t_\star)\times\mcl Q_2$, we conclude \begin{align*} f(t,q) =\psi(q)+ \int_0^t\Big(\int_0^1\xi(p^{t,q})\Big) \d t. \end{align*} This uniquely characterizes $f$, and proves that it is independent of the chosen subsequence. Therefore, we conclude that $\bar F_N$ converges at every $(t,q)\in (0,t_\star)\times\mcl Q_2$. By~\cite[Proposition~7.3]{chenmourrat2023cavity}, the limit at $(t,q)$ is equal to $\msc P_{t,q}(p;x)$ for some $p\in\mcl Q_{\infty,\leq 1}$ satisfying the fixed point equation $p=\nabla_q\psi(q+t\nabla(p');x)$. Again, Lemma~\ref{l.unique crit point} ensures that $p=p_{q,t}$, completing the proof. \end{proof} \begin{corollary}\label{c.limF_N=f} Fix any $x\in\R^d$. Then, for every $(t,q) \in [0,t_\star)\times \mcl Q_\infty$, we have \begin{align}%\label{e.p.limF_N=f} \lim_{N\to\infty} \bar F^{m \mapsto x \cdot m}_N(t,q) = f(t,q;x), \end{align} where $f(\scdot,\scdot;x)$ is the function constructed in Proposition~\ref{p.f=UZ}. \end{corollary} \begin{proof} This follows from Proposition~\ref{p.id_limF} and Proposition~\ref{p.f=UZ}. \end{proof} \begin{lemma}\label{l.varphi} Fix any $(t,q)\in[0,t_\star)\times \mcl Q_\infty$ the function $x \mapsto f(t,q;x)$ is Lipschitz, concave, and continuously differentiable. \end{lemma} %In the convex case~\eqref{e.condition_convex}, the lemma can be proved by modifying the existing argument for~\cite[Lemma~2.2]{chen2024conventional} which replies on the availability of the Parisi formula. In the non-convex case~\eqref{e.condition_non-convex}, need to argue more. \begin{proof} Since we have $f(t,q;x) = \lim_{N \to +\infty} \bar F^{m \mapsto x \cdot m}_N(t,q)$ for every $x\in\R^d$ due to Corollary~\ref{c.limF_N=f}, the concavity and Lipschitzness of $f(t,q;\cdot)$ follows from that of $x \mapsto \bar{F}^{m \mapsto x \cdot m}_N(t,q)$, which are evident from its definition in~\eqref{e.F_N(t,q;s,x)=} and the boundedness of $m_N$ (see~\eqref{e.m_N=}). %the Lipschitzness and convexity of $\varphi$ follow from those of $\bar\cF_N$ proved in Lemma~\ref{l.cF_N_properties}. It remains to prove the differentiability. Once this is done, the continuity of the derivative follows directly since $f(t,q;\cdot)$ is concave. %Let $f(t,q;x)$ be the limit of $\bar F_N(t,q;0,x)$ and we have $f(t,q;x) =\varphi(x)$. Recall $\msc P_{t,q}$ defined in~\eqref{e.P_t,q(p;x)=}. Since $t$ and $q$ are fixed, we write $\msc P(\scdot;x)= \msc P_{t,q}(\scdot;x)$ for brevity. By Proposition~\ref{p.f=UZ}, $\nabla_q f(t,q;x)$ exists for every $x\in \R^d$ and we denote it by $p^x = \nabla_q f(t,q;x)$. By~\eqref{e.p.psi_smooth_1}, we have $p^x\in\mcl Q_{\infty,\leq1}$ defined in~\eqref{e.Q_infty,<1}. By~\eqref{e.fixed_pt_for_f} and~\eqref{e.fixed_pt_for_f_2} from the same proposition, we have \begin{align*} f(t,q;x) = \msc P(p^x;x)\quad\text{and}\quad p^x = \nabla_q \psi\Ll(q+t\nabla\xi (p^x);x \Rr),\quad\forall x\in\R^d. \end{align*} Using the latter relation, for $x,x'\in\R^d$ we can compute \begin{align} |p^x - p^{x'}|_{L^{\infty}}\leq \Ll|\nabla_q \psi\Ll(q+t\nabla\xi (p^x);x \Rr)-\nabla_q \psi\Ll(q+t\nabla\xi (p^{x'});x \Rr)\Rr|_{L^\infty} \notag \\ + \Ll|\nabla_q \psi\Ll(q+t\nabla\xi (p^{x'});x \Rr)-\nabla_q \psi\Ll(q+t\nabla\xi (p^{x'});x' \Rr)\Rr|_{L^\infty} \notag \\ \stackrel{\eqref{e.t_star=}\eqref{e.p.psi_smooth_2}}{\leq} t t_\star^{-1} |p^x - p^{x'}|_{L^\infty} + 4|h|_{L^\infty}|x-x'|. \label{e.t_star_used_here_2} \end{align} Due to $t_Ntqsx=} and recall that given a random variable $X : \Omega \to \mfk X$ and $\omega \in \Omega$ we let $X^\omega \in \mfk X$ denote the realization of the random variable $X$ on the event $\omega$. Also, recall the definition of $f^*(t,q;m)$ from~\eqref{e.f^*=}. \begin{lemma} \label{l.ldp_G=0} Let $(t,q) \in [0,t_\star) \times \mcl Q_\infty$. There exists a full-measure subset $\Omega' \subset \Omega$ such that, for every $\omega \in \Omega'$, $m_N$ under $\langle \cdot \rangle_{N;t,q}^{0,\omega}$ satisfies a large deviation principle with rate function $I_{t,q}$ defined by % \begin{e}\label{e.I_t,q=f^*-f} I_{t,q}(m) = f^*(t,q;m) - f(t,q;0), \qquad \forall m \, \in \R^d. \end{e} \end{lemma} \begin{proof} Using Corollary~\ref{c.limF_N=f} and the concentration of free energy as in Remark~\ref{r.concentration}, we can find a full-measure subset $\Omega' \subset \Omega$ such that such that, for every $\omega\in\Omega'$ and every $y\in \Q^d$, % \begin{equation*} \lim_{N\to\infty}F^{m \mapsto y \cdot m, \omega}_N(t,q)= f(t,q;y). \end{equation*} % Henceforth, we fix $\omega\in\Omega'$ and define for each $y\in\R^d$, % \begin{e*} \Lambda_{N;t,q}(y)= \frac{1}{N}\log \la e^{Ny\cdot m_N}\ra^{0,\omega}_{N;t,q}. \end{e*} % Notice that $\Lambda_{N;t,q}(y) = -F_N^{m \mapsto y \cdot m,\omega}(t,q) + F_N^{0,\omega}(t,q)$. Hence, we have for every $y\in \Q^d$ % \begin{e}\label{e.limLambda_N=} \lim_{N\to\infty}\Lambda_{N;t,q}(y)= -f(t,q;y)+f(t,q;0). \end{e} % Since $\Lambda_{N;t,q}$ is Lipschitz uniformly in $N$ and $f(t,q;\cdot)$ is also Lipschitz, the previous display in fact holds for every $y \in \R^d$. Finally, since by Lemma~\ref{l.varphi}, $f(t,q;\cdot)$ is differentiable everywhere, the Gärtner--Ellis theorem (e.g., see~\cite[Theorem~2.3.6]{dembo2009large}) guarantees that $m_N$ satisfies a large deviation principle under $\la\cdot\ra^{0,\omega}_{N;t,q}$ with rate function % \begin{e*} I_{t,q}(m) = \sup_{y \in \R^d} \left\{ y \cdot m - \lim_{N \to +\infty} \Lambda_{N;t,q}(y) \right\} \stackrel{\eqref{e.limLambda_N=}\eqref{e.f^*=}}{=} f^*(t,q;m) - f(t,q;0). \end{e*} \end{proof} \begin{lemma} \label{l.log} Let $(t,q) \in [0,t_\star) \times \mcl Q_\infty$. For every continuous function $G : \R^d \to \R$ and every $\omega \in \Omega'$ (as given in Lemma~\ref{l.ldp_G=0}), we have % \begin{e} \lim_{N \to +\infty} \frac{1}{N} \log \la e^{NG(m_N)} \ra_{N;t,q}^{0,\omega} = \sup_{m \in \R^d} \left\{ G(m) - I_{t,q}(m) \right\}. \end{e} \end{lemma} \begin{proof} Since $m_N$ is bounded uniformly in $N$, so is $G(m_N)$, which allows us to apply Varadhan's lemma (e.g., see~\cite[Theorem~4.3.1]{dembo2009large}) to deduce the desired result from the large deviation principle in Lemma~\ref{l.ldp_G=0}. \end{proof} \begin{proof}[Proof of Theorem~\ref{t.free_energy}] Let $\Omega'$ be given as in Lemma~\ref{l.ldp_G=0}. Notice that, for every $\omega\in\Omega'$, % \begin{e*} \frac{1}{N}\log\la e^{NG(m_N)}\ra^{0,\omega}_{N;t,q}= -F^{G,\omega}_N(t,q) + F^{0,\omega}_N(t,q). \end{e*} % Using Lemma~\ref{l.log}, \eqref{e.I_t,q=f^*-f}, and $\lim_{N\to\infty} F^{0,\omega}_N(t,q)=f(t,q;y)$ due to Corollary~\ref{c.limF_N=f}, we get that for every $\omega \in \Omega'$ % \begin{e*} \lim_{N\to\infty} F^{G,\omega}_N = \inf_{m \in \R^d} \left\{ -G(m) + f^*(t,q;m) \right\}. \end{e*} % Finally, using the definition of $f^*(t,q;m)$ in~\eqref{e.f^*=}, we observe that the right-hand side in the previous display is equal to the right-hand side in~\eqref{e.free_energy}. Then, the desired result follows from this and the concentration of free energy as in Remark~\ref{r.concentration}. \end{proof} \begin{proof}[Proof of Theorem~\ref{t.ldp}] Let $\Omega'$ be given as in Lemma~\ref{l.ldp_G=0}. Fix any continuous function $G: \R^d \to \R$ and recall $I^G_{t,q}$ from~\eqref{e.I^G=}. Let $\Gamma:\R^d\to\R$ be any bounded continuous function. Since we have, % \begin{e*} \frac{1}{N}\log\la e^{N\Gamma(m_N)}\ra^{G,\omega}_{N;t,q} = \frac{1}{N}\log\la e^{N(G+\Gamma)(m_N)}\ra^{0,\omega}_{N;t,q} - \frac{1}{N} \log\la e^{NG(m_N)}\ra^{0,\omega}_{N;t,q}. \end{e*} % It follows from Lemma~\ref{l.log} along with~\eqref{e.I_t,q=f^*-f} that for every $\omega \in \Omega'$, % \begin{align*} \lim_{N\to\infty}\frac{1}{N}\log\la e^{N\Gamma(m_N)}\ra^{G,\omega}_{N;t,q} &=\sup\Ll\{G+ \Gamma-I_{t,q} \Rr\} - \sup\Ll\{G-I_{t,q} \Rr\} \\ & \stackrel{\eqref{e.I^G=}\eqref{e.I_t,q=f^*-f}}{=} \sup \{\Gamma - I^G_{t,q}\}. \end{align*} % Since this holds for any bounded continuous $\Gamma$, we can apply a variant of Bryc's inverse Varadhan lemma (e.g., see the ``$\Longleftarrow$'' direction in~\cite[Theorem~4.4.13]{dembo2009large}) which guarantees that $m_N$ satisfies a large deviation principle under $\la\cdot\ra^{G,\omega}_{N;t,q}$ with rate function $I^G_{t,q}$. \end{proof} %\subsection{The free energy with general Mattis interaction} \subsection{Replica symmetric form} \label{ss.proj} Recall from~\eqref{e.haty=} the notation of a constant path $\hat y$ given any $y\in\S^D$. In this section, we also write $(y)^\wedge = \hat y$. For $\kappa\in L^1$, we define $\check{\kappa}=(\kappa)^\vee =\int_0^1\kappa(r)\d r \in\S^D$. Then, for every $y\in \S^D$ and $\kappa\in L^1$, we have the following simple identities: \begin{align}\label{e.proj_adjoint} y\cdot \check{\kappa} = \la \hat y, \kappa\ra_{L^2}\quad\text{and}\quad \Ll(\hat y\Rr)^\vee = y. \end{align} Recall $\psi(\scdot,\scdot)$ in~\eqref{e.psi(q;x)=} and $\phi(\scdot,\scdot)$ in~\eqref{e.phi(y,x)=}. Also, recall the relation $\phi(y;x)=\psi\Ll(\hat y;x\Rr)$ from~\eqref{e.phi=psi}. To prove Theorem~\ref{t.rs}, we show in Proposition~\ref{p.f-d-proj} that for every $x \in \R^D$, the function $(t,y) \mapsto f(t,\hat{y};x)$ satisfies a \emph{finite} dimensional Hamilton--Jacobi on $[0,t_\star) \times \S^D_+$. To do so, we need to recall a modified version of~\cite[Lemma~5.12]{chen2024simultaneous}. This lemma relies on the assumption that the support of $P_1$ spans $\R^D$. \begin{lemma}[\cite{chen2024simultaneous}]\label{l.detect_jump} Fix any $x\in\R^d$. Let $q\in\mcl Q_\infty$ and set $p=\nabla_q\psi(q;x)$. For $0-ms} \la\cdot\ra_{N;t,q}^G \quad\propto\quad \exp\Ll(H^G_{N;t,q}(\sigma,\alpha)\Rr)\d P_N(\sigma)\mathfrak{R}(\alpha). \end{align*} We denote by $\Omega$ the underlying probability space for $H^G_{N;t,q}$ and $\mfk R$, and write $\la\cdot\ra_{N;t,q}^{G,\omega}$ as a realization of $\la\cdot\ra_{N;t,q}^G$ for any $\omega\in\Omega$. % \begin{theorem}[Large deviations of $m_N$] \label{t.ldp-ms} Assume~\eqref{e.limlambda_N=lambda_infty}. Let $t < t_\star$ and $q \in \mcl Q_\infty^\sS$. There is a full measure subset $\Omega' \subset \Omega$ such that the following holds. For every $\omega\in\Omega'$ and every continuous function $G:\prod_{\s\in\sS}\R^{d_\s}\to\R$, the random variable $m_N$ satisfies a large deviation principle under $\la\cdot\ra_{N;t,q}^{G,\omega}$ with rate function $I^G_{t,q}$ defined by % \begin{e*} I_{t,q}^G(m) = -G(m) + f^*(t,q;m) + \sup_{m'} \left\{ G(m') - f(t,q;m') \right\} ,\ \forall m \, \in \prod_{\s\in\sS}\R^{d_\s}, \end{e*} % where $f^*(t,q;m) = \sup_x \left\{ x \cdot m + f(t,q;x)\right\}$ with $f$ given in Theorem~\ref{t.free_energy-ms}. \end{theorem} Given $y\in\R^\sS_+$, we define $\hat y$ similar as in~\eqref{e.haty=} to be a constant path. For $y=(y_\s)_{\s\in\sS}\in\R^\sS_+$, $x=(x_\s)_{\s\in\sS}\in\prod_{\s\in\sS}\R^{d_\s}$, and $\lambda\in\blacktriangle_\infty$, we set \begin{align*} &\phi_\s(y_\s;x_\s) = -\E\log\int\exp\Ll(\sqrt{2 y_\s}\eta_\s\tau-y_\s\tau^2+x_\s\cdot h_\s(\tau,\chi_{\s,1})\Rr) \d P_s(\tau), \end{align*} where $\eta_\s$ is an independent standard Gaussian random variable, and we also set \begin{align*}%\label{e.psi(q;x)=-ms} \phi_\lambda(y;x) = \sum_{\s\in\sS}\lambda_\s \phi_\s(y_\s;x_\s). \end{align*} By a similar argument as in Remark~\ref{r.Gauss_ext_field}, we have $\phi_\lambda(y;x) = \psi_\lambda(\hat y;x)$. \begin{theorem}[Replica symmetric form]\label{t.rs-ms} Assume~\eqref{e.limlambda_N=lambda_infty}. Let $x\in\prod_{\s\in\sS}\R^{d_\s}$. Define $g(\scdot,\scdot;x):[0,t_\star)\times \R^\sS_+\to\R$ by % \begin{e*} g(t,y;x)=f(t,\hat y;x), \end{e*} with $f$ given in Theorem~\ref{t.free_energy-ms}. % The function $g(\scdot,\scdot;x)$ is differentiable on $[0,t_\star)\times \R^\sS_+$ and satisfies \begin{align*}%\label{e.hj_g-p.f-d-proj} \begin{cases} \partial_t g(t,y;x) - \xi\big(\nabla_y g(t,y;x)\big)=0 &\quad \text{ for } (t,y)\in(0,t_\star)\times \R^\sS_+, \\ g(0,y;x) = \phi_{\lambda_\infty}(y;x) &\quad \text{ for } y \in \R^\sS_+. \end{cases} \end{align*} % Moreover, at every $(t,y)\in (0,t_\star)\times \R^\sS_+$, there is a unique solution $z\in\R^\sS_+$ to the fixed point equation \begin{align*} z' = \nabla_y \phi_{\lambda_\infty}(t\nabla\xi(z');x) \quad\text{for }z'\in\R^\sS_+, \end{align*} and we have % \begin{e*} g(t,y;x) = \phi_{\lambda_\infty}(y+t\nabla\xi(z);x) - t \theta(z)\quad\text{and}\quad \nabla_y g(t,y;x) = z. \end{e*} \end{theorem} % \begin{theorem}\label{t-ms} % There is $\beta_\star\in(0,+\infty)$ satisfying % \begin{align}\label{e.beta_star-ms} % \beta_\star\geq\Ll(8\big\|\nabla\xi\lfloor_{[-1,1]^\sS}\big\|_{\mathrm{Lip}([-1,1]^\sS;\R^\sS)}\Rr)^{-\frac{1}{2}} % \end{align} % such that the following holds for every $\beta\in[0,\beta_\star)$. % Suppose $\lim_{N\to\infty}\lambda_N = \lambda_\infty$ for some $\lambda_\infty \in \blacktriangle_\infty$. % For every $x\in\prod_{\s\in\sS}\Ll(\R^{d_\s}\times \R\Rr)$, % there is a unique $z^x\in\R^\sS$ satisfying % \begin{align}\label{e.t.fixed_pt_eqn-ms} % z^x=\nabla_y\phi_{\lambda_\infty}\Ll(\tfrac{\beta^2}{2}\nabla\xi(z^x);x\Rr) % \end{align} % with $\phi_{\lambda_\infty}(\scdot;\scdot)$ given in~\eqref{e.phi(y,x)=-mp}. % We have % \begin{align}\label{e.t-ms} % \begin{split} % &\lim_{N\to\infty} \bar F_N(\beta) % \\ % &= \inf_{x'}\sup^x\Ll\{\phi_{\lambda_\infty}\Ll(\tfrac{\beta^2}{2}\nabla\xi(z^x);x\Rr)-\tfrac{\beta^2}{2}\theta(z^x)+ x\cdot x' - G(x'_{\bullet 1})- \tfrac{\beta^2}{2}\xi(x'_{\bullet 2})\Rr\} % \end{split} % \end{align} % with optimizations over $x,x'\in\prod_{\s\in\sS}\Ll(\R^{d_\s}\times \R\Rr)$, where $x'_{\bullet 1}=(x'_{\s,1})\in \prod_{\s\in\sS}\R^{d_\s}$ and $x'_{\bullet 2}=(x'_{\s,2})\in \R^\sS$. % \end{theorem} % For every $\s\in\sS$, $y_\s\in \R_+$, and $x_\s\in \R$, we set % \begin{align*} % \phi^\circ_\s(y_\s;x_\s)=-\E\log\int \exp\Ll(\sqrt{2y_\s}\eta_\s\tau-y_\s\tau^2 + x_\s\tau^2\Rr)\d P_1(\tau). % \end{align*} % For $\lambda\in \triangle$, $y_\s\in \R_+^\sS$, and $x_\s\in \R^\sS$, we take % \begin{align}\label{e.phi^circ(y,x)=-ms} % \phi^\circ_\lambda(y;x) = \sum_{\s\in\sS}\lambda_\s\phi^\circ_\s(y_\s;x_\s). % \end{align} % \begin{theorem}\label{t.so-ms} % Assume that, for every $\s\in\sS$, $h_\s:\R\to\R$ (with $L_\s=0$ and $d_\s=1$) is deterministic and is given by $h_\s(r)=r^2$ for every $r\in\R$ and that $G$ is defined on $\R^\sS$. % There is $\beta_\star\in(0,+\infty)$ satisfying~\eqref{e.beta_star-ms} such that the following holds for every $\beta\in[0,\beta_\star)$. % Suppose $\lim_{N\to\infty}\lambda_N = \lambda_\infty$ for some $\lambda_\infty \in \blacktriangle_\infty$. % For every $x\in\R^\sS$, % there is a unique $z^x\in\R_+^\sS$ satisfying % \begin{align*} % z^x=\nabla_y\phi^\circ_{\lambda_\infty}\Ll(\tfrac{\beta^2}{2}\nabla\xi(z^x);x\Rr) % \end{align*} % with $\phi^\circ_{\lambda_\infty}(\scdot;\scdot)$ given in~\eqref{e.phi^circ(y,x)=-ms}. % We have % \begin{align*} % \begin{split} % &\lim_{N\to\infty} \bar F_N(\beta) % \\ % &= \inf_{x'}\sup^x\Ll\{\phi^\circ\Ll(\tfrac{\beta^2}{2}\nabla\xi(z^x);x\Rr)-\tfrac{\beta^2}{2}\theta(z^x)+ x\cdot x' - G(x')- \tfrac{\beta^2}{2}\xi(x')\Rr\} % \end{split} % \end{align*} % where the optimizations are over $x,x'\in\R^\sS$. % \end{theorem} \subsection{Recipe for adaptation} Instead of rewriting everything in detail, we want to avoid repetition and give a recipe for adapting existing results to the multi-species setting. We mention the difference in initial conditions. In the vector spin glass setting, we simply have $\bar F^{x \mapsto x \cdot m}_N(0,q)=\psi(q;x)$ (see the line above~\eqref{e.psi(q;x)=}). Here, we have (see~\cite[Lemma~4.11]{multi-sp}) $\bar F^{x \mapsto x \cdot m}_N(0,q)=\psi_{\lambda_N}(q;x)$ and thus, under the assumption that $\lambda_N$ converges to $\lambda_\infty$, % \begin{e*} \lim_{N\to\infty} \bar F^{x \mapsto x \cdot m}_N(0,q) = \psi_{\lambda_\infty}(q;x). \end{e*} % In the current setting, $\psi_\s$ has the same form as $\psi$ in~\eqref{e.psi(q;x)=} in the vector spin glass. So, each $\psi_\s$ satisfies the properties in Proposition~\ref{p.psi_smooth} with $\mcl Q_\sfp$ therein replaced by $\mcl Q_\sfp(1)$. To transfer these properties to $\psi_{\lambda_\infty}$, we first recall the norms on $\mcl Q^\sS_\sfp$. We view $\mcl Q^\sS_\sfp$ as embedded into $L^\sfp([0,1);\R^\sS)$, equiped with the norm \begin{align*} |\kappa|_{L^\sfp} = \Big(\int_0^1\Big(\sum_{\s\in\sS}\kappa_\s(r)^2\Big)^\frac{\sfp}{2}\d r\Big)^\frac{1}{\sfp}. \end{align*} Observe that at $\sfp=2$, we have $|\kappa|_{L^2}^2=\sum_{\s\in\sS}|\kappa_\s|_{L^2}^2$. % Then, since $\nabla_q \psi_{\lambda_\infty}(q;x) = \big(\lambda_{\infty,\s}\nabla_{q_\s}\psi_\s(q_\s;x_\s)\big)_{\s\in\sS}$ and $\lambda_{\infty,\s}\in[0,1]$, we have % \begin{align*} % \Ll\|\nabla_q \psi_{\lambda_\infty}(\scdot;x)\Rr\|_{\mathrm{Lip}(\mcl Q^\sS_2,\mcl Q^\sS_2)} \leq \max_{\s\in\sS}\Ll\|\nabla_{q_\s} \psi_\s(\scdot;x_\s)\Rr\|_{\mathrm{Lip}(\mcl Q_2(1),\mcl Q_2(1))}\stackrel{\eqref{e.p.psi_smooth_2}}{\leq} 16. % \end{align*} % In the current multi-species setting, we define $t_\star$ by % \begin{align}\label{e.t_star=-ms} % t^{-1}_\star = \sup^x \Ll\|\nabla_q \psi_{\lambda_\infty}(\scdot;x)\Rr\|_{\mathrm{Lip}(\mcl Q^\sS_2,\mcl Q^\sS_2)}\Ll\|\nabla \xi\lfloor_{[-1,1]^\sS}\Rr\|_{\mathrm{Lip}([-1,1]^\sS;\R^\sS)} % \end{align} % where the supremum is taken over $x \in \prod_{\s\in\sS}\R^{d_\s}$. Then the above two displays imply the lower bound % \begin{align}\label{e.t_star>-mp} % t_\star \geq \Ll(16\Ll\|\nabla \xi\lfloor_{[-1,1]^\sS}\Rr\|_{\mathrm{Lip}([-1,1]^\sS;\R^\sS)}\Rr)^{-1}>0. % \end{align} One can verify that Proposition~\ref{p.f=UZ}, Corollary~\ref{c.limF_N=f}, and Lemma~\ref{l.varphi} hold with $t_\star$ defined in~\eqref{e.t_star=-ms}. Indeed, checking the proofs, one can see that the definition of $t_\star$ (previously in~\eqref{e.t_star=}) are used in~\eqref{e.|nabla^2xi(nablapsi)nabla^2psi<}, the line below \eqref{e.q=X(t,Z(t,q))=Z...}, \eqref{e.pf.l.unique crit point}, %\eqref{e.t_star_used_here}, and \eqref{e.t_star_used_here_2}. Then, one can verify that the definition in~\eqref{e.t_star=-ms} still works in all these cases. To restate and reprove these four propositions, one needs to replace $\mcl Q_\sfp$, $\psi$ (see~\eqref{e.psi(q;x)=}), $\S^D_+$, and $\R^d$ therein by $\mcl Q^\sS_\sfp$, $\psi_{\lambda_\infty}$, $\R^\sS_+$, and $\prod_{\s\in\sS}\R^{d_\s}$, respectively. Cited results from~\cite{chenmourrat2023cavity} for vector spin glasses can be replaced by counterparts in~\cite{multi-sp} for multi-species spin glasses. Once those three results are proven in the setting of multi-species models, one can straightforwardly use the same arguments as in the rest of Section~\ref{s.proof_main_results} to finish the proofs of Theorems~\ref{t.free_energy-ms}, \ref{t.ldp-ms}, and~\ref{t.rs-ms}. %\section{Simple argument for identifying the limit free energy with linear Mattis interaction} \label{s.simplified} %In Corollary~\ref{c.limF_N=f}, we have proved Theorem~\ref{t.free_energy} in the special case $G(m)=x\cdot m$. In turn, this proposition serves as a cornerstone for deriving the full version of Theorem~\ref{t.free_energy} as well as Theorem~\ref{t.ldp}. As discussed in the introduction, the proof of Corollary~\ref{c.limF_N=f} relies on the so-called Hamilton--Jacobi approach. This perspective provides a conceptual viewpoint on the results we obtain. In particular, we verify that, as proposed in~\cite{mourrat2019parisi}, the viscosity solution of~\eqref{e.hj} is indeed the correct object to describe the limit free energy, at least in the high-temperature regime. From this, one deduces the critical-point representation for the free energy~\eqref{e.visco=crit_parisi}. We stress that, although not proven so far, it is widely expected that the statement of Corollary~\ref{c.limF_N=f} should hold for every $t\ge0$, and not only for $tt_\star$, the fixed-point equation~\eqref{e.fixed_point} may admit multiple solutions, and there is currently no rigorous method to select the correct fixed point “by hand”. % Let us explain that Propositions~\ref{p.f=UZ}, \ref{p.f-d-proj}, and \ref{p.limF_N=f}, and Lemma~\ref{l.varphi} hold with $t_\star$ defined in~\eqref{e.t_star=-ms}. Indeed, checking the proofs, one can see that the definition of $t_\star$ (previously in~\eqref{e.t_star=}) are used in~\eqref{e.|nabla^2xi(nablapsi)nabla^2psi<}, the line below \eqref{e.q=X(t,Z(t,q))=Z...}, \eqref{e.pf.l.unique crit point}, \eqref{e.t_star_used_here}, and \eqref{e.t_star_used_here_2}. Then, one can verify that the definition in~\eqref{e.t_star=-ms} still works in all these cases. To restate and reprove these four propositions, one need to replace $\mcl Q_\sfp$, $\psi$ (see~\eqref{e.psi(q;x)=}), $\S^D_+$, and $\R^d$ therein by $\mcl Q^\sS_\sfp$, $\psi_{\lambda_\infty}$, $\R^\sS_+$, and $\prod_{\s\in\sS}\R^{d_\s}$, respectively. Cited results from~\cite{chenmourrat2023cavity} for vector spin glasses can be replaced by counterparts in~\cite{multi-sp} for multi-species spin glasses. % From these, we can also obtain an adapted version of Theorem~\ref{t.gen} on the limit of $\bar F_N$ defined in~\eqref{e.F_N(t,q;s,x)=-ms}. The statement involves the aforementioned replacements. Using this theorem, we can follow the same steps in Section~\ref{s.proofs_main_results} to prove Theorems~\ref{t-ms} and~\ref{t.so-ms}. % \section{This subsection contains a special case which we do not use} % For every $y\in \S^D_+$ and $x=(x_1,x_2)\in\R^d\times \S^D$, we set % \begin{align}\label{e.phi(y,x)= bis} % \phi(y;x)=-\E\log\int \exp\Ll(\sqrt{2y}\eta\cdot\tau-y\cdot\tau\tau^\intercal + x_1\cdot h(\tau,\chi_1)+x_2\cdot\tau\tau^\intercal\Rr)\d P_1(\tau) % \end{align} % where $\eta$ is an independent standard $\R^D$-valued Gaussian vector (i.e.\ $\E\eta\eta^\intercal$ is the $D\times D$-identity matrix) and $\E$ averages over both $\eta$ and $\chi_1$. We denote the derivative in the first variable of $\phi(\scdot;x)$ as $\nabla_y \phi(\scdot;x)$. % \begin{theorem}\label{tbis} % There is $\beta_\star\in(0,+\infty)$ satisfying % \begin{align}\label{e.beta_star} % \beta_\star\geq\Ll(8\big\|\nabla\xi\lfloor_B\big\|_{\mathrm{Lip}(B;\R^{D\times D})}\Rr)^{-\frac{1}{2}} % \end{align} % such that the following holds for every $\beta\in[0,\beta_\star)$. For every $x\in\R^d\times \S^D$, % there is a unique $z^x\in\S^D_+$ satisfying % \begin{align}\label{e.t.fixed_pt_eqn bis} % z^x=\nabla_y\phi\Ll(\tfrac{\beta^2}{2}\nabla\xi(z^x);x\Rr) % \end{align} % with $\phi(\scdot;\scdot)$ given in~\eqref{e.phi(y,x)=}. % We have % \begin{align}\label{e.t bis} % \lim_{N\to\infty} F^G_N(\frac{\beta^2}{2},0) = \inf_{x'}\sup^x\Ll\{\phi\Ll(\tfrac{\beta^2}{2}\nabla\xi(z^x);x\Rr)-\tfrac{\beta^2}{2}\theta(z^x)+ x\cdot x' - G(x'_1)- \tfrac{\beta^2}{2}\xi(x'_2)\Rr\} % \end{align} % where the optimizations are over $x,x'\in\R^d\times \S^D$. % \end{theorem} % We mention that there is some flexibility in terms of formulae. % We can view the mean magnetization $\frac{1}{N}\sum_{i=1}^N h(\sigma_{\bullet i},\chi_i)$ as an extra order parameter in addition to the existing spin glass order parameter, i.e., the overlap. Aside from these two, another natural order parameter is the self-overlap $N^{-1}\sigma\sigma^\intercal$. % The terms $x_2\cdot\sigma\sigma^\intercal$ in $\phi(y;x)$ and $\frac{\beta^2}{2}\xi(x'_2)$ are needed specifically for the self-overlap. In special cases where the self-overlap is the order parameter other than the overlap, the formula can be simplified. For every $y\in \S^D_+$ and $x\in \S^D$, we set % \begin{align}\label{e.phi^circ(y,x)=} % \phi^\circ(y;x)=-\E\log\int \exp\Ll(\sqrt{2y}\eta\cdot\tau-y\cdot\tau\tau^\intercal + x\cdot\tau\tau^\intercal\Rr)\d P_1(\tau) % \end{align} % where $\eta$ is an independent standard $\R^D$-valued Gaussian vector. % \begin{theorem}[Special case]\label{t.so} % %\leavevmode % Assume that $h:\R^D\to\S^D$ (with $L=0$ and $\R^d$ identified with $\S^D$ for some $d$) is deterministic and is given by $h(v)=vv^\intercal$ for every $v\in\R^D$ and that $G$ is defined on $\S^D$. % There is $\beta_\star\in(0,+\infty)$ satisfying~\eqref{e.beta_star} such that the following holds for every $\beta\in[0,\beta_\star)$. For every $x\in\S^D$, % there is a unique $z^x\in\S^D_+$ satisfying % \begin{align*} % z^x=\nabla_y\phi^\circ\Ll(\tfrac{\beta^2}{2}\nabla\xi(z^x);x\Rr) % \end{align*} % with $\phi^\circ(\scdot;\scdot)$ given in~\eqref{e.phi^circ(y,x)=}. % We have % \begin{align*} % \begin{split} % &\lim_{N\to\infty} F_N(\beta) % = \inf_{x'}\sup^x\Ll\{\phi^\circ\Ll(\tfrac{\beta^2}{2}\nabla\xi(z^x);x\Rr)-\tfrac{\beta^2}{2}\theta(z^x)+ x\cdot x' - G(x')- \tfrac{\beta^2}{2}\xi(x')\Rr\} % \end{split} % \end{align*} % where the optimizations are over $x,x'\in\S^D$. % \end{theorem} % More generally, if $\sigma_{\bullet i}\sigma_{\bullet i}^\intercal$ is part of $h(\sigma_{\bullet i},\chi_i)$, then similar simplifications can also be made. We omit the formulation of this as it can be straightforwardly obtained by modifying the proofs of these theorems in Section~\ref{s.proof_main_results}. Notice that when $G=0$, the setup corresponds to the standard vector spin glass model with only the spin-glass term. % \begin{proof}[Proof of Theorem~\ref{tbis}] % For $t_\star$ given in~\eqref{e.t_star=}, we take $\beta_\star = \sqrt{2t_\star}$. Then, the lower bound in~\eqref{e.beta_star} follows from~\eqref{e.t_star>}. % Given $h:\R^D\times \R^L\to\R^d$ in~\eqref{e.F_N(t,q;s,x)=}, we define $\tilde h:\R^D\times \R^L\to \R^d\times \S^D$ through % \begin{align*} % \tilde h(\tau,\chi_i) = \Ll(h(\tau,\chi_i),\ \tau\tau^\intercal\Rr). % \end{align*} % Henceforth, we isometrically identify $\R^d\times \S^D$ with $\R^{\tilde d}$ for some $\tilde d \in\N$ and we view any $x\in \R^d\times \S^D$ as an element in $\R^{\tilde d}$. % Given $G:\R^d\to\R$ in~\eqref{e.F_N(t,q;s,x)=}, we introduce $\tilde G:\R^d\times \S^D\to\R$ given by $\tilde G(x) =G(x_1) + \frac{\beta^2}{2}\xi(x_2)$ for $x=(x_1,x_2)\in \R^d\times \S^D$. % Then, we consider $\bar F_N(t,q;s,x)$ in~\eqref{e.F_N(t,q;s,x)=} with $G,h,d$, therein substituted with $\tilde G,\tilde h,\tilde d$ introduced above. % Comparing~\eqref{e.F_N(t,q;s,x)=} with the definition of $F_N(\beta)$ in~\eqref{e.F_N(t,q;s,x)=}, we have $F_N(\beta)= \bar F_N\Ll(\tfrac{\beta^2}{2},0;1,0\Rr)$. Thus, applying Theorem~\ref{t.free_energy}, we get % \begin{align*} % \lim_{N\to\infty} F_N(\beta) % = \inf_{x'}\sup^x\Ll\{f\Ll(\tfrac{\beta^2}{2},0;x\Rr)+x\cdot x'-s\tilde G(x')\Rr\} % \end{align*} % with optimizations over $x,x'\in\R^{\tilde d}\simeq \R^d\times \S^D$. % By Proposition~\ref{p.f-d-proj}, $f\Ll(\tfrac{\beta^2}{2},0;x\Rr)$ coincides with $g\Ll(\tfrac{\beta^2}{2},0;x\Rr)$ defined therein. The last part in Proposition~\ref{p.f-d-proj} ensures the existence and uniqueness of the solution $z^x$ to~\eqref{e.t.fixed_pt_eqn}. By the second relation in~\eqref{e.fixed_pt_eqn_fd}, we have that $g(t,y;x) = \phi(y+t\nabla\xi(z^x);x)-t\theta(z^x)$. Inserting this and the expression of $\tilde G$ to the above display, we get~\eqref{e.t}. % \end{proof} % \begin{proof}[Proof of Theorem~\ref{t.so}] % We modify the previous proof. % Let $\beta_\star$ be as in the above proof. Let $\tilde h=h$ be deterministic and map $u\in\R^D$ to $uu^\intercal\in\S^D$. Let $\tilde d$ be such that $\S^D$ is isometric to $\R^{\tilde d}$. Take $\tilde G:\S^D\to\R$ by $\tilde G(x) = G(x)+ \frac{\beta^2}{2}\xi(x)$ for $x\in\S^D$. Then, we consider $\bar F_N(t,q;s,x)$ in~\eqref{e.F_N(t,q;s,x)=} with $G,h,d$, therein substituted with $\tilde G,\tilde h,\tilde d$ introduced above. We can follow the same steps in the proof of Theorem~\ref{t} to get the desired result (with $x,x',x''\in\R^d\times \S^D$ and $\phi(\scdot;\scdot)$ replaced by $x,x',x''\in\R^d\times \S^D$ and $\phi^\circ(\scdot;\scdot)$ in~\eqref{e.phi^circ(y,x)=}). % \end{proof} % \section*{-not used- Adding Curie--Weiss-type interactions} \label{s.addingcw} % Recall $F_N(t,q;s,x)$ and $\bar F_N(t,q;s,x)$ from~\eqref{e.F_N(t,q;s,x)=}, $\la\cdot\ra_{N;t,q;s,x}$ from~\eqref{e.<>_Ntqsx=}, and $\psi(q;x)$ from~\eqref{e.psi(q;x)=}. % For brevity, we often use the following notation: % \begin{align}\label{e.notation_cF(s,x)} % \begin{split} % &\text{when $(t,q)$ is fixed,\quad we write}\quad % \begin{cases} % \cF_N(s,x) = -F_N(t,q;s,x) % \\ % \bar\cF_N(s,x) = -\bar F_N(t,q;s,x) % \\ % \la\cdot\ra_{N;s,x} = \la\cdot\ra_{N;t,q;s,x} % \end{cases}, % \\ % &\text{and} \quad \varphi(x) = \lim_{N\to\infty}\bar \cF_N(0,x) \qquad \text{whenever the limit exists.} % \end{split} % \end{align} % %In some places, we will assume either one of the following conditions: % %\begin{align} % % &\text{$\xi$ is convex on $\S^D_+$, \quad $t\in\R_+$, \quad and $q\in\mcl Q_\infty$;} \label{e.condition_convex} % % \\ % % &\text{$\xi$ is not convex on $\S^D_+$, \quad $t0$ such that % \begin{align}\label{e.|m|<} % \sup_{N\in\N} \Ll|m_N\Rr|0$ such that, everywhere on $(0,+\infty)\times \R^d$ and for every $N$, % \begin{align}\label{e.approx_hj} % \Ll| \partial_s \bar\cF_N - G\Ll(\nabla \bar\cF_N\Rr)\Rr| \leq C\Ll(N^{-1} \Delta \bar\cF_N\Rr)^\frac{1}{2} + C\E \Ll|\nabla\cF_N -\nabla \bar\cF_N\Rr|. % \end{align} % \end{itemize} % \end{lemma} % \begin{proof} % For brevity, we write $m=m_N$ and $\la\cdot\ra=\la\cdot\ra_{N;s,x}$ where $N, s,x$ will be clear from the context. % At every $(s,x) \in (0,+\infty)\times \R^d$, using~\eqref{e.F_N(t,q;s,x)=}, we can compute % \begin{align}\label{e.1st_der_FN} % \partial_s \bar\cF_N =\E\la G(m)\ra, \qquad \nabla \cF_N = \la m \ra, \qquad \nabla \bar\cF_N = \E \la m \ra % \end{align} % and, for any $(r,y)\in\R\times \R^d$, % \begin{align}\label{e.2nd_der_FN} % \frac{\d^2}{\d \eps^2}\cF_N(s+\eps r, x+\eps y)\, \Big|_{\eps = 0} = N \la \Ll(r G(m)+y\cdot m\Rr)^2 - \la r G(m)+y\cdot m \ra^2 \ra\geq 0. % \end{align} % Since $|m|$ is bounded (as in~\eqref{e.|m|<}) and $G$ is locally Lipschitz, we deduce from~\eqref{e.1st_der_FN} that $\bar\cF_N$ is Lipschitz in both variables with $\sup_{N}\| \bar\cF_N\|_\mathrm{Lip}<\infty$. % Since it is easy to verify $\frac{\d^2}{\d \eps^2}\bar\cF_N(s+\eps r, x+\eps y) = \E\frac{\d^2}{\d \eps^2}\cF_N(s+\eps r, x+\eps y)$, we obtain from~\eqref{e.2nd_der_FN} the convexity of $\bar\cF_N$. Setting $r=0$ in~\eqref{e.2nd_der_FN}, we can get % \begin{align*} % \Delta \bar\cF_N = N \E \la | m|^2 - |\la m\ra |^2 \ra = N \E \la |m - \la m\ra |^2 \ra. % \end{align*} % Also, from~\eqref{e.1st_der_FN}, we have % \begin{align*} % \Ll|\partial_s \bar\cF_N - G(\nabla\bar\cF_N)\Rr| % &= \Ll|\E\la G(m)\ra - G\Ll(\E\la m\ra\Rr)\Rr| % \\ % &\leq \Ll|\E\la G(m)\ra - \E G(\la m\ra) \Rr| + \Ll|\E G(\la m\ra) - G\Ll(\E\la m\ra\Rr)\Rr| % \\ % &\leq C\E \la \Ll|m - \la m\ra \Rr|\ra + C\E \Ll|\la m\ra - \E \la m\ra\Rr| % \end{align*} % for some constant $C$ due to the local Lipschitzness of $G$; % and we have % \begin{align*} % \E \Ll|\nabla \cF_N - \nabla \bar\cF_N\Rr| = \E \Ll|\la m\ra -\E \la m\ra\Rr|. % \end{align*} % The above three displays together yield~\eqref{e.approx_hj}. % \end{proof} % An important condition on $\varphi$ in~\eqref{e.notation_cF(s,x)} to ensure is the following: % \begin{align}\label{e.cond_varphi} % \text{$\varphi$ exists and is Lipschitz, convex, and continuously differentiable.} % \end{align} % We separate out this because the convergence result Proposition~\ref{p.FN_cvg_f}, to be proved, depends on $t