\documentclass[JEP,XML,SOM,Unicode,published]{cedram}
\datereceived{2022-04-20}
\dateaccepted{2022-12-20}
\dateepreuves{2023-03-09}

\usepackage[scr=rsfs,cal=euler]{mathalfa}
\newenvironment{enumeratei}
{\bgroup\def\theenumi{\roman{enumi}}\def\theenumii{\arabic{enumii}}\begin{enumerate}}
{\end{enumerate}\egroup}

\newcommand{\Changel}{\mathcode`l="7160}
\newcommand{\Changelback}{\mathcode`l="716C}
\newcommand{\NoChangel}[1]{%
\expandafter\let\csname old\string#1\endcsname=#1
\let#1=\relax
\newcommand{#1}{\mathcode`l="716C\csname old\string#1\endcsname\mathcode`l="7160 }%
}

\NoChangel{\log}
\NoChangel{\ln}
\NoChangel{\lim}
\NoChangel{\limsup}
\NoChangel{\liminf}
\NoChangel{\varlimsup}
\NoChangel{\varliminf}
\NoChangel{\varinjlim}
\NoChangel{\varprojlim}

\newcommand\mto{\mathchoice{\longmapsto}{\mapsto}{\mapsto}{\mapsto}}
\newcommand{\psfrac}[2]{\sfrac{(#1)}{#2}}
\newcommand{\spfrac}[2]{\sfrac{#1}{(#2)}}
\newcommand{\Psfrac}[2]{(\sfrac{#1}{#2})}

\let\epsilon\varepsilon
\let\sep\mid
\let\ts\textstyle

\newcommand{\qand}{\quad\text{and}\quad}
\usepackage{bbm}\let\mathds\mathbbm

\theoremstyle{plain}
\newtheorem{theorem}{Theorem}[section]
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{question}[theorem]{Question}

\theoremstyle{definition}
\newtheorem{remark}[theorem]{Remark}

\newcommand\E{\mathbb{E}}
\newcommand\Z{\mathbb{Z}}
\newcommand\R{\mathbb{R}}
\newcommand\p{\mathbb{P}}
\newcommand\N{\mathbb{N}}

\DeclareMathOperator\inte{int}
\DeclareMathOperator\Isom{Isom}
\DeclareMathOperator\Lip{Lip}
\DeclareMathOperator\supp{supp}

\datepublished{2023-03-20}
\begin{document}
\frontmatter

\title[Second order expansion of the rate function at the drift]{Random walks on hyperbolic spaces:\\ second order expansion of the rate function at the drift}

\author[\initial{R.} \lastname{Aoun}]{\firstname{Richard} \lastname{Aoun}}
\address{University Gustave Eiffel, Champs-sur-Marne\\
5 boulevard Descartes, Champs-sur-Marne, 77454 Marne-la-Vallée Cedex 2, France}
\email{richard.aoun@univ-eiffel.fr}

\author[\initial{P.} \lastname{Mathieu}]{\firstname{Pierre} \lastname{Mathieu}}
\address{Aix-Marseille Université, CNRS, Centrale Marseille, I2M, UMR 7373\\
13453 Marseille, France}
\email{pierre.mathieu@univ-amu.fr}

\author[\initial{C.} \lastname{Sert}]{\firstname{Cagri} \lastname{Sert}}
\address{Institut für Mathematik, Universität Zürich\\
190, Winterthurerstrasse, 8057 Zürich, Switzerland}
\email{cagri.sert@math.uzh.ch}

\thanks{The third author is supported by SNF Ambizione grant 193481}

\begin{abstract}
Let $(X,d)$ be a separable geodesic Gromov-hyperbolic space, $o \in X$ a basepoint and $\mu$ a countably supported non-elementary probability measure on $\mathrm{Isom}(X)$. Denote by $z_n$ the random walk on $X$ driven by the probability measure $\mu$. Supposing that $\mu$ has a finite exponential moment, we give a second-order Taylor expansion of the large deviation rate function of the sequence $\frac{1}{n}d(z_n,o)$ and show that the corresponding coefficient is expressed by the variance in the central limit theorem satisfied by the sequence $d(z_n,o)$. This provides a positive answer to a question raised in \cite{BMSS}. The proof relies on the study of the Laplace transform of $d(z_n,o)$ at the origin using a martingale decomposition first introduced by Benoist--Quint together with an exponential submartingale transform and large deviation estimates for the quadratic variation process of certain martingales.
\end{abstract}

\subjclass{20F67, 60G50, 60G42, 60F10, 60F05}

\keywords{Random walks, hyperbolic spaces, martingales, large deviations, central limit theorem}

\altkeywords{Marches aléatoires, espaces hyperboliques, martingales, grandes déviations, théorème central limite}

\alttitle{Marches aléatoires sur les espaces hyperboliques: dérivée seconde en la vitesse de fuite de la fonction de taux des grandes déviations}

\begin{altabstract}
Soit $(X,d)$ un espace Gromov-hyperbolique, géodésique et séparable, $o \in X$ un point base et~$\mu$ une mesure de probabilité non élémentaire et à support dénombrable sur le groupe $\mathrm{Isom}(X)$ des isométries de $X$. Notons par $z_n$ la marche aléatoire sur $X$ induite par $\mu$. 
Sous l'hypothèse de moment exponentiel fini de $\mu$, nous donnons un développement de Taylor d'ordre $2$ de la fonction de taux des grandes déviations de la suite de variables aléatoires $\frac{1}{n}d(z_n,o)$ et exprimons la dérivée seconde en la vitesse de fuite en fonction de la variance dans le théorème central limite que vérifie la suite $d(z_n,o)$. Cela répond par l'affirmative à une question posée dans \cite{BMSS}. La preuve s'appuie sur l'étude de la transformée de Laplace de $d(z_n,o)$ en zéro en utilisant une approximation par une martingale introduite pour la première fois par Benoist-Quint, combinée avec une transformée exponentielle de martingales et des estimées de grandes déviations pour le crochet de certaines martingales.
\end{altabstract}
\maketitle

\tableofcontents
\mainmatter

\section{Introduction}
Let $(X,d)$ be a separable geodesic Gromov-hyperbolic space,
$G=\Isom(X)$, and $o \in X$ a base point of $X$. A probability measure $\mu$ on $G$ defines a random walk on the group $G$ and subsequently on the metric space $X$ in the following way. Let $(X_i)_{i\in \N}$ be a sequence of i.i.d.~random variables on $G$ with distribution $\mu$. We let $L_n=X_n \dots X_1$ denote the successive positions of the random walk on $G$. The process $(z_n)_{n \in \N}$ on $X$ defined by $z_n=L_n \cdot o$ constitutes a Markov chain on $X$ that we shall refer to as a random walk on $X$. To avoid measurability issues, we will always suppose that the probability measure $\mu$ is countably supported.

Thanks to the subadditive ergodic theorem, under a finite first moment assumption, we have the following law of large numbers
\begin{equation}\label{eq.lln}
\frac{1}{n}\, d(z_n,o) \xrightarrow[n \to \infty]{\textup{a.s.}} \ell_\mu,
\end{equation}
where $\ell_\mu \in [0,\infty)$ is a constant called the drift of the random walk. There has recently been substantial interest in the finer study of asymptotic properties of a random walks on Gromov-hyperbolic spaces. This recent progress shows that the resemblance between the asymptotic behaviour of random walk displacement and classical sums of i.i.d.~real random variables is far more than the law of large numbers \eqref{eq.lln}: a central limit theorem (CLT) with the optimal finite second moment assumption is proved by Benoist--Quint \cite{BQ.hyperbolic} (see also Horbez \cite{horbez}) improving previous more restrictive versions by Ledrappier \cite{ledrappier.lecturenotes} and Björklund \cite{bjorklund} --- an alternative proof of the CLT was later given by Mathieu--Sisto \cite{mathieu-sisto} and in a more restrictive setting by Gouëzel \cite{gouezel.gap}. These show that for a non-elementary probability measure $\mu$ with finite second moment (see below for the definitions), we have
\begin{equation}\label{eq.clt}
\frac{1}{\sqrt{n}}\,(d(z_n,o) - n \ell_\mu) \xrightarrow[n \to \infty]{\textup{law}} \mathcal{N}(0,\sigma_{\mu}^2).
\end{equation}
The analogue of Cramér's theorem on large deviation principles was recently proved by Boulanger--Mathieu--Sert--Sisto \cite{BMSS} (see also Gouëzel \cite{gouezel.first.moment}): they showed that for a non-elementary probability measure with a finite exponential moment, the sequence $\frac{1}{n}d(z_n,o)$ satisfies a large deviation principle (LDP) with a proper convex rate function $I:[0,\infty) \to [0,\infty]$ vanishing only at the drift $\ell_\mu$: for every (measurable) subset $R$ of $[0,\infty)$, we have
\begin{equation}\label{eq.ldp}
\begin{split}
-\inf_{\alpha \in \inte(R)} I(\alpha) &\leq \liminf_{n \to \infty} \frac{1}{n}\ln \mathbb{P}\Bigl(\frac{1}{n}d(z_n,o) \in R\Bigr)\\
&\leq \limsup_{n \to \infty} \frac{1}{n}\ln \mathbb{P}\Bigl(\frac{1}{n}d(z_n,o) \in R\Bigr) \leq -\inf_{\alpha \in \overline{R}} I(\alpha),
\end{split}
\end{equation}
where $\inte(R)$ denotes the interior and $\overline{R}$ the closure of $R$.

Furthermore, concentration inequalities reminiscent of Hoeffding inequalities were recently shown by Aoun--Sert \cite{aoun-sert} and a local limit theorem for random walks on Gromov-hyperbolic groups was proved by Gouëzel \cite{gouezel.local}.

However, establishing these results analogous to the classical setting of sums of i.i.d.~real random variables involves overcoming serious issues by use of various approaches and techniques. Apart from mostly geometric approaches such as the ones used in \cite{BMSS,gouezel.first.moment,mathieu-sisto}, two classical methods are present --- say in aforementioned different proofs of the CLT. These are Nagaev's analytic method \cite{nagaev} and Gordin--Lif\v{s}ic's martingale method \cite{martingale.method}.

Nagaev's method can be seen as a version of the classical Fourier--Laplace transform and it relies on techniques of analytic perturbation theory, and in general, yields sharper estimates. However, implementing it requires proving a certain spectral gap result for a Markov operator acting on an appropriate boundary space. Although this is by-now standard, say, on classical hyperbolic spaces or on free groups, it is not well-developed in the generality of spaces, namely (not necessarily proper) geodesic Gromov-hyperbolic spaces that we shall be working with in this article. A thorough study of the analytical method in the case of Gromov-hyperbolic groups was done by Gouëzel \cite[Prop.\,3.6, \S 5]{gouezel.gap}.

We will extensively use the martingale approach --- developed in this setting by Benoist--Quint \cite{BQ.CLT, BQ.hyperbolic} and adapted to greater generality by Horbez \cite{horbez} and Aoun--Sert~\cite{aoun-sert} --- to tackle the analytic problem of giving a second-order expansion of the limit Laplace transform of the sequence $d(z_n,o)$ (or by convex duality, of its large deviation rate function in \eqref{eq.ldp}) and relating it to the variance in the central limit theorem \eqref{eq.clt}. Similar results are known to hold in settings where spectral methods are available. We now expound on these notions and precisely state the main result of this note.

A geodesic metric space $(X,d)$ is said to be Gromov-hyperbolic if there exists $\delta>0$ such that for every $x,y,z,o \in X$, we have $(x|y)_o \geq (x|z)_o \wedge (z|y)_o -\delta$, where $(.|.)_.$ is the Gromov-product given by $(x|y)_o=\frac{1}{2}(d(x,o)+d(y,o)-d(x,y))$. A probability measure $\mu$ on $\Isom(X)$ is called \textit{non-elementary} if its support $S$ generates a semigroup that contains two independent loxodromic elements (see Section \ref{sec.large.deviation}). For such a measure~$\mu$ and $n \in \N$, we denote by $\mu^{\ast n}$ its $n^\mathrm{th}$ convolution which is the law of the random variable $L_n$.

Given a probability measure $\mu$, the limit Laplace transform of the sequence $\frac{1}{n}d(z_n,o)$ is the function $\Lambda:\R \to (-\infty,\infty]$ defined by\vspace*{-3pt}\enlargethispage{.5\baselineskip}
\begin{equation}\label{eq.defn.laplace}
\Lambda(\lambda)=\lim_{n \to \infty} \frac{1}{n} \log \mathbb{E}[e^{\lambda d(z_n,o)}].
\end{equation}
Note that, since the increments are i.i.d and $G$ acts by isometries on $X$, subadditivity implies that the limit in (\ref{eq.defn.laplace}) exists. Under a finite super-exponential moment assumption, the Fenchel-Legendre transform of $\Lambda$ is the rate function $I$ of the large deviation principle satisfied by $\frac{1}{n}d(z_n, o)$ (see \cite[Lem.\,C.2]{BMSS}).
Using this and \cite[Th.\,1.1]{BMSS}, one can deduce that the derivative of $\Lambda$ at 0 is equal to the drift $\ell_{\mu}$.

The goal of this note is to prove the following result which answers part of \hbox{\cite[Quest.\,C.1]{BMSS}} and which says that the convex function $\Lambda$ has a second order Taylor expan\-sion at $0$ with second derivative equal to the variance in the central limit theorem:
\begin{theorem}\label{thm.main}
Let $(X,d)$ be a separable geodesic Gromov-hyperbolic space and $\mu$ a non-elementary probability measure on $\Isom(X)$. Suppose that $\mu$ has a finite exponential moment, \ie for some $\alpha>0$, $\int{e^{\alpha \,d(g\cdot o, o)} d\mu(g)}<+\infty$. Then, we have\vspace*{-3pt}
\[
\lim_{\lambda\to 0} \frac{\Lambda(\lambda)-\lambda \ell_\mu}{\lambda^2}=\frac{\sigma_\mu^2}{2}.
\]
\end{theorem}

The proof uses extensively the martingale approach developed in this context by Benoist--Quint \cite{BQ.CLT,BQ.hyperbolic}. The martingale decomposition proved in these works allows us to reduce the study of $\Lambda$ near zero to the study of the limit Laplace transform of a martingale induced by an i.i.d.\ random walk on the group $\Isom(X)$. Once this reduction is done, the proof is divided into two parts: proving the lower bound, \ie $\lim_{\lambda\to 0} \psfrac{\Lambda(\lambda)-\lambda \ell_\mu}{\lambda^2}\geq \sfrac{\sigma_\mu^2}{2}$ and the upper bound, \ie $\lim_{\lambda\to 0} \psfrac{\Lambda(\lambda)-\lambda \ell_\mu}{\lambda^2}\leq \sfrac{\sigma_\mu^2}{2}$.
The proof of the lower bound is based on a new exponential submartingale transform that we establish in Proposition \ref{prop.submart.trans}. The latter extends a classical result of Freedman \cite{Freedman} to the case of martingales with unbounded differences. The proof of the upper bound
uses ideas from martingale concentration inequalities. Another important tool is large deviation estimates for the quadratic variation of our martingales.

\skpt
\begin{remark}
\begin{enumerate}
\item
(\textit{Busemann cocycle}) A general version of Theorem \ref{thm.main} will be proved in Theorem~\ref{thm.main.tech} where the displacement $d(z_n, o)$ is replaced with the Busemann cocycle $\sigma(L_n, x)$ of $L_n$ based at any point of $x$ in the horofunction compactification of $X$. See also Question \ref{question} for an ensuing problem.

\item
(\textit{Translation distance}) Thanks to \cite[Th.\,1.3]{BMSS}, when $\mu$ has bounded support, one can replace $d(z_n,o)$ by $\tau(L_n)$ in \eqref{eq.defn.laplace}, where $\tau(.)$ denotes the translation distance given for $g \in \Isom(X)$ by $\tau(g)=\lim_{n \to \infty}\frac{1}{n}d(g^n \cdot o,o)$.

\item
(\textit{Positivity of $\sigma_\mu$}) By an argument of Benoist--Quint \cite{BQ.hyperbolic}, it follows from the expression of $\sigma_\mu^2$ (see \eqref{def.variance}) that $\sigma_\mu>0$ if any only if $\mu$ is non-arithmetic (see Remark~\ref{rk.sigma.positive}).
\end{enumerate}
\end{remark}

Using the convexity of the rate function proved in \cite{BMSS} and standard results from convex analysis, we deduce
\begin{corollary}[About the rate function]\label{corol.rate.function}
Keep the assumptions of Theorem \ref{thm.main} and let $I$ be the rate function \eqref{eq.ldp}. Then, we have\vspace*{-3pt}
\[
\lim_{\lambda \to 0}\frac{I(\ell_\mu+\lambda)}{\lambda^2}=\frac{1}{2\sigma_\mu^2}.
\]
\end{corollary}

Finally, we note that our results are also valid for the right random walk $R_n=X_1\ldots X_n$ since for every $n \in \N$, $L_n$ and $R_n$ have the same distribution.

\begin{remark}[Role of hyperbolicity and possible extensions]
The key ingredients of the proof of our main result in which hyperbolicity plays a role are Lemma \ref{lemma.martingale.dec} (in combination with the solution of cohomological equation \eqref{cocycle.decomposition}) and Lemma \ref{lemma.unif.punctual} (a~qualitative form of which is also sufficient for the purposes). It may therefore be possible to establish this connection between large deviations and central limit theorem in similar geometric settings (see for example \cite{horbez} and the recent monograph~\cite{choi} and the articles \cite{choi1,choi2}.)
\end{remark}

The paper is organized as follows. In Section \ref{sec.martingale}, we recall some preliminaries on submartingales and prove an exponential transform for submartingales. In Section~\ref{sec.hyperbolic}, we~recall basic definitions about Gromov-hyperbolic spaces and metric compactifications as well as results from the theory of random walks on hyperbolic spaces. In~particular, we recall that $d(z_n, o) -n\ell_{\mu}$ is at bounded distance from a martingale and prove a large deviation estimate for the predictable quadratic variation of the latter. In Section \ref{sec.proof} we prove Theorem \ref{thm.main} in its general form Theorem \ref{thm.main.tech}, by~treating separately the lower bound (Section \ref{subsec.lower.bound}) and the upper bound (Section \ref{subsec.upper.bound}). In~Section~\ref{subsec.corol}, we~deduce Corollary \ref{corol.rate.function}, and finally, discuss some ensuing questions in Section~\ref{subsec.questions}.

\subsubsection*{Acknowledgements}
The authors would like to thank the anonymous referee for the careful reading of our paper and useful suggestions.

\section{Preliminaries on martingales}\label{sec.martingale}

In this section, we recall some preliminaries from the theory of martingales and prove a result about exponential martingale transforms that will play a crucial role in the proof of our main theorem.

Let us first fix our notation. We shall denote by $\mathcal{F}=(\mathcal{F}_n)_{n\in \N}$ an increasing sequence of $\sigma$-algebras (a filtration) on a fixed standard probability space $\Omega$. Usually, we will consider the filtration to be fixed and omit it from the notation. The notation $M=(M_n)_{n \in \N}$ will be reserved for an adapted sequence of random variables that form either a martingale or submartingale. Denoting by $\Delta M$ the sequence of differences given by $\Delta_n M:=M_n-M_{n-1}$, we recall that $M$ being a submartingale means that for every $n\in\nobreak \N$, $M_n$ is $\mathcal{F}_n$-measurable, integrable, and it satisfies \hbox{$\mathbb{E}[\Delta_n M | \mathcal{F}_{n-1}] \geq 0$}. In the sequel, unless otherwise stated, we take $M_0=0$ a.s. The predictable quadratic variation (or conditional quadratic variation) of the submartingale $M_n$ is denoted by $\langle M\rangle_n:= \sum_{i=1}^n \mathbb{E}[(\Delta_i M)^2|\mathcal{F}_{i-1}]$. Given a positive constant $a>0$, we denote
\begin{equation}\label{transform.G}
G_n^a=\sum_{i=1}^n \mathbb{E}[(\Delta_i M)^2 1_{|\Delta_i M| \leq a}| \mathcal{F}_{i-1}]-a \sum_{i=1}^n |\Delta_i M| 1_{|\Delta_i M| \geq a}.\end{equation} Finally, the following special function defined on $\R$ will play a significant role: $\mathfrak{f}(\lambda)=e^{-\lambda} -1 +\lambda$.\vspace*{-3pt}

We start by recalling Freedman's submartingale transform whose statement and proof strategy will be used in our generalization below.

\skpt
\begin{proposition}[{\cite[Cor.\,1.4(b), (3.7) \& (3.9)]{Freedman}}]\label{prop.freedman.submartingale}
\begin{enumerate}
\item
Let $X$ be an integrable random variable with $\mathbb{E}(X)=0$ (\resp $\mathbb{E}[X] \geq 0$) and $X\geq -1$ (\resp $|X| \leq 1$) a.s. Then, for every $\lambda \geq 0$, we have
\[
\mathbb{E}[e^{\lambda X}] \geq e^{\mathfrak{f}(\lambda) \mathrm{Var}(X)}.
\]
\item
Let $(M_n)_{n\in \N}$ be a submartingale and such that for every $1 \leq n \in \N$, $|\Delta_n M|\leq 1$ almost surely. Then for every $\lambda \geq 0$, the sequence of random variables
\[
\left(\exp\left(\lambda M_n - \mathfrak{f}(\lambda) \langle M\rangle_n\right)\right)_{n \in \N}
\]
is a submartingale with respect to the same filtration.
\end{enumerate}
\end{proposition}

We note that the second statement above is a consequence of the first one.
The following result provides a generalization of Proposition \ref{prop.freedman.submartingale} to submartingales with increments possessing a finite exponential moment.

\begin{proposition}[Submartingale transform]\label{prop.submart.trans}
Let $(M_n)_{n\in \N}$ be a submartingale. Suppose that there exists a constant $\alpha>0$ such that for every $n \in \N$, we have $\mathbb{E}[e^{\alpha \sum_{k=1}^n |\Delta_k M|}]<\infty$. Then, given any $a>0$, for every $\lambda >0$ small enough, the sequence of random variables
\[
\Bigl(\exp\Bigl(\lambda M_n - \frac{\mathfrak{f}(\lambda a)}{a^2}G_n^a\Bigr)\Bigr)_{n \in \N}
\]
is a submartingale with respect to the same filtration.
\end{proposition}

This is an extension (to unbounded differences) of Freedman's submartingale transform in his seminal work \cite{Freedman}. Indeed, if the difference sequence $\Delta_n M$ satisfies $|\Delta_n M| \leq 1$ a.s., the transform in the previous result boils down to Proposition \ref{prop.freedman.submartingale}. On the other hand, it applies, for instance, when there exists a constant $\alpha>0$ such that for every $n\in \N$, $\mathbb{E}[e^{\alpha |\Delta_n M|} | \mathcal{F}_{n-1}]<+\infty$.
This will be the case in our application.
The counterparts of Proposition \ref{prop.submart.trans} for supermartingale transforms were obtained by Dzhaparidze--van Zanten \cite{DZ} (see also Fan--Grama--Liu \cite{fan.grama.liu}).

\begin{proof} Let $a>0$. By the finite exponential moment hypothesis on $(M_n)_{n \in \N}$, it is clear that for every $\lambda>0$ small enough and for every $n \in \N$, $\exp(\lambda M_n - \Psfrac{\mathfrak{f}\,(\lambda a)}{a^2}G_n^a)$ is $\mathcal{F}_n$-measurable and integrable. Therefore, by expanding the conditional expectation, one sees that it is enough to show the following: for any integrable random variable~$X$ with $\E[X]\geq 0$, for any $\lambda>0$,
\begin{equation}\label{inequality.super}
\E\Bigl[\exp\Bigl(\lambda X + \frac{\mathfrak{f}(\lambda a)}{ a}\, |X| \mathds{1}_{|X|\geq a}\Bigr)\Bigr]
\geq e^{\Psfrac{\mathfrak{f}(\lambda a)}{a^2} \E[X^2 \mathds{1}_{|X| \leq a}]}.\end{equation}
Denote by $\nu$ the distribution of $X$.
\begin{itemize}
\item Case 1: $\E[X]=0$ and $\nu$ is supported on two points $-c$ and $d$ with $c,d>0$ and both $c,d \leq a$. Let $r:=\max\{c,d\}$.
Since $\E[X]= 0$ and
$X\geq -r$ almost surely, 1.~of Proposition \ref{prop.freedman.submartingale} (applied to $\sfrac{X}{r}$ and to $\lambda r$) yields
\[
\E\Bigl[\exp\Bigl(\lambda X + \frac{\mathfrak{f}(\lambda a)}{ a}\, |X| \mathds{1}_{|X|\geq a}\Bigr)\Bigr]= \E[\exp(\lambda X)]\geq e^{\Psfrac{\mathfrak{f}(\lambda r)}{r^2} \E[X^2]}.
\]
Since the function $x\mto \sfrac{\mathfrak{f}(x)}{x^2}$ is decreasing on $\R$ and since $r \leq a$, we deduce that $\sfrac{\mathfrak{f}(\lambda r)}{r^2}\geq \sfrac{\mathfrak{f}(\lambda a)}{a^2}$. Therefore
\[
\E\Bigl[\exp\Bigl(\lambda X + \frac{\mathfrak{f}(\lambda a)}{a}\, |X| \mathds{1}_{|X|\geq a}\Bigr)\Bigr]\geq e^{\Psfrac{\mathfrak{f}(\lambda a) }{a^2} \E[X^2]} = e^{\Psfrac{\mathfrak{f}(\lambda a)}{a^2} \E[X^2 \mathds{1}_{|X|\leq a}]}.
\]

\item Case 2: $\E[X]=0$ and $\nu$ is supported exactly on two points $-c$ and $d$ and we are not in Case 1. By Jensen's inequality, we obtain
\begin{equation}\label{eq.i1}
\begin{aligned}
\E\Bigl[\exp\Bigl(\lambda X +\frac{\mathfrak{f}(\lambda a)}{ a}\, |X| \mathds{1}_{|X|\geq a}\Bigr)\Bigr] &\geq \exp\Bigl(\lambda \E[X]+ \frac{\mathfrak{f}(\lambda a)}{a}\, \E[|X|\mathds{1}_{|X|\geq a}]\Bigr)\\ &=
\exp\Bigl(\frac{\mathfrak{f}(\lambda a)}{a}\, \E[|X| \mathds{1}_{|X|\geq a}]\Bigr).
\end{aligned}
\end{equation}
If both $c,d\geq a$, then the right hand side of \eqref{inequality.super} is equal to $1$ and hence \eqref{inequality.super} holds in view of \eqref{eq.i1}. So, since we are also not in Case 1, we can suppose that either $c > a$ and $d< a$, or $d > a$ and $c< a$. Let us treat the case $c > a$ and $d < a$. Notice also that since $\E[X]=0$ we have $\p(X=-c)=\spfrac{d}{c+d}$ and $\p(X=d)=\spfrac{c}{c+d}$.
By assumption on $c,d$ and \eqref{eq.i1}, these yield
\begin{equation}\label{inequality.super1}
\E\Bigl[\exp\Bigl(\lambda X +\frac{\mathfrak{f}(\lambda a)}{ a}\, |X| \mathds{1}_{|X|\geq a}\Bigr)\Bigr]
\geq \exp\Bigl(\frac{\mathfrak{f}(\lambda a)}{a}\, \frac{cd}{c+d}\Bigr).\end{equation}
On the other hand,
\begin{equation}\label{inequality.super2}
\exp \Bigl(\frac{\mathfrak{f}(\lambda a)}{a^2}\, \E[X^2 1_{|X|\leq a}]\Bigr)=
\exp\Bigl(\frac{\mathfrak{f}(\lambda a)}{a^2}\, \frac{d^2c}{c+d}\Bigr).\end{equation}
Since $d\leq a$, \eqref{inequality.super} follows from combining \eqref{inequality.super1} and \eqref{inequality.super2}. The case $d>a$ and $c\leq a$ can be treated similarly.

\item Case 3: $\E[X]\geq 0$ and $\nu$ is supported on two points $\{-c,d\}$ with $c,d>0$. We will study the behavior of the left-hand-side and the right-hand-side of \eqref{inequality.super} when we vary $\nu$ with the condition $\E[X]\geq 0$, while fixing $a,c,d,\lambda$. Since $\nu$ is supported on two points, it is enough to treat the behavior of these quantities when
$\beta:=\p(X=d)$ varies. Observe that since $\E[X]\geq 0$, we have $ \spfrac{c}{c+d}\leq \beta \leq 1$. The function
$\psi_1(\beta):=\E_{\nu}[\exp(\lambda X + \Psfrac{\mathfrak{f}(\lambda a)}{a} |X| \mathds{1}_{|X|\geq a})]$ is affine in $\beta$ while the function $\psi_2(\beta):= e^{\Psfrac{\mathfrak{f}(\lambda a)}{a^2} \E_{\nu}[X^2 \mathds{1}_{|X| \leq a}]}$ is convex in $\beta$ (being of the form $\psi_2(\beta)=e^{L(\beta)}$ with $L$ an affine map). Thus the function $\psi:=\psi_1-\psi_2$ is concave on $[\spfrac{c}{c+d}, 1]$. It suffices then to check that $\psi(\spfrac{c}{c+d})\geq 0$ and that $\psi(1)\geq 0$. The case $\beta=\spfrac{c}{c+d}$ corresponds to the case $\E_{\nu}[X]=0$ and hence, by cases 1 and 2, $\psi(\spfrac{c}{c+d})\geq 0$.
The case $\beta=1$ corresponds to $\nu=\delta_d$. Clearly, $\psi(1)\geq 0$ when $d\geq a$. When $d<a$, the relation $\psi(1)\geq 0$ follows from the facts that the function $x\mto \sfrac{\mathfrak{f}(x)}{x^2}$ is decreasing and that $\mathfrak{f}(x)\leq x$ for every $x\geq 0$. This concludes the proof of \eqref{inequality.super} in this case.

\item Case 4: here we treat the general case (\cf proof of \cite[Prop.\,3.6]{Freedman}). Since $\E[X]\geq 0$, we can find a family $(\nu_{\alpha})_{\alpha \in I}$ of probability measures, each supported on two points $-c_{\alpha}\leq 0$ and $d_{\alpha}>0$ and of expectation $\geq 0$, and a probability measure $\theta$ on $I$ such that $\nu=\int{d\theta(\alpha) \nu_{\alpha}}$. We have
\begin{equation*}
\begin{aligned}
\E\Bigl[\exp\Bigl(\lambda X + \frac{\mathfrak{f}(\lambda a)}{a}\, |X| \mathds{1}_{|X|\geq a}\Bigr)\Bigr]&=\int
\left(\int e^{\lambda x +\Psfrac{\mathfrak{f}(\lambda a)}{a}|x| 1_{|x| \geq a}}d\nu_\alpha(x)\right)d\theta(\alpha) \\
&\geq \int e^{\Psfrac{\mathfrak{f}(\lambda a)}{a^2} \int x^2 1_{|x| \leq a} d\nu_\alpha(x)}d\theta(\alpha) \\
& \geq e^{\Psfrac{f(\lambda a)}{a^2} \iint{x^2 \mathds{1}_{|x|\leq a} d\nu_{\alpha}(x) d\theta(\alpha)}}\\
&= e^{\Psfrac{\mathfrak{f}(\lambda a)}{a^2} \mathbb{E}[X^2 1_{|X| \leq a}]},
\end{aligned}
\end{equation*}
where we applied \eqref{inequality.super} for each probability measure $\nu_{\alpha}$ in the second inequality and Jensen in the third inequality.\qedhere
\end{itemize}
\end{proof}

\section{Random walks on hyperbolic spaces}\label{sec.hyperbolic}

\subsection{Preliminaries on hyperbolic spaces}\label{subsec.hyperbolic}
Let us first fix our notation.
Let $(X,d)$ be a geodesic metric space. Fix a base point $o\in X$. Recall that $(X,d)$ is said to be $\delta$-hyperbolic (where $\delta\geq 0$) if
for every $x,y,z,o \in M$,
\begin{equation}\label{eq.defining.eq}
(x|y)_o \geq (x|z)_o \wedge (z|y)_o -\delta,
\end{equation}
where $(.|.)_.$ is the Gromov-product given by $(x|y)_o=\frac{1}{2}(d(x,o)+d(y,o)-d(x,y))$. For simplicity, we will often omit the basepoint $o$ from the notation.
We recall that this category of metric spaces comprises many usual spaces: trees, classical hyperbolic spaces, the fundamental group of compact surfaces of genus $\geq 2$. We recall that the definition of hyperbolicity is equivalent to geodesic triangles being thin. We refer to~\cite{hyperbolic-book} for general properties of these spaces. Denote by $G:=\Isom(X)$ the group of isometries of the metric space $(X,d)$. The displacement of $g\in G$ is by definition
\[
\kappa(g):=d(g\cdot o, o).
\]
An element $\gamma \in G$ is said to be loxodromic if for any $x \in X$, the sequence $(\gamma^nx)_{n \in \Z}$ constitutes a quasi-geodesic (see \cite[Ch.\,3]{hyperbolic-book}). Equivalently, $\gamma$ is loxodromic if and only if it fixes precisely two points $x_\gamma^+,x_\gamma^-$ on the Gromov boundary $\partial X$ of $X$ \cite[Ch.\,9 \& 10]{hyperbolic-book}. Two loxodromic elements $\gamma_1,\gamma_2$ are said to be independent if the sets of fixed points $\{x^+_{\gamma_i} ,x^-_{\gamma_i}\}$ for $i=1,2$ are disjoint. Finally, a set $S$, or equivalently a probability measure with support $S$, is said to be non-elementary if the semigroup generated by~$S$ contains at least two independent loxodromic elements.

Now we recall the definition of the Busemann compactification of $X$ (no need for hyperbolicity in this part). Denote by $\Lip^1(X)$ the set of real valued Lipschitz functions on $X$ with Lipschitz constant $1$, endowed with the topology of pointwise convergence. Fixing $o \in X$, for $x \in X$, let the function $h_x \in \Lip_{o}^1(X)$, defined by $h_x(m)=d(x,m)-d(x,o)$, where $\Lip_{o}^1(X)$ is the subspace of $\Lip^1(X)$ consisting of functions $f$ satisfying $f(o)=0$. If $X$ is separable (as we suppose in the sequel), the closure of $\{h_x \sep x \in X\}$ is a compact metrizable subset of $\Lip^1_o(X)$, called the \textit{horofunction compactification} of $X$ (see \eg \cite[Prop.\,3.1]{maher-tiozzo}). It will be denoted as~$\overline{X}^h$. The map $x \mto h_x$ is injective on $X$ (and an embedding when $X$ is a proper metric space) and we usually identify $X$ with its image in $\overline{X}^h$. The \textit{horofunction boundary} of $X$ is defined as $\partial_h X:=\overline{X}^h\setminus X$. The group of isometries $\Isom(X)$ acts on $\overline{X}^h$ by homeomorphisms given, for $g \in \Isom(X)$, $h \in \overline{X}^h$ and $m \in X$, by $(g\cdot h)(m)=h(g^{-1}m)-h(g^{-1}o)$. This extends equivariantly the isometric action of $\Isom(X)$ on~$X$ and the set $\partial_h X \subset \overline{X}^h$ is invariant under $\Isom(M)$. The \textit{Busemann cocycle} \hbox{$\sigma: \Isom(X) \times \overline{X}^h \to \mathbb{R}$} is defined~by\vspace*{-3pt}
\[
\sigma(g,h)=h(g^{-1}o).
\]
Observe that for every $g\in G$ and $x\in \overline{X}^h$,\vspace*{-3pt}
\begin{equation}\label{useful}\sigma(g,o)=\kappa(g)\qand |\sigma(g,x)|\leq \kappa(g).
\end{equation}

Finally, we recall that the Gromov-product can be extended to the whole Busemann compactification by setting $(x|y)_o:=-\min_{z\in X}{\frac{1}{2}(h_x(z)+h_y(z))}$. In particular, one can infer that for $x\in \overline{X}^h$ and\vspace*{-3pt}
$y\in X$, \begin{equation}\label{gromov.extended.space.boundary}(x|y)_o=\frac{1}{2} (d(y,o)-h_x(y)).\end{equation}

\iffalse
The following lemma records some first properties of the function $\Lambda(.)$:
\begin{lemma}
Given a probability measure $\mu$ with finite super exponential moment, the log-Laplace transform $\Lambda(.)$ is a convex function that is finite in an open interval $(-\infty,s_0)$ with $s_0 \in (0,\infty] $ and such that $\Lambda(s)=\infty$ for every $s>s_0$. We have $\Lambda(0)=0$ and $\Lambda$ is differentiable at $0$ with derivative equal to $\ell_{\mu}$.
\end{lemma}

\begin{proof}
On the one hand, since by \cite[Th.\,1.2]{BMSS} the rate function $I$ has a unique zero at the drift $\ell_{\mu}$ and since $\Lambda=I^*$, we deduce by a general result of Legendre-Fenchel transform (see for instance Theorem 5 in \url{https://www.ise.ncsu.edu/fuzzy-neural/wp-content/uploads/sites/9/2019/01/or706-LF-transform-1.pdf}) that $\Lambda$ is differentiable at zero.

On the other hand, by Jensen's inequality and concavity of the logarithm function, the following holds for every $\lambda\in \R$ and $n\in \N$:
$\frac{1}{n}\log \E(e^{\lambda \kappa_n})\geq \lambda \frac{\E(\kappa_n)}{n}$. Letting $n\to +\infty$, we
that $\Lambda(\lambda)\geq \lambda \ell_{\mu}$. Hence $\Lambda'(0^+)\geq \ell_{\mu}$ and $\Lambda'(0^-)\leq \ell_{\mu}$. But we have showed that $\Lambda'(0^+)=\Lambda'(0^-)$, This concludes the proof.
\iffalse
(note that the derivatives at right and left exist by convexity of the function $\Lambda$).

Let us first check that $\Lambda'(0^+)\leq \ell_{\mu}$.
Let then $\lambda>0$. Since $\lambda>0$, by subadditivity the limit in \eqref{Laplace} is an infimum. Thus for every $n\in \N$,
$\Lambda(\lambda)\leq \frac{1}{n} \log \E(e^{\lambda \kappa_n})$.
Using the finite exponential assumption on $\mu$ and the inequality $\exp(x)\leq 1+x+\frac{x^2}{2} e^{|x|}$ true for every $x\in \R$, we get that for every $n\in \N$,
\[
\Lambda(\lambda)\leq \frac{1}{n} \log(1+ \lambda \E(\kappa_n) + \lambda^2 e^{n \lambda C}) \leq \lambda \frac{\E(\kappa_n)}{n} + \frac{\lambda^2 e^{n \lambda C}}{n},
\]
where $C>0$ depends only on $\mu$. Let $\epsilon>0$. By definition of the drift $\ell_{\mu}$, we can find $n_0\in \N$ (independent of $\lambda$) such that $\frac{\E(\kappa_{n_0})}{n_0}\leq \ell_{\mu} + \epsilon$. Thus
\[
\Lambda(\lambda)\leq \lambda (\ell_{\mu}+\epsilon) + \frac{\lambda^2 e^{n_0 \lambda C}}{n_0}.
\]
Letting now $\lambda\to 0$ then $\epsilon\to 0$, we deduce that $\Lambda'(0^+)\leq \ell_{\mu}$ and a fortiori $\Lambda'(0^+)=\ell_{\mu}$ (note that this recovers $\Lambda(0^-)\leq \ell_{\mu}$ by convexity of $\Lambda$).
Note also that since $\Lambda(\lambda)\leq 0 \leq \Lambda(-\lambda)$ for each $\lambda<0$, we have $\Lambda'(0^-)\geq -\ell_{\mu}$.
\fi
\end{proof}

\begin{remark}
With only exponential moment, I am only able to show that
$\Lambda(0^+)=\ell_\mu$. Indeed, again by concavity of the logarithm, we have $\Lambda(0^+)=\ell_\mu$ so it is enough to show that $\Lambda(0^+)\leq \ell_\mu$. Let $\lambda>0$.
Since $\lambda>0$, by subadditivity the limit in \eqref{Laplace} is an infimum. Thus for every $n\in \N$,
$\Lambda(\lambda)\leq \frac{1}{n} \log \E(e^{\lambda \kappa_n})$.
Using the finite exponential assumption on $\mu$ and the inequality $\exp(x)\leq 1+x+\frac{x^2}{2} e^{|x|}$ true for every $x\in \R$, we get that for every $n\in \N$,
\[
\Lambda(\lambda)\leq \frac{1}{n} \log(1+ \lambda \E(\kappa_n) + \lambda^2 e^{n \lambda C}) \leq \lambda \frac{\E(\kappa_n)}{n} + \frac{\lambda^2 e^{n \lambda C}}{n},
\]
where $C>0$ depends only on $\mu$.
Thus for every $n\in \N$,
\[
\frac{\Lambda(\lambda)}{\lambda}\leq \frac{\E(\kappa_n)}{n} + \frac{\lambda e^{nC}}{n}.
\]
Fix $n\in \N$. Letting $\lambda\to 0$, we get that
\[
\frac{\Lambda(\lambda)}{\lambda}\leq \frac{\E(\kappa_n)}{n}.
\]
Letting now $n\to +\infty$, we deduce that
\[
\Lambda(0^+)\leq \ell_{\mu},
\]
as desired.
The problem with $\Lambda(0^-)$ is that first it is a supremum and then when I divide by $\lambda$ I obtain a $\leq$. Note also that once we know that $\Lambda(0^+)\geq \ell_{\mu}$, the convexity of $\Lambda$ obliges $\Lambda(0^-)\leq \ell_{\mu}$; which we already know.
\end{remark}
\fi

\subsection{Random walks}
\label{sec.large.deviation}
There are two main goals in this section. The first one (discussed in Section \ref{subsub.mart.dec}) is to recall a martingale decomposition (Lemma \ref{lemma.martingale.dec}) of the Busemann cocycle along non-elementary random walks on Gromov-hyperbolic spaces which is due to Benoist--Quint \cite{BQ.CLT,BQ.hyperbolic} (see also an extension in \cite{horbez}). We will use a slightly more general version of this worked out in \cite{aoun-sert}. The second goal (discussed in Section \ref{subsub.bracket.large.dev}) is to prove Proposition \ref{prop.bracket.large.dev} about large deviations of predictable quadratic variation and its consequence expressed in Corollary \ref{corollary.laplace.bracket}. The latter will be crucial in the proof of our main result.

\subsubsection{Benoist--Quint martingale decomposition}\label{subsub.mart.dec}

Let $\mu$ be a probability measure on the isometry group $G$ of $X$ with countable support. Recall that it is said to have a finite exponential moment (\resp finite second moment) if there exists $\alpha>0$ such that $\int e^{\alpha d(g \cdot o, o)} d\mu(g)<\infty$ (\resp $\int \kappa(g)^2 d\mu(g)<\infty$). Let $L_n=X_n\dots X_1$ be the random walk on $G$ and $\ell_{\mu}$ the drift of the random walk on $X$ defined in \eqref{eq.lln}.
Denote by $\mathcal{F}$ the natural filtration generated by the increments $X_i$'s. Finally, we denote by $P_{\mu}$ the Markov operator on the horofunction compactification $\overline{X}^h$ induced by the random walk on $G$, \ie $P_{\mu}f(x)=\int{f(g\cdot x) d\mu(g)}$ for every bounded measurable function $f$ on $\overline{X}^h$. The starting point of the proof of Theorem \ref{thm.main} is the following.

\begin{lemma}\label{lemma.martingale.dec}
Let $\mu$ be a non-elementary probability measure with finite second moment. Then, for every $x\in \overline{X}^h$, there exists a martingale $M_x=(M_{x,n})_{n\in \N}$ with respect to the filtration $\mathcal{F}$ starting at the origin and such that for every $n\in \N$,\vspace*{-3pt}
\[
\sigma(L_n, x)-n\ell_{\mu} = M_{x,n} + O_{x,n}(1),
\]
where $O_{x,n}(1)$ is a random variable whose absolute value is bounded uniformly in $n\in \N$ and $x\in \overline{X}^h$.
\end{lemma}

\begin{proof}
When $X$ is proper, Benoist--Quint \cite[Prop.\,4.6]{BQ.hyperbolic} showed that that there exists a bounded measurable function $\psi$ on the Busemann boundary $\partial_h X$ such that
\[
\psi - P_{\mu} \psi = \int{\sigma(g,x) d\mu(g)} - \ell_\mu.
\]
It was then verified in \cite{horbez} that this solution can be extended to the case when~$X$ is non-proper and also in \cite{aoun-sert} that $\psi$ could be defined on the whole compactification~$\overline{X}^h$ while preserving the boundedness of $\psi$. This is equivalent to finding a cocycle \hbox{$\sigma_0: G\times \overline{X}^h \to \R$} with constant drift equal to $\ell_{\mu}$, \ie 
\[
\int{\sigma_0(g,x) d\mu(g)}=\ell_{\mu}
\]
for every $x\in \overline{X}^h$, such that the following identity holds for every $(g,x)\in G\times \overline{X}^h $:
\begin{equation}\label{cocycle.decomposition}\sigma(g,x) = \sigma_0(g,x) - \psi(g\cdot x)+\psi(x).\end{equation}
Let then
\begin{equation}\label{equation.martingale}M_{x,n}:=\sigma_0(L_n, x)-n\ell_{\mu}.\end{equation}
The constant drift property of $\sigma_0$ implies that $M_x:=(M_{x,n})_{n\in \N}$ is a martingale with respect to the filtration $\mathcal{F}$, which finishes the proof.
\end{proof}

\begin{remark}\label{remark.martingale}
Observe that since $\E(M_{x,n})=\E(M_{x,0})=0$ for every $n\in \N$, we~obtain the existence of some $C\geq 0$ such that for every $n\in \N$ and every $x\in M$,
\[
n\ell_\mu - C \leq \E[\sigma(L_n, x)] \leq n \ell_\mu+C.
\]
\end{remark}

From now on, for every $x\in \overline{X}^h$ we denote by $M_x=(M_{x,n})_{n\in \N}$ the martingale defined in the proof of Lemma \ref{lemma.martingale.dec}, \ie
\[
M_{x,n}:=\sigma_0(L_n, x)-n\ell_{\mu}.
\]

Many properties of a martingale are encoded in its different notions of quadratic variation. For instance, a martingale whose predictable quadratic variation (see below for the definition) is almost surely bounded satisfies a Bennett--Bernstein concentration result (see \cite{Freedman} for the bounded difference case and \cite{pena,DZ,fan.grama.liu} for the general case). Burkholder inequalities \cite{burkholder} are another instance of the relevance of the quadratic variation in studying martingales.

\subsubsection{Large deviation estimate for predictable quadratic variation of $M_{x,n}$} \label{subsub.bracket.large.dev}
We now proceed with the second goal of Section \ref{sec.large.deviation}, namely proving Proposition \ref{prop.bracket.large.dev} below and deducing Corollary \ref{corollary.laplace.bracket}. We first give some observations and definitions regarding the martingale $(M_{x,n})_{n\in \N}$ introduced in Section \ref{subsub.mart.dec}. The martingale difference of $(M_{x,n})_{n\in \N}$ is
\begin{equation}\label{our.martingale.difference}\Delta_n M_x:=M_{x,n}-M_{x,n-1}=\sigma_o(X_n, Z_{x,n-1})-\ell_\mu,\end{equation}
where $(Z_{x,j}:=L_j\cdot x)$ is the Markov chain on $\overline{X}^h$ induced by the random walk on $G$ and starting at $x$.
We recall that the (predictable) quadratic variation of $\langle M_x\rangle$ is the unique increasing predictable process such that $(M_{x,n}^2-\langle M_x \rangle_n)_{n\in \N}$ is a martingale. We have
\begin{equation}\label{bracket.expression}\langle M_x\rangle_n=
\sum_{j=1}^n{\E(\Delta_j M_{x}^2 | \mathcal{F}_{j-1})}=
\sum_{j=1}^n{\int{(\sigma_0(g,Z_{x,j-1}) -\ell_{\mu})^2 d\mu(g)}}.\end{equation}
We now come to the main result of this section. Its statement contains the expression
\begin{equation}\label{eq.exp.variance}
\sigma_{\mu}^2:=\iint{(\sigma_0(g,x)-\ell_{\mu})^2 d\mu(g) d\nu(x)},
\end{equation}
where $\nu$ is any $\mu$-stationary probability measure on $\overline{X}^h$ --- we will see that the integral does not depend on $\nu$. This constant $\sigma_\mu^2$ is also the variance appearing in the central limit theorem \eqref{eq.clt} (see proof of \cite[Th.\,4.7.b]{BQ.hyperbolic} or \cite[Th.\,1.3]{horbez}).

\begin{proposition}[Large deviation estimates for the quadratic variation]
\label{prop.bracket.large.dev} Let $\mu$ be a non-elementary probability measure with finite second moment. Then, for every $\epsilon>0$,
\[
\limsup_{n\to \infty}\frac{1}{n} \log \sup_{x\in X}\p(| \langle M_x \rangle_n - n \sigma_{\mu}^2 | \geq n \epsilon)<0.
\]
\end{proposition}

To proceed to prove this result, we first observe that we can reformulate the statement as a statement about large deviations for an additive functional of a Markov chain. Indeed, for $x\in \overline{X}^h$, defining
\begin{equation}\label{eq.def.phi}
\phi(x):=\int{(\sigma_0(g,x) -\ell_{\mu})^2 d\mu(g)},
\end{equation}
the expression \eqref{bracket.expression} shows that
\[
\langle M_x \rangle_n=\sum_{j=1}^n{\phi(Z_{x,j})}.
\]

Benoist--Quint showed a large deviation estimate for functionals along Markov chains \cite[Prop.\,3.1]{BQ.CLT}, which is a quantitative refinement of Breiman's law of large numbers. In the aforementioned paper, the authors work with continuous functions in the framework of Markov--Feller operators on compact metric spaces. However, in the generality that we work with, we were not able to prove the continuity of $\phi$. Note that by the expression \eqref{cocycle.decomposition} of the cocycle $\sigma_0$, the continuity of $\phi$ would follow from the continuity of the Gromov-product on the Busemann compactification $\overline{X}^h$.
Up to our knowledge, the latter is known in familiar cases including trees and classical hyperbolic spaces but not in our generality (note that by \cite[\S 10]{miyachi} the Gromov-product on the Busemann compactification of a general metric space $X$ may fail to be continuous even if $X$ is proper and geodesic). To overcome this issue, we will adapt the statement of Benoist--Quint by relaxing the continuity assumption.

\Changel
\begin{proposition}[{\cite[Prop.\,3.1]{BQ.CLT} modified}]\label{proposition.deviations.MC}
Let $(Z_n)_{n\in \N}$ be a Markov chain on a state space $E$, $P$ its Markov operator and $\phi: E\to \R$ a measurable bounded function. Suppose that\vspace*{-3pt}
\begin{equation}\label{uniform}\frac{1}{n}\sum_{j=1}^n{P^j\phi(x)}\to l_{\phi}\in \R,\end{equation} uniformly in $x\in E$. Then the following large deviation estimate holds: for every $\epsilon>0$
\[
\limsup_{n\to +\infty}\frac{1}{n}\log \p_x \bigl(\ts
\sum_{i=1}^n\phi(Z_i) \not\in [nl_{\phi} - n\epsilon , nl_{\phi} +n\epsilon] \bigr)<0,
\]
uniformly in $x\in E$.
\end{proposition}
The proof is an adaptation of Benoist--Quint's proof of \cite[Prop.\,3.1]{BQ.CLT}. We include it for the convenience of the reader.
\begin{proof}
We begin with a general result, which can be seen as a quantitative version of the law of large numbers stated in \cite[Th.\,1.6]{BQ.book}. If $\xi=(\xi_n)_{n\in \N}$ is a sequence of bounded real random variables adapted to a filtration $\mathcal{F}=\{\mathcal{F}_n \sep n\in \N\}$ , then for every $\epsilon'>0$ and $n\in \N$
\begin{equation}\label{inter}
\p \bigl(|\ts\sum_{j=1}^n(\xi_j - \E[\xi_j| \mathcal{F}_{j-1}])| \geq n\epsilon'\bigr)\leq 2 \exp(-n\epsilon'^2/8\|\xi\|_{\infty}^2).\end{equation}
Indeed the sequence $\{\xi_n-\E[\xi_n| \mathcal{F}_{n-1}] \sep n\in \N\}$ is a bounded martingale difference sequence with respect to the filtration $\mathcal{F}$ and hence \eqref{inter} follows from Azuma--Hoeffding's concentration inequality for martingales with bounded differences.

In the second step, we show that for every $m\in \N$, $\frac{1}{n}\sum_{j=1}^n{\phi(Z_j)}$ concentrates around the Cesàro average $\frac{1}{m}\sum_{l=1}^m{P^l \phi(Z_j)}$; more precisely for every $\epsilon'>0$, $n,m\in \N$, and $x \in X$,\vspace*{-3pt}
\begin{multline}\label{toshow}
\p_x\Bigl(\bigl|\ts\sum_{j=1}^{n}
{\bigl[\phi(Z_{j})-\frac{1}{m}\sum_{l=1}^mP^l\phi(Z_{j})}\bigr]\bigr| \geq m n \epsilon'+2m\|\phi\|_{\infty}\Bigr)\\
\leq 2 m^2
\exp(-n\epsilon'^2/8\|\phi\|_{\infty}^2).
\end{multline}
Indeed, let $1\leq l \leq m$. We write\vspace*{-3pt}
\[
\sum_{j=l+1}^{l+n}
{(\phi(Z_{j})-\E_x[\phi(Z_j) | \mathcal{F}_{j-l}])}=
\sum_{k=0}^{l-1} \sum_{j=l+1}^{l+n}{
(\E_x[\phi(Z_j)| \mathcal{F}_{j-k}]-
\E_x[\phi(Z_j)|
\mathcal{F}_{j-k-1}]}),
\]
where $\mathcal{F}$ is the filtration induced by the Markov chain.
For each $k\in \{0,\cdots, l-1\}$, we apply \eqref{inter} with
the sequence
of random variables
$\{\xi_{j,k}=\E_x[\phi(Z_j)| \mathcal{F}_{j-k}] \sep j\in \N\}$ which are adapted to the filtration
$\{\mathcal{F}_{j-k}\sep j\in \N\}$ and bounded by $\|\phi\|_{\infty}$. Combining the resulting $l$ estimates, we obtain that
\[
\p_x\bigl(\ts\sum_{j=l+1}^{l+n}
(\phi(Z_{j})-\E_x[\phi(Z_j)|\mathcal{F}_{j-l}]) \geq l n \epsilon'\bigr)\leq 2 l
\exp(-n\epsilon'^2/8\|\phi\|_{\infty}^2).
\]
Noticing that $\E_x[\phi(Z_j)|\mathcal{F}_{j-l}]=P^l\phi(Z_{j-l})$, the previous estimate gives (after killing the boundary terms using the boundedness of $\phi$) that
\[
\p_x\bigl(\bigl|\ts\sum_{j=1}^{n}
{[\phi(Z_{j})-P^l\phi(Z_{j})]}\bigr| \geq l n \epsilon'+2l\|\phi\|_{\infty}\bigr)\leq 2l
\exp(-n\epsilon'^2/8\|\phi\|_{\infty}^2).
\]
Estimate \eqref{toshow} immediately follows.

Finally, let $\epsilon>0$ be given. The uniform convergence \eqref{uniform} yields an integer $m_0$ such that $\p_x$-almost surely for every $j\in\N$,
\begin{equation}\label{uniform.consequence}\frac{1}{m_0}\sum_{l=1}^{m_0} {P^l\phi(Z_{j})}\in [l_{\phi}-\sfrac{\epsilon}{3}, l_{\phi}+\sfrac{\epsilon}{3}].\end{equation}
Plugging \eqref{uniform.consequence} into \eqref{toshow} with $m=m_0$ and
$\epsilon'=\sfrac{\epsilon}{3m_0}$ gives some constant $C(\epsilon)>0$ such that for every $n\geq \sfrac{6m_0 \|\phi\|_{\infty}}{\epsilon}$,
\[
\p_x \bigl(\ts
\sum_{i=1}^n\phi(Z_i) \in [nl_{\phi} - n\epsilon , nl_{\phi} +n\epsilon] \bigr)\leq \exp(-n C(\epsilon)),
\]
concluding the proof.
\end{proof}
\Changelback

\begin{remark}
If $E$ is a compact metric space, $P$ a Markov Feller operator and $\phi$ is a continuous function which has a unique average with respect to stationary measures on $E$, then \eqref{uniform} is fulfilled. As mentioned earlier, this is the case, for instance, for random walks on trees, classical hyperbolic spaces and also for strongly irreducible and proximal random walks on projective spaces (see for instance \cite{BQ.book}).
\end{remark}

We now check that \eqref{uniform} is satisfied for our function $\phi$ defined in \eqref{eq.def.phi} and the Markov operator $P=P_\mu$ of the Markov chain on $\overline{X}^h$ induced by the random walk on~$G$ (see Section \ref{sec.large.deviation}).

\begin{lemma}\label{lemma.unif.conv}
Let $\phi:\overline{X}^h \to \R$ as defined in \eqref{eq.def.phi}. Then the sequence of functions $f_n(x):=\frac{1}{n}\sum_{j=1}^n{P^j_\mu \phi(x)}$ converges uniformly on $\overline{X}^h$ to $\sigma^2_\mu\geq 0$. The limit $\sigma_{\mu}^2$ can be expressed as
\begin{equation}\label{def.variance}\sigma_{\mu}^2:=\iint{(\sigma_0(g,x)-\ell_{\mu})^2 d\mu(g) d\nu(x)},
\end{equation}
where $\nu$ is any $\mu$-stationary measure on $\overline{X}^h$.
\end{lemma}

\begin{remark}\label{rk.sigma.positive}
It follows from \eqref{def.variance} and the argument in the proof of \cite[Th.\,4.7.b]{BQ.hyperbolic} that $\sigma_\mu=0$ if any only if there exists a constant $C>0$ such that for every $n \in \N$ and $g\in \supp(\mu^{\ast n})$, we have $|\kappa(g)-n\ell_{\mu}|\leq C$. It follows that $\sigma_\mu>0$ if any only if $\mu$ is non-arithmetic.
Here, a probability measure $\mu$ on $\Isom(X)$ is said to be non-arithmetic if there exists $n \in \N$ and $g,g' \in \supp(\mu^{\ast n})$ such that $\tau(g) \neq \tau(g')$ where $\tau$ is the translation distance, $\tau(g)=\lim_{n \to \infty}\sfrac{\kappa(g^n)}{n}$.
\end{remark}

The proof of the previous lemma is based on showing that $f_n(x)-f_n(y)$ converges uniformly to zero (see \eqref{expression}), which imposes the limit to be the average $\sigma_{\mu}^2$ as defined in \eqref{def.variance}. To prove this, we express $f_n(x)$ as the variance of $\sfrac{M_{x,n}}{\sqrt{n}}$ (see \eqref{identity1}). Using Burkholder's inequalities, the proof boils down to showing deviation inequalities for $\sigma(L_n,x)-\sigma(L_n,y)$ uniformly in $x,y\in \overline{X}^h$ (see \eqref{eq.botim1}). For the latter fact, we will use the following lemma which is a direct consequence of uniform punctual deviation estimates given in \cite[Prop.\,2.12]{BMSS}.

\begin{lemma}[Uniform punctual deviations]\label{lemma.unif.punctual}
Keep the hypotheses of Proposition \ref{prop.bracket.large.dev}. Then there are constants $C, \beta >0$ such that for any $k \in \N$ and any $x \in \overline{X}^h$, $R>0$, we have
\[
\mathbb{P}(\kappa(L_k)-\sigma(L_k,x) > R) \leq C e^{-\beta R}.
\]
\end{lemma}
\begin{proof}
Notice that for $g\in \Isom(M)$ and $x \in \overline{X}^h$, by \eqref{gromov.extended.space.boundary} we have $\kappa(g)-\nobreak\sigma(g,x)=2(g^{-1}o | x)$. In particular, when $x \in X$, the statement precisely corresponds to \hbox{\cite[Prop.\,2.12]{BMSS}} applied with the image $\check{\mu}$ of $\mu$ by the map $g \mto g^{-1}$ on $\Isom(X)$. To~extend it to $\overline{X}^h$, given $x \in \overline{X}^h$, let $x_n$ be a sequence in $X$ such that $x_n \to x$ in~$\overline{X}^h$. By continuity of $\sigma(g,\cdot)$, we have $\kappa(g)-\sigma(g,x)=\lim_{n \to \infty}2(g^{-1}o|x_n)$. Therefore, given $R>0$,
\begin{equation}\label{eq.tofatou}\ts
\mu^{\ast k}\{g \sep \kappa(g)-\sigma(g,x) >R \}=\check{\mu}^{\ast k} \{g \sep \lim_{n \to \infty}(go|x_n) >R/2\},
\end{equation}
for every $k \in \N$. Denoting by $h_n(\cdot)$, the map $g \mto \mathds{1}_{(go|x_n)>R/2}$, by \eqref{eq.tofatou} we have
\[
\mathbb{P}(\kappa(L_k)-\sigma(L_k,x) > R)=\int \lim_{n \to \infty}h_n(g) d\check{\mu}^{\ast k}(g)=\lim_{n \to \infty} \check{\mu}^{\ast k} \{g\sep (go|x_n) >R/2\},
\]
where we used dominated convergence in the last equality.
Hence the statement follows from \cite[Prop.\,2.12]{BMSS}.
\end{proof}

\begin{proof}[Proof of Lemma \ref{lemma.unif.conv}]
First, we reduce the problem to showing that \begin{equation}\label{uniform.distance}
f_n(x)-f_n(y)\to 0
\end{equation}
uniformly in $x$ and $y$ in $\overline{X}^h$. Indeed, let us assume for a while that \eqref{uniform.distance} holds. Fix any $\mu$-stationary measure $\nu$ on $\overline{X}^h$ (the latter exists by compactness of $\overline{X}^h$). We have for every $n\in \N$,
\begin{equation}\label{expression}f_n(x)=\frac{1}{n}\sum_{j=1}^n{\E[\phi(Z_j)| Z_0=x]}
= \frac{1}{n}\sum_{j=1}^n{\E[\ts\int{(\sigma_0(g, L_{j} \cdot x)-\ell_{\mu})^2 d\mu(g)}]}.
\end{equation}
Since $\nu$ is $\mu^{\ast n}$-stationary for every $n\in \N$, we deduce that for every $n\in \N$,
\[
\int_{\overline{X}^h}{f_n(x) d\nu(x)}=
\iint_{G\times \overline{X}^h}{(\sigma_0(g,x) - \ell_{\mu})^2 d\mu(g) d\nu(x)}:=\sigma^2_{\mu,\nu}.
\]
Let $\epsilon>0$ and $y\in \overline{X}$. We can find $n_0$ depending only on $\epsilon$ such that for every $n\geq n_0$ and for every $x\in \overline{X}^h$, $f_n(x)-\epsilon\leq f_n(y)\leq f_n(x)+\epsilon$. Integrating on both sides with respect $d\nu(x)$, we obtain that $|f_n(y)-\sigma_{\mu,\nu}^2|\leq \epsilon$ for every $n\geq n_0$, concluding the proof of the uniform convergence of the sequence of functions $(f_n)_{n\in \N}$ towards $\sigma_{\mu,\nu}^2$. It also shows that $\sigma^2_{\mu, \nu}$ is independent of the choice of the stationary measure $\nu$.

From now on, we focus on showing the convergence \eqref{uniform.distance} uniformly in $x,y\in \overline{X}^h$. Since $(M_{x,n}^2-\langle M_x\rangle_n)_{n\in \N}$ is a martingale starting at zero, we have that $\E[\langle M_x\rangle_n]=\E[M_{x,n}^2]$ for every $n\in \N$ so that by \eqref{expression}:
\begin{equation}\label{identity1}
f_n(x)=\frac{1}{n}\E[M_{x,n}^2]=\E\Bigl[\left(\sfrac{M_{x,n}}{\sqrt{n}}\right)^2\Bigr].
\end{equation}
Let us check that the sequence $\{(\sfrac{M_{x,n}}{\sqrt{n}})^2 \sep n\in \N,\, x\in \overline{X}^h\}$ is uniformly bounded in $L^p$ for every $p> 1$; and hence in particular uniformly integrable. Indeed,
by Burkholder's inequality (\cite[Th.\,9]{burkholder}), we have
for every $k>2$,
\begin{equation}\label{eq.apply.burkholder}
\E[|M_{x,n}|^k]\leq C_k \E[[M_x]_n^{\sfrac{k}{2}}],
\end{equation}
where $C_k>0$ is a constant depending only on $k$, $[M_x]_n=\sum_{j=1}^n{(\Delta_j M_{x})^2}$ is the quadratic variation of $M_x$.
By Jensen's inequality, we have
\[
[M_x]_n^{\sfrac{k}{2}}\leq n^{\sfrac{k}{2}-1}\sum_{j=1}^n{|\Delta_j M_{x}|^k},
\]
so that
\begin{equation}\label{intt1}
\E[[M_x]_n^{\sfrac{k}{2}}]\leq
n^{\sfrac{k}{2}-1}\sum_{j=1}^n{\E[|\Delta_j M_{x}|^k]}\leq n^{\sfrac{k}{2}}
\sup_{j\in \N}{\E[|\Delta_j M_{x}|^k}].
\end{equation}
Remembering that $\sigma(g,x)=\sigma_0(g,x)-\psi(g\cdot x)+\psi(x)$, $|\sigma(g,x)|\leq \kappa(g)$, and that $\psi$ is bounded on $\overline{X}^h$, we get that for every $n\in \N$,
\begin{equation}\label{intt2}
|\Delta_n M_{x}|=|\sigma_0(X_n, L_{n-1}\cdot x)-\ell_{\mu}|\leq \kappa(X_n) + \ell_{\mu} +2\|\psi\|_{\infty}.
\end{equation}
Since the $X_i$'s have the same distribution, by plugging \eqref{intt2} and
\eqref{intt1} in \eqref{eq.apply.burkholder} we get
\[
\E[M_{x,n}^k]\leq C_k n^{\sfrac{k}{2}}\E[|\kappa(X_1) + \ell_{\mu} +2\|\psi\|_{\infty}|^k].
\]
The right-hand-side is finite (since $\mu$ has a finite moment of any order $k>2$) and does not depend neither on $n$ nor on $x$, showing the boundedness in $L^{k/2}$ of $(\sfrac{M_{x,n}}{\sqrt{n}})^2$ uniformly in $n$ and $x$.

Let now $\epsilon>0$. It follows from the uniform integrability of the family $\sfrac{M_{x,n}}{\sqrt{n}}$ that there exists $L(\epsilon)>0$ such that for every $n\in \N$ and $x,y\in \overline{X}^h$ we have
\[
\p\bigl(\max\bigl\{\sfrac{|M_{x,n}|}{\sqrt{n}}, \sfrac{|M_{y,n}|}{\sqrt{n}}\bigr\}>L(\epsilon)\bigr)<\epsilon.
\]
Using Lemma \ref{lemma.unif.punctual} together with the fact that $\sigma$ differs from $\sigma_0$ by a bounded function on $\overline{X}^h$, we obtain some $T(\epsilon)>0$ such that for every $n\in \N$, $x,y\in \overline{X}^h$,
\begin{equation}\label{eq.botim1}
\p(|\sigma_0(L_n, x)-\sigma_0(L_n,y)|>T(\epsilon))\leq \epsilon.
\end{equation}
Hence, if $A_{x,y,n}$ denotes the event
\[
A_{x,y,n}:= \left\{|\sigma_0(L_n, x)-\sigma_0(L_n, y)|>T(\epsilon) \right\} \cup \bigl\{\max\bigl\{\sfrac{|M_{x,n}|}{\sqrt{n}}, \sfrac{|M_{y,n}|}{\sqrt{n}}\bigr\}> L(\epsilon)\bigr\},
\]
we have for every $n\in \N$, $x,y\in \overline{X}^h$ that
$\p(A_{x,y,n})<2\epsilon$. Now we write
\begin{multline*}
|f_n(x)-f_n(y)|\leq \underset{\ts a_{x,y,n}}{\underbrace{ \E\Bigl[\Bigl|\bigl(\sfrac{M_{x,n}}{\sqrt{n}}\bigr)^2 - \bigl(\sfrac{M_{y,n}}{\sqrt{n}}\bigr)^2\Bigr|\mathds{1}_{A_{x,y,n}}\Bigr]}}\\
+ \underset{\ts b_{x,y,n}}{\underbrace{ \E\Bigl[\Bigl|\bigl(\sfrac{M_{x,n}}{\sqrt{n}}\bigr)^2 - \bigl(\sfrac{M_{y,n}}{\sqrt{n}}\bigr)^2\Bigr|\mathds{1}_{A_{x,y,n}^C}\Bigr]}}.
\end{multline*}
Let us estimate $a_{x,y,n}$. By Cauchy--Schwarz inequality, we have for every $n\in \N$,
\[
a_{x,y,n}^2 \leq 2\max_{x,y}{\E\bigl[\bigl(\sfrac{M_{x,n}}{\sqrt{n}}\bigr)^4\bigr]} \,\p(A_{x,y,n})\leq C_4\epsilon,
\]
where $C_4>0$ is a constant independent of $n, x$ and $\epsilon$; guaranteed by the uniform boundedness in $L^4$ shown at the beginning of the proof. Finally, we estimate $b_{x,y,n}$. Since the function $x\mto x^2$ is uniformly continuous on $[-L(\epsilon), L(\epsilon)]$, we can find $\delta(\epsilon)>0$ such that $|t^2-t'^2|<\epsilon$ whenever $|t-t'|<\delta(\epsilon)$ and $\max\{|t|,|t'|\}\leq L(\epsilon)$. Let $n_0(\epsilon)\in \N$ be such that $\sfrac{T(\epsilon)}{\sqrt{n_0(\epsilon)}}<\delta(\epsilon)$. From the definition of the event $A_{x,y,n}^C$, we deduce that for every $n\geq n_0(\epsilon)$, $x,y\in \overline{X}^h$, $b_{x,y,n}<\epsilon$.
Hence for $n\geq n_0(\epsilon)$, $x,y\in \overline{X}^h$
\[
|f_n(x)-f_n(y)|< \sqrt{C_4 \epsilon}+\epsilon,
\]
which finishes the proof of the uniform convergence \eqref{uniform.distance}. \end{proof}

We end this section with the following consequence of Proposition \ref{prop.bracket.large.dev}.
In the statement below, for every $x\in \overline{X}^h$, we use the transform $G_n^a$ introduced in \eqref{transform.G} associated to the martingale $M_x$. To ease the notation, we omit the dependence on $x$ in $G_n^a$.

\begin{corollary}\label{corollary.laplace.bracket}
Suppose $\mu$ has finite exponential moment. Then for every $x\in \overline{X}^h$,
\[
\lim_{a\to +\infty}\limsup_{\lambda\to 0^+}\frac{1}{\lambda} \limsup_{n\to +\infty}\frac{1}{n}\log {\E[\exp(-\lambda (G_n^a - n \sigma_{\mu}^2))]}= 0.
\]
\end{corollary}

\begin{proof}
Let $x\in \overline{X}^h$. The result will follow from Cauchy--Schwarz inequality and the following two estimates
\begin{equation}\label{1bis}
\limsup_{\lambda\to 0^+}\frac{1}{\lambda} \limsup_{n\to +\infty}\frac{1}{n}\log {\E[\exp(-\lambda (\langle M_x\rangle_n-n\sigma_{\mu}^2))]}\leq 0
\end{equation}
and
\begin{equation}\label{backtoidd2}
\limsup_{a\to +\infty} \limsup_{\lambda \to 0^+}\frac{1}{\lambda} \limsup_{n\to \infty} \frac{1}{n} \log \E[\exp(\lambda (\langle M_x \rangle_n - G_n^a))]\leq 0.
\end{equation}

\begin{enumeratei}
\item
We start by proving \eqref{1bis}.
To ease the notation, let $Y_n=\langle M_x\rangle_n-n\sigma_{\mu}^2$. Let $\epsilon>0$. By Proposition \ref{prop.bracket.large.dev}, there exists $\alpha(\epsilon)>0$ and $n_0(\epsilon)\in \N$ such that for every $n\geq n_0(\epsilon)$,
\[
\p(Y_n\leq -n\epsilon)\leq \exp(-\alpha(\epsilon) n).
\]
Noticing that $Y_n\geq - n\sigma^2_\mu$, we write
\[
Y_n=Y_n\mathds{1}_{Y_n\geq 0}+
Y_n\mathds{1}_{-n\epsilon \leq Y_n\leq 0}
+ Y_n\mathds{1}_{-n\sigma^2_\mu \leq Y_n\leq -n\epsilon}.
\]
Since $\E[\exp(-\lambda Y_n) \mathds{1}_{-n\epsilon \leq Y_n\leq 0}]\leq \exp(n\lambda \epsilon)$ and, for $n\geq n_0(\epsilon)$,
\[
\E[\exp(-\lambda Y_n)\mathds{1}_{-n\sigma_\mu^2 \leq Y_n\leq -n\epsilon}]\leq \exp(n\lambda \sigma^2)\p(Y_n\leq -n\epsilon)\leq \exp(n (\sigma_\mu^2\lambda-\alpha(\epsilon))),
\]
we get that for every $n\geq n_0(\epsilon)$,
\[
\E[\exp(-\lambda(\langle M_x\rangle_n - n\sigma_\mu^2))]\leq 1+ \exp(n\lambda \epsilon)+\exp(n\sigma_\mu^2 \lambda-\alpha(\epsilon)n).
\]
Keeping $\epsilon$ and $\lambda>0$ (small enough) fixed, we let $n\to +\infty$ and deduce that
\[
\limsup_{n\to +\infty}
\frac{1}{n}\log \E[\exp(-\lambda(\langle M_x\rangle_n - n\sigma_\mu^2))]\leq \max\{\lambda \epsilon, \lambda \sigma_\mu^2-\alpha(\epsilon)\}.
\]
Since $\alpha(\epsilon)>0$, we get by letting $\lambda\to 0^+$ (while keeping $
\epsilon$ fixed) that
\[
\limsup_{\lambda\to 0^+}\frac{1}{\lambda}\limsup_{n\to +\infty}
\frac{1}{n}\log \E[\exp(-\lambda(\langle M_x\rangle_n - n\sigma_\mu^2))]\leq \epsilon.
\]
Letting $\epsilon\to 0$, we conclude that
\[
\limsup_{\lambda\to 0^+}\frac{1}{\lambda}\limsup_{n\to +\infty}
\frac{1}{n}\log \E[\exp(-\lambda(\langle M_x\rangle_n - n\sigma_\mu^2))]\leq 0.
\]
This shows \eqref{1bis}.

\item
Finally, we show \eqref{backtoidd2}. Using the expression \eqref{transform.G} for $G_n^a$, we see that
\[
\langle M_x \rangle_n - G_n^a = \sum_{i=1}^n{\E[(\Delta_i M_x)^2 \mathds{1}_{|\Delta_i M_x|>a} | \mathcal{F}_{i-1}] + a \sum_{i=1}^n{|\Delta_i M_x|} \mathds{1}_{|\Delta_i M_x|\geq a}}.
\]
Observe that by the expression of
our martingale difference \eqref{our.martingale.difference} and by the decomposition \eqref{cocycle.decomposition},
we have a.s.~for every $i\in \N$,
\begin{equation}\label{equation.upper.MDS}|\Delta_i M_x|= |\sigma_0(X_i, L_{i-1}\cdot x)-\ell_\mu|\leq
\kappa(X_i)+\ell_\mu+\|\psi\|_{\infty}:=\zeta_i.\end{equation}
Since the $\zeta_i$'s have the same distribution,
\[
\E[(\Delta_i M_x)^2 \mathds{1}_{|\Delta_i M_x|>a} | \mathcal{F}_{i-1}]\leq \E(\zeta_1^2 \mathds{1}_{\zeta_1>a}):=h_{\mu}(a).
\]
Observe that the constant $h_{\mu}(a)$ is independent of $i$. Since $\mu$ has finite second moment, we deduce that
\begin{multline}\label{quedire1}
\limsup_{a \to \infty}\limsup_{\lambda \to 0^+}\frac{1}{\lambda} \limsup_{n\to \infty} \frac{1}{n} \log \E\bigl[e^{\lambda \sum_{i=1}^n \mathbb{E}[(\Delta_i M_x)^2 \mathds{1}_{|\Delta_i M_x|>a} | \mathcal{F}_{i-1}]}\bigr]\\
\leq \limsup_{a\to \infty}{h_{\mu}(a)}=0.
\end{multline}
On the other hand, for every $a>0$, the random variables $a\zeta_i\mathds{1}_{|\zeta_i|>a}$ are i.i.d random variables. Denote by $\zeta_a$ their common distribution and by $\Lambda_{\zeta_a}$ the Laplace transform of $\zeta_a$. The latter is differentiable at $0$ for every $a>0$ as $\zeta_1$ has finite exponential moment (because $\mu$ has finite exponential moment).
It follows that
\begin{multline}\label{link}
\limsup_{a\to +\infty} \limsup_{\lambda \to 0^+}\frac{1}{\lambda} \limsup_{n\to \infty} \frac{1}{n} \log \E\bigl[e^{\lambda a \sum_{i=1}^n{|\Delta_i M_x| \mathds{1}_{|\Delta_i M_x|\geq a}}}\bigr]\\
\leq 
\limsup_{a\to +\infty} \limsup_{\lambda \to 0^+}\frac{1}{\lambda} \limsup_{n\to \infty} \frac{1}{n} \log \E\bigl[e^{\lambda \sum_{i=1}^n{ a |\zeta_i| \mathds{1}_{|\zeta_i|\geq a}}}\bigr] =
\limsup_{a\to +\infty} \Lambda_{\zeta_a}'(0).
\end{multline}
But $\Lambda_{\zeta_a}'(0)=a\E[|\zeta_1| \mathds{1}_{|\zeta_1|>a}]\leq \E[\zeta_1^2 \mathds{1}_{|\zeta_1|>a}]\xrightarrow[a\to \infty]{} 0$. This concludes the proof of \eqref{backtoidd2} and hence the corollary.\qedhere
\end{enumeratei}
\end{proof}

\section{Proof of the main result}\label{sec.proof}
Having established the submartingale transform from Section \ref{sec.martingale} and the exponential decay of large deviation probabilities of the predictable quadratic variation from Section \ref{sec.large.deviation}, we are now ready to give the proofs of Theorem \ref{thm.main} and Corollary \ref{corol.rate.function}. In fact, we will prove a slightly more general version given by Theorem \ref{thm.main.tech} below.

\subsection{Statement of the main result}
To state the more general version of Theorem~\ref{thm.main}, we recall and introduce some notation. We are given a separable geodesic Gromov-hyperbolic space $X$ with a fixed based point $o \in X$. The Busemann cocycle $\sigma: \Isom(X) \times \overline{X}^h \to \R$ with respect to the base point $o$ is as defined in Section \ref{subsec.hyperbolic}. Given a countably supported probability measure $\mu$ on $\Isom(X)$ and $x \in \overline{X}^h$, we~define the upper $\Lambda_x^+$ and lower $\Lambda_x^-$ limit Laplace transforms as
\begin{align*}
\Lambda_x^+(\lambda)&:=\limsup_{n \to \infty} \frac{1}{n} \log \mathbb{E}[e^{\lambda\sigma(L_n,x)}]
\\
\tag*{and}
\Lambda_x^-(\lambda)&:=\liminf_{n \to \infty} \frac{1}{n} \log \mathbb{E}[e^{\lambda\sigma(L_n,x)}].
\end{align*}

Whenever $\mu$ has finite exponential moment both functions have values in $\R$ in a neighborhood of $0 \in \R$.

We will omit sub/super-scripts when $x\in X$, indeed, for every $x \in X$, we have $\Lambda_x^+ \equiv \Lambda_x^-\equiv \Lambda_o$ (since $|\sigma(g,x)-\sigma(g,y)|\leq 2 d(x,y)$ for $g\in G$ and $x,y\in X$). This common function $\Lambda=\Lambda_o$ is the notation used in Theorem \ref{thm.main} where we work with the basepoint $x=o \in X$.

\begin{theorem}\label{thm.main.tech}
Let $(X,d)$ be a separable geodesic Gromov-hyperbolic space and $\mu$ a non-elementary probability measure on $\Isom(X)$. Suppose that $\mu$ has a finite exponential moment. Then for every $x\in \overline{X}^h$,
\[
\lim_{\lambda \to 0} \frac{\Lambda_x^-(\lambda)-\ell \lambda }{\lambda^2}=
\lim_{\lambda \to 0} \frac{\Lambda_x^+(\lambda)-\ell \lambda }{\lambda^2}=\frac{\sigma^2_{\mu}}{2}.
\]
\end{theorem}

Sections \ref{subsec.lower.bound} and \ref{subsec.upper.bound} are devoted to the proof of Theorem \ref{thm.main.tech}.

\subsection{Proof of the lower bound}\label{subsec.lower.bound}

Here we prove the following.
\begin{proposition}\label{prop.lower.bound}
Keep the setting of Theorem \ref{thm.main.tech}. Then, for every $x\in \overline{X}^h$
\[
\lim_{\lambda \to 0} \frac{\Lambda_x^-(\lambda)-\ell_\mu \lambda }{\lambda^2} \geq \frac{\sigma^2_{\mu}}{2} .
\]
\end{proposition}

\begin{proof}[Proof of Proposition \ref{prop.lower.bound}]

Given a probability measure $\mu$ as in the statement and $x \in \overline{X}^h$, let $M_{x}$ be the martingale given by Lemma \ref{lemma.martingale.dec}. It satisfies
\[
\sigma(L_n,x)-n\ell_\mu=M_{x,n}+O_{x,n}(1),
\]
for every $n \in \N$, where $O_{x,n}(1)$ is a random variable that is bounded (in absolute value) uniformly in $x \in \overline{X}^h$ and $n\in \N$. Let $\sigma_\mu^2>0$ be as defined in \eqref{def.variance}. Let $x \in \overline{X}^h$ be fixed for the rest of the proof. For every $\lambda \in \R$, we have
\begin{align*}\label{observe}
\Lambda_x^-(\lambda)-\lambda \ell_{\mu} & =  \liminf_{n \to \infty}\frac{1}{n}\log \E\bigl[e^{\lambda (\sigma(L_n,x) - n\ell_{\mu})}\bigr]\\
& = \liminf_{n \to \infty} \frac{1}{n}\log \E\bigl[e^{\lambda(M_{x,n} +O_{x,n}(1))}\bigr]\\
& = \liminf_{n \to \infty} \frac{1}{n}\log \E\bigl[e^{\lambda M_{x,n}}\bigr], \end{align*}
where we used the fact that the random variables $O_{x,n}(1)$ are bounded below and above uniformly in $n \in \N$. Notice that since $\mu$ has finite exponential moment, for every $\lambda$ in a neighborhood of $0 \in \R$ (independent of $x \in \overline{X}^h$), the last quantity in the above displayed equation is finite.

We first prove that
\begin{equation}\label{eq.main0}
\frac{\sigma_{\mu}^2}{2}\leq \liminf_{\lambda\to 0^+}{\frac{\Lambda_x^-(\lambda)-\lambda \ell_{\mu}}{\lambda^2}}.
\end{equation}

Let $n\in \N$, $a>0$, and $\lambda>0$ small enough. By Proposition \ref{prop.submart.trans}, we have
\[
1\leq \E\Bigl[\exp\Bigl(\lambda M_{x,n} - \frac{\mathfrak{f}(a\lambda)}{a^2}\, G_n^a\Bigr)\Bigr],
\]
where, we recall,
\[
G_n^a=\sum_{i=1}^n \mathbb{E}[(\Delta_i M_x)^2 1_{|\Delta_i M_x| \leq a}| \mathcal{F}_{i-1}]-a\sum_{i=1}^n |\Delta_iM_x| 1_{|\Delta_i M_x| \geq a}.
\]
Let $p> 1$. By Hölder inequality, we get
\[
1\leq
\E[\exp(p\lambda M_{x,n})]^{1/p}
\E\Bigl[\exp\Bigl(- q\,\frac{\mathfrak{f}(a\lambda)}{a^2}\,G_n^a\Bigr)\Bigr]^{1/q},
\]
where $q \geq 1$ satisfies $1/p+1/q=1$.
Taking logarithm and dividing by $n\lambda^2$, adding and subtracting the term $\Psfrac{\mathfrak{f}(a\lambda)}{(a\lambda)^2}\sigma_{\mu}^2$ gives
\begin{multline}\label{eq.main1}
0\leq \frac{1}{n p \lambda^2}\log \E[\exp(p\lambda M_{x,n})]\\
+ \frac{1}{nq \lambda^2}\log \E\Bigl[\exp\Bigl(- q\,\frac{\mathfrak{f}(a\lambda)}{a^2}\, (G_n^a-n\sigma_{\mu}^2)\Bigr)\Bigr]-\frac{\mathfrak{f}(a\lambda)}{(a\lambda)^2}\sigma_{\mu}^2.
\end{multline}

Using the elementary fact $\liminf_{n\to +\infty}(a_n+b_n)\leq \liminf_n a_n + \limsup_n b_n$ for real sequences $a_n$ and $b_n$, letting $n \to \infty$ in \eqref{eq.main1}, we get
\[
0\leq p \frac{\Lambda_x^-(\lambda p)- \lambda p\ell_{\mu}}{(\lambda p)^2} + \frac{1}{ q\lambda^2}\limsup_{n\to +\infty} \frac{1}{n}\log \E\Bigl[\exp\Bigl(- q\,\frac{\mathfrak{f}(a\lambda)}{a^2}\, (G_n^a-n\sigma_{\mu}^2)\Bigr)\Bigr]- \frac{\mathfrak{f}(a\lambda)}{(a\lambda)^2}\sigma_{\mu}^2.
\]
Letting $\lambda\to 0^+$ while noting that $\mathfrak{f}(a\lambda) \underset{\lambda\to 0}{\sim} (a\lambda)^2/2$ and in particular $\eta=q\sfrac{\mathfrak{f}(a\lambda)}{a^2}\allowbreak \xrightarrow[\lambda \to 0]{} 0^+$, we obtain:
\[
\frac{\sigma_{\mu}^2}{2}\leq p \liminf_{\lambda\to 0^+}{\frac{\Lambda_x^-(\lambda)-\lambda \ell_{\mu}}{\lambda^2}}
+ \frac{1}{2}\limsup_{\eta\to 0^+}\frac{1}{\eta}
\limsup_{n\to +\infty}\frac{1}{n}\log\E[\exp(-\eta (G_n^a-n\sigma_{\mu}^2))].
\]
Letting $a \to \infty$, we deduce from Corollary \ref{corollary.laplace.bracket} that we have
\[
\frac{\sigma_\mu^2}{2} \leq p \liminf_{\lambda\to 0^+}{\frac{\Lambda_x^-(\lambda)-\lambda \ell_{\mu}}{\lambda^2}}.
\]
The desired inequality \eqref{eq.main0} is now proved by taking $p \to 1$.

The inequality
\begin{equation}\label{eq.main}
\frac{\sigma_{\mu}^2}{2}\leq \liminf_{\lambda\to 0^-}{\frac{\Lambda_x^-(\lambda)-\lambda \ell_{\mu}}{\lambda^2}}\end{equation}
is proved in precisely the same way replacing the martingale $M_{x}$ by the martingale $-M_{x}$ using the fact that both martingales have same transforms $G^a$. This completes the proof of Proposition \ref{prop.lower.bound}.
\end{proof}

\subsection{Proof of the upper bound} \label{subsec.upper.bound}
Here we prove the following.
\begin{proposition}\label{prop.upper.bound}
Keep the setting of Theorem \ref{thm.main.tech}. Then, for every $x \in \overline{X}^h$ \begin{equation}\label{eq.upper.bound}\lim_{\lambda \to 0} \frac{\Lambda_x^+(\lambda)-\ell_\mu \lambda }{\lambda^2} \leq \frac{\sigma^2_{\mu}}{2}.\end{equation}
\end{proposition}
The proof is based on showing that for large $n\in \N$ the random variable \hbox{$\frac{1}{n}\sigma(L_n,x)- \ell_\mu$} has a subgaussian behaviour in a neighborhood of $0$. This is shown in the following proposition which controls the limit Laplace transform of the sequence of random variables $ \frac{1}{n}\sigma(L_n,x) - \ell_\mu$. The proof is based on the martingale decomposition given in Lemma \ref{lemma.martingale.dec} and standard techniques for concentration results for martingales (in particular \cite[Th.\,2.19]{wainwright}). With the notation of Section \ref{sec.hyperbolic}, the main tool for the proof of Proposition \ref{prop.upper.bound} is the following.
\begin{proposition}\label{proposition.laplace.control}
Let \begin{equation}\label{eq.vmu}
v(\mu):=\sup_{\xi\in \overline{X}^h}{\E\Bigl[\left(\sigma_0(X_1,\xi)-\ell_\mu\right)^2 \Bigr]}=\sup_{\xi\in \overline{X}^h}{\E[M_{\xi,1}^2]}.
\end{equation}
Then there exists $C>0$ such that for every $\epsilon>0$, there exists $b>0$ such that for every $|\lambda|<\sfrac{v(\mu)}{b}$, every $n\in \N$ and every $x\in \overline{X}^h$,
\begin{equation}\label{eq.laplace.control}\E\Bigl[e^{\lambda (\sigma(L_n,x) - n \ell_\mu)}\Bigr] \leq \exp\Bigl(\frac{\lambda^2 (v(\mu)+\epsilon) n}{2}+ C |\lambda|\Bigr).\end{equation}
\end{proposition}

This proposition will yield \eqref{eq.upper.bound} with $\sigma_\mu^2$ replaced by the larger quantity $v(\mu)$. To~obtain \eqref{eq.upper.bound}, we will use an acceleration technique speeding up the random walk, see the proof of Proposition \ref{prop.upper.bound}.

We now proceed with proving Proposition \ref{proposition.laplace.control}. The proof is based on the following control of the conditional expectation of the martingale difference $\Delta M_x$:

\begin{lemma}\label{lemma.martingale.condition}
For every $\epsilon>0$,
there exists a constant $b>0$ such that for every $|\lambda|<b$, $n\in \N$ and $x\in \overline{X}^h$, the following inequality holds almost surely:
\begin{equation*}
\E\left[\exp(\lambda \Delta_n M_x)\,|\,\mathcal{F}_{n-1} \right]
\leq \exp\Bigl(\frac{\lambda^2 (v(\mu)+\epsilon)}{2}\Bigr).
\end{equation*}
\end{lemma}

\begin{proof}
By expanding the expression \eqref{equation.martingale} of the martingale $M_x$ and taking conditional expectation, it suffices to show that for every $\epsilon>0$, there exists a constant $b>0$ such that for every $\xi \in \overline{X}^h$ and $|\lambda|<b$
\begin{equation}\label{eq.b.lambda.epsilon}
\int e^{\lambda(\sigma_0(g,\xi)-\ell_\mu)} d\mu(g) \leq \exp\Bigl(\frac{\lambda^2 (v(\mu)+\epsilon)}{2}\Bigr).
\end{equation}
Using the exponential moment assumption on $\mu$, let $\alpha>0$ be such that
\[
\int e^{\alpha \kappa(g)}d\mu(g)<\infty.
\]
Thanks to \eqref{cocycle.decomposition}, we have that for every $g \in \Isom(X)$ and $\xi \in \overline{X}^h$, $|\sigma_0(g,x)-\ell_\mu| \leq \kappa(g) + 2(\|\psi\|+\ell_\mu)$. Therefore, for every $|\lambda|<\alpha$, using dominated convergence, we have
\[
\int e^{\lambda(\sigma_0(g,\xi)-\ell_\mu)} d\mu(g)=1+\frac{\lambda^2}{2}\E[(\sigma_0(X_1,\xi)-\ell_\mu)^2]+\sum_{k=3}^{\infty}\frac{\lambda^k}{k!}\E[(\sigma_0(X_1,\xi)-\ell_\mu)^k],
\]
where we have used the fact that $\sigma_0(X_1,\xi)\!-\!\ell_\mu\!=\!M_{\xi,1}$ has mean zero (as the cocycle~$\sigma_0$ has constant drift). Now using again the fact that $|\sigma_0(g,x)-\ell_\mu| \leq \kappa(g) + 2(\|\psi\|+\ell_\mu)$ and that $\mu$ has finite exponential moment, we get that there exists $C>0$ (independent of $\xi$) such that for every $|\lambda|<\alpha$
\[
\int e^{\lambda(\sigma_0(g,\xi)-\ell_\mu)} d\mu(g)\leq 1+\frac{\lambda^2 v(\mu)}{2}+ C \lambda^3.
\]
This readily implies \eqref{eq.b.lambda.epsilon} and hence finishes the proof.
\end{proof}

\begin{proof}[Proof of Proposition \ref{proposition.laplace.control}]
By the tower property of the conditional expectation, we deduce from Lemma \ref{lemma.martingale.condition} that for every $\epsilon>0$, $|\lambda|<b$ (where $b=b(\mu,\epsilon)$ is given by the aforementioned lemma), every $n\in \N$ and $x\in \overline{X}^h$,
\[
\E\bigl[e^{\lambda M_{x,n}}\bigr]
= \E\bigl[e^{\lambda M_{x,n-1}} \E[e^{\lambda \Delta M_{x,n}} |\mathcal{F}_{n-1}]\bigr]
\leq \exp\Bigl(\frac{\lambda^2(v(\mu)+\epsilon)}{2}\Bigr) \E\bigl[e^{\lambda M_{x,n-1}}\bigr].
\]
Iterating the same process, we deduce that
\[
\E[\exp(\lambda M_{x,n})]\leq \exp\Bigl(\frac{ \lambda^2 (v(\mu)+\epsilon) n}{2}\Bigr).
\]
Finally, recall that $\sigma(L_n,x)-n\ell_\mu=M_{x,n}+R_{x,n}$ where $|R_{x,n}|\leq 2\|\psi\|_{\infty}:=C$. This finishes the proof of the proposition.
\end{proof}

A remark on the proof Proposition \ref{proposition.laplace.control} is in order.

\begin{remark}
Given a martingale $M$ with unbounded differences, controlling various quantities involving the \emph{conditional expectation} of the martingale difference sequence $\Delta M$ is generally an important step to prove concentration results for the martingale~$M$; see the works of de La Pe\~{n}a \cite{de.la.pena}, Dzhaparidze--van Zanten \cite{DZ}, Fan--Grama--Liu \cite{fan.grama.liu} and Liu--Watbled \cite{liu-watbled} who prove Bennett--Bernstein type concentration inequalities generalizing results of Freedman \cite{Freedman} to the case of unbounded differences. Proposition \ref{proposition.laplace.control} avoids using these more sophisticated results thanks to Lemma \ref{lemma.martingale.condition} which, exploiting the special form of our martingales (namely, coming from an i.i.d.\ random walk on a group), gives a deterministic bound for the exponential of the conditional expectation.
\end{remark}

We are now ready to give:
\begin{proof}[Proof of Proposition \ref{prop.upper.bound}]
Using Proposition \ref{proposition.laplace.control}
and taking logarithm and dividing by $n$ on both sides of \eqref{eq.laplace.control}, letting first $n\to +\infty$, then $\lambda\to 0$, and finally $\epsilon\to 0$, we~get that
\begin{equation}\label{eq.upper.bound2} \limsup_{\lambda\to 0}
\frac{\Lambda_x^+(\lambda)-\lambda \ell_\mu}{\lambda^2}\leq \frac{v(\mu)}{2}.
\end{equation}
This yields \eqref{eq.upper.bound} with $\sigma_{\mu}^2$ replaced with the larger quantity $v(\mu)$. We now employ an acceleration trick. More precisely, consider, for every $k\in \N$, the probability measure~$\mu^{\ast k}$ (distribution of $L_k$), which is a non-elementary probability measure with finite exponential moment. Denote by $\Lambda(\mu^{\ast k},.)$ the Laplace transform based at $x=o$ for the $\mu^{\ast k}$-random walk $(L_{nk})_{n \in \N}$. In particular, $\Lambda(\mu,.)=\Lambda(.)$. Applying \eqref{eq.upper.bound2} for the
$\mu^{\ast k}$-random walk, we deduce that for every $k\geq 1$,
\[
\limsup_{\lambda\to 0}\frac{\Lambda({\mu^{\ast k}},\lambda)- \lambda \ell_{\mu^{\ast k}}}{\lambda^2}\leq \frac{ v(\mu^{\ast k})}{2}
\]
and $\Lambda({\mu^{\ast k}},.)=k \Lambda(.)$. Hence, for every $k \geq 1$,
\[
\limsup_{\lambda\to 0}\frac{\Lambda(\lambda)- \ell_\mu \lambda}{\lambda^2}\leq \frac{ v(\mu^{\ast k})}{2k}.
\]
It remains to check that
\begin{equation}\label{equation.variance.limit}
\lim_{k\to \infty}{\frac{v(\mu^{\ast k})}{k}}=\sigma_{\mu}^2.\end{equation}
By definition of $v(\mu^{\ast k})$ given in \eqref{eq.vmu} and using \eqref{identity1} (with the notation of Lem\-ma~\ref{lemma.unif.conv}), we get that for every $k \geq 1$,
\[
\frac{v(\mu^{\ast k})}{k}=\sup_{\xi\in \overline{X}^h}f_k(\xi).
\]
Finally, the uniform convergence given in Lemma \ref{lemma.unif.conv} for the sequence $(f_k)_{k\in \N}$ implies~\eqref{equation.variance.limit} and finishes the proof of the proposition.
\end{proof}

\subsection{Proof of Corollary \ref{corol.rate.function}}\label{subsec.corol}
If $\sigma_\mu=0$, it is easy to deduce from Remark \ref{rk.sigma.positive} that the rate function $I$ satisfies $I(\ell_\mu)=0$ and $I(x)=\infty$ for every $x \in \R \setminus \{\ell_\mu\}$ and hence Corollary \ref{corol.rate.function} is true in that case. We therefore suppose $\sigma^2_\mu>0$. To treat this case, we will use some standard terminology from convex analysis, for which we refer the reader to \cite{convexity.book}. Let, as usual, $\Lambda$ denote the limit Laplace transform of the sequence $\frac{1}{n} d(L_n \cdot o,o)$. Note that $\Lambda$ is convex (as it follows by a direct application of Hölder inequality), and, thanks to the finite exponential moment assumption, it takes finite values on an interval of type $(-\infty,\alpha)$ with $\alpha>0$ and hence it is continuous on this interval. Let $\Lambda^\ast$ be its Fenchel--Legendre transform. By Theorem \ref{thm.main} and \cite[Prop.\,6.1.2]{convexity.book}, we have $\partial \Lambda^\ast(\ell_\mu)=\{0\}$ where $\partial \Lambda^\ast$ is the multi-valued subdifferential function of $\Lambda^{\ast}$. Moreover, by Theorem \ref{thm.main}, $\Lambda$ has a second-order development at $0$ and therefore, by \cite[Th.\,5.1.2]{convexity.book} its subdifferential $\partial \Lambda$ is differentiable in the sense of \cite[Def.\,5.1.1]{convexity.book}. Since also $\sigma_\mu^2>0$, we can apply \cite[Prop.\,6.2.5]{convexity.book} (see also \cite[Prop.\,4.5]{gianluca}) and deduce that $\Lambda^\ast$ satisfies
\begin{equation}\label{eq.expand.lambdastar}
\Lambda^{\ast}(\ell_\mu+ x)=\frac{1}{2\sigma_\mu^2}\,x + o(x^2)
\end{equation}
as $x \to 0$.
Now, let $\alpha>0$ be the constant appearing in the finite exponential moment condition, \ie $\int e^{\alpha d(g \cdot o,o)} d\mu(g)<\infty$.
Then, it follows by Varadhan's integral lemma \cite[Th.\,4.3.1]{dembo-zeitouni} for every $\lambda<\alpha$, we have $I^\ast(\lambda)=\Lambda(\lambda)$, where $I^\ast$ is the Fenchel--Legendre transform of $I$. But since the second-order term $\sigma^2_\mu/2$ in the second-order expansion of $\Lambda$ at $0$ (given by Theorem \ref{thm.main}) is positive, it follows that the Fenchel--Legendre transforms of $I^\ast$ and $\Lambda$ coincides in a neighborhood of $\ell_\mu$, \ie
\begin{equation}\label{eq.Istarstar}
I^{\ast \ast}(x)=\Lambda^{\ast}(x)
\end{equation}
for every $x \in (\ell_\mu-\beta,\ell_\mu+\beta)$ for some $\beta>0$. But since by \cite[Th.\,1.1]{BMSS}, the function $I$ is convex (and lower semi-continuous), thanks to Fenchel--Legendre duality, we have $I \equiv I^{\ast \ast}$ and hence the corollary follows from \eqref{eq.expand.lambdastar} and \eqref{eq.Istarstar}. \qed

\begin{remark}[On finite time large deviation estimates]
Corollary \ref{corol.rate.function} is an asymptotic statement obviously in its expression (as $\lambda \to 0$) but also concerning the rate function $I$ itself (which controls, from below and above, the exponential rate of decay of probabilities of large deviations of $\frac{1}{n}d(L_n \cdot o,o)-\ell_\mu$ as $n \to \infty$). In regard to giving upper bounds for the large deviation probabilities, Corollary \ref{corol.rate.function} parallels Proposition~\ref{prop.upper.bound}. However, in the spirit of concentration estimates, as in the proof of Proposition \ref{prop.upper.bound}, we could have directly used Proposition \ref{proposition.laplace.control} together with the Chernoff bound, to obtain \textit{finite time estimates} for the large deviations of $\frac{1}{n}d(L_n \cdot o,o)-\ell_\mu$\footnote{These finite time estimates then can be used, with the acceleration trick, to prove that $\lim_{\lambda \to 0}\sfrac{I(\ell_\mu+\lambda)}{\lambda^2} \geq \sfrac{1}{2\sigma_\mu^2}$.}. This is in line with the recent work \cite{aoun-sert} where, under additional assumptions, the~appearing constants are made explicit (\eg relating with the spectral radius of the probability measure $\mu$ in the regular representation $L^2(G)$ of the isometry group $G$).
\end{remark}

\subsection{Concluding remarks and questions}\label{subsec.questions}
In this final part, we include two questions motivated by our results and and make some brief comments on them.
\subsubsection{Limit Laplace transform of the Busemann cocycle}
As a direct consequence of Theorem \ref{thm.main.tech}, we have that the functions $\Lambda_x^+$ and $\Lambda_x^-$ have the same derivatives at $0$ for every $x\in \overline{X}^h$. Moreover, it is not hard to see that $\Lambda_x^+=\Lambda_y^-$ on $[0,+\infty)$ for every $x,y \in \overline{X}^h$. These suggest the following questions:

\begin{question}\label{question}
Is it true that $\Lambda_x^+=\Lambda_x^-$ for every $x\in \overline{X}^h$? More importantly, does there exist a neighborhood of $0$ such that $\Lambda_x^+=\Lambda_y^+$ for every $x,y\in \overline{X}^h$ (and similarly $\Lambda_x^-=\Lambda_y^-$)?
\end{question}

The answer to Question \ref{question} is positive for $x,y \in \partial_h X$ in standard cases when an analytic approach can be implemented. These include random walks on free groups or on classical hyperbolic spaces $\mathbb{H}^n$. Regarding the last part of the question, we note that there are simple examples which show that one cannot ask that the functions~$\Lambda_x$ and~$\Lambda_y$ coincide throughout the region where they are finite/well-defined --- take for example the random walk on the group $\mathrm{F}_2=\langle a, b\rangle$ driven by the measure $\mu=\frac{1}{2}(\delta_a+\delta_b)$ and consider $x=a^{+\infty} \in \partial \mathrm{F}_2$ and $y=a^{-\infty}\in \partial \mathrm{F}_2$.

\subsubsection{Second-order expansion below the drift without exponential moment}
The rate function $I$ appearing in \eqref{eq.ldp} for $\frac{1}{n}\kappa(L_n)$ exists without any moment assumption \cite[Th.\,2.8]{BMSS}. Moreover, if $\mu$ fails to have finite exponential moment, then the rate function~$I$ vanishes on $[\ell_\mu,+\infty)$ (see \cite[Rem.\,3.2]{BMSS}). On the other hand, it follows from Gouëzel's \cite[Th.\,1.2]{gouezel.first.moment} that $I$ is positive on $[0,\ell_\mu)$ when $\mu$ has finite first moment. This suggests the following question

\begin{question}
Suppose $\mu$ is a non-elementary probability measure with finite second order moment. Is it true that
\[
\lim_{\lambda\to 0^-}\frac{I(\lambda+\ell_{\mu}) }{\lambda^2}=\frac{1}{2\sigma_\mu^2} ?
\]
\end{question}
Moreover, we note that thanks to Benoist--Quint \cite[\S 5]{BQ.hyperbolic}, the definition of the variance $\sigma_\mu^2$ given in \eqref{def.variance} even makes sense under the finite first moment hypothesis supposing that the isometry group $\Isom(X)$ acts cocompactly on $X$. Therefore, this suggests the subsequent question as to whether the second-order term in the second-order expansion of $I$ below the drift vanishes when $\sigma_\mu^2=\infty$. Similar questions can be asked about the second-order expansion of the limit Laplace transform $\Lambda$ below zero.

\backmatter
\bibliographystyle{jepplain+eid}
\bibliography{aoun-et-al}
\end{document}

