%~Mouliné par MaN_auto v.0.29.1 2023-09-19 15:51:17
\documentclass[CRMATH,Unicode,XML]{cedram}

\TopicFR{Statistiques}
\TopicEN{Statistics}

\providecommand*{\noopsort}[1]{}

\usepackage{braket}
\usepackage[noadjust]{cite}

\newcommand*{\dd}{\mathrm{d}}
\newcommand*{\dmu}{\dd\mu}
\newcommand*{\dtheta}{\dd\theta}
\newcommand*{\dt}{\dd t}
\newcommand*{\dx}{\dd x}
\newcommand*{\dy}{\dd y}
\newcommand*{\du}{\dd u}

\graphicspath{{./figures/}}

\newcommand*{\mk}{\mkern -1mu}
\newcommand*{\Mk}{\mkern -2mu}
\newcommand*{\mK}{\mkern 1mu}
\newcommand*{\MK}{\mkern 2mu}

\hypersetup{urlcolor=purple, linkcolor=blue, citecolor=red}

\newcommand*{\relabel}{\renewcommand{\labelenumi}{(\theenumi)}}
\newcommand*{\romanenumi}{\renewcommand*{\theenumi}{\roman{enumi}}\relabel}
\newcommand*{\Romanenumi}{\renewcommand*{\theenumi}{\Roman{enumi}}\relabel}
\newcommand*{\alphenumi}{\renewcommand*{\theenumi}{\alph{enumi}}\relabel}
\newcommand*{\Alphenumi}{\renewcommand*{\theenumi}{\Alph{enumi}}\relabel}
\let\oldtilde\tilde
\renewcommand*{\tilde}[1]{\mathchoice{\widetilde{#1}}{\widetilde{#1}}{\oldtilde{#1}}{\oldtilde{#1}}}
\let\oldexists\exists
\renewcommand*{\exists}{\mathrel{\oldexists}}
\let\oldforall\forall
\renewcommand*{\forall}{\mathrel{\oldforall}}


\title[Hahn polynomials]{New Karhunen-Lo\`eve expansions based on Hahn polynomials with application to a Sobolev test for uniformity on Johnson graphs}
\alttitle{De nouveaux développements de Karhunen-Loève basés sur les polynômes de Hahn avec application à un test d'uniformité de Sobolev sur les graphes de Johnson}

\author{\firstname{Jean-Renaud} \lastname{Pycke}}
\address{LaMME CNRS (UMR 8071), University of Paris-Saclay  (Evry), France}
\email{jeanrenaud.pycke@univ-evry.fr}

\subjclass{62F05, 33C45}
\keywords{\kwd{Karhunen-Lo\`eve expansions}
\kwd{classical orthogonal polynomials}
\kwd{distance regular graphs}
\kwd{directional statistics}}
\altkeywords{\kwd{développements de Karhunen-Loève}
\kwd{polynômes orthogonaux classiques}
\kwd{Graphes distance-réguliers}
\kwd{statistiques directionnelles}}

\begin{abstract}
We give a new family of Karhunen-Lo\`eve expansions involving Hahn polynomials. This enables us to introduce discrete analogues of Watson statistics, and a test for uniformity on Johnson's graphs. We use the fact that the zonal spherical functions on these graphs are Hahn polynomials. Our test is consistent against all alternatives and locally most powerful against some alternative.
\end{abstract}

\begin{altabstract}
Nous proposons un nouveau d\'eveloppement de Karhunen-Lo\`eve avec les polyn\^omes de Hahn. Cela nous permet d'introduire une analogue discr\`ete de la statistique de Watson pour tester l'uniformit\'e d'une distribution sur les graphes de Johnson, dont les fontions sph\'eriques zonales sont les polyn\^omes de Hahn. Ce test peut \^etre vu comme un test de Sobolev dans le cas discret, nous en d\'eduisons certaines de ses propriétés asymptotiques.
\end{altabstract}


\begin{document}

\maketitle

\section{Introduction}

The statistics of Cram\'er-von Mises (CVM) and Watson, denoted in the literature by $W_n^2$ and $U_n^2$ respectively, remain two of the most widely used tools for testing the uniformity of a sample from a distribution over $[0,1]$ or the unit circle (see, e.g., \cite[\S{4-5}, in particular formulas $(4.1.7),(5.4.2)$ p.~27 and 36]{durbin}, or~\cite[\S{4.2.2-3}, formulas $(4.2)$ p.~100-101]{stephens}).

Recall that in order to test uniformity on the unit circle, Watson~\cite{watson} introduced $U_n^2$, by adapting $W_n^2$. The circle is a homogeneous space and the simplest example of a compact continuous two-point homogeneous space. The field of directional statistics deals with such sample spaces, less conventional than the real line. The fundamental reference for an introduction to this quite recent domain of statistics is~\cite{mardia}. The latter book should be supplemented by~\cite{pewsey}, which provides a detailed survey of recent advances in the field. Characteristically, no reference dealing with data from sample spaces similar to Johnson's graphs does occur in this survey.

Beran~\cite{beran} and Gin\'e~\cite{gine} introduced a wide class of tests for uniformity on homogeneous spaces and compact Riemaniann manifolds, but the discrete cases have received less attention than the continuous. As a result, directional statistics dealing with discrete sample spaces, remains a widely unexplored field.

The aim of the present paper is to contribute to bridge this gap, by building a counterpart of Watson's statistic, by means of Karhunen-Lo\`eve (K-L) expansions involving classical orthogonal polynomials, so that our results can be easily extended, \emph{mutatis mutandis} to other classical discrete orthogonal polynomials. Our test for uniformity on Johnson graphs can be considered as a Sobolev test, in the discrete case, introduced by Gin\'e's~\cite{gine}.

Our paper is organized as follows. In Section~\ref{visit} we show that the two Karhunen-Lo\`eve (K-L) expansions~\eqref{dev1}, associated with CVM and Watson's statistics, via their definitions~\eqref{dev0} as V-statistics, and from which most of their properties can be derived (principal components, limiting distribution, Bahadur efficiency), are associated with Chebyshev polynomials of the first kind.

This interpretation enables us, in Theorem~\ref{aaa} of Section~\ref{kl}, to give for kernel~\eqref{ccc} the K-L development~\eqref{rgL1}, which is a discrete analogue, valid for Hahn polynomials, of development~\eqref{dev1}.

Hahn polynomials can be seen as the zonal spherical functions of Johnson graphs. Using this fact, we build in Section~\ref{sobolev} a test for uniformity on these graphs in the same way as Watson's statistic was built for testing uniformity on a circle from CVM's statistic. Theorem~\ref{last} gives a correspondence between some K-L expansions over Johnson graphs, and a one-variable Fourier series with Hahn polynomials. Theorem~\ref{bb} introduces our new statistic and gives the asymptotic distribution under the hypothesis of uniformity. Theorem~\ref{bb} also provides an alternative hypothesis under which the new test is locally most powerful.


\section{Cram\'er - von Mises and Watson statistics revisited}\label{visit}

Let $(T,\mu)$ be a probability measure space and $L_2(T,\mu)$ the associated Hilbert space of real, square-integrable functions endowed with the inner product
\[
\braket{f|g}_\mu=\int_Tf(t)g(t)\dmu(t).
\]
By a K-L expansion (with respect to $\mu$) of the bivariate symmetric kernel $K:T\times T\to\mathbb R$, we mean a pointwise convergent series of the form
\begin{equation}
K(s,t)=\sum_{k\geq 1}\mu_k\cdot\frac{f_k(s)f_k(t)}{c_k}\quad(s,t\in T),
\end{equation}
where the sequences of functions $(f_k)$ and real positive numbers $(\mu_k)$ and $(c_k)$ satisfy the integral and orthogonality relations
\[
\int_TK(s,t)f_k(s)\dmu(s)=\mu_kf_k(t),\ \braket{f_k|f_l}_\mu=c_k\delta_{k,\ell}\quad(t\in T,k,\ell\geq 1),
\]
with the Kronecker delta, or Dirac function, defined by
\[
\delta_{i,j}=\delta_i(j)=
\begin{cases}0&\text{if $i\neq j$,}\\1&\text{if $i= j$,}
\end{cases}\quad(i,j\in\mathbb Z).
\]

Concerning these expansions and statistical applications, see~\cite[Chapter~5]{schorack}. For general facts about degenerate $V-$statistics, the reader is referred to~\cite[\S{4.3}]{koro}, \cite[\S{12.3} (in particular Examples~12.9 and 12.13)]{van}, \cite[Chapter~5]{serfling}. Whereas it is not the most standard way, CVM and Watson statistics can be defined in the form of the V-statistics (see~\cite[Introduction]{pycke2007} and references therein), computed from samples $X_1,\dots,X_n\in[0,1]$ and $\theta_1,\dots,\theta_n\in[0,2\pi],$
\begin{align}\label{dev0}
W_n^2=\frac1n\sum_{i=1}^n\sum_{j=1}^nh_W(X_i,X_j),\quad U_n^2=\frac1n\sum_{i=1}^n\sum_{j=1}^nh_U(\theta_i,\theta_j),
\end{align}
where the symmetric kernels and their K-L expansions (w.r.t. the uniform distribution) are given, for $0\leq t_1\leq t_2\leq 1$ or $0\leq\theta_1, \theta_2\leq 2\pi$, by
\begin{align}\label{dev1}
h_W(t_1,t_2)&=\frac{t_1^2+(1-t_2)^2}2-\frac16
=\sum_{k=1}^\infty \frac1{k^2\pi^2}\cdot2\cos(k\pi t_1)\cos(k\pi t_2),\\
h_U(\theta_1,\theta_2)&=\frac14h_W\big(0,\pi^{-1}d[P_1(\theta_1),P_2(\theta_2)]\big)\\&=\sum_{k=1}^\infty\frac1{4k^2\pi^2}\cdot 2\{\cos(k \theta_1)\cos(k \theta_2)+\sin(k \theta_1)\sin(k \theta_2)\}.
\end{align}
where $d[P_1(\theta_1),P_1(\theta_2)]=\min(|\theta_1-\theta_2|,2\pi-|\theta_1-\theta_2|)\in[0,\pi]$ denotes the distance between the circle points $P_1$ and $P_2$ associated with angles $\theta_1$ and $\theta_2$. Therefore $U_n^2$ does not depend on the origin or the orientation chosen arbitrarily on the circle. In other words, this statistic is well-defined for circular data, because its kernel is invariant under the action of $O(2)$, the group of isometries of the unit Euclidean circle. We will define an invariant kernel on Johnson graphs in a similar way by~\eqref{similar}.



Kernels $h_W$ and $h_U$, and by extension the associated $V-$statistics $W_n^2$ and $U_n^2$, are said to be degenerate (w.r.t. the uniform distribution) in view of equalities
\[
\int_0^1h_W(t_1,t_2)\dt_1=\int_0^{2\pi}h_U(\theta_1,\theta_2)\dtheta_1=0.
\]

Let us express these relations in terms of Chebyshev polynomials of the first kind, whose sequence $(T_k)_{k\geq 0}$ is defined by $T_k(\cos\theta)=\cos(k\theta)$. They satisfy, (see~\cite[\S{9.8.2}]{koek}) for $0\leq k,\ell$, the differential equation and the orthogonality relations
\begin{equation}\label{diff}
(\sigma(x)\omega(x)T_k'(x))'=-\lambda_k \omega(x)T_k(x),\ \int_{-1}^1T_k(x)T_\ell(x)\omega(x)\dx=c_k\delta_{k,\ell},
\end{equation}
with $\sigma(x)=1-x^2$, eigenvalues $\lambda_k= k^2$ and constants $c_k=1/2$ for $k\geq 1$, the weight function $\omega(x)=\pi^{-1}(1-x^2)^{-1/2}=\Omega'(x)$ being the arcsine probability density function with distribution function $\Omega$ and the associated tail $\overline \Omega$ given by
\[
\Omega(x)=\int_{-1}^x \omega(y)\dy=\pi^{-1}\arccos(-x)=1- \overline \Omega(x)\quad(-1\leq x\leq 1).
\]
It is easily checked that~\eqref{dev1} can be rewritten, for $-1\leq x_1\leq x_2\leq 1$, as
\begin{multline}\label{defl}
\pi^2 h_W(\Omega(x_1),\Omega(x_2))=\sum_{k=1}^\infty\frac1{\lambda_k}\frac{T_k(x_1)T_k(x_2)}{c_k}\\
=\int_{-1}^{x_1}\frac{\Omega(u)}{\sigma(u)\omega(u)}\du+\int_{x_2}^1\frac{\overline \Omega(u)}{\sigma(u)\omega(u)}\du-\int_{-1}^1\frac{\Omega(u)\overline \Omega(u)}{\sigma(u)\omega(u)}\du\,.
\end{multline}
Now, since expressions appearing on both sides of the second equality in~\eqref{defl} involve functions and constants associated with Chebyshev polynomials, having their analogues for Hahn polynomials, we are in a position to state the corresponding results.

\section{K-L expansions with Hahn polynomials}\label{kl}
Assume $N$ is a positive integer and $\mu,\nu\in(-1,\infty)$. In the present paper, Hahn polynomials, denoted by $H_k^{(\mu,\nu,N)}(x)$ for $0\leq k \leq N$, will be those denoted by $Q_k(x,\alpha,\beta,N)$ in~\cite[\S{9.5}]{koek} \linebreak (see formulas $(1.4.1)$ p.~5 and $(9.5.1)$ p.~204) with $\alpha=-N-\nu-1,\beta=-N-\mu-1$, or also
%$\tilde h^ {(\mu,\nu)}(x,N+1)(x)/\tilde h^ {(\mu,\nu)}(x,N+1)(0)$
\linebreak$\tilde h^{(\mu,\nu)}_k (x, N+1)/\tilde h^{(\mu,\nu)}_k (0, N+1)$
in~\cite{nikiforov} (see the first equality p.~53 and Table~2.4 p.~54), so that
\begin{align}\label{defh}
H_k^{(\mu,\nu,,N)}(x)&=
\sum_{j=0}^{k}\frac{(-k)_j(-2N-\mu-\nu+k-1)_j(-x)_j}{(-N-\nu)_j(-N)_j\,j!}
\quad(0\leq k\leq N)
\end{align}
where the Pochhammer symbol $(a)_k$ is defined to be
\[
(a)_0=1,\ (a)_k=a(a+1)\cdots(a+k-1)\quad(k=1,2,3,\dots).
\]

Following~\cite[$(6.31)$ p.~259]{johnson}, consider the hypergeometric probability mass function (p.m.f.), supported by the set $\llbracket0,N\rrbracket=\{0,1,\dots,N\}$, given by
\begin{equation}
\omega^{(\mu,\nu,N)}(i)=
\begin{cases}\binom {N+\mu}{N-i}\binom {N+\nu}{i}\big /\binom{2N+\mu+\nu}{N},&\text{for $i\in\{0,1,\dots,N\}$},\\
0&\text{for $i\in\mathbb Z\setminus\{0,1,\dots,N\}$}.
\end{cases}
\end{equation}
The associated cumulative distribution function (c.d.f.) and its tail are given by
\begin{equation}
\Omega^{(\mu,\nu,N)}(i):=\sum_{j\leq i}\omega^{(\mu,\nu,N)}(j)=1-\overline \Omega^{(\mu,\nu,N)}(i)\quad(i\in \mathbb Z).
\end{equation}
Hahn polynomials satisfy, for $0\leq k,\ell\leq N$, the orthogonality relations
\begin{gather}\label{orth}
\sum_{i=0}^N\binom {N+\mu}{N-i}\binom {N+\nu}{i} H_k^{(\mu,\nu,N)}(i)H_\ell^{(\mu,\nu,N)}(i)=\delta_{k,\ell}\,d_k^{(\mu,\nu,N)},\\
d_k^{(\mu,\nu,N)}=\frac{(2N+\mu+\nu+1-k)!(N+\mu)!(N+\nu-k)!(N-k)!k!}{(2N+\mu+\nu+1-2k)(N+\mu+\nu-k)!(N+\nu)!(N+\mu-k)!N!^2}.
\end{gather}
The latter formula is most of the time given with parameters $\alpha,\beta$, so that we shall prove ours, for $\mu,\nu$, at the beginning of Section~\ref{tech}, with a reminder of some notations for factorials and binomial coefficients with non integer parameters.

Hahn polynomials also satisfy the discrete analogue of~\eqref{diff} in the form of the difference equations
\begin{equation}\label{difeq}
\mathcal L^{(\mu,\nu,N)}H_k^{(\mu,\nu,N)}:=\Delta[\sigma^{(\mu,\nu)}\omega^{(\mu,\nu,N)}\nabla H_k^{(\mu,\nu,N)}] = -\lambda_k^{(\mu,\nu,N)}\omega^{(\mu,\nu,N)}H_k^{(\mu,\nu,N)},
\end{equation}
for $0\leq k\leq N$, with
\begin{equation}
\lambda_k^{(\mu,\nu,N)}=k(2N + \mu + \nu-k+1),\quad \sigma^{(\mu,\nu)}(i)=i(i+\mu)\quad(0\leq i,k\leq N) \label{lamsig3}
\end{equation}
(see~\cite[(2.1.18) p.~21]{nikiforov}), and where for any function $f:\mathbb Z\to\mathbb R$, the forward and backward shift operators are defined by
\[
\Delta f(i)=f(i+1)-f(i),\ \nabla f(i)=f(i)-f(i-1)=\Delta f(i-1)\quad(i\in\mathbb Z).
\]

In order to avoid problems at the endpoints $0$ and $N$ when dealing with difference operators, any function $f:\llbracket0,N\rrbracket\to\mathbb R$ (including Hahn polynomials) will be identified with its extension to $\mathbb Z$ obtained by setting $f(n)=0$ for $n\notin\llbracket0,N\rrbracket$. Note for such a function $f$ the fundamental property, valid for $k=0,\dots,N$,
\begin{equation}\label{ffff}
\mathcal L^{(\mu,\nu,N)}f=-\lambda_k^{(\mu,\nu,N)} \omega^{(\mu,\nu,N)}f\iff \exists c\in\mathbb R,f=cH_k^{(\mu,\nu,N)}.
\end{equation}
Let us introduce the scalar product
\begin{align}\label{scal}
\braket{f|g}_\omega=\sum_{i=0}^Nf(i)g(i)\omega^{(\mu,\nu,N)}(i).
\end{align}
In this setting, noticing that $d_0^{(\mu,\nu,N)}=\binom{2N+\mu+\nu}{N}$, the orthogonality relations~\eqref{orth} take the form
\begin{equation}\label{orthk}
\braket{H_k^{(\mu,\nu,N)}|H_\ell^{(\mu,\nu,N)}}_\omega=\frac{d_k^{(\mu,\nu,N)}}{d_0^{(\mu,\nu,N)}}\,\delta_{k,\ell}\quad(0\leq k,\ell\leq N).
\end{equation}

Finally, by analogy with~\eqref{defl}, consider the kernel
\begin{multline}\label{ccc}
h^{(\mu,\nu,N)}(i,j):=\sum_{\ell=1}^i\frac{\Omega^{(\mu,\nu,N)}(\ell-1)}{\sigma^{(\nu)}(\ell)\omega^{(\mu,\nu,N)}(\ell)}
+\sum_{\ell=j+1}^N\frac{\overline\Omega^{(\mu,\nu,N)} (\ell-1)}{\sigma^{(\nu)} (\ell)\omega ^{(\mu,\nu,N)}(\ell)}\\-\sum_{\ell=1}^N\frac{\Omega^{(\mu,\nu,N)} (\ell-1)\overline \Omega ^{(\mu,\nu,N)}(\ell-1)}{\sigma^{(\nu)} (\ell)\omega ^{(\mu,\nu,N)}(\ell)},
\end{multline}

The discrete analogue of~\eqref{defl} is given by

\begin{theo}\label{aaa}
The K-L expansion
\begin{equation}
h^{(\mu,\nu,N)}(i,j)=\sum_{k=1}^N\frac 1{\lambda_k^{(\mu,\nu,N)}}\cdot \frac{d_0^{(\mu,\nu,N)} }{d_k^{(\mu,\nu,N)}}H_k^{(\mu,\nu,N)}(i)\, H_k^{(\mu,\nu,N)}(j)\label{rgL1}
\end{equation}
holds, for $ 0\leq i,j\leq N$, with respect to the p.m.f. $\omega^{(\mu,\nu,N)}$.
\end{theo}

\begin{proof}
From Lemma~\ref{ll}, Lemma~\ref{mm}, and~\eqref{ffff} with $k=0$, we infer that given $j$, the two functions of the variable $i$ appearing on the two sides of the equal sign in~\eqref{rgL1} are equal up to an additive constant. The latter is zero in view of Lemma~\ref{degebis}, and the equalities
\[
\braket{H_k^{(\mu,\nu,N)}|H_0^{(\mu,\nu,N)}}_{\omega}=0\quad(1\leq k\leq N).\qedhere
\]
\end{proof}
\section{A Sobolev test for uniformity on Johnson's graph}\label{sobolev}

Let us introduce some notations and basic facts about Johnson's graphs. These graphs provide an example of distance-transitive graphs, discrete analogues of the continuous compact two-point homogeneous (also called rank-one symmetric) spaces. In~\cite[see \S{4.1.F} p.~136-138 for the definition of distance transitive graphs and \S{9.1} p.~255-261 about Johnson graphs]{brouwer}; we will also use~\cite[II.3]{stanton}. In Section~\ref{tech}, our Lemma~\ref{ff} will restate and give some elements of proof for the spectrum features~\eqref{spech}.

Assume $S$ is a finite set with cardinality $m\geq 2N$. The graph of Johnson $\Gamma=J(m,N)$ has vertex set, denoted by $\mathcal V=\mathcal V(m,N)$, the collection of subsets of $S$ whose cardinality is $N$. The volume, or size of the graph, is therefore $|\mathcal V(m,N)|=\binom mN$. Two vertices $v_1,v_2$ are adjacent if $v_1\cap v_2$ has cardinality $N-1$. The distance between two vertices $v_1,v_2$ is $d(v_1,v_2)=m-|v_1\cap v_2|$ and can take any value in $\llbracket0,N\rrbracket$ ; consequently $J(m,N)$ has diameter $N$. The volume of any sphere with radius $i$ is
\begin{equation}\label{vols}
|S^{m,N}(i)|=\binom N{N-i}\binom{m-N}i=|\mathcal V(m,N)|\, \omega^{(0,m-2N,N)}(i)
\quad(0\leq i\leq N).
\end{equation}
Johnson's graph is $r$-regular with $r=|S^{m,N}(1)|=N(m-N)$. If $\mathbf A$ denotes the adjacency matrix of an $r$-regular graph, then $ \mathbf A$ has a decreasing sequence of, say $s$, distinct eigenvalues denoted by $(\lambda_k')_{ 0\leq k\leq s-1}$, $\lambda_k'$ having multiplicity $m_k$. The sequence $(\lambda_k',m_k)_{ 0\leq k\leq s-1}$ is the \emph{ordinary spectrum} of $\Gamma$ (see~\cite[Definition~2.2]{biggs}, \cite[p.~23]{doob}).

The combinatorial Laplacian matrix (also called Kirchhoff matrix in~\cite[p.~54]{bollobas}, admittance matrix in~\cite[p.~27]{doob}) of $\Gamma$ is $r\mathbf I- \mathbf A$, ($\mathbf I$ being the identity matrix of size the same as $\mathbf A$) with eigenvalues given by the increasing sequence
\begin{equation}\label{spectres}
\lambda_k=r-\lambda_k'\quad (0\leq k\leq s-1)
\end{equation}
and the same multiplicities (see~\cite[p.~268]{bollobas}, \cite[$(1.33')$ p.~30]{doob}). The sequence $(\lambda_k,m_k)_{ 0\leq k\leq s-1}$ is the \emph{Laplacian spectrum} of the graph. For a connected graph, $\lambda_0=0$ has multiplicity one, the corresponding eigenspace $E_0$ consisting of all constant functions. All other eigenvalues are therefore positive.

Johnson graph $J(m,N)$ has $s=N+1$ distinct eigenvalues and the Laplacian spectrum is associated with the eigenvalues of Hahn polynomials via relations
\begin{align}\label{spech}
(\lambda_k,m_k)_{ 0\leq k\leq N}&=(\lambda_k^{(0,m-2N,N)},\dim E_k^{(m,N)})_{ 0\leq k\leq N}\\
&=\left(k(m-k+1),\binom mk-\binom m{k-1}\right)_{0\leq k\leq N}
\end{align}
where $E_k^{(m,N)}$ denotes the eigenspace associated with $\lambda_k=\lambda_k^{(0,m-2N,N)}.$

Let us now, using the framework of~\cite{gine} in the discrete case, introduce the following notations. Let $V_1,\dots,V_n$ be a sample of vertices drawn from a distribution over $\mathcal V$. The uniform distribution assigns the measure $|\mathcal V|^{-1}$ to every vertex $v\in\mathcal V$.

Assume that for $0\leq k\leq s-1$, $\{g_{k,\ell}:1\leq \ell \leq \dim E_k^{(m,N)}\}$ is an orthonormal basis of $E_k^{(m,N)}$, thus providing an orthonormal basis of $L_2(J(m,N))$ with respect to the scalar product
\[
\braket{f|g}=\frac1{|\mathcal V|}\sum_{v\in\mathcal V}f(v)g(v).
\]

Given the positive real numbers $\alpha_1,\dots,\alpha_{s-1}$, a consistent, invariant Sobolev test for uniformity, is provided by the degenerate $V$-statistic defined by
\[
T_n(\{\alpha_k\})=\frac1n\sum_{i=1}^n\sum_{j=1}^n h(V_i,V_j),\ h(v_1,v_2)=\sum_{k=1}^{s-1}\alpha_\ell^2\left\{\sum_{\ell=1}^{\dim E_k^{(m,N)}} g_{k,\ell}(v_1)g_{k,\ell}(v_2)\right\}
\]
(see Theorem~4.4, formula $(2.7)$, $(5.5)$ in~\cite{gine}), the null hypothesis of uniformity being rejected for large values of $T_n(\{\alpha_k\})$.

Recall that the function $\tilde f:\mathcal V\to\mathbb R$ is called \emph{zonal spherical} (with respect to $v\in\mathcal V$) if $\tilde f$ is an eigenfunction of the Laplacian and can be written as $\tilde f(w)=f(d[v,w])$ for some one-variable function $f:\llbracket0,N\rrbracket\to\mathbb R$. The function $f$ is also referred to as a zonal spherical function, giving rise to $\tilde f$ after the choice of any arbitrary $v\in\mathcal V$. Following~\cite[$(5.1)$]{gine}, consider the functions defined, for $0\leq k\leq N$, by
\begin{align}\label{relban}
f_k^{(m,N)}(i)=(\dim E_k^{(m,N)})^{1/2}H_k^{(0,m-2N,N)}(i)\quad (0\leq i\leq N).
\end{align}

\begin{prop}
Assume that for $0\leq k\leq s-1$, $\bigl\{g_{k,\ell}:1\leq \ell \leq \dim E_k^{(m,N)}\bigr\}$ is an orthonormal base of $E_k^{(m,N)}$. Hahn polynomials satisfy
\begin{equation}\label{koro}
\frac1{|\mathcal V(m,N)|}\sum_{i=0}^NH_k^{(0,m-2N,N)}(i)H_\ell^{(0,m-2N,N)}(i)|S^{(m,N)}(i)|= \frac{\delta_{k,l}}{\dim E_k^{(m,N)}},
\end{equation}
and for functions defined by~\eqref{relban} satisfy, for $0\leq k,\ell\leq N$, Gin\'e's addition formula
\begin{equation}\label{koro1}
\sum_{\ell=1}^{\dim E_k^{(m,N)}} g_{k,\ell}(v_1)g_{k,\ell}(v_2)=(\dim E_k^{(m,N)})^{1/2}f_k^{(m,N)}(i)
\end{equation}
holds, whenever $d(v_1,v_2)=i$, for $v_1,v_2$ vertices of $J(m,N)$.
\end{prop}

\begin{proof}
In view of~\eqref{orthk}, \eqref{vols} and relation~\eqref{ff2} from Lemma~\ref{ff} of Section~\ref{tech}, \eqref{koro} is a restatement of~\eqref{orth}. Equality in~\eqref{koro1} is the discrete version of Gin\'e's addition formula (\cite[Theorem~3.2]{gine}) for the following reason. From (4) in~\cite[\S{13.1.5}, p.~492]{vilenkin}, we can assert, due to~\eqref{koro}, that $H_k^{(0,m-2N,N)}(i)$ is the $k-$th zonal spherical function of our graph, normalized by $H_k^{(0,m-2N,N)}(0)=1$. Then we notice that $(a)$ in Theorem~3.2 of~\cite{gine} agrees with our normalization $f_k^{(m,N)}(0)=(\dim E_k^{(m,N)})^{1/2}$, in view of definition $\eqref{relban}$.
\end{proof}

As a corollary of this proposition we obtain

\begin{theo}\label{last}
Assume $\alpha_1,\dots,\alpha_N\in\mathbb R$ and let $v_1,v_2$ be two vertices of the graph $J(m,N)$. The equality
\begin{align}
\nonumber
\sum_{k=1}^N\left\{\alpha_k^2\cdot \sum_{\ell=1}^{\dim E_k^{(m,N)}} g_{k,\ell}(v_1)g_{k,\ell}(v_2)\right\}= \sum_{k=1}^N \alpha_k^2\cdot \frac{d_0^{(0,m-2N,N)}H_k^{(0,m-2N,,N)}(d[v_1,v_2])}{d_k^{(0,m-2N,N)}}
\end{align}
holds. In particular
\begin{align}\label{similar}
H^{(m,N)}(v_1,v_2)&:=
\sum_{k=1}^N\left\{\frac1{k(m-k+1)}\cdot \sum_{\ell=1}^{\dim E_k^{(m,N)}} g_{k,\ell}(v_1)g_{k,\ell}(v_2)\right\}\\&=h^{(0,m-2N,N)}(0,d[v_1,v_2])
\end{align}
\end{theo}

\begin{proof}
The first equality is a direct consequence of~\eqref{koro1} and~\eqref{ff2}. Then the particular case $\alpha_k^2=1/\lambda_k^{(0,m-2N,N)}$ for $1\leq k\leq N$, in view of~\eqref{rgL1}, yields~\eqref{similar}.
\end{proof}

A kernel associated with $H^{(m,N)}$ for statistical purposes in Gin\'e's framework (see~\cite[$(5.3)'$--$(5.4)'$]{gine}) is
\begin{align}\label{alt}
G^{(m,N)}(v_1,v_2)&:=\sum_{k=1}^N\frac{1}{\sqrt{k(m-k+1)}}\left\{\sum_{\ell=1}^{\dim E_k^{(m,N)}} g_{k,\ell}(v_1)g_{k,\ell}(v_2)\right\}.
\end{align}
We will denote by $\|G^{(m,N)}\|_\infty$ the number $\max_{v_2}G^{(m,N)}(v_1,v_2)$, which is independent upon $v_1$, since our graph is homogeneous.

If $d$ is a positive integer let $\chi(d)$ denote a chi-square random variable with $d$ degrees of freedom. The following result combines standard properties of degenerate $V-$statistics with~\cite[Theorem~5.3]{gine}.

\begin{theo}\label{bb}
Let $J(m,N)$ be a Johnson graph, with notations~\eqref{spech} for the Laplacian spectrum and definitions~\eqref{ccc}, \eqref{similar}. Let $V_1,V_2,\dots$ be a sequence of vertices from the uniform distribution over $J(m,N)$. Gin\'e's statistic
\begin{equation}
T_n\biggl(\biggl\{\sqrt{\lambda_k^{(0,m-2N,N)}}\biggr\}\biggr)=\frac1n\sum_{i=1}^n\sum_{j=1}^nH^{(m,N)}(V_i,V_j)=\frac1n\sum_{i=1}^n\sum_{j=1}^nh^{(0,m-2N,N)}(0,d[V_i,V_j])
\end{equation}
converges in law toward the random variable
\[
\sum_{k=1}^N\frac{\chi_k(\dim E_k^{(m,N)})}{\lambda_k^{(0,m-2N,N)}}
\]
where the random variables $\chi_k(\dim E_k^{(m,N)})$, $1\leq k\leq N$ are independent.

Furthermore, the test based on the rejection of uniformity for large values of $T_n$ is most powerful invariant except for terms of order $O(\alpha^3)$, against the alternative p.m.f.
\[
w\in\mathcal V\mapsto \frac1{|\mathcal V(m,N)|}\big(1+\alpha\, \frac{G^{(m,N)}(v,w)}{\|G^{(m,N)}\|_\infty}\big)\quad (0<|\alpha|<1)
\]
where $v$ is any fixed vertex of $J(m,N)$.
\end{theo}

The lack of data or models from real-life, associated with Johnson graphs, deprives us from any privileged alternative distribution, under which the power of our new test for uniformity should be discussed through simulations. Therefore numerical simulations involving some specific alternatives will only be the object of some forthcoming papers.

To our knowledge no specific probabilistic or statistical models have been discussed in the literature, so that we just outline directions for further research. Gin\'e's alternative~\eqref{alt} arises from Fourier analysis and might not be of particular interest. Other alternative distributions are of the form
\begin{equation}
w\in\mathcal V\mapsto \frac1{|\mathcal V(m,N)|}\big(1+\alpha H_1^{(0,m-2N)}(d(v,w)\big)
\end{equation}
with $|\alpha|$ small enough and where $v$ is any fixed vertex of $J(m,N)$. They are perturbations of the uniform distribution by a zonal spherical function associated with the first non-constant Hahn polynomial, in other words the analogues, for $J(m,N)$, of the cardioid distributions for the circle (see~\cite[\S{3.5.5}]{mardia}).

Other alternatives can be associated with the subgroups of the full isometry group of $J(m,N)$ (given, e.g., in~\cite[Theorem~9.1.2]{brouwer}). The uniform distribution being characterised by its invariance with respect to the full group, some proper subgroup may determine a wider family of distributions, in the same way as, for instance, on the Euclidean sphere, rotations about a given axis determine a family of rotationally invariant distributions.

All these alternatives arise from the so-called transformation models, see~\cite[3.5.1]{mardia}, and references therein about this topic.
\section{Useful technical results}\label{tech}
Throughout this section we will sometimes drop the superscripts and write, e.g. $\lambda_k$ instead of $\lambda_k^{(\mu,\nu,N)}$.

Following~\cite[\S{1.1}]{johnson} and~\cite[\S{1.3}]{koek}, we use the generalized factorial and binomial coefficients defined by
\[
r!=\Gamma(r+1),\ \binom rk=(-1)^k\frac{(-r)_k}{k!}\quad(r>-1, k\in\mathbb N),
\]
where $\Gamma$ is the gamma function, and set
\[
\binom{-r}k=(-1)^k\binom{r+k-1}{k},\quad(r\geq 1,k\in\mathbb N).
\]
Note the identity
\[
(-r)_k=(-1)^k\frac{r!}{(r-k)!}\quad(r\geq k)
\]
The traditional orthogonality relation for Hahn polynomials given by~\cite[\S{9.5}]{koek}, reads
\begin{equation}
\sum_{i=0}^N\binom{\alpha+i}i\binom{\beta+N-i}{N-i}Q_k(i;\alpha,\beta,N)Q_\ell(i;\alpha,\beta,N)
=\frac{(-1)^k(k+\alpha+\beta+1)_{N+1}(\beta+1)_kk!}{(2k+\alpha+\beta+1)(\alpha+1)_k(-N)_k}=:d_k'
\end{equation}
With parameters $\nu=-\alpha-N-1,\mu=-\beta-N-1$, the above identities give
\begin{equation}
\binom{\alpha+k}k\binom{\beta+N-k}{N-k}=\binom{-N-\nu-1+k}k\binom{-N-\mu-1+N-k}{N-k}\\=(-1)^k\binom{N+\nu}k(-1)^{N-k}\binom{\mu+N}{N-k}
\end{equation}
On the other hand,
\begin{align}
(k+\alpha+\beta+1)_{N+1}&=(k-2N-\mu-\nu-1)_{N+1}=(-1)^{N+1}\frac{(2N+\mu+\nu+1-k)!}{(N+\mu+\nu-k)!},\\
(\beta+1)_k=(-N-\mu)_k&=(-1)^k\frac{(N+\mu)!}{(N+\mu-k!)},\ (\alpha+1)_k=(-N-\nu)_k=(-1)^k\frac{(N+\nu)!}{(N+\nu-k)!},\\
(-N)_k&=(-1)^k\frac{N!}{(N-k)!},\ (2k+\alpha+\beta+1)=-(2N+\mu+\nu+1-2k),
\end{align}
so that
\[
d_k'=\frac{(-1)^N(2N+\mu+\nu+1-k)!(N+\mu)!(N+\nu-k)!(N-k)!k!}{(2N+\mu+\nu+1-2k)(N+\mu+\nu-k)!(N+\nu)!(N+\mu-k)!N!^2},
\]
and~\eqref{orth} readily follows.


Consider the three-variable kernel, defined over $\llbracket0,N\rrbracket^3$ by
\begin{align}
\tilde h^{(\mu,\nu,N)}(i,j,z)&=
\frac{\mathbf {1}_{\{i\leq z-1\}}-\Omega^{(\mu,\nu,N)}(z-1)}{\sigma^{(\nu)}(z)\omega^{(\mu,\nu,N)}(z)^2}\cdot \frac{\mathbf {1}_{\{j\leq z-1\}}-\Omega^{(\mu,\nu,N)}(z-1)}{\sigma^{(\nu)}(z)\omega^{(\mu,\nu,N)}(z)^2}\quad(z=1,\dots,N),\\
\tilde h^{(\mu,\nu,N)}(i,j,0)&=0,
\end{align}
and $\tilde h^{(\mu,\nu,N)}(i,j,z)=0$ for $i$, $j$ or $z$ belonging to $\mathbb Z\setminus \llbracket0,N\rrbracket$. Throughout this subsection, $I$ and $Z$ will denote two independent random variables with p.m.f. $\omega^{(\mu,\nu,N)}$.

Elementary computations give, for $0\leq i\leq j\leq N$,
\begin{multline}
E_Z\big[\tilde h(i,j,Z)\big]= \sum_{\ell=1}^{i}\frac{\Omega(\ell-1)^2}{\sigma(\ell)\omega(\ell)}-\sum_{\ell=i+1}^{j}\frac{\overline\Omega(\ell-1)\Omega(\ell-1)}{\sigma(\ell)\omega(\ell)}+\sum_{\ell=j+1}^{N}\frac{\overline\Omega(\ell-1)^2}{\sigma(\ell)\omega(\ell)}\\
=\sum_{\ell=1}^i\frac{\Omega(\ell-1)}{\sigma(\ell)\omega(\ell)} +\sum_{\ell=j+1}^N\frac{\overline\Omega (\ell-1)}{\sigma (\ell)\omega (\ell)}-\sum_{\ell=1}^N\frac{\Omega (\ell-1)\overline \Omega (\ell-1)}{\sigma (\ell)\omega (\ell)} =h(i,j)
\end{multline}
(for the second equality use relations $\Omega^2+\overline \Omega\Omega=\Omega$ and $\overline \Omega^2+\overline \Omega\Omega=\overline \Omega$). Then from $E_I\mathbb [\mathbf{1}_{\{I\leq z-1\}}-\Omega(z-1)]=0,$ we infer relations $E_I [\tilde h(I,j,z)]=0$ for every $j,z\in\mathbb Z$, and $E_Z \{E_I [\tilde h(I,j,Z)]\}=0.$ Then by the discrete Fubini theorem we obtain $E_I\{E_Z [\tilde h(I,j,Z)]\}=0$, which means $E_I\big[h(I,j)\big]=0.$ We have proved

\begin{lemm}\label{degebis}
The kernel $(i,j)\mapsto h^{(\mu,\nu,N)}(i,j)$ is degenerate with respect to $\omega^{(\mu,\nu,N)}$. In other words
\[
\forall j\in\{0,1,\dots,N\}:\sum_{i=0}^N h^{(\mu,\nu,N)}(i,j)\omega^{(\mu,\nu,N)}(i)=0.
\]
\end{lemm}

In view of~\eqref{orthk}, an expansion of Dirac distribution is given by the dual relations
\begin{equation}
\omega^{(\mu,\nu,N)} ({\,\cdot\,})\sum_{k=0}^N \frac{d_0^{(\mu,\nu,N)}H_k^{(\mu,\nu,N)}({\,\cdot\,})H_k^{(\mu,\nu,N)}(j)}{d_k^{(\mu,\nu,N)}}=\delta_j({\,\cdot\,})\quad(0\leq j\leq N)
\end{equation}
The fact that $H_0^{(\mu,\nu,N)}=1$ then implies

\begin{lemm}
Assume $0\leq j\leq N$. Then
\begin{align}
\omega^{(\mu,\nu,N)} ({\,\cdot\,})\sum_{k=1}^N \frac{d_0^{(\mu,\nu,N)}H_k^{(\mu,\nu,N)}({\,\cdot\,})H_k^{(\mu,\nu,N)}(j)}{d_k^{(\mu,\nu,N)}}&=\delta_j({\,\cdot\,})-\omega^{(\mu,\nu,N)}({\,\cdot\,}).
\end{align}
\end{lemm}

By linearity and in view of~\eqref{difeq}, we obtain as a corollary

\begin{lemm}\label{ll}
Assume $0\leq j\leq N$. Then
\begin{align}
\mathcal L^{(\mu,\nu,N)}\left\{\sum_{k=1}^N\frac{d_0^{(\mu,\nu,N)}H_k^{(\mu,\nu,N)}({\,\cdot\,})H_k^{(\mu,\nu,N)}(j)}{\lambda_k^{(\mu,\nu,N)}d_k^{(\mu,\nu,N)}}\right\}&=\omega^{(\mu,\nu,N)}({\,\cdot\,})-\delta_j({\,\cdot\,})
\end{align}
\end{lemm}

\begin{lemm}\label{mm}
Assume $0\leq j\leq N$. Then
\begin{equation}
\mathcal L^{(\mu,\nu,N)}\big[h^{(\mu,\nu,N)}(\cdot,j)\big]=\omega^{(\mu,\nu,N)}({\,\cdot\,})-\delta_j({\,\cdot\,}).
\end{equation}
\end{lemm}
\begin{proof}
The difference operators acting on the variable $i$, we obtain successively
\begin{align}
\nabla \{1_{\{i \leq z-1\}}-\Omega(z-1)\}&=1_{\{i \leq z-1\}}-1_{\{i-1 \leq z-1\}}=-1_{\{i= z\}}\quad(i,z\in\mathbb Z),\\
\nabla \tilde h(i,j,z)&=-1_{\{i= z\}}\cdot\frac{\{1_{\{j\leq z-1\}}-\Omega(z-1)\}}{\sigma(z)\omega(z)^2}\quad(i,z\in\llbracket 1,N\rrbracket),\\
\nabla h(i,j)&= \nabla \{E_Z \tilde h(i,j,Z)\}=E_Z\{\nabla \tilde h(i,j,Z)\}\\&=-\omega(i)\cdot \frac{\{1_{\{j\leq i-1\}}-\Omega(i-1)\}}{\sigma(i)\omega(i)^2}\quad(i\in\llbracket 1,N\rrbracket),\\ \sigma(i)\omega(i)\nabla h(i,j)&=\Omega(i-1)-1_{\{j\leq i-1\}}\quad(i\in\llbracket 1,N\rrbracket).
\end{align}
The latter is also true for $i\leq 0$ or $i>N$ (both sides being equal to zero), so that we may write, for $i\in\mathbb Z$,
\begin{align}
\Delta[\sigma(i)\omega(i)\nabla h(i,j)]&=\Delta[\Omega(i-1)-1_{\{j\leq i-1\}}]=\omega(i)-\delta_j(i),
\end{align}
and the result is proved.
\end{proof}

\begin{lemm}\label{ff}
The graph $J(m,N)$ has eigenvalues
\begin{equation}\label{rtt}
\lambda_k^{(0,m-2N,N)}=k(m-k+1)\quad(0\leq k\leq N)
\end{equation}
with multiplicities
\begin{equation}
\dim E_k^{(m,N)}=\binom mk-\binom {m}{k-1}=\frac{m!(m-2k+1)}{k!(m-k+1)!}=\frac{d_0^{(0,m-2N)}}{d_k^{(0,m-2N)}}.\label{ff2}
\end{equation}
\end{lemm}

\begin{proof}
Let us use Theorem~9.1.2 (with $n=m, e=N$) in~\cite[p.~255]{brouwer}, or~\cite[\S{II.3} p.~431]{stanton} (with $v=m$ and $n=N$). The eigenvalues of the adjacency matrix of the graph are $\lambda_k'=(N-k)(m-N-k)-k$, $0\leq k\leq N$, and $\eqref{rtt}$ follows from~\eqref{spectres} with $r=N(m-N)$ and~\eqref{lamsig3}.
For the value of $\dim E_k^{(m,N)}$, see the last equality p.~431 in~\cite{stanton} and the last equality in~\cite[Theorem~9.1.2]{brouwer}. The second equality in~\eqref{ff2} is simple calculus ; for the third, use~\eqref{orth} yielding,
\begin{equation}
d_k^{(0,m-2N,N)}=\frac{(m-k+1)!k!}{(m-2k+1)(m-N)!N!},\ d_0^{(0,m-2N,N)}=\frac{m!}{(m-N)!N!}.
\end{equation}
\end{proof}

\bibliographystyle{crplain}
\bibliography{CRMATH_Pycke_20221191}
\end{document}
