%~Mouliné par MaN_auto v.0.32.3 2024-05-28 18:15:28
\documentclass[CRMATH, Unicode, XML, biblatex]{cedram}

\TopicFR{\'Equations aux dérivées partielles, Théorie du contrôle}
\TopicEN{Partial differential equations, Control theory}

%\usepackage{ulem}%todelete

\addbibresource{CRMATH_Santambrogio_20230495.bib}

\usepackage{esint}
\usepackage{colonequals}

\newcommand{\R}{\ensuremath{\mathbb{R}}}
\newcommand{\N}{\ensuremath{\mathbb{N}}}
\newcommand{\1}{\ensuremath{\mathbb{1}}}
\newcommand{\de}{\mathrm{d}}
\newcommand{\ve}{\varepsilon}
\newcommand{\e}{\operatorname{e}}

\DeclareMathOperator{\Lip}{Lip}
\DeclareMathOperator{\argmax}{argmax}
\DeclareMathOperator{\BV}{BV}
\DeclareMathOperator{\prox}{prox}
\DeclareMathOperator{\sgn}{sgn}
\DeclareMathOperator{\spt}{spt}

\newcommand*{\dx}{\de x}
\newcommand*{\dy}{\de y}
\newcommand*{\ds}{\de s}
\newcommand*{\dt}{\de t}

\let\oldmapsto\mapsto
\renewcommand*{\mapsto}{\mathchoice{\longmapsto}{\oldmapsto}{\oldmapsto}{\oldmapsto}}
\renewcommand*{\to}{\mathchoice{\longrightarrow}{\rightarrow}{\rightarrow}{\rightarrow}}

\graphicspath{{./figures/}}

\newcommand*{\mk}{\mkern -1mu}
\newcommand*{\Mk}{\mkern -2mu}
\newcommand*{\mK}{\mkern 1mu}
\newcommand*{\MK}{\mkern 2mu}

\hypersetup{urlcolor=purple, linkcolor=blue, citecolor=red}

\newcommand*{\relabel}{\renewcommand{\labelenumi}{(\theenumi)}}
\newcommand*{\romanenumi}{\renewcommand*{\theenumi}{\roman{enumi}}\relabel}
\newcommand*{\Romanenumi}{\renewcommand*{\theenumi}{\Roman{enumi}}\relabel}
\newcommand*{\alphenumi}{\renewcommand*{\theenumi}{\alph{enumi}}\relabel}
\newcommand*{\Alphenumi}{\renewcommand*{\theenumi}{\Alph{enumi}}\relabel}
\let\oldtilde\tilde
\renewcommand*{\tilde}[1]{\mathchoice{\widetilde{#1}}{\widetilde{#1}}{\oldtilde{#1}}{\oldtilde{#1}}}
\let\oldexists\exists
\renewcommand*{\exists}{\mathrel{\oldexists}}
\let\oldforall\forall
\renewcommand*{\forall}{\mathrel{\oldforall}}

\makeatletter
\def\@setafterauthor{%
  \vglue3mm%
%  \hspace*{0pt}%
\begingroup\hsize=12cm\advance\hsize\abstractmarginL\raggedright
\noindent
%\hspace*{\abstractmarginL}\begin{minipage}[t]{10cm}
   \leftskip\abstractmarginL
  \normalfont\Small
  \@afterauthor\par
\endgroup
\vskip2pt plus 3pt minus 1pt
}
\makeatother

%\def\multiciterangedelim{--}%devrait être déjà intégré pourtant ??

\title{Optimal trajectories in $L^1$ and under $L^1$ penalizations}
\alttitle{Trajectoires optimales dans $L^1$ et sous pénalisation de type $L^1$}

\author{\firstname{Annette} \lastname{Dumas}}
\address{Institut Camille Jordan, Universit\'e Claude Bernard - Lyon 1; 43 Boulevard du 11 Novembre 1918, 69622 Villeurbanne cedex, France}
\email{dumas@math.univ-lyon1.fr}

\author{\firstname{Filippo} \lastname{Santambrogio}\IsCorresp}
\address[1]{Institut Camille Jordan, Universit\'e Claude Bernard - Lyon 1; 43 Boulevard du 11 Novembre 1918, 69622 Villeurbanne cedex, France}
\email{santambrogio@math.univ-lyon1.fr}

\begin{abstract}
Motivated by a MFG model where the trajectories of the agents are piecewise constants and agents pay for the number of jumps, we study a variational problem for curves of measures where the cost includes the length of the curve measures with the $L^1$ distance, as well as other, non-autonomous, cost terms arising from congestion effects. We prove several regularity results (first in time, then in space) on the solution, based on suitable approximation and maximum principle techniques. We then use modern algorithms in non-smooth convex optimization in order to obtain a numerical method to simulate such solutions.
\end{abstract}

\begin{altabstract}
Motivé par un modèle de MFG dans lequel les trajectoires des agents sont constantes par morceaux et où les agents paient un coût dépendant du nombre de sauts, nous étudions un problème variationnel pour des courbes de mesures où le coût inclut la longueur de la courbe mesurée en termes de la distance $L^1$, ainsi que d'autres termes non autonomes représentants des effets de congestion. Nous démontrons plusieurs résultats de régularité (d'abord en temps, puis en espace) sur la solution, en utilisant des techniques d'approximation et le principe du maximum. Ensuite, des algorithmes modernes d'optimisation convexe non lisse nous permettent d'obtenir une méthode numérique pour simuler de telles solutions.
\end{altabstract}

\keywords{\kwd{BV functions}
\kwd{non-autonomous calculus of variations}
\kwd{regularity}
\kwd{non-smooth optimization}}

\altkeywords{\kwd{fonctions BV}
\kwd{calcul des varations non-autonome}
\kwd{régularité}
\kwd{optimisation non lisse}}

\begin{document}

%todelete
\let\oldemph\emph
\let\emph\textit
%todelete

\maketitle

%todelete
\let\emph\oldemph
%todelete

\section{Introduction}\label{sec1}

This work studies a class of variational problems which arises in a variant of one of the most classical Mean Field Game models. This variant could be used as a brick for describing the evolution of the real estate market but the mathematical questions which appear are of independent interest and the present paper only concentrates on the variational problem without addressing the full model.


The theory of Mean Field Games was introduced around 2006 at the same time by Lasry and Lions, \cite{LL06cr1,LL06cr2,LasLio,Lions-lectures}, and by Caines, Huang and Malham\'e, \cite{HCMieeeAC06}, in order to describe the evolution of a population of rational agents, each one choosing (or controlling) a path in a state space, according to some preferences which are affected by the presence of other agents nearby in a way physicists call mean-field effect. The evolution is described through a Nash equilibrium in a game with a continuum of players.

In the simplest MFG models we look at a population of agents moving inside a domain $\Omega$ and we suppose that every agent chooses his own trajectory solving a minimization problem of the form
\begin{equation*}\label{cost}
\min\;\int_0^T \left(\frac{|x'(t)|^2}{2}+h(t,\rho_t,x(t))\right)\de t + \Psi(x(T)),
\end{equation*}
with given initial point $x(0)$. The mean-field effect will be modeled through the fact that the running cost $h$ explicitly depends on the density $\rho_t$ of the agents at time $t$.

The initial density of the players $\rho_0$ is given. The goal in MFG is to find an evolution $t\mapsto \rho_t$ such that, when agents consider the above optimization problem for such a given $\rho$, the trajectories they choose globally reconstruct the same density $\rho$. This is a non-trivial fixed point condition which can be described via a system of PDEs (we refer to~\cite{Carnotes}, for instance). In some cases, and in particular when $h(t,\rho,x)=V(t,x)+g(\rho(x))$ for an increasing function $g:\R_+\to\R$ (where, by abuse of notation, $\rho$ also denotes the density of the measure $\rho$, which has to be found absolutely continuous), fixed point can be found by an overall minimization problem. The function $g$ models in this case the cost for congestion (i.e.~agents have a higher cost when passing through congested regions, where the density is larger).

The corresponding variational formulation is the following: we consider all the possible population evolutions, i.e.~curves $t\mapsto\rho_t\in\mathcal P (\Omega)$ and we minimize the following energy
\[
\int_0^T\left(\frac12 |\dot\rho|_{W_2}^2(t)+\mathcal F_{V_t}(\rho_t)\right)\dt+ \int_\Omega \Psi \de\rho_T,
\]
where $\mathcal F_V(\rho):=\int V(x)\rho(x)+f(\rho(x))\dx,$ where $f$ is the anti-derivative of $g$, i.e.~$f'(s)=g(s)$ for $s\in\R_+$ with $f(0)=0$. We fix by convention $f(s)=+\infty$ for $\rho<0$. The functional $\mathcal F_V$ is set to $+\infty$ if $\rho$ is not absolutely continuous. Here, at each instant of time, we use the functional associated with the time-dependent potential $V_t:=V(t,\cdot\,)$. The minimization problem above is a variant of the well-known dynamic formulation of optimal transport (see~\cite{BenBre}) which includes congestion effects (as in~\cite{ButJimOud}). Note in particular that $f$ is convex, as its derivative is the increasing function $g$, and so is $\mathcal F_V$. The notation $ |\dot\rho|_{W_2}(t)$ stands for the metric derivative (see~\cite{AmbTil}) of the curve $t\mapsto \rho_t$ in the space of probability measures, endowed with the Wasserstein distance $W_2$ (a distance induced by optimal transport, see~\cite{OTAM,villani}). The reason for this distance to appear is related to the presence of the quadratic term $|x'(t)|^2$ in the minimization problem solved by each agent. If this term was replaced by $|x'(t)|^p$ we would have $|\dot\rho|_{W_p}^p(t)$ instead of $ |\dot\rho|_{W_2}^2(t)$.

We are now interested in a different individual cost, and more precisely we replace the term $\int_0^T \frac{|x'(t)|^2}{2}\de t$, defined on curves $x\in H^1([0,T])$ with the cost $S(x)$, defined for $x\in BV([0,T])$ in the following way: if $x$ is piecewise constant $S(x)$ equals the number of its jumps, while $S(x)=+\infty$ if $x$ is not piecewise constant. We then face discontinuous trajectories, and the individual optimization problem can be considered as an impulse control problem (for which we refer, for instance, to the classical book~\cite{BenLiopere}). The variational formulation of such a new Mean Field Game is based on the observation (see, for instance, \cite{villani}) that the Wasserstein distance associated with a transport cost
\[
c(x,y)=
\begin{cases}
1&\text{if }x\neq y\\
0&\text{if }x=y
\end{cases}
\]
is equal to the total variation distance between the two measures times a factor $1/2$. Indeed, when transporting with this cost a measure $\mu$ onto a measure $\nu$ with the same mass, the mass which needs to move is equal to the part of the mass of $\mu$ which is not in common with $\nu$, which means half of the mass $\|\mu-\nu\|$. When $\mu,\nu$ are absolutely continuous, this gives $\frac12 \|\mu-\nu\|_{L^1}$.

This motivates to study a problem where the term $\int_0^T\frac12 |\dot\rho|_{W_2}^2(t)\de t$ is replaced by the length of the curve $t\mapsto \rho_t$ computed according to the $L^1$ distance (we can restrict to absolutely continuous measures $\rho_t$ because of the functional $\mathcal G_V$). Hence, formally, we obtain
\[
\min \int_0^T\left(\frac12 \|\dot\rho\|_{L^1}+\mathcal F_{V_t}(\rho_t)\right)\dt+ \int_\Omega \Psi \de \rho_T.
\]
The term $\int_0^T \|\dot\rho\|_{L^1}\dt$ should be intended as the length of the curve $\rho$ (its total variation) w.r.t. the $L^1$ distance.

Because of the BV behavior of the curves, the Dirichlet condition $\rho(0)=\rho_0$, as well as the final penalization on $\rho_T$, have to be suitably interpreted. Indeed, it is always possible to jump exactly at time $t=0$ or $t=T$, so that the Dirichlet condition at $t=0$ can be replaced by a penalization $\frac12\|\rho(0^+)-\rho_0\|_{L^1}$ and for the final penalization, we can replace $\int_\Omega \Psi \de\rho_T$ with $\inf_{\mu\in \mathcal{P}(\Omega)} \|\mu-\rho_T\|+\int \Psi \de \mu $. This last quantity can be computed and equals $\int \tilde \Psi \de \rho_T$, where $\tilde\Psi=\min\{\Psi,\inf\Psi+1\}$. This is perfectly coherent with the individual optimization problem: if agents are allowed to jump at a cost $1$, the final cost $\Psi$ is automatically replaced by $\tilde \Psi$ as there is no point in paying $\Psi(x)$ whenever $\Psi(x)>\Psi(x')+1$ for some point $x'$. Up to subtracting a constant to the final penalization, we can thus suppose that we have $|\Psi|\leq 1/2$.

More generally, we will study in this note the variational problem
\[
\min \int_0^T\int_\Omega\left(\lambda |\dot\rho|+f(\rho_t)+V_t\rho_t\right)\de t+ \int_\Omega \Psi \de \rho_T
\]
with a final cost $\Psi$ satisfying $|\Psi|\leq \lambda$ and an initial condition $\rho(0)=\rho_0$ which can also be replaced by a penalization $\lambda \|\rho(0^+)-\rho_0\|_{L^1}$. A variant will be the infinite-horizon case with a discount factor $r>0$, i.e.~the variational problem
\[
\min \int_0^\infty\int_\Omega \e^{-rt}\left(\lambda |\dot\rho|+f(\rho_t)+V_t\rho_t\right)\de t
\]
under the same initial condition.

For simplicity, we will only consider the case where the function $f$ is uniformly convex (think at $f(\rho):=\frac 12\rho^2$). We still establish regularity results in both time and space for the optimal solution (which is unique because of strict convexity). This result, besides its mathematical interest, has also at least two applications in the MFG theory which motivates the problem.~First, it proves that, despite individual trajectories being discontinuous, the global behavior of the density of agents $\rho(t,x)$ is smooth, coherently with the experience about the evolution of residential areas. Second, it provides the necessary mathematical properties on the individual running cost $g(\rho)+V$ so as to rigorously prove that minimizers of the variational problem are indeed equilibria of the game. This requires, as we will briefly explain at the end of Section~\ref{sec5}, the continuity (in space) of the running cost, or at least its boundedness; we refer to~\cite{CIME}) for more details.

Then, we will also provide a numerical method to approximate the solutions using convex optimization tools, able to deal with the non-smooth penalization given by the $L^1$ term.

Since proving regularity in a problem set on BV curves could be challenging, in order to develop the relevant techniques (which will be based on a suitable use of the maximum principle) we will first start from a simpler, yet not-so-standard, case, where curves, instead of being valued in the functional space $L^1$, will be simply valued in the euclidean space $\R^d$. This will be object of Section~\ref{sec2}, where we will prove Lipschitz behavior in the open interval $(0,T)$. Some explicit examples will also be analyzed, in particular for $d=1$, in order to have some cases which could be used as a test for the numerical methods of Section~\ref{sec6}.

The analysis of the infinite-dimensional case, valued in $L^1$, will be the object of Section~\ref{sec3}, and will be performed by means of many approximations. Some of them are common with the Euclidean case, but there is an extra approximation which comes from discretization: the infinite-dimensional problem is indeed approximated by a sequence of finite-dimensional ones, where the domain $\Omega$ is divided into small cells and only piecewise constant densities are considered. Note that in this Eulerian discretization (different from a Lagrangian one where one should follow the jumping trajectories of the particles) the problem becomes very similar to the one studied in Section~\ref{sec2} with the only exception that the norm used on the finite-dimensional space is not the Euclidean one (i.e.~$\ell^2$) but the $\ell^1$ norm. This makes things slightly more involved, but, surprisingly, the final Lipschitz regularity result will be expressed anyway in the $L^2$ norm.

Section~\ref{sec4} contains the modifications of the strategy proof which are needed to handle the infinite-horizon case, both in the finite and infinite dimensional case. Then, Section~\ref{sec5} addresses the problem of space regularity. We will prove that the solution $\rho(t,x)$ shares the same modulus of continuity in $x$ (uniformly in $t$) of the Dirichlet data and of the time-dependent potential $V(t,x)$, and in this proof, differently from what is done in Sections~\ref{sec2}, \ref{sec3} and \ref{sec4}, the Dirichlet data will be attacked by approximation but without replacing them with penalizations (the $L^1$ cost will be approximated by other superlinear costs on the velocity, as it was already done in the other sections as well). Indeed, for the previous results, it was crucial to use the transversality conditions coming from a suitable approximation of these penalizations, while here, on the contrary, the transversality conditions are harder to consider. This is why the first regularity result is presented in the case where the problem is endowed with Dirichlet conditions at both $t=0$ and $t=T$, which is not the natural framework we will be interested in. In order to consider some instances of the problem which are more interesting for applications, we will consider the infinite time horizon with Dirichlet data at $t=0$ (in this case we do not have penalization at the end), where the whole analysis can be performed, as well as the finite-horizon case when the final penalization $\psi$ is piecewise constant: in this last case we can prove continuity of $\rho$ on each piece where $\psi$ is constant, which is enough, for instance, to obtain $\rho\in L^\infty$.

Finally, Section~\ref{sec6} uses proximal methods from non-smooth convex optimization to attack in a numerical way all these problems, and some examples will be considered. A periodic and explicit case presented at the end of Section~\ref{sec2} will be solved numerically as an example in order to validate the method.


\section{Lipschitz regularity in the Euclidean setting}\label{finite_dim}
\label{sec2}

As a starting point for our analysis, we consider here the following easier problem
\begin{equation}\label{TVF}
\min\left\{ TV(\gamma;[0,1])+\int_0^1 F(t,\gamma(t))\de t + \psi_0(\gamma(0))+\psi_1(\gamma(1))\;:\; \gamma\in BV([0,1];\R^d)\right\}.
\end{equation}
Here $TV(\gamma;[0,1])$ denotes the total variation of $\gamma$ on $[0,1]$, i.e.
\[
TV(\gamma;[0,1]):=\sup\left\{\sum_{k=0}^{N-1}|\gamma(t_k)-\gamma(t_{k+1})|\,:\, 0=t_0<t_1<\dots<t_N=1\right\},
\]
a value which also coincides with the total mass of the vector measure $\gamma'$.

The functions $\psi_0,\psi_1:\R^d\to[0,+\infty]$ are just supposed to be l.s.c. and bounded from below, and among possible choices we mention those which impose Dirichlet boundary conditions, i.e.
\[
t=0,1\qquad\psi_t(x)=
\begin{cases} 0 &\text{ if }x=x_t,\\ +\infty & \text{ if not.}
\end{cases}
\]

We stress the fact that functions in BV spaces are not continuous and can have jumps ; even if we consider that BV functions of one variable are defined pointwisely, it is possible to change very easily their value at a point. In particular, Dirichlet boundary conditions have a very particular meaning: a curve which takes the value $x_0$ at $t=0$ but immediately jumps at another point at $t=0^+$ is considered to satisfy the condition $\gamma(0)=x_0$. In particular, it is possible to freely choose a curve $\gamma$ on $(0,1)$ and then add a jump to $x_0$ or $x_1$ at the boundary in order to satisfy the corresponding Dirichlet boundary condition, of course adding a price $|\gamma(0^+)-x_0|$ or $|\gamma(1^-)-x_1|$ to the total variation. In this way, we could decide to identify the values of $\gamma$ at $t=0$ or $t=1$ with their right or left limits at these points, respectively, and replace Dirichlet boundary conditions with a boundary penalization. This could also be done for more general penalizations $\psi_0,\psi_1$, for which it is useful to define the relaxed functions
\[
\tilde\psi_t(x):=\inf_y |y-x|+\psi_t(y).
\]
It is important to observe that the functions $\tilde\psi_i$ are automatically $1$-Lipschitz continuous, as an inf of $\Lip_1$ functions of the variable $x$, indexed with the parameter $y$.

In this way the problem~\eqref{TVF} becomes
\begin{equation*}
\min\left\{ TV(\gamma;[0,1])+\int_0^1 F(t,\gamma(t))\de t + \tilde\psi_0(\gamma(0^+))+\tilde\psi_1(\gamma(1^-))\;:\; \gamma\in BV([0,1];\R^d)
\right\}
\end{equation*}
or, equivalently, we can replace $ \tilde\psi_0(\gamma(0^+))+\tilde\psi_1(\gamma(1^-))$ with $ \tilde\psi_0(\gamma(0))+\tilde\psi_1(\gamma(1))$ and impose continuity of $\gamma$ at $t=0,1$.

\begin{lemm}\label{preveLFpsi}
Let $L:\R^d\to\R$ be a smooth and uniformly convex function which is supposed to be radial: $L(v):=\ell(|v|)$ for a convex and increasing function $\ell:\R_+\to\R$. Let $F:[0,1]\times\R^d\to\R$ be a $C^2$ time-dependent potential satisfying $D^2_{xx}F(t,x)\geq c_0\mathrm I$ for a certain constant $c_0>0$ and $|\partial_t \nabla_x F(t,x)|\leq C_0$, and $\psi_0,\psi_1:\R^d\to\R$ two Lipschitz continuous functions. Consider a solution $\gamma$ of
\[
\min\left\{ \int_0^1 (L(\gamma'(t))+F(t,\gamma(t)))\de t+\psi_0(\gamma(0))+\psi_1(\gamma(1))\; :\; \gamma\in H^1([0,1])\right\}.
\]
Then $\gamma$ is Lipschitz continuous and satisfies $|\gamma'|\leq C$ where $C$ is defined by
\[
C:=\max\left\{\frac{C_0}{c_0},(\ell')^{-1}(\Lip\psi_0), (\ell')^{-1}(\Lip\psi_1)\right\}.
\]
\end{lemm}

\begin{proof}
Let us start from the Euler--Lagrange system of the above optimization problem. We have
\[
\begin{cases} (\nabla L(\gamma')'(t)=\nabla_x F(t,\gamma(t))&\\
\nabla L(\gamma'(0))=\nabla\psi_0(\gamma(0))&\\
\nabla L(\gamma'(1))=-\nabla\psi_1(\gamma(1)).
\end{cases}
\]
First we observe that $\gamma\in C^0$ and $F\in C^1$ imply that the right-hand side in the first equation is a continuous function, so that we have $\nabla L(\gamma')\in C^1$. Inverting the injective function $\nabla L$ we obtain $\gamma\in C^2$ and, since $F\in C^2$, we obtain $\gamma\in C^3$.

Then, the transversality conditions show $|\nabla L(\gamma'(t))|\leq \Lip\psi_t$ for $t=0,1$. Using $|\nabla L(v)|=\ell'(|v|)$ we see $|\gamma'(t)|\leq C$ for $t=0,1$.

Let us now consider the maximal value of $|\gamma'(t)|$. This maximum exists on $[0,1]$ since $\gamma\in C^1$ and if it is attained on the boundary $t=0,1$ the desired Lipschitz bound $|\gamma'|\leq C$ is satisfied. We can now suppose that it is attained in $(0,1)$. Since $\ell'$ is increasing and non-negative, the maximal points of $|\gamma'|$ and of $|\nabla L(\gamma')|^2$ are the same. We can then write the optimality condition differentiating once and twice in $t$: we do have
\[
\nabla L(\gamma'(t))\cdot (\nabla L(\gamma'))'(t)=0;\quad \nabla L(\gamma'(t))\cdot (\nabla L(\gamma'))''(t)+| (\nabla L(\gamma'))'(t)|^2\leq 0.
\]
In the last condition we can ignore the positive term $| (\nabla L(\gamma'))'(t)|^2$ and observe that, since $\nabla L(\gamma'(t))$ and $\gamma'(t)$ are vectors with the same orientation (we do have $\nabla L(\gamma'(t))=\frac{\ell'(|\gamma'(t)|)}{|\gamma'(t)|}\gamma'(t)$), we have $\gamma'(t)\cdot (\nabla L(\gamma'))''(t)\leq 0$.

We now differentiate in time the Euler--Lagrange equation and take the scalar product times $\gamma'$, and obtain
\[
0\geq \gamma'(t)\cdot (\nabla L(\gamma'))''(t)=(\nabla_x F(t,\gamma(t)))'\cdot \gamma'(t) =\partial_t\nabla_x F(t,\gamma(t))\cdot \gamma'(t)+\gamma'(t)\cdot D^2_{xx}F(t,\gamma(t))\gamma'(t).
\]
We deduce
\[
c_0|\gamma'(t)|^2\leq |\gamma'(t)|\,|\partial_t\nabla_x F(t,\gamma(t))|,
\]
which implies $|\gamma'(t)|\leq \frac{C_0}{c_0}\leq C$ and concludes the proof.
\end{proof}

We now use the above result on an approximation of the original problem in BV.

\begin{prop}\label{minTVBVprop}
Consider
\begin{equation}\label{minTVBV}
\min\left\{ TV(\gamma;[0,1])+\int_0^1 F(t,\gamma(t))\de t+\psi_0(\gamma(0^+))+\psi_1(\gamma(1^-))\; :\; \gamma\in BV([0,1];\R^d)\right\}
\end{equation}
where $F:[0,1]\times\R^d\to\R$ is a $C^2$ time-dependent potential satisfying $D^2_{xx}F(t,x)\geq c_0\mathrm I$ for a certain constant $c_0>0$ and $|\partial_t \nabla_x F(t,x)|\leq C_0$, and $\psi_0,\psi_1\in \Lip_1(\R^d)$ are two penalization functions.

Then a minimizer $\gamma$ for the above problem exists, is unique, and is actually Lipschitz continuous with $|\gamma'|\leq\frac{C_0}{c_0}$.
\end{prop}

Note that we directly state the problem using $\Lip_1$ penalizations instead of first fixing $\psi_0$ and $\psi_1$ and then passing to $\tilde \psi_0$ and $\tilde\psi_1$, but we have already explained why we can restrict to this case. Yet, an important remark is needed:

\begin{rema}\label{initialjump}
The solutions with penalizations $\psi_t$ and $\tilde\psi_t$ ($t=0,1$) coincide in $(0,1)$, but the solution with the original (non-$\Lip_1$) penalizations could jump at $t=0$ or $t=1$, and this jump is intrinsically considered in the definition of $\tilde\psi_t$.
\end{rema}
\begin{proof}
Given $\ve>0$, we define $\ell_\ve:\R_+\to\R_+$ via $\ell_\ve(s):=\sqrt{\ve^2+s^2}+\ve h(s)$, where $h:\R_+\to\R_+$ is a smooth, convex, and increasing function, with $\liminf_{s\to\infty} h''(s)>0$. We then define $L_\ve:\R^d\to\R$ via $L_\ve(v)=\ell_\ve(|v|)$, so that $L_\ve$ is smooth, uniformly convex, and radial\footnote{Note that the easiest choice for $h$ is $h(s)=\frac12s^2$, but other choices are possible and reasonable, and the only role of $h$ is to guarantee a lower bound on the Hessian of $L_\ve$ (and in particular, to provide a quadratic behavior to $\ell_\ve$ so that the problem is well-posed in $H^1$). Later one we will see the interest for other choices of $h$.}.

We also choose some numbers $\alpha_\ve<1$ in such a way that $\lim_{\ve\to 0}\alpha_\ve=1$ and $\lim_{\ve\to 0}\frac{\ve^2}{1-\alpha_\ve^2}=0$ (for instance $\alpha_\ve=\sqrt{1-\ve}$).

We consider $\gamma_\ve$ the solution of the variational problem
\[
\min\left\{ \int_0^1 (L_\ve(\gamma'(t))+F(t,\gamma(t)))\de t+\alpha_\ve(\psi_0(\gamma(0))+\psi_1(\gamma(1)))\; :\; \gamma\in H^1([0,1])\right\}.
\]
Since $L_{\ve}$ is convex, a solution exists by the direct method of the calculus of variations, and it is unique because of the strict convexity of the function $F$.

We want to apply Lemma~\ref{preveLFpsi} to this approximated optimization problem. We first compute $\ell_\ve'(s)=\frac{s}{\sqrt{\ve^2+s^2}}+\ve h'(s)\geq \frac{s}{\sqrt{\ve^2+s^2}}$ and observe that we have $(\ell_\ve')^{-1}(r)\leq \frac{r\ve}{\sqrt{1-r^2}}$. Since $\Lip(\alpha_\ve \psi_i)=\alpha_\ve \Lip(\psi_i)\leq \alpha_\ve$ we obtain from Lemma~\ref{preveLFpsi}
\[
|\gamma_\ve'|\leq \max\left\{\frac{C_0}{c_0}, \frac{\alpha_\ve\ve}{\sqrt{1-\alpha_\ve^2}}\right\},
\]
and we observe that our choice of $\alpha_\ve$ implies that the second term in the max above tends to $0$ as $\ve\to 0$. This means that the Lipschitz constant of $\gamma_\ve$ is at most $\frac{C_0}{c_0}$ if $\ve$ is small enough.

By comparing $\gamma_\ve$ with the constant curve $\gamma=0$ we obtain
\[
\int_0^1 F(t,\gamma_\ve(t))\de t+\alpha_\ve(\psi_0(\gamma_\ve(0))+\psi(\gamma_\ve(1)))\leq \int_0^1 F(t,0)\de t+ \alpha_\ve(\psi_0(0)+\psi_1(0))\leq C.
\]
This estimate includes an $L^2$ estimate on $\gamma_\ve$ and, because of the uniform Lipschitz condition on $\gamma_\ve$, it also implies that the curves $\gamma_\ve$ are equibounded. We can then apply Arzel\`a--Ascoli's theorem to obtain a limit curve $\gamma_\ve\to \gamma_0$. This curve $\gamma_0$ is of course $\frac{C_0}{c_0}$-Lipschitz continuous, and we can prove that it solves Problem~\eqref{minTVBV}.

Indeed, the optimality of $\gamma_\ve$, together with the inequality $L_\ve(v)\geq |v|$ (which allows to bound the total variation from above with the integral of $L_\ve$), shows that we have
\begin{multline*}
TV(\gamma_\ve;[0,1])+\int_0^1F(t,\gamma_\ve(t))\de t+\alpha_\ve(\psi_0(\gamma_\ve(0))+\psi_1(\gamma_\ve(1)))\\
\leq \int_0^1L_\ve(\gamma'(t))\de t + \int_0^1F(t,\gamma(t))\de t+\alpha_\ve(\psi_0(\gamma(0))+\psi_1(\gamma(1)))
\end{multline*}
for every $\gamma\in H^1$. If we let $\ve$ tend to 0 and use the lower semicontinuity of TV for the uniform convergence, we obtain
\begin{multline*}
TV(\gamma_0;[0,1])+\int_0^1F(t,\gamma_0(t))\de t+\psi_0(\gamma_0(0))+\psi_1(\gamma_0(1))\\
\leq \int_0^1|\gamma'(t)|\de t +\int_0^1F(t,\gamma(t))\de t+\psi_0(\gamma(0))+\psi_1(\gamma(1)),
\end{multline*}
where we used the dominated convergence $L_\ve(\gamma')\to|\gamma'|$ as $\ve\to 0$, and $\alpha_\ve\to 1$.

This shows the optimality of $\gamma_0$ compared to any $H^1$ curve. It is now enough to approximate any BV curve with $H^1$ curves. We take $\gamma\in BV([0,1])$, we define it as equal to $\gamma(0^+)$ on $[-1,0]$ and to $\gamma(1^-)$ on $[1,2]$ and we convolve it with a smooth compactly supported kernel $\eta_\delta$ tending to the identity so as to smooth it, thus obtaining a sequence of curves $\gamma*\eta_\delta$ such that $TV(\gamma*\eta_\delta;[-1,2])=\int_{-1}^2 |(\gamma*\eta_\delta)'(t)|\de t\leq TV(\gamma,(0,1))$; moreover, $\gamma*\eta_\delta$ is uniformly bounded and converges to $\gamma$ at all continuity points of $\gamma$, which means that the convergence holds a.e. and at the boundary point. This proves
\begin{multline*}
\limsup_{\delta\to 0} \int_0^1|(\gamma*\eta_\delta)'(t)|\de t +\int_0^1F(t,\gamma*\eta_\delta(t))\de t+\psi_0(\gamma*\eta_\delta(0))+\psi_1(\gamma*\eta_\delta(1))\\
\leq TV(\gamma,(0,1))+\int_0^1F(t,\gamma(t))\de t+\psi_0(\gamma(0))+\psi_1(\gamma(1))
\end{multline*}
and concludes the proof of the optimality of $\gamma_0$.

What we proved implies the existence of an optimal curve for~\eqref{minTVBV}, and its uniqueness comes, again, from the strict convexity of the term with $F$.
\end{proof}

In the case where the space of curves is $\BV([0,1];\R^d)$ with $d=1$, we can obtain a very interesting behavior.

\begin{prop}\label{1Dcasestop}
When $d=1$, i.e.~the target space of the curves in Problem~\eqref{minTVBV} is one-dimensional, the minimizer $\gamma$ satisfies $|\gamma'(t)|\,|\nabla_x F(t,\gamma(t))|=0$ a.e., i.e.~at each instant of time either $\gamma$ does not move or it is already located at the optimal point for $F(t,\cdot\,)$.
\end{prop}

\begin{proof}
We consider the same approximation as in Proposition~\ref{minTVBVprop}, using the function $h(s)=(s-M)_+^2$ for a very large $M$. The uniform Lipschitz bound proven in Lemma~\ref{preveLFpsi} and Proposition~\ref{minTVBVprop} makes it irrelevant the choice of $\ell_\ve(s)$ for large values of $s$, so that we can write the Euler--Lagrange equation for the minimizer $\gamma_\ve$ in the form
\[
\frac{\ve^2}{(\ve^2+|\gamma_\ve'|^2)^{3/2}}\gamma_\ve''=\left(L_\ve'(\gamma_\ve')\right)'=\nabla_xF(t,\gamma_\ve),
\]
where we explicitly computed\footnote{Note that this computation is based on a 1D cancellation effect, since in higher dimension we have
\[
D^2L_\ve(v)=\frac{(\ve^2+|v|^2)I-v\otimes v}{(\ve^2+|v|^2)^{3/2}}
\]
and the matrices $|v|^2I$ and $v\otimes v$ do not cancel out.} the second derivative of $L_\ve$ ignoring the term in $h$.

We write this as $\ve^2\gamma_\ve''=(\ve^2+|\gamma_\ve'|^2)^{3/2}\nabla_xF(t,\gamma_\ve)$. First, note that this implies a uniform bound $\ve^2|\gamma_\ve''|\leq C$. Then, we differentiate it in time, thus obtaining
\[
\ve^2\gamma_\ve'''=3(\ve^2+|\gamma_\ve'|^2)^{1/2}\gamma_\ve'\cdot\gamma_\ve''\nabla_xF(t,\gamma_\ve)+(\ve^2+|\gamma_\ve'|^2)^{3/2}\left(\nabla_xF(t,\gamma_\ve)\right)'.
\]
Re-using the Euler--Lagrange equation we have
\[
\ve^2\gamma_\ve'''=3\ve^2(\ve^2+|\gamma_\ve'|^2)^{-1}(\gamma_\ve'\cdot\gamma_\ve'')\gamma_\ve''+(\ve^2+|\gamma_\ve'|^2)^{3/2}\left(\nabla_xF(t,\gamma_\ve)\right)'.
\]
We observe that the last term $(\ve^2+|\gamma_\ve'|^2)^{3/2}\left(\nabla_xF(t,\gamma_\ve)\right)'$ is bounded thanks to the Lipschitz bound on $\gamma_\ve$ and the regularity of $F$. We multiply by $\gamma_\ve'$ and obtain
\begin{equation}\label{epsquad}
|\ve^2\gamma_\ve'''\cdot\gamma_\ve'-3\ve^2(\ve^2+|\gamma_\ve'|^2)^{-1}(\gamma_\ve'\cdot\gamma_\ve'')^2|\leq C.
\end{equation}
We then compute
\[
\ve^2\int_0^1|\gamma_\ve''|^2 \dt=-\ve^2\int_0^1\gamma_\ve'''\cdot\gamma_\ve'\dt+\left[\ve^2\gamma_\ve'\cdot \gamma_\ve''\right]_{0}^1\leq C.
\]
the last inequality is justified by~\eqref{epsquad}, by the sign of the term $\ve^2(\ve^2+|\gamma_\ve'|^2)^{-1}(\gamma_\ve'\cdot\gamma_\ve'')^2$, and by the bound on the boundary term, which is the product of two bounded quantities: $\gamma_\ve'$ and $\ve^2\gamma_\ve''$.

Coming back to the equality $\ve^2\gamma_\ve''=(\ve^2+|\gamma_\ve'|^2)^{3/2}\nabla_xF(t,\gamma_\ve)$ we take the $L^2$ norms of both sides, thus obtaining
\[
\int_0^1 |\gamma_\ve'|^6|\nabla_xF(t,\gamma_\ve)|^2 \dt\leq \int_0^1(\ve^2+|\gamma_\ve'|^2)^{3}|\nabla_xF(t,\gamma_\ve)|^2 \dt=\int_0^1 \ve^4|\gamma_\ve''|^2 \dt\leq C\ve^2.
\]
We deduce $\int_0^1 |\gamma_\ve'|^6|\nabla_xF(t,\gamma_\ve)|^2\de t\to 0$ and, at the limit, by lower semicontinuity, we have the claim.
\end{proof}

We will now analyze in details a simple example, both for future use and for better understanding the properties of the minimizers.

We consider the periodic problem
\begin{equation}\label{TVF-per}
\min\left\{ J(\gamma):=\lambda TV(\gamma;\mathbb S^1)+\int_{\mathbb S^1}\frac12|\gamma(t)-\omega(t)|^2\de t \;:\; \gamma\in BV(\mathbb S^1;\R)\right\},
\end{equation}
where $\lambda>0$ and $\omega:\mathbb S^1\to\R$ is a fixed curve. We suppose that $\omega$ is a Lipschitz continuous function such that there exists a finite decomposition of $\mathbb S^1$ into essentially disjoint intervals $I_k=[a_k,b_k]$ ($k=1,\dots,4N$ for $N\geq 1$) such that $a_{k+1}=b_k$ and $b_{4N}=a_1$ and satisfying for each $n=0,\dots,N-1$, the following conditions
\begin{itemize}
\item $\omega$ is non-decreasing on $I_{4n+1}$;
\item $\omega(a_{4n+2})=\omega(b_{4n+2}):=c_{4n+2}$ and $\omega\geq c_{4n+2}$ on $I_{4n+2}$;
\item $\omega$ is non-increasing on $I_{4n+3}$;
\item $\omega(a_{4n+4})=\omega(b_{4n+4}):=c_{4n+4}$ and $\omega\leq c_{4n+4}$ on $I_{4n+4}$;
\item $\int_{I_{4n+2}}\omega-c_{4n+2}=\int_{4n+4}c_{4n+4}-\omega=2\lambda$.
\end{itemize}


We then define a curve $\gamma$ and a function $z$ via
\[
\begin{aligned}
&\text{on }I_{4n+1}&\quad& \gamma=\omega &\text{ and }\quad&z=\lambda,\vphantom{\sum^i}\\[0.1em]
&\text{on }I_{4n+2}&\quad& \gamma=c_{4n+2}&\text{ and }\quad&z(t)=\lambda-\int_{a_{4n+2}}^t(\omega-c_{4n+2}),\\[0.5em]
&\text{on }I_{4n+3}&\quad& \gamma=\omega &\text{ and }\quad&z=-\lambda,\\[0.1em]
&\text{on }I_{4n+4}&\quad& \gamma=c_{4n+4} &\text{ and }\quad&z(t)=-\lambda-\int_{a_{4n+2}}^t(\omega-c_{4n+2}).
\end{aligned}
\]
We can check that $\gamma$ and $z$ are Lipschitz continuous functions, that we have $z'=\omega-\gamma$ as well as $|z|\leq \lambda$, $z=\pm\lambda$ when $\gamma'\neq 0$ and $z$ and $\gamma'$ have the same sign. Hence, $z(t)\in \partial(\lambda|{\,\cdot\,}|)(\gamma'(t))$. This means that we have, for any curve $\tilde\gamma$, the inequality
\begin{equation}\label{lambdatv}
\lambda TV(\tilde\gamma;\mathbb S^1)\geq\lambda TV(\gamma;\mathbb S^1)+\int_{\mathbb S^1}(\tilde\gamma'-\gamma')\cdot z.
\end{equation}

We claim that $\gamma$ is a solution of Problem~\eqref{TVF-per}. Indeed, for any other competitor $\tilde\gamma$, we have
\[
J(\tilde\gamma)=\lambda TV(\tilde\gamma;\mathbb S^1)+\int_{\mathbb S^1}\frac12|\tilde\gamma-\omega|^2 \geq J(\gamma)+\int_{\mathbb S^1}(\tilde\gamma'-\gamma')\cdot z+(\tilde\gamma-\gamma)\cdot(\gamma-\omega)=J(\gamma),
\]
where the last equality is obtained by integrating by parts and using $z'=\omega-\gamma$. The previous inequality comes from the use of~\eqref{lambdatv} and from expanding the square.

This explicit example confirms the behavior predicted in Proposition~\ref{1Dcasestop}: the optimizer in the scalar case either coincides with the minimal point of $F(t,\cdot\,)$ (here such a point is equal to $\omega(t)$) or it does not move. By using the numerical method described in Section~\ref{section_num}, the solution to the problem~\eqref{TVF-per} is displayed in Figure~\ref{fig0} when $\omega\colon \mathbb S^1 \to \R$ is a curve such that $\omega(t)= \sin(2\pi t) + 3 \sin(3\cdot 2\pi t)$. The periodic function $\omega$ verifies the hypothesis listed above and we can see that the solution $\gamma$ either follows $\omega$ or it is constant on each interval $I_k$. This numerical simulation confirms the solution and at the same time it validates the numerical method which will be presented more precisely in Section~\ref{section_num}.

\begin{figure}[!ht]
\centering
\includegraphics[width=11cm]{Figure_0.png}
\caption{The simulation of the solution $\gamma$ to the 1D periodic case~\eqref{TVF-per} with $\omega(t) = \sin(2\pi t) + 3 \sin(3\cdot 2\pi t)$. The parameters are displayed in Table~\ref{table1} except from $S=1$ and $\lambda =0.04$. The blue solid line corresponds to the solution $\gamma$, while the red dashed line is the profile of $\omega$. Each interval $I_k$ is delimited by the vertical dotted gray lines. }\label{fig0}
\end{figure}

\section{Lipschitz regularity in the \texorpdfstring{$L^1$}{L1} setting} \label{infinite_dim}
\label{sec3}

In this section, we prove the regularity in time of the density $\rho$ which solves the following problem:
\begin{equation*}
\min_{\substack{\rho \in E \\\rho\geq 0\\ \forall t\in[0,T],~\int_{\Omega}\rho(t,x)\dx=1}} \int_0^T \int_{\Omega} \left(|\dot{\rho}(t,x)| + V(t,x) \rho(t,x) + f(\rho(t,x))\right) \dx{} \dt +\psi_0(\rho(0)) + \psi_T(\rho(T)) \colonequals F(\rho),
\end{equation*}
where $E \colonequals \BV([0,T]; L^1(\Omega))\cap L^2([0,T]\times\Omega)$ (for one-variable BV functions valued in a Banach space such as $L^1$ we refer, for instance, to~\cite{MorBV1,MorBV2}). The function $f$ will be supposed to be uniformly convex (i.e.~$f''\geq c_0>0$), and the time-dependent potential $V$ will be supposed to belong to $\Lip([0,T];L^2(\Omega))$. The domain $\Omega$ is a finite measure set in $\R^d$, and we assume for simplicity that it has unit volume.

\subsection*{A few words on the penalizations \texorpdfstring{$\psi_0$}{psi 0} and \texorpdfstring{$\psi_T$}{psi T}}

We aim to apply the results when $\psi_T$ is of the following form
\begin{align*}
\psi_T\colon L^1(\Omega)&\to \R \\
\rho &\mapsto \int_{\Omega} \varphi_T(x)\rho(x)\dx,
\end{align*}
where $\varphi_T: \Omega \to \R$ is $L^\infty$ and $\|\varphi_T\|_{\infty}\leq 1$. In this case, $\psi_T$ is \emph{1-Lipschitz} for the norm $\|{\,\cdot\,}\|_{L^1(\Omega)}$, and it is also weakly continuous in $L^1(\Omega)$ (and hence in $L^2(\Omega)$).

When it comes to $\psi_0$, we aim to consider the following case:
\begin{align*}
\psi_0\colon L^1(\Omega) &\to \R \\
 \rho& \mapsto \|\rho-m_0\|_{L^1(\Omega)},
\end{align*}
where $m_0\in L^1(\Omega)$. In this case as well, $\psi_0$ is \emph{1-Lipschitz} for the norm $\|{\,\cdot\,}\|_{L^1(\Omega)}$. It is also weakly lower-semicontinuous in $L^1(\Omega)$ and $L^2(\Omega)$.

More generally, our results apply when the penalizations $\psi_0$ and $\psi_1$ are of the following form:
\[
\psi_t(\rho):=\int_\Omega a_t(x,\rho(x))\dx,
\]
for two functions $a_t$ which are 1-Lipschitz continuous and convex in the second variable. In this way the functionals $\psi_t$ are both continuous for the strong $L^1$ convergence (actually, $\Lip_1$) and lower semicontinuous for the weak $L^1$ convergence. This general framework includes the two examples above.

Finally, if $\psi_0$ and $\psi_T$ are defined as previously, we will define $\psi_{0,\alpha} := \alpha \psi_0$ and $\psi_{T,\alpha} := \alpha \psi_T$.

\subsection*{Approximations}

As in the previous section, the absolute value $|\dot\rho|$ will be approximated by a smoother function $L_{\epsilon}$ which will be specified later. The mass constraint $\int \rho_t = 1$ will be imposed via a penalization method, adding $\frac{(\int \rho-1)^2}{2\delta}$. The penalizations on the boundary on $[0,T]$, $\psi_0$ and $\psi_T$, will also be approximated by multiplying them by $\alpha<1$. Finally, the positivity constraint will be handled by approximating $f$ with a sequence $f_n:\R\to\R$ obtained via $f_n(\rho):=f(\rho)+n(\rho_-)^2$ so that negative values are penalized but the uniform convexity of $f$ is preserved since we have $f_n''\geq c_0$ for the same $c_0>0$.

Taking together these approximations, we obtain the following problem:
\begin{multline*}
\min_{\rho\in E}\int_0^T \left(\int_{\Omega} L_{\epsilon}(\dot{\rho}(t,x)) + f_n(\rho(t,x)) + V(t,x)\rho(t,x)\dx +\frac{\left(\int \rho(t,y)\dy-1\right)^2 }{2\delta} \right) \dt \\
+\psi_{0,\alpha}(\rho(0)) + \psi_{T,\alpha}(\rho(T)) \colonequals F_{n}(\rho).
\end{multline*}
Note that the approximated functional is written as $F_n$, meaning that we have fixed a suitable sequence of values $(\epsilon,\delta,\alpha)$ such that $\epsilon_n \to 0$, $\delta_n \to 0$ and $\alpha_n \to 1$.

In order not to have difficulties with the infinite-dimensional space $L^1(\Omega)$, we will also use a finite-dimensional discretization. This consists in imposing that the functions $\rho(t)$ belong to a finite-dimensional subspace. More precisely, we divide the space $\Omega$ into $n$ small areas of volume $\frac 1 n$ called $A_i^n$ (we need their diameter to tend to $0$) and we take $\rho_n(t)\colon \Omega\to \R$ such that it is constant on each area. This means that $\rho_n(t)$ takes at most $n$ different values and its mass is constant equal to $\rho_i(t)$ on each region $A_i^n$ (and its density equals $n\rho_i(t)$). The problem can be considered as a restriction of the previous one to the subset of $E$ composed of densities which are piecewise constant functions (constant on each $A^n_i$) for every $t$, or it can be rewritten as follows:
\begin{multline*}
\min_{\rho\in BV([0,T];\R^n)}
\int_0^T \left(\left(\sum_{i=1}^n \frac 1 n(L_{\epsilon}(n \dot{\rho}_i(t)) + f_n(n \rho_i(t))) + \rho_i(t) \fint_{A_i^n} V(t,x)\dx \right)+ \frac{\left(\sum_j \rho_j(t) -1\right)^2}{2\delta}\right) \dt \\
+\psi_{0,\alpha}(\rho(0)) +\psi_{T,\alpha}(\rho(T)),
\end{multline*}
where the functionals $\psi_{0,\alpha}$ and $\psi_{T,\alpha}$ can also be written in terms of values $\rho_i(0)$ and $\rho_i(T)$: they are of the form
\[
\psi_{0,\alpha}(\rho(0)):=\alpha\sum_i a^n_{i,0}(\rho_i(0))\;\text{ and }\;\psi_{T,\alpha}(\rho(0)):=\alpha\sum_i a^n_{i,T}(\rho_i(T))
\]
for some $\Lip_1$ functions $a^n_{i,t}$, which are precisely given by
\[
a^n_{i,t}(u):=\frac 1n\fint_{A^n_i}a_t(x,nu)\dx.
\]

In the set $E$, we say that $\rho_n$ converges to $\rho$ in $E$ in the sense of~\eqref{cv_in_E}, if
\begin{equation}\label{cv_in_E}
\exists C \text{ s.t. }\|\rho_n(t)\|_{L^2(\Omega)}\leq C\text{ for every $n$ and every $t$ and } \rho_n(t)\rightharpoonup \rho(t) \text{ uniformly in }t
\end{equation}
where the uniform $L^2$ bound allows to metrize the weak $L^2$ convergence and the uniform convergence is defined accordingly.

We observe that the convergence in the sense of~\eqref{cv_in_E} implies the weak convergence $\rho_n\rightharpoonup\rho$ weakly in $L^2([0,T]\times \Omega)$.

\begin{lemm}\label{F_lsc}
Let $(\rho_n)_n$ be a sequence converging to $\rho\in E$ in the sense of~\eqref{cv_in_E} such that $F_n(\rho_n)$ is bounded. Then, we have $\rho\ge 0,$ for a.e. $t\in[0,T]$ we have $\int_{\Omega}\rho(t,x)\dx=1$, and moreover
\begin{equation*}
F(\rho)\leq \liminf_n F_n(\rho_n).
\end{equation*}
\end{lemm}

\begin{proof}
First, we have
\[
\int_0^T\! \int_{\Omega} |\dot{\rho}_n(t,x)| \dx \dt
\leq
\int_0^T\! \int_{\Omega} L_{\epsilon}(\dot{\rho}_n(t,x)) \dx \dt
\leq F_n(\rho_n)
\leq C,
\]
so that $\|\dot{\rho}_n\|_{L^1([0,T]\times \Omega)}$ is bounded. By embedding $L^1([0,T]\times\Omega)$ into $M([0,T]\times\Omega)$, the space of Radon measures, there exists a subsequence of $(\dot{\rho}_n)_n$ which converges weakly in ${M([0,T]\times\Omega)}$ towards a measure which can only be $\dot\rho$. The semicontinuity of the mass for the weak convergence provides
\[
\int_0^T\! \int_\Omega|\dot{\rho}(t,x)|\dx\dt
\leq \liminf_{n\to\infty} \| \dot{\rho}_n\|_{L^1([0,T]\times\Omega)}\leq \int_0^T\! \int_{\Omega} L_{\epsilon}(\dot{\rho}_n(t,x)) \dx \dt
\]
(where, actually, the first integral is to be intended as a mass in the sense of measures, or as the total variation of the curve $t\mapsto \rho(t)$ in $L^1(\Omega)$). Then, using again $\rho_n\overset{L^2}{\rightharpoonup}\rho$, we obtain
\begin{equation*}
\int_0^T \!\int_{\Omega} V(t,x) \rho(t,x) \dx{} \dt =
\lim_{n\to \infty} \int_0^T\! \int_{\Omega} V(t,x) \rho_n(t,x) \dx{} \dt.
\end{equation*}
Using $f\leq f_n$ and the convexity of $f$, which implies the weak lower semicontinuity of $\rho\mapsto \int\!\!\int f(\rho(t,x))\dx\dt$, we have
\[
\int_0^T\! \int_{\Omega} f(\rho(t,x)) \dx{} \dt
\leq
\liminf_{n\to \infty} \int_0^T\! \int_{\Omega} f_n(\rho_n(t,x)) \dx{} \dt.
\]
Moreover, by the definition of $f_n$, we also obtain
\[
\int_0^T\!\int_\Omega (\rho_-)^2(t,x)\dx\dt\leq \liminf_n\int_0^T\!\int_\Omega ((\rho_n)_-)^2(t,x)\dx\dt=0
\]
since $\int\!\!\int ((\rho_n)_-)^2(t,x)\dx\dt\leq \frac Cn$. This shows $\rho\geq 0$.

Since for all $t\in[0,T]$, $\rho_n(t) \rightharpoonup \rho(t)$ weakly in $L^2(\Omega)$, we have in particular $\rho_n(0) \rightharpoonup \rho(0)$ and $\rho_n(T) \rightharpoonup \rho(T)$ weakly in $L^2(\Omega)$ and consequently weakly in $L^1(\Omega)$, so by the lower semi-continuity of $\psi_0$ and $\psi_T$, we have
\begin{equation*}
\psi_0(\rho(0)) \leq \liminf_n \psi_0(\rho_n(0))\quad
\text{and}\quad
\psi_T(\rho(T)) \leq \liminf_n \psi_T(\rho_n(T)).
\end{equation*}
Since $(\|\rho_n(0)\|_{L^2(\Omega)})_n$ and $(\|\rho_n(T)\|_{L^2(\Omega)})_n$ are bounded we also have $L^1$ bounds and hence
\[
\lim_n \alpha_n\psi_0(\rho_n(0)) = \lim_n \psi_0(\rho_n(0))
\text{ and }
\lim_n \alpha_n\psi_T(\rho_n(T)) = \lim_n \psi_T(\rho_n(T)).
\]

Finally, we use the positivity of the term
\[
\int_0^T\frac{\left(\int \rho_n(t,y)\dy-1\right)^2 }{2\delta} \dt
\]
to obtain
\[
F(\rho) \leq
\liminf_n F_n(\rho_n),
\]
and its boundedness to obtain $\int \rho(t)=\lim\int \rho_n(t)=1$ for a.e. $t$.
\end{proof}

\begin{lemm}\label{D_dense}
Suppose that $m_0$ is such that $\int f(m_0)<+\infty$ (in particular $m_0\in L^2(\Omega)$). For all $\rho\in E$ there exists a sequence $(\rho_n)_n$ in $D = H^1([0,T];L^2(\Omega))$ which converges to $\rho$ strongly in $L_{t,x}^2$ and which satisfies
\begin{align}
\nonumber
\rho_n(0)&=m_0\\
\|\dot{\rho}_n\|_{L^1([0,T]\times\Omega)}
&\leq
\|\dot{\rho}\|_{L^1([0,T]\times\Omega)} + \|\rho(0^+)- m_0\|_{L^1(\Omega)} \label{ineg1}
\\
&= \|\dot{\rho}\|_{L^1([0,T]\times\Omega)} + \psi_0(\rho(0^+)) \nonumber\\
\limsup_n \int_0^T \int_{\Omega} f(\rho_n(t,x)) \dx \dt
&\leq \int_0^T \int_{\Omega} f(\rho(t,x)) \dx \dt \label{ineg2}
\\
\lim_{n\to\infty} \int_0^T \int_{\Omega} \rho_n(t,x)V(t,x)\dx \dt &=
\int_0^T \int_{\Omega} \rho(t,x)V(t,x)\dx \dt \label{lim3}
\\
\lim_n\psi_T(\rho_n(T)) &=
\psi_T(\rho(T^-))\label{ineg4}
\end{align}

In addition, if for all $t\in[0,T]$, $\int_{\Omega} \rho(t,x)\dx = 1$, then for all $n\in \N$, $\int_{\Omega} \rho_n(t,x)\dx = 1$.

\end{lemm}\begin{proof}
Let $\rho\in E$. We denote by $\eta_n$ a sequence of mollifiers in the $t$ variable (i.e.~$\eta_n$ is a sequence of smooth probability measures on $\R$ such that $\eta_n\rightharpoonup\delta_0$), and we suppose $\spt\eta_n\subset [0,\frac 1n]$.

The function $\rho$ is only defined on the time interval $[0,T]$, but we can extend $\rho$ to a function $\tilde{\rho}: [-1,T]\times \Omega \to \R$ setting $\tilde\rho(t)=m_0$ for all $t<0$. The total variation in $L^1$ of this extension is equal to the sum of that of $\rho$ and the $L^1$ distance between $m_0$ and $\rho(0^+)$.

We then define $\rho_n$ as the convolution of $\tilde\rho$ with the mollifiers $\eta_n$ in time:
\[
\rho_n(t,x) := \int \eta_n(t-s)\tilde\rho(s,x)\ds
\]
With the assumption on the support of $\eta_n$ this convolution is well-defined on $[0,T]$ even if $\tilde\rho$ has not been extended on $]T,+\infty[$. Moreover, we have $\rho_n(0)=m_0$.

Note that we have
\[
\left|\dot{\rho}_n(t,x)\right|=\left|\int \eta_n'(t-s)\tilde\rho(s,x)\ds\right|\leq C(n)\int_{t-1/n}^t \tilde\rho(s,x)\ds
\]
so that we have
\[
\|\dot{\rho}_n(t)\|_{L^2(\Omega)}\leq C(n)\int_{t-1/n}^t \|\tilde\rho(s)\|_{L^2(\Omega)}\ds\leq C(n)
\]
(we use here the assumption $m_0\in L^2(\Omega)$). This proves $\rho_n\in H^1([0,T];L^2(\Omega))$.

The convexity of the total variation easily implies~\eqref{ineg1}. Again by convexity, for any convex function $g$ we have
\[
\int_0^T\!\int_\Omega g(\rho_n(t,x))\dx\dt\leq \int_{-1/n}^T\int_\Omega g(\tilde\rho(t,x))\dx\dt\to \int_{0}^T\!\int_\Omega g(\rho(t,x))\dx\dt,
\]
where the last limit is valid whenever $\int_\Omega g(m_0)<+\infty$. Applying this to $g(\rho)=\rho^2$ proves that $\rho_n$ strongly convergence in $L^2_{t,x}$ to $\rho$ (it provides an $L^2$ bound, hence a weak limit up to subsequences; this weak limit can be identified as $\rho$ by testing against continuous functions; the limit is actually strong because the $L^2$ norm converges to that of the limit). This implies, in particular, \eqref{lim3}. As for~\eqref{ineg2}, it is enough to use $g=f$.

To prove~\eqref{ineg4}, we use the property of bounded variation functions: $\rho$ admits a left limit at $T$ in $L^1$, i.e.~for every $\ve>0$ there exists $\delta>0$ such that $\|\rho(t)-\rho(T^-)\|_{L^1}\leq\ve$ for every $t\in [T-\delta,T[$. By convexity this implies, as soon as $\frac1n<\delta$, $\|\rho_n(T)-\rho(T^-)\|\leq \ve$ and shows $\rho_n(T)\to\rho(T^-)$ strongly in $L^1$. This implies~\eqref{ineg4}.

If we suppose in addition that for all $t\in[0,T]$, $\int_{\Omega}\rho(t,x)\dx=1$ the same will be true for $\rho_n$ by convexity, which concludes the proof of the statement.
\end{proof}

\begin{theo}\label{Lip_reg}
Suppose that $\psi_0\colon L^1(\Omega)\to \R$ and $\psi_T\colon L^1(\Omega)\to \R$ are 1-Lipschitz and weakly lower semicontinuous on $L^1(\Omega)$ and that $V\colon [0,T]\times \Omega\to \R$ belongs to $\Lip([0,T];L^2(\Omega))$. Suppose also that $f\colon\R\to \R$ is $c_0$-convex, i.e.~$f''\geq c_0$ on $\R$.

Then, there exists a unique minimizer $\rho$ to the problem
\begin{equation}\label{PB1}
\min_{\substack{\rho \in E \\ \forall t\in[0,T],~\int_{\Omega}\rho(t,x)\dx=1}} F(\rho).
\end{equation}
This solution belongs to $\Lip([0,T];L^2(\Omega))$ and it satisfies
\begin{equation}\label{Lip}
\sup_{t\in[0,T]} \int_{\Omega} |\dot{\rho}(t,x)|^2 \dx \leq C,
\end{equation}
where $C = \frac{C_0^2}{c_0^2}$ with $C_0^2 = \sup_{t\in[0,T]} \|V'(t,\cdot\,) \|_{L^2(\Omega)}^2 $.
\end{theo}

\begin{proof}
To obtain the regularity~\eqref{Lip}, we will approximate the problem~\eqref{PB1} as already described at the beginning of this section; i.e.~solving
\begin{multline}\label{PB2}
\min_{\rho\in E_n}
\int_0^T \left(\left(\sum_{i=1}^n \frac 1 n\left(L_{\epsilon}(n \dot{\rho}_i(t)) + f_n(n \rho_i(t)) \right) + n \rho_i(t) \int_{A_i^n} V(t,x)\dx \right) + \frac{\left(\sum_j \rho_j(t) -1\right)^2}{2\delta} \right)\dt \\
+\psi_{0,\alpha}(\rho(0)) + \psi_{T,\alpha}(\rho(T)),
\end{multline}
where we denote by $E_n$ the set of piecewise constant (in space) functions $\rho\in E$ such that
\[
\forall t\in[0,T], \forall x \in\Omega,~\rho(t,x) = \sum_{i=1}^n n \rho_i(t) \1_{A_i^n}(x),
\]
where $\rho_i\colon [0,T] \to \R$ is a real-valued function. With the definition, the mass of $\rho$ on each $A_i^n$ equals $\rho_i(t)$.

In particular, if we consider the curves $\rho_i(t)$, for each $i$ the curve $\rho_i$ solves
\begin{multline}\label{PB3}
\min_{\rho_i\in H^1([0,T];\R)}
\int_0^T \left(L_{\epsilon}(n \dot{\rho}_i(t)) + f_n(n \rho_i(t)) + n \rho_i(t) V_i^n(t) + \frac{\left(\sum_j \rho_j(t) -1\right)^2}{2\delta} \right)\dt \\
+\alpha a^n_{i,0}(\rho_i(0)) +\alpha a_{i,T}(\rho_i(T))
\end{multline}
where $V_i^n(t) \colonequals \frac 1 {|A_i^n|} \int_{A_i^n} V(t,x)\dx = n \int_{A_i^n} V(t,x)\dx $. The function $V^n(t)$ defined to be equal to $V^n_i(t)$ in $A^n_i$ satisfies
\[
\|V^n(t)\|_{L^2(\Omega)}\leq \|V(t)\|_{L^2(\Omega)},\quad \|V^n\|_{L^2([0,T]\times\Omega)}\leq \|V\|_{L^2([0,T]\times \Omega)},\quad \|\partial_t V^n(t)\|_{L^2(\Omega)}\leq \|\partial _tV(t)\|_{L^2(\Omega)},
\]
all these inequalities being a consequence of Jensen's inequality.

The proof is divided in several steps.

\begin{proof}[Step 1]
The minimizers of~\eqref{PB2}, which is a finite-dimensional variational problem in $H^1$, exist by the direct method.
\let\qed\relax
\end{proof}

\begin{proof}[Step 2]
Let $\rho_n$ be a minimizer of $F_n$ for all $n$. In this step, we bound $\| \dot{\rho}_n(t) \|_{L^2(\Omega)}$ independently of $n$ so that we will be able to pass to the limit $n\to\infty$. We remind that $\rho_n$ is piecewise constant in space: $\rho_n(t,x)\colonequals \sum_{i=1}^n n\rho_{n,i}(t)\1_{A_i^n}(x)$. In the following, we fix $n$ and write $\rho_i(t)$ instead of $\rho_{n,i}(t)$ to enlighten the notation.

The Euler--Lagrange equation of~\eqref{PB3} is
\begin{equation}\label{E-L}
\left(L'_{\epsilon}(n \dot{\rho}_i(t))\right)' = V_i^n(t) + f_n'(n \rho_i(t)) + \frac{\left(\sum_j \rho_j(t) -1\right)}{n\delta}.
\end{equation}
Differentiating the equation~\eqref{E-L} yields
\begin{equation*}
\left(L'_{\epsilon}(n \dot{\rho}_i(t))\right)'' = (V_i^n)'(t) + n \dot{\rho}_i(t)f_n''(n \rho_i(t)) + \frac{\sum_j \dot{\rho}_j(t)}{n\delta}.
\end{equation*}
Multiplying by $\dot{\rho}_i(t)$ and summing over $i$ gives
\begin{equation*}
\sum_{i=1}^n \dot{\rho}_i(t)(L'_{\epsilon}(n \dot{\rho}_i(t)))'' = \sum_{i=1}^n (V_i^n)'(t) \dot{\rho}_i(t) + n(\dot{\rho}_i(t))^2 f_n''(n \rho_i(t)) + \frac{\left(\sum_j \dot{\rho}_j(t)\right)^2}{n\delta }.
\end{equation*}
Since the term $\left(\sum_j \dot{\rho}_j(t)\right)^2$ is positive, using $f_n''\geq c_0$ as well, we obtain the inequality
\begin{equation}\label{ineq_EL}
\sum_{i=1}^n \dot{\rho}_i(t)\left(L'_{\epsilon}(n \dot{\rho}_i(t))\right)''
\geq \sum_{i=1}^n (V_i^n)'(t) \dot{\rho}_i(t) + c_0n(\dot{\rho}_i(t))^2.
\end{equation}

Now, we need to estimate the left-hand side of~\eqref{ineq_EL}. By expanding the second derivative, we have
\begin{equation}
\begin{aligned}\label{ineq_EL2}
\sum_{i=1}^n \dot{\rho}_i(t)\left(L'_{\epsilon}(n \dot{\rho}_i(t))\right)'' &= \sum_{i=1}^n \dot{\rho}_i(t) \left(n\ddot{\rho}_i(t) L''_{\epsilon}(n \dot{\rho}_i(t))\right)' \\
&= \sum_{i=1}^n \dot{\rho}_i(t) \left((n\ddot{\rho}_i(t))^2 L'''_{\epsilon}(n \dot{\rho}_i(t)) + n\dddot{\rho}_i(t) L''_{\epsilon}(n \dot{\rho}_i(t)) \right).
\end{aligned}
\end{equation}
Let us define the function $h \colon \R \to \R $ such that $h(s)= s L'_{\epsilon}(s) - L_{\epsilon}(s)$. Then, $h$ verifies
\[
h'(s) = s L''_{\epsilon}(s) \;\text{ and } \; h''(s) = L''_{\epsilon}(s) + s L'''_{\epsilon}(s).
\]
We now consider
\begin{equation}\label{h}
\max_{t\in[0,T]} \sum_{i=1}^n h(n \dot{\rho}_i(t)).
\end{equation}
Two cases will be distinguished:
\begin{itemize}
\item the maximum is reached on $(0,T)$,
\item the maximum is reached on $\{0,T\}$.
\end{itemize}
\begin{itemize}
\item[(i)] Suppose there exists $t_0 \in(0,T)$ such that $\sum_{i=1}^n h(n \dot{\rho}_i(t_0)) = \max_{t\in[0,T]} \sum_{i=1}^n h(n \dot{\rho}_i(t)) $.
\end{itemize}
In particular, we have
\begin{align*}
&\sum_{i=1}^n n \ddot{\rho}_i(t_0) h'(n \dot{\rho}_i(t_0)) = \sum_{i=1}^n n^2 \ddot{\rho}_i(t_0) \dot{\rho}_i(t_0) L''_{\epsilon}(n \dot{\rho}_i(t_0)) = 0 \\
\text{and }\quad&\sum_{i=1}^n n \dddot{\rho}_i(t_0) h'(n \dot{\rho}_i(t_0)) + (n \ddot{\rho}_i(t_0))^2 h''(n \dot{\rho}_i(t_0)) \leq 0,\\
\text{i.e.~}\quad& \sum_{i=1}^n n^2 \dddot{\rho}_i(t_0) \dot{\rho}_i(t_0) L''_{\epsilon}(n \dot{\rho}_i(t_0)) + (n \ddot{\rho}_i(t_0))^2 \left(L''_{\epsilon}(n \dot{\rho}_i(t_0)) + n \dot{\rho}_i(t_0) L'''_{\epsilon}(n \dot{\rho}_i(t_0))\right) \leq 0.
\end{align*}
Inserting~\eqref{ineq_EL} and~\eqref{ineq_EL2} in $t_0$, the last inequality becomes
\begin{equation}\label{ineq_V}
\sum_{i=1}^n \left((V_i^n)'(t_0) \dot{\rho}_i(t_0) + c_0n\dot{\rho}_i(t_0)^2 \right) \leq - \sum_{i=1}^n (n\ddot{\rho}_i(t_0))^2 L''_{\epsilon}(n \dot{\rho}_i(t_0)) \leq 0.
\end{equation}
Let us precise the expression of $h$:
\begin{align*}
h(s) &= s L'_{\epsilon}(s) - L_{\epsilon}(s)\\
&= \frac{s^2}{\sqrt{s^2 + \epsilon^2}} + 2\epsilon s^2 - \sqrt{s^2+\epsilon^2} - \epsilon s^2= \frac{s^2 - s^2 -\epsilon^2}{\sqrt{s^2 + \epsilon^2}} + \epsilon s^2\\
& = - \frac{\epsilon^2}{\sqrt{s^2 + \epsilon^2}} + \epsilon s^2 < \epsilon s^2.
\end{align*}
Since $t_0$ is a maximizer of $\sum_{i=1}^n h(n \dot{\rho}_i)$, for all $t\in[0,T]$, we obtain
\begin{equation}\label{h_ineq}
\sum_{i=1}^n h(n \dot{\rho}_i(t)) = \sum_{i=1}^n - \frac{\epsilon^2}{\sqrt{(n \dot{\rho}_i(t))^2 + \epsilon^2}} + \epsilon (n \dot{\rho}_i(t))^2 \\
\leq \sum_{i=1}^n h(n \dot{\rho}_i(t_0)) < \epsilon \sum_{i=1}^n (n \dot{\rho}_i(t_0))^2.
\end{equation}
In particular, we have
\begin{equation*}
\sum_{i=1}^n - \frac{\epsilon}{\sqrt{(n \dot{\rho}_i(t))^2 + \epsilon^2}} + (n \dot{\rho}_i(t))^2 < \sum_{i=1}^n (n \dot{\rho}_i(t_0))^2.
\end{equation*}
Since $\frac{\epsilon}{\sqrt{n \dot{\rho}_i(t)^2 + \epsilon^2}} \leq 1$, for all $t\in [0,T]$,
\begin{equation}\label{ineq_t}
\sum_{i=1}^n n \dot{\rho}_i(t)^2 < 1 + \sum_{i=1}^n n \dot{\rho}_i(t_0)^2.
\end{equation}

Besides, thanks to Cauchy--Schwarz inequality, the inequality~\eqref{ineq_V} becomes
\begin{align}\label{ineq_rho_dot}
c_0 \sum_{i=1}^n n \dot{\rho}_i(t_0)^2 &\leq -\sum_{i=1}^n (V_i^n)'(t_0) \dot{\rho}_i(t_0) \\ \nonumber &\leq \sqrt{ \sum_{i=1}^n \frac 1 n (V_i^n)'(t_0)^2 } \sqrt{ \sum_{i=1}^n n \dot{\rho}_i(t_0)^2 }\\\nonumber &\leq \sqrt{ \sup_{t\in[0,T]} \sum_{i=1}^n \int_{A_i^n}(V^n)'(t)^2\dx} \sqrt{ \sum_{i=1}^n n \dot{\rho}_i(t_0)^2 } \\\nonumber &\leq \sup_{t\in[0,T]} \|(V^n)'(t,\cdot\,) \|_{L^2(\Omega)} \sqrt{ \sum_{i=1}^n n \dot{\rho}_i(t_0)^2 } \\ \nonumber &\leq \sup_{t\in[0,T]} \|V'(t,\cdot\,) \|_{L^2(\Omega)} \sqrt{ \sum_{i=1}^n n \dot{\rho}_i(t_0)^2 }.
\end{align}
Finally, \eqref{ineq_rho_dot} gives
\begin{equation}\label{ineq_t0}
c_0^2 \sum_{i=1}^n n \dot{\rho}_i(t_0)^2 \leq \sup_{t\in[0,T]} \|V'(t,\cdot\,) \|_{L^2(\Omega)}^2.
\end{equation}

By gathering the inequalities~\eqref{ineq_t} and~\eqref{ineq_t0}, there exists a constant $$C_0^2\colonequals \sup_{t\in[0,T]} \|V'(t,\cdot\,) \|_{L^2(\Omega)}^2 \geq0$$ independent from $n$ such that for all $t\in[0,T]$,
\begin{equation}\label{born_unif}
\sum_{i=1}^n n \dot{\rho}_i(t)^2 \leq 1+ \sum_{i=1}^n n \dot{\rho}_i(t_0)^2 \leq 1+\frac{C_0^2}{c_0^2}.
\end{equation}
Yet, for all $t\in [0,T]$, the $L^2$-norm of $\dot{\rho}_n(t)$ is
\begin{equation*}
\|\dot{\rho}_n(t)\|^2_{L^2(\Omega)} = \sum_{i=1}^n \int_{A_i^n} (n\dot{\rho}_i(t))^2 = \sum_{i=1}^n n \dot{\rho}_i(t)^2,
\end{equation*}
so, for all $n\in \mathbb{N}$, a minimizer of~\eqref{PB3} verifies
\begin{equation}\label{ineq_]0,T[}
\sup_{t\in [0,T]} \| \dot{\rho}_n(t)\|_{L^2(\Omega)}^2 \leq 1+\frac{C_0^2}{c_0^2}.
\end{equation}
\begin{itemize}
\item[(ii)] Suppose that the maximum of~\eqref{h} is reached at $0$, i.e.$$\sum_{i=1}^n h(n \dot{\rho}_i(0))= \max_{t\in[0,T]} \sum_{i=1}^n h(n \dot{\rho}_i(t)).$$
\end{itemize}
The transversality condition yields
\[
L_{\epsilon}' (n\dot{\rho}_i(0)) = (\alpha a^n_{i,0})'(\rho_i(0)),
\]
and we know that $\alpha a^n_{i,0}$ is $\Lip_\alpha$. Using $L_{\epsilon}'(s) = \frac{ s}{\sqrt{s^2+\epsilon^2}} + 2 \epsilon s$, we get
\[
\left| \frac{n\dot{\rho}_i(0)}{\sqrt{(n\dot{\rho}_i(0))^2+\epsilon^2}} + 2 \epsilon n \dot{\rho}_i(0) \right| \leq \alpha.
\]
The same computations as in Section~\ref{finite_dim} lead to
\[
|n\dot{\rho}_i(0) |\leq\frac{\alpha\ve}{1-\alpha}.
\]
Thanks to this inequality, we can conclude similarly to~\eqref{born_unif} that
\begin{align*}
\sum_{i=1}^n n \dot{\rho}_i(t)^2 &< 1 + \sum_{i=1}^n n\dot{\rho}_i(0)^2, \\
& \leq 1 + \frac{n^2 \alpha^2 \epsilon^2}{n^2 (1 - \alpha^2)} = 1 + \frac{\alpha^2 \epsilon^2}{1 - \alpha^2},
\end{align*}
so, if $\alpha \to 1$ and $\epsilon \to 0$ in a way that $\frac{\alpha^2 \epsilon^2}{1 - \alpha^2}$ remains below $1$, we have
\begin{equation}\label{ineq_0T}
\sup_{t\in[0,T]} \| \dot{\rho}_n(t)\|_{L^2(\Omega)}^2 =\sup_{t\in[0,T]} \sum_{i=1}^n n \dot{\rho}_i(t)^2
\leq 2.
\end{equation}

\begin{rema}\label{rem_better_upper_bounds}
The upper bounds in~\eqref{ineq_]0,T[} and~\eqref{ineq_0T} can be improved to quantities which are abritrarily close to $C_0^2/c_0^2$ and $0$, respectively.

For the first one, it suffices to define a slightly different approximation of $|{\,\cdot\,}|$. If one take $L_{\epsilon,A}(s)= \sqrt{s^2+\epsilon^2} + A \epsilon s^2$ instead of $L_{\epsilon}$, the calculus remains the same, except for~\eqref{h_ineq}, which becomes
\begin{align*}
\sum_{i=1}^n h(n \dot{\rho}_i(t)) = \sum_{i=1}^n - \frac{\epsilon^2}{\sqrt{(n \dot{\rho}_i(t))^2 + \epsilon^2}} + A\epsilon (n \dot{\rho}_i(t))^2
\leq \sum_{i=1}^n h(n \dot{\rho}_i(t_0)) < A\epsilon \sum_{i=1}^n (n \dot{\rho}_i(t_0))^2.
\end{align*}
and yields the inequality
\begin{align*}
A\sum_{i=1}^n n \dot{\rho}_i(t)^2 &< 1 + A \sum_{i=1}^n n \dot{\rho}_i(t_0)^2,\\
\text{i.e.~}\sum_{i=1}^n n \dot{\rho}_i(t)^2 &< \frac 1 A + \sum_{i=1}^n n \dot{\rho}_i(t_0)^2.
\end{align*}
Since the constant $A$ can be chosen as large as we want, the bound can be $C_0^2/c_0^2$.

For the second estimate, it is enough to choose $\alpha \to 1$ such that $\frac{\alpha^2 \epsilon^2}{1 - \alpha^2} \to 0$ which allows to replace the second term in~\eqref{ineq_0T} by almost $0$ (and the first one can be taken small as well, as we have just explained).
\end{rema}

The same computations lead to the same conclusion if the maximum were reached on $T$.

To conclude, by~\eqref{ineq_]0,T[}, \eqref{ineq_0T} and Remark~\ref{rem_better_upper_bounds}, we have shown that
\begin{equation*}
\forall n,
\sup_{t\in[0,T]} \| \dot{\rho}_n(t)\|_{L^2(\Omega)}^2
\leq \max \left\{\frac{C_0^2}{c_0^2}, 0\right\} = \frac{C_0^2}{c_0^2} \colonequals C.
\end{equation*}
\let\qed\relax
\end{proof}

\begin{proof}[Step 3]
In this step, we prove that there exists a subsequence of $(\rho_n)_n$ which converges to a function $\rho\in \mathcal{C}([0,T], L^2(\Omega))$.

The sequence $(\rho_n(t))_n$ is equi-Lipschitz in $L^2(\Omega)$ but the norm $L^2_{t,x}$ is also bounded, which provides a uniform bound $\|\rho_n(t)\|_{L^2(\Omega)}\leq C$. Bounded sets in $L^2(\Omega)$ are not compact for the strong convergence, but they are compact, by Banach--Alaoglu's theorem, for the weak convergence. The equicontinuity that we have is in the strong sense, so it also holds in the weak sense, and we can then apply the Arzel\`a--Ascoli's theorem to extract a subsequence of $(\rho_n)_n$ which converges to $\rho$ in the sense of~\eqref{cv_in_E}.

Additionally, we obtain by the lower-semicontinuity property of the $L^2$-norm that the limit curve $\rho$ is also Lipschitz in time for the strong $L^2$ norm:
\begin{equation*}
\forall (t,s) \in [0,T]^2,~\| \rho(t) - \rho(s) \|_{L^2(\Omega)}
\leq C |t-s|.
\end{equation*}
\let\qed\relax
\end{proof}

\begin{proof}[Step 4]
The goal of this step is to prove that the limit $\rho$ found previously is actually a minimizer of~\eqref{PB1}. Comparing to a suitable competitor (for instance a discretization of $m_0$, constant in time), we can see that there exists $C>0$ such that $F_n(\rho_n) \leq C$ for all $n$.

First, let us notice that $\int_{\Omega}\rho(t,x)\dx=1$, for all $t\in[0,T]$.

Indeed, we have
\[
\int_0^T \left(\int_{\Omega} \rho(t,x)\dx -1\right)^2 \dt\leq\liminf_n\int_0^T \left(\int_{\Omega} \rho_n(t,x)\dx -1\right)^2 \dt = 0,
\]

Second, by Lemma~\ref{F_lsc}, we have that $F(\rho) \leq \liminf_n F_n(\rho_n)$.

Third, let $m \in D=H^1([0,T];L^2(\Omega))$ be such that $\int_{\Omega}m(t,y)\dy=1$. In what follows, we prove that there exists a sequence $(m_n)_n$ such that $\limsup_n F_n(m_n)\leq F(m)$.

We define the sequence $(m_n)_n$ by choosing piecewise constant functions such that
\begin{equation}\label{m_n}
\forall t\in[0,T],\ \forall x\in A_i^n, \quad m_n(t,x)= n \int_{A_i^n} m(t,y)\dy
\end{equation}
which verifies $\int_{\Omega} m_n(t,y)\dy = \int_{\Omega} m(t,y)\dy=1$. This lets the term penalizing the mass of $m_n$ disappear in $F_n(m_n)$ and, using $m\geq 0$ and $m_n\geq 0$, the penalization of the negative part disappears as well:
\begin{align*}
F_n(m_n) &=
\int_0^T \int_\Omega L_{\epsilon}(\dot{m}_n(t,x)) + f(m_n(t,x)) +m_n(t,x) V^n(t,x) \dx\dt +\psi_{0,\alpha}(m_n(0)) + \psi_{T,\alpha}(m_n(T)).
\end{align*}


We observe that, by Jensen's inequality, exactly as it happens for $V^n$, the sequence $m_n$ is bounded in $L^2$ (the norm is bounded by that of $m$) and clearly weakly converges to $m$ (it is enough to test against continuous test functions). So, it strongly converges to $m$ in $L^2$. This is true both in $L^2_{t,x}$ and in $L^2_x$ for every $t$. In particular, the boundary terms and the linear term $\int m_n V^n$ converge to the corresponding terms with $m$. As for the term $\int f(m_n)$, it can be bounded (again thanks to Jensen's inequality) by $\int f(m)$. We are left to bound the term involving the time-derivative.

Using again the Jensen inequality we have $\int L_\ve(\dot{m}_n)\leq \int L_\ve(\dot m)$ and using $m\in D$ (i.e.~$\dot m\in L^2_{t,x}$) the quantity in the right-hand side tends to $ \|\dot{m}\|_{L^1([0,T]\times\Omega)}.$

Summing up, we have proved that for all $m\in D$ such that $\int_{\Omega} m(t,x)\dx=1$, there exists a sequence $(m_n)_n$ which converges to $m$ strongly in $L^2([0,T]\times\Omega)$ and which verifies
\begin{equation}\label{limsup_competitor}
\limsup_n F_n(m_n)\leq F(m).
\end{equation}

Now, we can prove that $\rho$ minimizes $F$. Let $m\in D$ be a competitor such that $\int_{\Omega} m(t,x)\dx=1$ and $(m_n)_n$ the sequence defined in~\eqref{m_n}. Since for all $n\in\N$, $\rho_n$ minimizes $F_n$, we have
\[
\forall n\in\N,\quad F_n(\rho_n)\leq F_n(m_n).
\]
By Lemma~\ref{F_lsc} and the property~\eqref{limsup_competitor}, we obtain
\begin{equation}\label{competitor}
\forall m \in D,\quad F(\rho)\leq F(m).
\end{equation}

The inequality is true for all $m\in D$, there remains to show that it is true for all $m\in E$.

Let $m\in E$ be such that $\int_{\Omega} m(t,x)\dx=1$. By Lemma~\ref{D_dense}, there exists a sequence $(m_n)_n \subset D$ which converges to $m$ strongly in $L_{t,x}^2$ and such that $\int_{\R^N} m_n(t,x)\dx=1$ for all $n\in\N$. This sequence may be different from~\eqref{m_n}. By Inequality~\eqref{competitor}, we have
\[
\forall n \in \N,\quad F(\rho) \leq F(m_n).
\]
By using the properties~\eqref{ineg1}, \eqref{ineg2} and~\eqref{lim3} and the fact that $m$ is supported in $[0,T]\times\Omega$, we obtain
\[
\forall m\in E,\quad F(\rho) \leq F(m),
\]
which shows that the limit $\rho$ is a minimizer of $F$.

To conclude, the function $\rho$ is a solution to the problem~\eqref{PB1} and verifies the property~\eqref{Lip}.

Moreover, the solution $\rho$ is unique by the strict convexity of $F$, given by $f$.
\end{proof}
\let\qed\relax
\end{proof}


\section{Infinite horizons problems} \label{infinite_horizon}
\label{sec4}

Similar results can be proven in the infinite horizon case, with an exponential discount factor:
\begin{equation*}
\min_{\substack{\rho \in E_{\infty}; \\ \forall t\in\R_+,~\int_{\Omega}\rho(t,x)\dx=1}}
\int_0^{\infty} \e^{-rt}\int_{\Omega} \left(|\dot{\rho}(t,x)| + V(t,x) \rho(t,x) + f(\rho(t,x))\right) \dx{}\dt +\psi_0(\rho(0)) \colonequals F_\infty(\rho)
\end{equation*}
where $E_{\infty} \colonequals \BV_{loc}(\R_+; L^1(\Omega))\cap L^2_{loc}(\R_+\times\Omega)$. This kind of infinite horizon problem is very classical in economical models and we will not discuss any more its economical motivations. From the mathematical point of view, it is interesting as we get rid of some difficulties (which will appear in the next section) related to the transversality condition at $t=T$ but the computations which provide the time regularity can be re-done up to minor modifications.

The proof is similar to Theorem~\ref{Lip_reg}. We will need to use the following approximated functional
\[
F_T(\rho) \colonequals
\int_0^T\e^{-rt} \int_{\Omega} \left(|\dot{\rho}(t,x)| + V(t,x) \rho(t,x) + f(\rho(t,x))\right) \dx{}\dt +\psi_0(\rho(0))
\]
and its approximation
\begin{multline*}
F_T^n(\rho) \colonequals
\int_0^T \e^{-rt}\left(\sum_{i=1}^n \frac 1 n\left(L_{\epsilon}(n \dot{\rho}_i(t)) + f_n(n \rho_i(t)) + \frac{\left(\sum_j \rho_j(t) -1\right)^2}{2\delta} \right) + n \rho_i(t) \int_{A_i^n}\! V(t,x)\dx \right) \dt\\
 +\psi_{0,\alpha}(\rho(0)),
\end{multline*}
where $L_{\epsilon}$ and the partition $\Omega = \cup_{i=1}^n A_i^n$ are the same as in the proof of Theorem~\ref{Lip_reg}.

\begin{theo}
Suppose that $\psi_0\colon L^1(\Omega)\to \R$ is 1-Lipschitz and weakly lower semicontinuous on $L^1(\Omega)$ and that $V\colon \R_+\times \Omega\to \R$ belongs to $\Lip(\R_+;L^2(\Omega))$.
Suppose also that $f\colon\R\to \R$ is $c_0$-convex, i.e.~$f''\geq c_0$.

Then, there exists a unique solution $\rho$ to the problem
\begin{equation*}
\min_{\substack{\rho \in E_{\infty}; \\ \forall t\in\R_+,~\int_{\Omega}\rho(t,x)\dx=1}} F_\infty(\rho),
\end{equation*}
and it satisfies
\begin{equation}\label{Lip_reg_infty}
\sup_{t\in\R_+} \int_{\Omega} |\dot{\rho}(t,x)|^2 \dx \leq C_{\infty},
\end{equation}
where $C_{\infty} = \frac{C_0^2}{c_0^2}>0 $ only depends on $V$ with $C_0^2 = \sup_{t\in\R_+} \|V'(t, \cdot\,) \|_{L^2(\Omega)}^2$ and on $c_0$.
\end{theo}

\begin{proof}
The proof will be very similar to the case discussed in Section~\ref{infinite_dim}, and we will only highlight the differences. First, there is an extra approximation due to the infinite horizon: we fix $T$ (and, later, we will consider $T\to\infty$) and we solve
\begin{equation}\label{PBinfty2}
\min_{\rho\in E_n} F^T_n(\rho).
\end{equation}

We adapt the proof of Theorem~\ref{Lip_reg} and we mention here the main differences.

\begin{proof}[Step 1]
Using the fact that $\e^{-rT}\leq \e^{-rt}$, we can conclude similarly to Step 1 in Theorem~\ref{Lip_reg} that there exists a solution to problem~\eqref{PBinfty2}.
\let\qed\relax
\end{proof}

\begin{proof}[Step 2]
Let $\rho_n$ (denoted $\rho$ in this step for simplicity) be a minimizer of $F_n$. We exploit the Euler--Lagrange equation for each component $\rho_i(t)$ in order to obtain bounds. The equation is slightly different now, due to the coefficient $\e^{-rt}$. We have
\begin{align}\label{E-L_infty}
\left(\e^{-rt} L_{\epsilon}'(\dot{\rho}_i)\right)' & =
\e^{-rt} \left(V_i^n(t,x_i) + f_n'(n \rho_i) + \frac{\sum_j \rho_j -1}{\delta}\right) \\ \nonumber
\text{ i.e.~}- r L_{\epsilon}'(\dot{\rho}_i) + (L_{\epsilon}' (\dot{\rho}_i))' &= V_i^n(t,x_i) + f_n'(n \rho_i) + \frac{\sum_j \rho_j -1}{\delta n}. \nonumber
\end{align}
Similarly to the proof of Theorem~\ref{Lip_reg}, we differentiate Eq. \eqref{E-L_infty}, multiply it by $\dot{\rho}_i$ and sum over $i$:
\[
\sum_i (-r \ddot{\rho}_i \dot{\rho}_i L_{\epsilon}''(\dot{\rho}_i) + \dot{\rho}_i (L_{\epsilon}'(\dot{\rho}_i))'') = \sum_i \left(\dot{\rho}_i (V_i^n)'(t,x_i) + n \dot{\rho}_i^2 f_n''(n\rho_i)\right) + \frac{\left(\sum_i \dot{\rho}_i\right)^2}{\delta n}.
\]

Compared to Section~\ref{infinite_dim}, there is an extra term equal to $\sum_i (-r \ddot{\rho}_i \dot{\rho}_i L_{\epsilon}''(\dot{\rho}_i)$. We now take again $h$ the function such that
\[
h(s) = s L_{\epsilon}'(s) - L_{\epsilon}(s)
\]
and look at $\max_{t\in[0,T]} \sum_{i=1}^n h(n\dot{\rho}_i(t))$.


The extra term in the Euler--Lagrange equation appears in considering the maximum in $(0,T)$, but in this case we also have
\[
\sum_{i=1}^n h'(n\dot{\rho}_i(t))\ddot{\rho}_i(t)=0.
\]
Using $h'(s)=sL_\ve''(s)$ we see that this lets the extra term vanish, and the computations are then unchanged.

As for the case where the maximum is reached at $t=0$, the transversality condition is exactly the same as in Section~\ref{infinite_dim} because of $\e^{-rt}=1$. When the maximum is reached at $t= T$, the transversality condition now gives $\e^{-rT}L_\ve'(n\dot{\rho}_i(T))=0$ for every $i$. This allows to obtain $\dot{\rho}_i(T)=0$ and the maximum cannot be reached at $t=T$.

This allows to bound $\sum_i n\dot{\rho}_i(t)^2$ by a constant $C_{\infty} \colonequals C_0^2 /c_0^2$ which only depends on $c_0$ and $\sup_{t\in[0,T[} \|V'(t, \cdot\,) \|_{L^2(\Omega)}^2\leq \sup_{t\in\R_+} \|V'(t, \cdot\,) \|_{L^2(\Omega)}^2 \colonequals C_0^2$.
\let\qed\relax
\end{proof}

\begin{proof}[Step 3]
Now, we pass to the limit in $n$ for fixed $T$. This follows the very same procedure based on the Arzel\`a--Ascoli's theorem, as in Section~\ref{infinite_dim}. In this way, we obtain a family $(\rho_T)_T$ of equilipschitz functions in $t$ valued in $L^2(\Omega)$, satisfying the very same uniform bound on the time derivative, and each $\rho_T$ minimizes $F_T$ by proceeding the same way as in Step 4 of Theorem~\ref{Lip_reg}.
\let\qed\relax
\end{proof}

\begin{proof}[Step 4]
We pass to the limit $T\to\infty$. The previous step provides a family $(\rho_T)_{T\in\N}$ of equilipschitz minimizers of $F_T$. By choosing a density $m$ such that $\int f(m)<+\infty$ and comparing $\rho_T$ to the constant curve equal to $m$, using the integrability of the exponential discount coefficient $\e^{-rt}$, we obtain a uniform bound $F_T(\rho_T)\leq C$ (independent of $T$). Let us fix $T_0<\infty$. Since $\rho_T$ is a minimizer of $F_T$, we have $F_{T_0}(\rho_T)\leq F_{T}(\rho_T)\leq C$. Thus we have a bound (depending on $T_0$, because of the coefficient $\e^{-rt}$)
\[
\sup_{t\in[0,T_0]} \| \rho_T(t)\|_{L^2(\Omega)}
\leq C_{T_0}.
\]
This allows to apply Arzel\`a--Ascoli's theorem for the convergence in the sense of~\eqref{cv_in_E}, and extracting a diagonal subsequence we obtain a subsequence of $(\rho_T)_T$ which converges towards $\rho\in \mathcal{C}(\R_+,L^2(\Omega))$ in the sense of~\eqref{cv_in_E}, on each interval $[0,T_0]$.
\let\qed\relax
\end{proof}

\begin{proof}[Step 5]
In this step, we show that $\rho$ defined previously is a minimizer of $F$. Let us denote by $(\rho_{T_k})_k$, the subsequence which converges to $\rho$. Let $m\in E_{\infty}$ be a competitor and $T$ a fixed value. For all $T_k\geq T$, we have
\[
F_{T}(\rho_{T_k})\leq F_T(m)\leq F_\infty(m).
\]
By the lower semi-continuity of $F_{T}$ we obtain $F_T(\rho)\leq F_\infty(m)$ and, taking the limit $T\to\infty$, we see $F_\infty(\rho)\leq F_\infty(m)$.

This shows that $\rho$ is a minimizer of $F$. Additionally, it verifies
\[
\forall t,s\in\R_+,~
\|\rho(t)-\rho(s)\|_{L^2(\Omega)}
\leq C_{\infty}|t-s|,
\]
hence~\eqref{Lip_reg_infty}.
\end{proof}
\let\qed\relax
\end{proof}


\section{Regularity in space}\label{sec5}

While the regularity shown in the previous sections is in time, we can show regularity in space under additional conditions on $V$ and on the boundary conditions. We will start from the problem with Dirichlet boundary conditions:
\begin{equation}\label{PB_Diri}
\min_{\substack{\rho \in E \\ \rho(0,\cdot\,)= m_0, \rho(T,\cdot\,)= m_T \\ \forall t\in[0,T], \int_{\Omega}\rho(t,x)\dx = 1}}
\int_0^T \int_{\Omega} \left(|\dot{\rho}(t,x)| + V(t,x) \rho(t,x) + f(\rho(t,x)) \right) \dx{}\dt \colonequals F(\rho),
\end{equation}
where we remind that $E \colonequals \BV([0,T]; L^1(\Omega))\cap L^2([0,T]\times\Omega)$. Similarly to Section~\ref{infinite_dim} and~\ref{infinite_horizon}, we approximate the problem~\eqref{PB_Diri} by
\begin{equation}\label{PB_Diri2}
\min_{\substack{\rho \in E_n \\ \rho(0,\cdot\,)= m^n_0, \\ \rho(T,\cdot\,)= m^n_T}}\!
\int_0^T\! \left(\left(
\sum_{i=1}^n \frac 1 n L_{\epsilon}(n\dot{\rho}_i(t)) + n \rho_i(t) \int_{A_i^n}\!\!\! V(t,x)\dx + \frac1n f(n\rho_i(t)) \right) + \frac{\left(\sum_j \rho_j(t) -1\right)^2}{2\delta} \right) \dt
\colonequals F_{n}(\rho),
\end{equation}
with the usual choices for $L_{\epsilon}(s) = \sqrt{s^2 + \epsilon^2} + \epsilon s^2$ and $E_n$, the set of piecewise constant functions $\rho\in E$ such that
\[
\forall t\in[0,T], \ \forall x \in\Omega,\quad \rho(t,x) = \sum_{i=1}^n n \rho_i(t) \1_{A_i^n}(x),
\]
where $\rho_i\colon [0,T] \to \R$ is a real-valued function. Note that here we prefer not replace the Dirichlet boundary conditions with $L^1$ penalizations, so that we need to discretize the initial and final data as well. We then define $m^n_t$ to be piecewise constant approximations of $m_t$ (for $t=0,T$), taking in particular $m_{i,t}^n \colonequals \frac 1 {|A_i^n|} \int_{A_i^n} m_t(x)\dx$. We also set, as usual, $V_i^n(t) \colonequals \frac 1 {|A_i^n|} \int_{A_i^n} V(t,x)\dx$.

We will consider the problem solved by $\rho_i(t)$ for all $i\in\{1,\dots, n\}$:
\begin{equation}\label{PB_Diri3}
\min_{\substack{n\rho_i(0)=m^n_{i,t}, \\ n\rho_i(T)=m^n_{i,t} }}
\int_0^T \left(L_{\epsilon}(n \dot{\rho}_i(t)) + f(n \rho_i(t)) + n\rho_i(t) V_i^n + \frac{\left(\sum_j \rho_j(t) -1\right)^2}{2\delta} \right)\dt.
\end{equation}

The Euler--Lagrange system of~\eqref{PB_Diri3} is
\begin{equation}\label{system}
\left \{
\begin{aligned}
\left(L_{\epsilon}'(n\dot{\rho}_i(t))\right)'&= V_i^n(t,x_i) + f'(n\rho_i(t)) + c(t),\\
n\rho_i(0)& =m^n_{i,0}, \\
n\rho_i(T)& =m^n_{i,T},
\end{aligned}
\right.
\end{equation}
where $c(t) \colonequals \frac{\int \rho(t,y) \dy-1}{\delta n}$.

\begin{lemm}\label{prop_bornitude}
If $\rho$ is a minimizer of~\eqref{PB_Diri2}, then for every $i,j$ we have the following inequality:
\begin{equation*}
n\sup_{t\in[0,T]} \rho_i(t) - \rho_j(t)
\leq \max \left(\sup_{t\in[0,T]} V_j^n(t)- V_i^n(t), m^n_{i,0}-m^n_{j,0}, m^n_{i,T}-m^n_{j,T},\right).
\end{equation*}
\end{lemm}

\begin{proof}
We consider
\begin{equation*}
\max_{t\in[0,T]} \rho_i(t) -\rho_j(t).
\end{equation*}
We distinguish three cases:
\begin{itemize}
\item the maximum is reached on $(0,T)$,
\item the maximum is reached at $0$
\item the maximum is reached at $T$.
\end{itemize}
\begin{enumerate}\romanenumi
\item \label{p10i} If the maximum is attained at $t_0\in(0,T)$, we have $\dot{\rho}_i(t_0) =\dot{\rho}_j(t_0)$ as well as $\ddot{\rho}_i(t_0) \leq\ddot{\rho}_j(t_0)$. This implies $\ddot{\rho}_i (t_0) L_{\epsilon}''(n\dot{\rho}_i(t_0))
\leq \ddot{\rho}_j (t_0) L_{\epsilon}''(n\dot{\rho}_j(t_0))$. By using~\eqref{system} and substracting the equation for $i$ and that for $j$, we have
\begin{align*}
n\rho_i(t_0) - n\rho_j(t_0)
&\leq \frac{1}{c_0}(f_n'(n\rho_i(t_0) - f_n'(n\rho_j(t_0)) \\
&= \ddot{\rho}_i (t_0) L_{\epsilon}''(n\dot{\rho}_i(t_0)) - \ddot{\rho}_j (t_0) L_{\epsilon}''(n\dot{\rho}_j(t_0))+V_j^n(t_0)- V_i^n(t_0)\\
&\leq V_j^n(t_0)- V_i^n(t_0) \leq \sup_{t\in[0,T]} V_j^n(t)- V_i^n(t),
\end{align*}
Consequently, $n(\rho_i(t_0) - \rho_j(t_0))$ is bounded by $\sup_{t\in[0,T]} V_j^n(t)- V_i^n(t)$.

\item \label{p10ii} If the maximum is reached at $t=0$ we have
\[
n\rho_i(t)-n\rho_j(t)\leq n a^n_i-a^n_j
\]
using the Dirichlet condition.

\item \label{p10iii} If the maximum is reached at $t=T$ we have
\[
n\rho_i(t)-n\rho_j(t)\leq n b^n_i-b^n_j
\]
using the other Dirichlet condition.
\end{enumerate}
The claim follows by putting together the three cases.
\end{proof}

\begin{theo}\label{rho_bounded}
Let $\rho$ be a minimizer of~\eqref{PB_Diri} and $\omega$ a function such that
\[
\frac{1}{c_0}(V(t,x)-V(t,x')),m_s(x)-m_s(x')\leq \omega(x-x')
\]
for all $t,x,x'$ with $s=0,T$. Suppose either that $\omega$ is a constant or that $\lim_{z\to 0}\omega(z)=0$ (i.e., $\omega$ is a modulus of continuity).

Then we have for a.e. $t,x,x'$
\[
\rho(t,x)-\rho(t,x')\leq \omega(x-x').
\]
\end{theo}


\begin{proof}
We first consider the curves $\rho_n$ minimizing~\eqref{PB_Diri2}. We write the estimate from Lemma~\ref{prop_bornitude} in terms of the densities $\rho_n(t,x)=n\rho_i(t)$. We obtain
\[
\rho_n(t,x)-\rho_n(t,x')\leq \omega(x-x')+\ve_n.
\]
Indeed, if the function $\omega$ is a constant $M$, then the oscillations of $\frac{V}{c_0}, a$ and $b$ are bounded by $M$ and so $n\rho_i-n\rho_j\leq M$ as well. Otherwise, if $\omega$ is a modulus of continuity, then the functions $\frac{V}{c_0}, a$ and $b$ are continuous and their averages on small pieces $A^n_i$ can be replaced by the values at the center of these pieces up to a small error $\ve_n$ (which also takes into account that $x$ and $x'$ can differ from the centers of the corresponding pieces).

The family $(\rho_n)_n$ is bounded in $L^2([0,T]\times \Omega)$. Differently from the case of Section~\ref{infinite_dim}, we do not have Lipschitz estimates in time (note that this is not the same approximation as in Section~\ref{infinite_dim}, since we impose the Dirichlet boundary conditions instead of penalizing them), but luckily we will not need them. Indeed, the equicontinuity in time was essential to obtain uniform and pointwise bounds and deal with the boundary terms. Here we just use weak convegence in $L^2([-1,T+1]\times\Omega)$ after extending $\rho_n$ to $a$ on $[-1,0]$ and to $b$ on $[T,T+1]$. It is easy to see that $\rho_n$ admits a weakly converging subsequence and that the limit solves~\eqref{PB_Diri}. Note that the extension before $t=0$ and after $t=T$ is needed to include the possible jump at those instants of time in the total variation and hence play the role of the Dirichlet boundary condition.

The conclusion comes from the following Lemma~\ref{limitomega}.
\end{proof}

\goodbreak
\begin{lemm}\label{limitomega}
Let $\rho_n$ be a sequence weakly converging to $\rho$ in $L^2([0,T]\times\Omega)$ and suppose that there exists a function $\omega$ such that
\begin{equation*}
\forall n\in \N^*,\ 
\forall t\in[0,T],\ 
\forall x,x'\in\Omega,
\quad
\rho_n(t,x) - \rho_n(t,x')
\leq \omega(x-x')+\ve_n
\end{equation*}
for a sequence $\ve_n\to 0$.

Then we have
\begin{equation*}
\rho(t,x) - \rho(t,x')
\leq \omega(x-x')
\end{equation*}
whenever $(t,x)$ and $(t,x')$ are Lebesgue points of $\rho$.
\end{lemm}

\begin{proof}
Let us take $(t_0,x_0)$ and $(t_0,y_0)$ in $[0,T]\times\Omega$, two Lebesgue points of $\rho$. Let $r>0$ be such that $]t_0-r,t_0+r[\subset [0,T]$, $B(x_0,r)\subset \Omega $ and $B(y_0,r)\subset \Omega $, we have in particular
\begin{align}\label{cv_faible1}
&\fint_{t_0-r}^{t_0+r} \fint_{B(x_0,r)} \rho_n(t,x)\dx \dt
\underset{n\to \infty}{\to}
\fint_{t_0-r}^{t_0+r} \fint_{B(x_0,r)} \rho(t,x)\dx \dt, \\ \label{cv_faible2}
\text{and }\quad & \fint_{t_0-r}^{t_0+r} \fint_{B(y_0,r)} \rho_n(t,x)\dx \dt
\underset{n\to \infty}{\to}
\fint_{t_0-r}^{t_0+r} \fint_{B(y_0,r)} \rho(t,x)\dx \dt.
\end{align}
By assumption we have the inequality
\begin{multline*}
\fint_{t_0-r}^{t_0+r} \left(\fint_{B(x_0,r)} \rho_n(t,x) \dx -\fint_{B(y_0,r)} \rho_n(t,x')\dx' \right) \dt\\
\begin{aligned}
&= \fint_{t_0-r}^{t_0+r} \fint_{B(x_0,r)} \left(\rho_n(t,x) - \rho_n(t,x-x_0+y_0)\right)\dx \dt \\
&\leq \omega(x_0-y_0)+\ve_n
\end{aligned}
\end{multline*}
By taking the limit $n\to\infty$ and applying~\eqref{cv_faible1} and~\eqref{cv_faible2} to the left-hand side of the inequality, we~get
\begin{equation*}
\fint_{t_0-r}^{t_0+r} \left(\fint_{B(x_0,r)} \rho(t,x) \dx -\fint_{B(y_0,r)} \rho(t,x')\dx' \right) \dt\leq \omega(x_0-y_0).
\end{equation*}
Since $(t_0,x_0)$ and $(t_0,y_0)$ are Lebesgue points, we can pass to the limit $r\to 0$ and obtain
\[
\rho(t_0,x_0) - \rho(t_0,y_0)
\leq \omega(x_0-y_0).\qedhere
\]
\end{proof}

The case where the Dirichlet conditions are replaced by penalizations are harder to deal with. The only case that is easy to consider requires that the transversality condition is the same for $\rho_i$ and $\rho_j$. We can obtain the following results for which we just sketch the modifications to the previous proofs.

\begin{theo}\label{5.4pen}
Let $\rho$ be a minimizer of
\begin{equation*}
\min_{\substack{\rho \in E \\ \rho(0,\cdot\,)= m_0, \\ \forall t\in[0,T], \int_{\Omega}\rho(t,x)\dx = 1}}
\int_0^T \int_{\Omega} \left(|\dot{\rho}(t,x)| + V(t,x) \rho(t,x) + f(\rho(t,x)) \right) \dx{}\dt +\int \Psi_T \rho(T,x).
\end{equation*}
Let $A\subset\Omega$ be a set where $\Psi_T$ is constant. Suppose that $\omega$ is a function (either constant or a modulus of continuity) such that $\frac{1}{c_0}(V(t,x)-V(t,x')),m_0(x)-m_0(x')\leq \omega(x-x')$ for all $t,x,x'$. Then we have for a.e. $x,x'\in A$
\[
\rho(t,x)-\rho(t,x')\leq\omega(x-x').
\]
\end{theo}

\begin{proof}
The approximation will be the same as before, but the Dirichlet boundary condition at $t=T$ is replaced by a transversality condition. Choosing a decomposition into pieces $A^n_i$ such that $x,x'$ belong to two pieces contained in $A$, this transversality condition will be the same for the two curves $\rho_i(t)$ and $\rho_j(t)$ that we need to consider to estimate $\rho(t,x)-\rho(t,x')$. In particular, we will have $\dot\rho_i(T)=\dot\rho_j(T)$. Hence, when considering $\max_t \rho_i(t)-\rho_j(t)$, the maximum could be attained on $t_0=T$ but in this case the first-order optimality condition will be satisfied, and this allows to obtain the second-order one, which is the main tool to estimate $\rho_i(t_0)-\rho_j(t_0)$. The rest of the analysis goes as in the rest of the Section.
\end{proof}

\begin{theo}
Let $\rho$ be a minimizer of
\begin{equation*}
\min_{\substack{\rho \in E_{\infty} \\ \rho(0,\cdot\,)= m_0, \\ \forall t\in\R_+, \int_{\Omega}\rho(t,x)\dx = 1}}
\int_0^{\infty} \e^{-rt} \int_{\Omega} \left(|\dot{\rho}(t,x)| + V(t,x) \rho(t,x) + f(\rho(t,x)) \right) \dx{}\dt :=F(\rho).
\end{equation*}
Suppose that $\omega$ is a function (either constant or a modulus of continuity) such that $\frac{1}{c_0}(V(t,x)-V(t,x')),m_0(x)-m_0(x')\leq \omega(x-x')$ for all $t,x,x'$. Then we have for a.e. $x,x'$
\[
\rho(t,x)-\rho(t,x')\leq\omega(x-x').
\]
\end{theo}

\begin{proof}
We first replace $F$ with $F_T$ as in Section~\ref{infinite_horizon}. Then, the claim is the same as in Theorem~\ref{5.4pen} with $\Psi_T=0$ (hence we can take $A=\Omega$) with the only difference that have an extra coefficient $\e^{-rt}$. This lets an extra term $-rL_\ve'(n\dot\rho_i)$ appear in the Euler--Lagrangian equation, but this term cancels when taking the difference between $i$ and $j$ because of the first-order optimality condition for $t_0$. The rest of the analysis goes as in Theorem~\ref{5.4pen} and in the rest of the Section. This provides a uniform estimate on $\rho_T$, independent of $T$, and we can then take the limit $T\to\infty$.
\end{proof}

We conclude summarizing the result that we can obtain in terms of spatial regularity:
\begin{itemize}
\item For the problem with two Dirichlet boundary conditions, if $V,m_0,$ and $m_T$ are continuous, then the solution $\rho$ shares the same modulus of continuity of $\frac{V}{c_0},m_0,$ and $m_T$. Combining this with the $L^2$ Lipschitz regularity in time obtained in Section~\ref{infinite_dim} this gives a uniform continuity result in $(t,x)$.
\item In the same problem, if $V,m_0,$ and $m_T$ are only bounded, then $\rho$ is bounded, since its oscillation is bounded by that of $\frac{V}{c_0},m_0,$ and $m_T$ and its $L^2$ norm is also bounded.
\item For the problem with a Dirichlet boundary condition at $t=0$ and a penalization $\Psi_T$ at $t=T$, if $\Psi_T$ is piecewise constant and $V$ and $m_0$ are continuous, then $\rho$ is piecewise continuous, and hence bounded. The solution $\rho$ is also bounded if we only assume $V$ and $m_0$ to be bounded, and $\Psi_T$ to be piecewise constant.
\item In the infinite-horizon problem with Dirichlet boundary condition at $t=0$, the solution shares the same modulus of continuity of $\frac{V}{c_0} $ and $m_0$ and is uniformly continuous in $(t,x)$. It is uniformly bounded if $V$ and $m_0$ are bounded.
\item In the periodic case (which we briefly presented in Section~\ref{finite_dim} but did not develop here) the solution $\rho$ shares the same continuity or boundedness of $\frac{V}{c_0} $.
\end{itemize}
All the above continuity results are crucial for applications to the theory of Mean Field Games, since the variational problem we studied, with a congestion penalization in the form of $f(\rho)$, corresponds to a MFG where every agent minimizes a cost over trajectories $\gamma$ involving the integral of a running cost of the form $\int_0^T (V+f'(\rho))(t,\gamma(t))\dt$. This cost is not even well-defined if $\rho(t,x)$ is not a continuous function of $x$! When $\rho$ is not continuous but it is bounded, a clever construction due to Ambrosio and Figalli (\cite{AmbFig}) and re-used in the frameworks of MFG in~\cite{CarMesSan} allows to choose a particular representative of this running cost for which it is possible to prove the desired equilibrium results. This explains our interest for the spatial regularity and/or the boundedness of $\rho$.


\section{Numerical approximation} \label{section_num}
\label{sec6}

In this section, numerical simulations are carried out on the following problem
\begin{equation}\label{PB1_algo}
\min_{\substack{\rho \in E \\ \forall t\in[0,T],~\int_{\Omega}\rho(t,x)\dx=1 \\ \rho\geq 0}}
\int_0^T\! \int_{\Omega} \left(\lambda|\dot{\rho}(t,x)| + V(t,x) \rho(t,x) + \frac{\rho(t,x)^2} 2\right) \dx{} \dt +\psi_0(\rho(0)) + \psi_T(\rho(T)) \colonequals F(\rho)
\end{equation}
and on some of its variants (Dirichlet conditions, periodic case\dots). We study a particular case of $F$ where $f(\rho) = \rho^2/2$. The parameter $\lambda>0$ allows us to add more or less importance to the $L^1$-norm.

For these numerical examples, we take the domain $\Omega \colonequals [0,S]$ to be one-dimensional with $S>0$. In this section, we study the following cases:
\begin{itemize}
\item periodic solutions in 1D (only the time variable, Section~\ref{num_1D}, Figure~\ref{fig1}).
\item periodic solutions in 2D (periodic in time, Section~\ref{num_2Dperio}, Figures~\ref{fig3}, \ref{fig4}).
\item non-periodic solutions in 2D, with or without Dirichlet conditions or penalizations at the time boundary (Section~\ref{num_2Dnonperio}, Figures~\ref{fig5}, \ref{fig6}, and~\ref{fig7}).
\end{itemize}

\subsection{The numerical method}\label{num_meth}
Let $\{t_0, \dots, t_K\}$ and $\{x_0, \dots, x_N\}$ be the regular subdivisions of respectively $[0,T]$ and $[0,S]$. To approximate the integral in $F$, the left-rectangle method will be used, hence we will consider solutions $(\rho(t_i,x_j))_{\substack{0\leq i\leq K-1\\0\leq j \leq N-1}}$ in the vector space $\R^{K\cdot N}$. With an abuse of notations, we also write $V \in \R^{K\cdot N}$ the vector $(V(t_i,x_j))_{\substack{0\leq i\leq K-1\\0\leq j \leq N-1}}$. In the following, the notations $\langle{\,\cdot\,|\,\cdot\,}\rangle$ and $\|{\,\cdot\,}\|$ designate respectively the scalar product and the euclidean norm in a vector space of finite dimension.

The problem that will be numerically solved can be written in the form
\begin{equation*}
\min_{\rho\in \R^{K\cdot N}} f(\rho) + g(\mathcal{A}\rho),
\end{equation*}
where, of course, $f$ and $g$ have nothing to do with the functions introduced in the previous sections. Here $\rho\in \R^{K\cdot N}$, $f(\rho)= h\langle V\,|\,\rho\rangle+h \frac{\|\rho\|^2}2$ is a proper, closed and $h$-strongly convex function and $g(\rho_0, \rho_1, \rho_2) = \lambda \|\rho_0\|_1 + \delta_{C_0}(\rho_1) + \delta_{C_1}(\rho_2)$ is a proper, closed and convex function. The notation $\|{\,\cdot\,}\|_1$ refers to the $l_1$-norm in a vector space of finite dimension and $\delta_{C_i}$ is the indicator function defined by
\begin{equation*}
\delta_{C_i}(\rho) =
\begin{cases}
0, & \rho \in C_i,\\
+\infty, & \rho \notin C_i,
\end{cases}
\end{equation*}
where $C_0 = (\R_+)^{K\cdot N}$ and $C_1 = \{\rho\in \R^{K\cdot N} ; \forall i \in\{0,\dots,K-1\},~\sum_{j=0}^{N-1} \rho(t_i,x_j)\cdot l=1 \}$ are the sets of constraints. The linear transformation $\mathcal{A}$ is $\mathcal{A}(\rho) = (A\rho, \rho, \rho)$, where the matrix $A$ will be detailed for each case.

An algorithm to approximate the solution of this problem is already known and is called (Fast) Dual Proximal Gradient Method which can be found in~\cite{Beck}. We will use its primal representation which does not involve the dual representation of the problem.

Since $g$ is a sum of separable functions, its proximal operator is
\begin{equation*}
\prox_g(\rho_0,\rho_1,\rho_2) = \left(\prox_{\lambda\|{\,\cdot\,}\|_1}(\rho_0),
\prox_{\delta_{C_0}}(\rho_1), \prox_{\delta_{C_1}}(\rho_2)\right).
\end{equation*}
We remind that for all $x\in\R$,
\begin{equation*}
\prox_{\lambda|{\,\cdot\,}|}(x) = [|x| -\lambda]_+ \sgn(x)
\text{ and }
\prox_{\delta_C}(x) = P_C(x)
\end{equation*}
where $P_C$ is the projection on the set $C$.

\goodbreak
The algorithm is described below.\bigskip

\framebox{
\begin{minipage}{0.6\linewidth}
\textbf{Initialization:} $L\geq \frac{\|\mathcal{A}\|^2}{h}$, $w^0=y^0 \in (\R^{K\cdot N})^3$, $t_0=1$.\\
\textbf{Step for $k\geq 0$:}
\begin{itemize}
\item $u^k = \argmax_u \{\langle u,\mathcal{A}^T w^k\rangle - f(u)\} = \frac{\mathcal{A}^T w^k}{h}-V$

$ = \frac 1 h (A^T w_0^k + w_1^k + w_2^k) -V$
\item $y_0^{k+1} = w_0^k - \frac 1 L A u^k + \frac 1 L \prox_{L \lambda\|{\,\cdot\,}\|_1} (A u^k - L w_0^k)$
\item $y_1^{k+1} = w_1^k - \frac 1 L u^k + \frac 1 L \prox_{L \delta_{C_0}} (u^k - L w_1^k)$
\item $y_2^{k+1} = w_2^k - \frac 1 L u^k + \frac 1 L \prox_{L \delta_{C_1}} (u^k - L w_2^k)$
\item $t^{k+1} = \frac{k+1+a}a$
\item $w^{k+1} = y^{k+1} + \frac{t^k -1}{t^{k+1}}(y^{k+1} -y^k)$
\end{itemize}
\end{minipage} }
\bigskip


The role of the parameter $t^k$ is to accelerate the algorithm, in the spirit of Nesterov's accelerated gradient or (in the proximal case) of the FISTA algorithm (see~\cite{Nest,FISTA}). The constant $a$ is a constant greater or equal than 2 and its value is displayed in Table~\ref{table1} and~\ref{table2}.

Note that our problem is essentially, up to minor modifications and the presence of extra constraints (unit mass, positivity, Dirichlet conditions), a simplified version of some standard problems in image denoising based on total variation (see, for instance, the classical paper~\cite{TVdenoising}): here,whether the problem is 1D or 2D or higher-dimensional, the main feature is that the total variation is only computed in time.

The following sections describe different examples of solutions to~\eqref{PB1_algo} by using this algorithm. For each case, the differences with the description above will be specified.

\subsection{1D-periodic} \label{num_1D}

When we consider the periodic problem (in time, so that the interval $[0,T]$ becomes a circle of length $T$), and we assume $S=T$ and $V(t,x) = v(t-x)$ for an $S$-periodic function $v$, it is possible to reduce the problem to the one dimensional case, namely a problem with one only variable in $[0,T]$ instead of $[0,T]\times[0,S]$. One expects the solution $\rho(t,x) = u(t-x)$ to be transported according to time. The uniqueness of the solution and the symmetry with respect to translations in both time and space (replacing $(t,x)$ with $(t+\delta,x+\delta)$) show that the solution should indeed be of this form. Then, a change of variables $y=t-x$ can be carried out in $F$ as following:
\begin{align*}
F(\rho) &=
\int_0^T\! \int_0^S\left(\lambda|\dot{u}(t-x)| +v(t-x) u(t-x) + \frac{u(t-x)^2} 2\right) \dx{} \dt \\
&= \int_0^T\! \int_t^{t-S} -\left(\lambda|\dot{u}(y)| +v(y) u(y) + \frac{u(y)^2} 2\right) \dy{} \dt \\
&= T \int_0^S \left(\lambda|\dot{u}(y)| +v(y) u(y) + \frac{u(y)^2} 2\right) \dy{}.
\end{align*}

The problem reduces hence to the search of an $S$-periodic solution with one variable. By the way, up to multiplicative and additive constants this problem is equivalent to minimizing
\[
\int_0^S \left(\lambda|\dot{u}(y)| + \frac{|u(y)+v(y)|^2} 2\right) \dy,
\]
which is exactly the problem described as an example at the end of Section~\ref{finite_dim}, with $\omega=-v$, except for the constraints $\int_0^S u(y)\dy=1$ and $u\geq 0$.


In the following subsections, we will see that the solution obtained through numerical simulations coincide with $u$ computed as above.

For the numerical simulation shown in Figure~\ref{fig1} we take $v(y) = -a_0 \cos(\frac{2\pi}{S}y)$ and the choice of the parameters are shown in Table~\ref{table1}.

Let $\{y_0, \dots, y_K\}$ be a regular subdivision of $[0,S]$ and $h\colonequals y_1-y_0$ the step-size of the subdivision. By the left-rectangle method, the problem is approximated by
\begin{equation*}
\sum_{i=0}^{K-1}
\left(
\lambda |u(y_{i+1}) - u(y_i)| + v(y_i) u(y_i) h + \frac{u(y_i)^2}2 h
\right) + \delta_{C_0}(u) + \delta_{C_1}(u).
\end{equation*}

\begin{figure}[!ht]
\centering
\includegraphics[width=11cm]{Figure_1.png}
\caption{The simulation of the solution $u$ to the 1D periodic case with $v(y) = -a_0 \cos(\frac{2\pi}{S}y)$. The parameters are displayed in Table~\ref{table1}. The blue solid line corresponds to the solution $u$, while the red dashed line is the profile of $c-v$ with $c=1/S$.}\label{fig1}
\end{figure}

\begin{table}[!ht]
\caption{Parameters for the solution to the problem in Figure~\ref{fig1}.}\label{table1}
\centering
\begin{tabular}{|c|c|}
\hline Parameter & Value \\
\hline $S$ & 10\\
\hline $K$ & 500\\
\hline $h$ & 0.02\\
\hline $a$ & 200\\
\hline $L$ & $6/h$\\
\hline $a_0$ & 0.1\\
\hline $\lambda$ & 0.1\\
\hline
\end{tabular}
\end{table}

We observe that with this choice of parameters the solution $u$ is strictly positive and ``follows'' the profile of $c-v$ for a constant $c$ which appears as a Lagrange multiplier for the mass constraint and allows to obtain unit mass. The function $u$ is also a solution of the problem at the end of Section~\ref{finite_dim} with $\omega=c-v$ and its profile, shown in Figure~\ref{fig1}, is consistent with the explicit description of the solution which we gave.

\begin{rema}\label{lambda}
If $v(y) = -a_0 \cos(\frac{2\pi}S y)$, it is possible to compute the critical $\lambda$ at which the aspect of the solution $u$ switches from Figure~\ref{fig1} to the constant solution. The Euler--Lagrange equation of
\begin{equation*}
\min_{\substack{u\in \BV([0,S];\R); \\
\int_0^S u(y)\dy = 1,~u\geq 0}}
\int_0^S \left(\lambda|\dot{u}(y)| +v(y) u(y) + \frac{u(y)^2} 2\right) \dy{}
\end{equation*}
is $z'=v+u-c$ on $[0,S]$ where $z(y)\in \partial(\lambda|{\,\cdot\,}|)(u'(y))$ and $c=1/S$ is the constant due to the mass constraint $\int_0^S u(y)\dy =1$. Assuming that the solution becomes constant when $\epsilon = \frac S 4$ and extending the solution periodically on $[-S,0]$, we integrate the equation $z'=v+u-c$ over $[-\epsilon,\epsilon]$:
\begin{align*}
-2\lambda = \int_{-\epsilon}^{\epsilon} z'(y) \dy &=
\int_{-\epsilon}^{\epsilon} (v(y) + u(y) - c)\dy =\int_{-\epsilon}^{\epsilon} (v(y)-v(\epsilon))\dy \\
&=\int_{-\epsilon}^{\epsilon} \left(-a_0\cos\Biggl(\frac{2\pi}S y\Biggr) +a_0\cos\Biggl(\frac{2\pi}S \epsilon\Biggr)\right)\dy =-a_0 \frac{S}{\pi} \sin\Biggl(\frac{2\pi}S \epsilon\Biggr) +2\epsilon a_0 \cos\Biggl(\frac{2\pi}S \epsilon\Biggr).
\end{align*}
By taking $\epsilon = \frac S 4$, we obtain that $\lambda = \frac{a_0 S}{2\pi}$.

When $\lambda > \frac{a_0 S}{2\pi}$, the solution $u$ is constant equal to $c$. While the condition $\lambda < \frac{a_0 S}{2\pi}$ gives the solution $u$ as in Figure~\ref{fig1}. This explains the choice of parameter $\lambda$.
\end{rema}

\subsection{2D periodic in time} \label{num_2Dperio}

We consider now the case where we keep two variables, but we assume the time domain to be periodic. This case is slightly simpler to handle from the point of view of the discretization of the time derivative.

By using the subdivisions described in Section~\ref{num_meth}, we approximate the integral $F$ by the left-rectangle method as following:
\begin{equation*}
\sum_{i=0}^{K-1} \sum_{j=0}^{N-1}
\left(
\lambda \bigl|\rho(t_{i+1},x_j) - \rho(t_i,x_j)\bigr| l + V(t_i,x_j) \rho(t_i,x_j) h l + \frac{\rho(t_i,x_j)^2}2 h l
\right),
\end{equation*}
where $h\colonequals t_1-t_0$ and $l\colonequals x_1-x_0$ are the step-sizes of each subdivision. We will consider that $\rho(t_0,x_j)= \rho(t_K,x_j)$ for all $j\in\{0,N-1\}$.

With this discretization, we look for a solution in the space $\R^{K\cdot N}$ where $\rho$ is viewed as a vector.

The discretized problem is
\begin{equation}\label{Discret_l}
\min_{\rho\in \R^{K\cdot N}}
\sum_{i=0}^{K-1} \sum_{j=0}^{N-1}
\left(
\lambda \bigl|\rho(t_{i+1},x_j) - \rho(t_i,x_j)\bigr| l + V(t_i,x_j) \rho(t_i,x_j) h l + \frac{\rho(t_i,x_j)^2}2 h l
\right) + \delta_{C_0}(\rho) + \delta_{C_1}(\rho).
\end{equation}

To avoid numerical errors due to small values, searching a minimizer of~\eqref{Discret_l} is the same as searching a minimizer to the problem divided by $l$:
\begin{equation*}\label{Discret}
\min_{\rho\in \R^{K\cdot N}}
\sum_{i=0}^{K-1} \sum_{j=0}^{N-1}
\left(
\lambda |\rho(t_{i+1},x_j) - \rho(t_i,x_j)| + V(t_i,x_j) \rho(t_i,x_j) h + \frac{\rho(t_i,x_j)^2}2 h
\right) + \delta_{C_0}(\rho) + \delta_{C_1}(\rho).
\end{equation*}

The function $f\colon \R^{K\cdot N} \to \R$ defined by
\begin{equation}\label{f}
f(\rho) =
\sum_{i=0}^{K-1} \sum_{j=0}^{N-1}
\left(V(t_i,x_j) \rho(t_i,x_j) h + \frac{\rho(t_i,x_j)^2}2 h
\right) = h \langle V\,|\, \rho \rangle +h \frac{\|\rho \|^2}2
\end{equation}
corresponds to the function $f$ described in Section~\ref{num_meth}.

The linear transformation $\mathcal{A}\colon \R^{K\cdot N} \to (\R^{K\cdot N})^3$ is the same as in Section~\ref{num_meth}. The squared matrix $A$ is of order $K\cdot N$ and its coordinates are
\begin{equation*}
A_{i,j} =
\begin{cases}
-1 & \text{if }j =i, \\
1& \text{if }j \equiv i+N [K\cdot N], \\
0 & \text{otherwise, }
\end{cases}
\end{equation*}
namely
\begin{equation*}
A =
\begin{pmatrix}
-I_N & I_N & & \\
& \ddots& \ddots & \\
& & -I_N & I_N \\
I_N & & & -I_N
\end{pmatrix}
\end{equation*}
where $I_N$ is the identity matrix of order $N$ and blank space corresponds to zeros.

The function $g\colon (\R^{K\cdot N})^3 \to \R$ is defined as in Section~\ref{num_meth}:
\begin{equation}
\begin{aligned}\label{g}
g(\rho_0, \rho_1, \rho_2) &= \sum_{i=0}^{K-1} \sum_{j=0}^{N-1}
\lambda |\rho_0(t_i,x_j) | + \delta_{C_0}(\rho_1) + \delta_{C_1}(\rho_2) \\
&= \lambda \| \rho_0 \|_1 + \delta_{C_0}(\rho_1) + \delta_{C_1}(\rho_2).
\end{aligned}
\end{equation}

\begin{figure}[!ht]
\centering
\includegraphics[width=\textwidth]{Figure_3.png}
\caption{Profile of the solution to~\eqref{PB1_algo} with $V(t,x) = -a_0 \cos(\frac{2\pi}S (t-x))$ at times $t=0$, $2$, $4$, $6$, $8$, $9.9$. The blue solid line describes the solution $\rho$ at each specified time and the red dashed line is the profile of $0.1-V$ at each time. The parameters are displayed in Table~\ref{table2}.}\label{fig3}
\end{figure}

In Figure~\ref{fig3}, we consider the same case as in Figure~\ref{fig1}, but with a 2D approach. The profile of the solution $\rho$ at time 0 is the same as in Figure~\ref{fig1}. In Figure~\ref{fig1} the problem is viewed on one dimensional space and here we can see that the solution is transported according to time from the left to the right.

\begin{table}[!ht]
\caption{Parameters for the numerical simulation of the solution to~\eqref{PB1_algo}.}\label{table2}
\centering
\begin{tabular}{|c|c|}
\hline Parameter & Value \\
\hline $T$ & 10\\
\hline $S$ & 10\\
\hline $K$ & 100\\
\hline $N$ & 500\\
\hline $a$ & 200\\
\hline $h$ & 0.02\\
\hline $L$ & $6/h$\\
\hline $a_0$ & 0.1\\
\hline $\lambda$ & 0.1\\
\hline
\end{tabular}
\end{table}


The next case (see Figure~\ref{fig4}) we show is different. Here we impose a Dirichlet boundary condition at time 0 which corresponds here to $m_0$ being the constant density equal to $1/S$. This is the same as studying the non-periodic problem on $[0,T]$ and imposing two Dirichlet boundary conditions, which are (by chance) equal. In this case, keeping the periodic structure allows to use a simpler form of the matrix $A$.

Let $m_0\in \R^N$ a vector verifying the constraints, i.e,
\begin{equation*}
m_0 \in (\R_+)^N
\quad\text{and}\quad
\sum_{j=0}^{N-1} m_{0,j} l =1.
\end{equation*}
Let us define the new set $C_2 = \{\rho \in \R^{K\cdot N};~\forall j \in \{0,\dots,N-1\},\rho(0, x_j) = m_{0,j} \} $ which encodes the constraint $\rho(0) = m_0$.

The function $f$ is the same as in~\eqref{f}. The linear transformation is now $\mathcal{A}\rho = (A\rho,\rho,\rho,\rho)$ and the function $g\colon (\R^{K\cdot N})^4 \to \R$ is
\begin{equation}
\begin{aligned}
g(\rho_0, \rho_1, \rho_2,\rho_3) &=
\sum_{i=0}^{K-1} \sum_{j=0}^{N-1}
\lambda |\rho_0(t_i,x_j) | + \delta_{C_0}(\rho_1) + \delta_{C_1}(\rho_2)+\delta_{C_2}(\rho_3) \\
&= \lambda \| \rho_0 \|_1 + \delta_{C_0}(\rho_1) + \delta_{C_1}(\rho_2) +\delta_{C_2}(\rho_3).
\end{aligned}
\end{equation}

\begin{figure}[!ht]
\centering
\includegraphics[width=\textwidth]{Figure_4.png}
\caption{Simulation of the solution to~\eqref{PB1_algo} at times $t=0$, $0.1$, $1$, $2$, $3$, $4$, $5$, $6$, $7$, $8$, $9$, $9.9$ with $V(t,x) = -a_0 \cos(\frac{2\pi}S (t-x))$ and $m_0(x) = 1/S $ with parameters from Table~\ref{table2} except for parameter $L$ where $L = 7/h$. The blue solid line is the profile of the solution $\rho$ and the red dashed line corresponds to $0.1-V$ taken at each time $t$. }\label{fig4}
\end{figure}

In Figure~\ref{fig4} we see that the Dirichlet condition is indeed verified at $t=0$ and then, immediately, the solution jumps (coherently with Remark~\ref{initialjump}) from the constant state to $t=0.1$. One can notice that the profile is different from Figure~\ref{fig3} in a way that on the subintervals where the solution does not follow $0.1-V$, it is not constant anymore. However, between times $t=4$ and $t=6$, the solution comes back to the profile when there is no boundary condition, namely, it is constant when it does not follow $0.1-V$. When $t\geq 6$, the profile of the solution varies again on the subintervals where it should be constant.

\subsection{2D, non periodic} \label{num_2Dnonperio}

Unlike Sections~\ref{num_1D} and~\ref{num_2Dperio}, we do not impose anymore a periodic time behavior, and the value $\rho(t_K, x_j)$ of the solution $\rho \in \R^{(K+1)\cdot N}$ may be different from $\rho(t_0, x_j)$. The integral in $F$ is now approximated by:
\begin{equation}\label{Discret_nonperio}
\sum_{j=0}^{N-1}\left(
\sum_{i=0}^{K-1}
\lambda \bigl|\rho(t_{i+1},x_j) - \rho(t_i,x_j)\bigr| l+
\sum_{i=0}^K
\left(V(t_i,x_j) \rho(t_i,x_j) h l + \frac{\rho(t_i,x_j)^2}2 h l
\right) \right).
\end{equation}

\goodbreak
\begin{rema}
The formula~\eqref{Discret_nonperio} is different from~\eqref{Discret_l} in the sense that there is the additional term \linebreak
$\sum_{j=0}^{N-1} V(t_K,x_j) \rho(t_K,x_j) h l + \frac{\rho(t_K,x_j)^2}2 h l$ that depends on $t_K$ which should not appear in the left-rectangle method. However, with this choice of discretization, the function $f$ is strongly convex in $\rho\in \R^{(K+1)\cdot N}$ which allows us to apply the algorithm and the formula~\eqref{Discret_nonperio} still converges towards the integral $\int_0^T \int_0^S \left(\lambda|\dot{\rho}| + V \rho + \frac{\rho^2} 2\right)$ as $h\to 0$ and $l\to 0$.
\end{rema}

The new function $f$ is
\begin{align}\label{f_nonperio}
f(\rho) \colon \R^{(K+1)\cdot N} &\to \R \\ \nonumber \rho &\mapsto h \langle V\,|\,\rho\rangle + h\frac{\|\rho\|^2}2 = \sum_{i=0}^K \sum_{j=0}^{N-1}
\left(V(t_i,x_j) \rho(t_i,x_j) h + \frac{\rho(t_i,x_j)^2}2 h \right).
\end{align}
The function $g$ is defined as in~\eqref{g} and $\mathcal{A}$ is the same as in Section~\ref{num_meth} with a different matrix $A$ which is rectangle of dimension $(K\cdot N) \times \left((K+1)\cdot N\right)$ such that
\begin{equation*}
A_{i,j} =
\begin{cases}
-1 & \text{if }i=j, \\
1& \text{if }j = i+N, \\
0 & \text{otherwise, }
\end{cases}
\end{equation*}
namely
\begin{equation*}
A =
\begin{pmatrix}
-I_N & I_N & & \\
& \ddots& \ddots & \\
& & -I_N & I_N
\end{pmatrix}.
\end{equation*}


\begin{figure}[!ht]
\centering
\includegraphics[width = \textwidth]{Figure_5bis.png}
\caption{The simulation of the solution to~\eqref{PB1_algo} at times $t= 0$, $0.2$, $0.4$, $0.5$, $1$, $1.5$, $5$, $8.3$, $8.8$, $9$, $9.5$, $10$ with $V(t,x) = (t-x)^2$ and parameters from Table~\ref{table2}. The blue solid line corresponds to the solution $\rho$ at different times.}\label{fig5}
\end{figure}

\goodbreak
Figure~\ref{fig5} shows the simulation of the solution $\rho$ to the problem~\eqref{PB1_algo} with a given $V$ which is not periodic anymore. No Dirichlet boundary conditions nor penalizations $\psi_t$ at $t=0,T$ are considered. The different times are chosen to show the most significant changes in the profile of $\rho$. Since $V(t,x)$ is minimal at $x=t$, the general behavior is that the solution is transported from the left to the right. Between times $t=1.5$ and $t=8.3$, the solution either follows $c-V$ (where $c$ is a constant to define) or it is constant. Close to the time boundaries, the behavior is different.

\begin{rema}
The constant $c$ such that $\rho$ follows $c-V$ depends on $\lambda$. The parameter $\lambda$ can be studied as in remark~\ref{lambda}. Different values of $\lambda$ can either imply that $\rho$ follows $c-V$ or that it follows $c-V$ and is constant in the middle.
\end{rema}

The next case that we present (Figure~\ref{fig6}) involves Dirichlet conditions in time. Let $m_0\in \R^N$ and $m_T\in \R^N$ be two vectors verifying the boundary conditions, i.e,
\begin{align*}
m_0 \in (\R_+)^N &\quad\text{and}\quad
\sum_{j=0}^{N-1} m_{0,j} l =1, \\
m_T \in (\R_+)^N &\quad\text{and}\quad
\sum_{j=0}^{N-1} m_{T,j} l =1.
\end{align*}
Let us define the new set of constraints by $C_2 = \bigl\{\rho \in \R^{K\cdot N};\ \forall j \in \{0,\dots,N-1\},\rho(0, x_j) = m_{0,j}$ and $\rho(T, x_j) = m_{T,j}\bigr\} $. The function $f$ is defined as in~\eqref{f_nonperio}, $\mathcal{A}\rho = (A\rho,\rho,\rho,\rho)$ and the function $g\colon (\R^{K\cdot N})^4 \to \R$ is
\begin{align*}
g(\rho_0, \rho_1, \rho_2,\rho_3) &=\sum_{i=0}^{K-1} \sum_{j=0}^{N-1} \lambda |\rho_0(t_i,x_j) | + \delta_{C_0}(\rho_1) + \delta_{C_1}(\rho_2) +\delta_{C_2}(\rho_3)\\
&= \lambda \| \rho_0 \|_1 + \delta_{C_0}(\rho_1) + \delta_{C_1}(\rho_2) +\delta_{C_2}(\rho_3).
\end{align*}

\begin{rema}
Instead of adding the constraint $\rho(0) = m_0$ in the problem, one could have discretized the integral with the right-rectangle method in time and directly used that $\rho(0) = m_0$ in the algorithm.
\end{rema}


\begin{figure}[!ht]
\centering
\includegraphics[width = \textwidth]{Figure_6bis.png}
\caption{The simulation of the solution to~\eqref{PB1_algo} at times $t=0$, $0.1$, $1$, $1.7$, $2.4$, $5$, $8.3$, $8.8$, $9.1$, $9.5$, $9.9$, $10$ with $V(t,x) = (t-x)^2$, $m_0(x) = m_T(x)= 1/S$ and parameters from Table~\ref{table2} except from $L$ which is $L=7/h$. The blue solid line corresponds to the solution $\rho$ at different times.}\label{fig6}
\end{figure}

The profile of Figure~\ref{fig6} is quite similar to Figure~\ref{fig5} and mainly differs around $t=0,T$. Again, a jump in time is observed.


The very last example we consider is non-periodic in time and involves both a Dirichlet condition at $t=0$ and penalization at $t=T$. We consider the penalization
\begin{equation*}
\psi_T (\rho(T)) = \int_0^S \Psi_T(x) \rho(T,x) \dx.
\end{equation*}
The left-rectangle method applied to $\psi_T$ gives
\begin{equation*}
\psi_T (\rho(T)) = \sum_{j=0}^{N-1}
\Psi_T(x_j) \rho(T,x_j) l.
\end{equation*}
We use the discretization from~\eqref{Discret_nonperio}. The difference is in the definition of $V$. We shall take $\tilde{V}$ such that
\begin{equation*}
\tilde{V} = V + \Biggl(0,\dots, 0, \frac{\Psi_T}h\Biggr).
\end{equation*}


\begin{figure}[!ht]
\centering
\includegraphics[width=\textwidth]{Figure_7bis.png}
\caption{The simulation of the solution to~\eqref{PB1_algo} at times $t=0$, $0.1$, $0.3$, $0.6$, $1$, $1.5$, $5$, $8.3$, $9$, $9.4$, $9.9$, $10$ with $V(t,x) = (t-x)^2$, $m_0(x) = 1/S$, $\varphi_T(x) = x^2$ and parameters from Table~\ref{table2} except from $L$ which is $7/h$. The blue solid line corresponds to the solution $\rho$ at different times.}\label{fig7}
\end{figure}

\pagebreak
Again, the profile of Figure~\ref{fig7} is similar to Figures~\ref{fig5} and~\ref{fig6} except for the behavior close to the time boundaries. In Figure~\ref{fig7}, we have put penalization at times 0 and $T$ such that it costs less to be close to $0$ in space, hence the concentration of the mass at time $0$ in $x=0$ and the jump at time~$T$.


\section*{Declaration of interests}
The authors do not work for, advise, own shares in, or receive funds from any organization that could benefit from this article, and have declared no affiliations other than their research organizations.

%\FloatBarrier
\printbibliography
\end{document}