Measures.tex

From Polymath Wiki
Revision as of 12:13, 8 July 2009 by Ryanworldwide (talk | contribs) (Undo revision 1768 by 67.186.58.92 (Talk))
Jump to navigationJump to search

\section{Passing between probability measures}

\noteryan{Some sentences about this section go here. Point out that the main goal is to prove Lemma~\ref{lem:distributions}, and that the proof of this lemma is sort of boring, and so we wouldn't mind if the reader skipped reading it.}

\subsection{Restrictions and composite distributions}

\begin{definition} A \emph{restriction} on $[k]^n$ is a pair $(J, x_\barJ)$, where $J \subseteq [n]$ and $x_\barJ \in [k]^{\barJ}$, where $\barJ = [n] \setminus J$. It is thought of as a ``partial string, where the $\barJ$-coordinates are ``fixed according to $x_\barJ$ and the $J$-coordinates are still ``free. Given a fixing $y_J \in [k]^J$, we write $(x_\barJ, y_J)$ for the complete composite string in $[k]^n$.\noteryan{Although this notation looks awkward, I believe it's useful. Also, I suppose we should point out that a restriction is a special kind of combinatorial subspace.} \end{definition}

\begin{definition} Given a set $A \subseteq [k]^n$ and a restriction $(J,x_\barJ)$, we write $A_{x_\barJ}$ for the subset of $[k]^{J}$ defined by $A_{x_\barJ} = \{y \in [k]^J : (x_{\barJ}, y_J) \in A\}$. \end{definition}

\begin{definition} \label{def:r4r} For $0 \leq \eps \leq 1$, we say that $J$ is an \emph{$\eps$-random subset} of $[n]$ if $J$ is formed by including each coordinate $i \in [n]$ independently with probability $\eps$. Assuming $r \leq n/2$, we say that $J$ is an \emph{$[r,4r]$-random subset} of $[n]$ if $J$ is an $\eps$-random subset of $[n]$ conditioned on $r \leq |J| \leq 4r$, with $\eps = 2r/n$. \end{definition}

Our main goal for this section is to prove a lemma allowing us to shift between various distributions. Before stating the lemma, we give some notation: \begin{definition} A \emph{distribution family} $(\pi^m)_{m \in \N}$ (over $[k]$) is a sequence of probability distributions, where $\pi^m$ is a distribution on $[k]^m$. In this paper the families we consider will either be the equal(-nondegenerate-)slices family $\pi^m = \ens{k}^m$ or $\pi^m = \eqs{k}^m$, or will be the product distributions based on a single distribution $\pi$ on $[k]$, $\pi^m = \pi^{\otimes m}$. \end{definition} \begin{lemma} \label{lem:distributions} Let $(\pi^m)$ and $(\nu^m)$ be distribution families. Assume $2 \ln n \leq r \leq n/2$. Let $J$ be an $[r,4r]$-random subset of $[n]$, let $x$ be drawn from $[k]^{\barJ}$ according to $\pi^{|\barJ|}$, and let $y$ be drawn from $[k]^J$ according to $\nu^{|J|}$. The resulting distribution on $(x,y)$ has total variation distance from $\pi^n$ which can be bounded as follows: \begin{enumerate} \item (Product to equal-slices.) If $\pi^m = \pi^{\otimes m}$ and $\nu^m = \eqs{\ell}^m$ for $\ell \leq k$, the bound is \[ (2{\textstyle \sqrt{\frac{1}{\min(\pi)}-1}})+2) \cdot r / \sqrt{n}. \] \item (Equal-slices to product.) If $\pi^m = \eqs{k}^m$ and $\nu^m = \nu^{\otimes m}$, the bound is $4k \cdot r/\sqrt{n}$. \item (Equal-slices to equal-slices.) \label{eqn:distrs-eqs-eqs} If $\pi^m = \eqs{k}^m$ and $\nu^m = \eqs{\ell}^m$ for $\ell \leq k$, the bound is $4k \cdot r/\sqrt{n}$. \end{enumerate} \end{lemma}

Although Lemma~\ref{lem:distributions} involves the equal-slices distribution, one can convert to equal-nondegenerate-slices if desired using Proposition~\ref{prop:degen}.\\


Since $\eqs{k}^n$ is a mixture of product distributions (Proposition~\ref{prop:eqs-mix}), the main work in proving Lemma~\ref{lem:distributions} involves comparing product distributions.


\subsection{Comparing product distributions} Throughout this subsection we assume $\pi$ and $\nu$ are probability distributions on the same finite set $\Omega$.\noteryan{Probably should change $\Omega$ to $[k]$ for simplicity.}

\begin{definition} The \emph{$\chi^2$ distance} $\dchi{\pi}{\nu}$ is defined by\noteryan{I assume the reader is savvy enough to realize we mean to take the positive square root\dots} \[ \dchi{\pi}{\nu}^2 = \Varx_{x \sim \pi}\left[\frac{\nu[x]}{\pi[x]}\right]. \] Note that $\dchi{\pi}{\nu}$ is \emph{not} symmetric in $\pi$ and $\nu$. \end{definition}

The $\chi^2$ distance is introduced to help us prove the following fact: \begin{proposition} \label{prop:mix-distance} Assume $\min(\pi) \neq 0$; i.e., $\pi$ has full support. Suppose $\pi$ is slightly mixed with~$\nu$, forming $\hat{\pi}$; specifically, $\hat{\pi} = (1-\eps) \pi + \eps \nu$. Then the associated product distributions $\pi^{\otimes n}$, $\hat{\pi}^{\otimes n}$ on $\Omega^{n}$ satisfy \[ \dtv{\pi^{\otimes n}}{\hat{\pi}^{\otimes n}} \leq \dchi{\pi}{\nu} \cdot \eps \sqrt{n}. \] \end{proposition} \begin{proof} It is a straightforward consequence of Cauchy-Schwarz (see, e.g.~\cite[p.\ 101]{Rei89})\noteryan{This is the part using $\min(\pi) \neq 0$, by the way.} that \[ \dtv{\pi^{\otimes n}}{\hat{\pi}^{\otimes n}} \leq \dchi{\pi}{\hat{\pi}} \cdot \sqrt{n}, \] and the identity $\dchi{\pi}{\hat{\pi}} = \eps \dchi{\pi}{\nu}$ follows easily from the definitions. \end{proof} This can be bounded independently of $\nu$, as follows: \begin{corollary} \label{cor:mix-distance} In the setting of Proposition~\ref{prop:mix-distance}, \[ \dtv{\pi^{\otimes n}}{\hat{\pi}^{\otimes n}} \leq \sqrt{{\textstyle \frac{1}{\min(\pi)}} - 1} \cdot \eps \sqrt{n}, \] \end{corollary} \begin{proof} It is easy to check that the distribution $\nu$ maximizing $\dchi{\pi}{\nu}$ is the one putting all its mass on the $x$ minimizing $\pi[x]$. In this case one calculates $\dchi{\pi}{\nu} = \sqrt{\frac{1}{\min(\pi)} - 1}$. \end{proof}



\subsection{Proof of Lemma~\ref{lem:distributions}}


\begin{definition} \label{def:compos-distr} Let $0 \leq \eps \leq 1$ and let $(\pi^m)$, $(\nu^m)$ be distribution families. Drawing from the \emph{$(\eps, \pi, \nu)$-composite distribution} on $[k]^n$ entails the following: $J$ is taken to be an $\eps$-random subset of $[n]$; $x$ is drawn from $[k]^{\barJ}$ according to $\pi^{|\barJ|}$; and, $y$ is drawn from $[k]^J$ according to $\nu^{|J|}$. We sometimes think of this distribution as just being a distribution on composite strings $z = (x, y) \in [k]^n$. \end{definition}

Note that the distribution described in Lemma~\ref{lem:distributions} is very similar to the $(\eps, \pi, \nu)$-composite distribution, except that it uses an $[r, 4r]$-random subset rather than an $\eps$-random subset. We can account for this difference with a standard large-deviation bound:\noteryan{Citation needed?} \begin{fact} \label{fact:dev} If $J$ is an $\eps$-random subset of $[n]$ with $\eps = 2r/n$ as in Definition~\ref{def:r4r}, then $r \leq |J| \leq 4r$ holds except with probability at most $2\exp(-r/4)$. \end{fact}

The utility of using $\eps$-random subsets in Definition~\ref{def:compos-distr} is the following observation: \begin{fact} If $\pi$ and $\nu$ are distributions on $[k]$, thought of also as product distribution families, then the $(\eps, \pi, \nu)$-composite distribution on $[k]^n$ is precisely the product distribution $\hat{\pi}^{\otimes n}$, where $\hat{\pi}$ is the mixture distribution $(1-\eps)\pi + \eps \nu$ on $[k]$. \end{fact}

Because of this, we can use Corollary~\ref{cor:mix-distance} to bound the total variation distance between $\pi^{\otimes n}$ and a composite distribution. We conclude: \begin{proposition} \label{prop:prod-composite} Let $\pi$ and $\nu$ be any distributions on $[k]$. Writing $\wt{\pi}$ for the $(\eps,\pi,\nu)$-composite distribution on strings in $[k]^n$, we have \[ \dtv{\pi^{\otimes n}}{\wt{\pi}} \leq {\textstyle \sqrt{\frac{1}{\min(\pi)}-1}} \cdot \eps \sqrt{n}. \] \end{proposition}

For any $\ell \leq k$, the equal-slices distribution $\eqs{\ell}^{m}$ on $m$ coordinates is a mixture of product distributions $\nu^{\otimes m}$ on $[k]^m$. In light of Fact~\ref{fact:tv-mix}, we can average Proposition~\ref{prop:prod-composite} over $\nu$ to obtain: \begin{proposition} \label{prop:prod-eqs} If $\wt{\pi}$ denotes the $(\eps,\pi,\eqs{\ell})$-composite distribution on strings in $[k]^n$, we have \[ \dtv{\pi^{\otimes n}}{\wt{\pi}} \leq {\textstyle \sqrt{\frac{1}{\min(\pi)}-1}} \cdot \eps \sqrt{n}. \] \end{proposition} If we instead average Proposition~\ref{prop:prod-composite} over $\pi$, we can obtain: \begin{proposition} \label{prop:eqs-prod} Let $\nu$ be any distribution on $[k]$. Writing $\wt{\mu}$ for the $(\eps, \eqs{k}, \nu)$-composite distribution on strings in $[k]^n$, we have \[ \dtv{\eqs{k}^n}{\wt{\mu}} \leq (2k-1)\eps \sqrt{n}. \] \end{proposition} \begin{proof} Thinking of $\eqs{k}^m$ as the mixture of product distributions $\pi^{\otimes m}$, where $\pi$ is a random distribution on $[k]$, Fact~\ref{fact:tv-mix} and Proposition~\ref{prop:prod-composite} imply \[ \dtv{\eqs{k}^n}{\wt{\mu}} \leq \Ex_{\pi}\left[{\textstyle \sqrt{\frac{1}{\min(\pi)}-1}}\right] \cdot \eps \sqrt{n}. \] We can upper-bound the expectation\noteryan{Undoubtedly someone has worked hard on this $-1/2$th moment of the least spacing before (Devroye '81 or '86 perhaps), but I think it's probably okay to do the following simple thing here} by \begin{multline*} \Ex_{\pi}\left[{\textstyle \sqrt{\frac{1}{\min(\pi)}}}\right] \quad=\quad \int_{0}^\infty \Pr_{\pi}\left[{\textstyle \sqrt{\frac{1}{\min(\pi)}}} \geq t\right]\,dt \quad=\quad \int_{0}^\infty \Pr_{\pi}[\min(\pi) \leq 1/t^2]\,dt \\ \leq\quad k + \int_{k}^\infty \Pr_{\pi}[\min(\pi) \leq 1/t^2]\,dt \quad\leq\quad k + \int_{k}^\infty (k(k-1)/t^2) \,dt \quad=\quad 2k-1, \end{multline*} where in the second-to-last step we used Proposition~\ref{prop:rand-min}. \end{proof} Averaging now once more in the second component, we obtain the following: \begin{proposition} \label{prop:eqs-eqs} Let $2 \leq \ell \leq k$ and let $\wt{\mu}$ denote the $(\eps, \eqs{k}, \eqs{\ell})$-composite distribution on strings in $[k]^n$. Then \[ \dtv{\eqs{k}^n}{\wt{\mu}} \leq (2k-1) \eps \sqrt{n}. \] \end{proposition}



We can now obtain the proof of Lemma~\ref{lem:distributions}:

\begin{proof} The three statements in Lemma~\ref{lem:distributions} essentially follow from Propositions~\ref{prop:prod-eqs}, \ref{prop:eqs-prod}, and \ref{prop:eqs-eqs}, taking $\eps = 2r/n$. This would give bounds of $2{\textstyle \sqrt{\frac{1}{\min(\pi)}-1}}) \cdot r / \sqrt{n}$, $(4k-2) \cdot r/\sqrt{n}$, and $(4k-2) \cdot r/\sqrt{n}$, respectively. However we need to account for conditioning on $r \leq |J| \leq 4r$. By Fact~\ref{fact:dev}, this conditioning increases the total variation distance by at most $2\exp(-r/4)$. Using the lower bound $r \geq 2 \ln n$ from the lemma's hypothesis, this quantity is at most $2r/\sqrt{n}$, completing the proof. \end{proof}