Difference between revisions of "Correlation.tex"

From Polymath1Wiki
Jump to: navigation, search
Line 1: Line 1:
 
\section{Line-free sets correlate with insensitive set intersections}
 
\section{Line-free sets correlate with insensitive set intersections}
 +
In this section we make rigorous the argument described in Section~\ref{sec:outline-corr}; we show that a dense subset $A$ either contains a combinatorial line, or it has slight correlation with a union of $ab$-insensitive sets.
  
\noteryan{I think it would be good to go through the paper and soften explicit constants to $O(\cdot)$'s, for readability.  I want to leave them in now just for: a) care; b) because we don't actually know what the final quantitative bounds are yet.}
+
\noteryan{I think it would be good to go through the paper and soften explicit constants to $O(\cdot)$'s, for readability. }
  
\begin{lemma} \label{lem:32} Suppose $A \subseteq [k]^n$ satisfies $\eqs{k}(A) \geq \delta$, and let $\gamma$ satisfy
+
First we reduce to the case where the set is dense under both $\eqs{k}$ and $\eqs{k-1}$.
 +
 
 +
\begin{lemma} \label{lem:32} Let $k \geq 3$, suppose $A \subseteq [k]^n$ satisfies $\eqs{k}(A) \geq \delta$, and let $\theta$ satisfy
 
\[
 
\[
10 k \frac{\ln n}{\sqrt{n}} \leq \gamma \leq \delta.
+
10 k \frac{\ln n}{\sqrt{n}} \leq \theta \leq \delta.
 
\]
 
\]
Then there exists a restriction $(J, x_\barJ)$ with $|J| \geq (\gamma/5k) \sqrt{n}$ such that one of the following two conditions holds:
+
Then there exists a restriction $(J, x_\barJ)$ with $\abs{J} \geq (\theta/5k) \sqrt{n}$ such that one of the following two conditions holds:
 
\begin{enumerate}
 
\begin{enumerate}
\item \label{eqn:lem32_1} $\eqs{k}^J(A_{x_\barJ}) \geq \delta + \gamma$; or,
+
\item \label{eqn:lem32_1} $\eqs{k}^J(A_{x_\barJ}) \geq \delta + \theta$; or,
\item \label{eqn:lem32_2} both $\eqs{k}^J(A_{x_\barJ}) \geq \delta - 3\gamma^{1/4}\delta^{1/2}$ and $\eqs{k-1}^J(A_{x_\barJ}) \geq \delta - 3\gamma^{1/2}$.
+
\item \label{eqn:lem32_2} both $\eqs{k}^J(A_{x_\barJ}) \geq \delta - 3\theta^{1/4}\delta^{1/2}$ and $\eqs{k-1}^J(A_{x_\barJ}) \geq \delta - 3\theta^{1/2}$.
 
\end{enumerate}
 
\end{enumerate}
 
\end{lemma}
 
\end{lemma}
 
\begin{proof}
 
\begin{proof}
Let $r = \lceil (\gamma/5k) \sqrt{n} \rceil$.  As in Lemma~\ref{lem:distributions}, let $J$ be an $[r,4r]$-random subset of $[n]$ and let $x$ be drawn from $\eqs{k}^{\barJ}$.  If  $\Ex_{J, x}[\eqs{k}(A_{x})^2] \geq (\delta + \gamma)^2$, then there must exist some restriction $(J,x)$ with $|J| \geq r$ and $\eqs{k}^J(A_{x}) \geq \delta + \gamma$.  In this case, conclusion~\ref{eqn:lem32_1} above holds.  We henceforth assume  
+
Let $r = \lceil (\theta/5k) \sqrt{n} \rceil$.  As in Lemma~\ref{lem:distributions}, let $J$ be an $[r,4r]$-random subset of $[n]$ and let $x$ be drawn from $\eqs{k}^{\barJ}$.  If  $\Ex_{J, x}[\eqs{k}(A_{x})^2] \geq (\delta + \theta)^2$, then there must exist some restriction $(J,x)$ with $\abs{J} \geq r$ and $\eqs{k}^J(A_{x}) \geq \delta + \theta$.  In this case, conclusion~\ref{eqn:lem32_1} above holds.  We henceforth assume  
 
\begin{equation} \label{eqn:moment}
 
\begin{equation} \label{eqn:moment}
\Ex_{J, x}[\eqs{k}(A_{x})^2] < (\delta + \gamma)^2
+
\Ex_{J, x}[\eqs{k}(A_{x})^2] < (\delta + \theta)^2
 
\end{equation}
 
\end{equation}
and show that conclusion~\ref{eqn:lem32_2} holds.\\
+
and show that conclusion~\ref{eqn:lem32_2} holds.
  
 
\ignore{Let $y$ be drawn from $\eqs{k}^{J}$, and let $z$ be drawn from $\eqs{k-1}^{J}$.}  Since $\eqs{k}(A) \geq \delta$, two applications of Lemma~\ref{lem:distributions}.\ref{eqn:distrs-eqs-eqs} yield
 
\ignore{Let $y$ be drawn from $\eqs{k}^{J}$, and let $z$ be drawn from $\eqs{k-1}^{J}$.}  Since $\eqs{k}(A) \geq \delta$, two applications of Lemma~\ref{lem:distributions}.\ref{eqn:distrs-eqs-eqs} yield
\begin{eqnarray}
+
\begin{align}
\Ex_{J, x}[\eqs{k}(A_{x})] & \geq & \delta - 4kr/\sqrt{n} \geq \delta - \gamma \label{eqn:mean3} \\
+
\Ex_{J, x}[\eqs{k}(A_{x})] & \geq \delta - 4kr/\sqrt{n} \geq \delta - \theta \label{eqn:mean3} \\
\Ex_{J, x}[\eqs{k-1}(A_{x})] & \geq & \delta - 4kr/\sqrt{n} \geq \delta - \gamma, \label{eqn:mean2}
+
\Ex_{J, x}[\eqs{k-1}(A_{x})] & \geq \delta - 4kr/\sqrt{n} \geq \delta - \theta, \label{eqn:mean2}
\end{eqnarray}
+
\end{align}
where we've used our definition of $r$ (and the hypothesis $\gamma \geq (10 k \ln n)/\sqrt{n}$ ensures that $r \geq 2 \ln n$ as required for the lemma.)  Combining~\eqref{eqn:moment}, \eqref{eqn:mean3} gives
+
where we've used our definition of $r$ (and the hypothesis $\theta \geq (10 k \ln n)/\sqrt{n}$ ensures that $r \geq 2 \ln n$ as required for the lemma.)  Combining~\eqref{eqn:moment}, \eqref{eqn:mean3} gives
 
\[
 
\[
\Varx_{J,x}[\eqs{k}(A_{x})] < (\delta + \gamma)^2 - (\delta - \gamma)^2 = 4 \gamma \delta.
+
\Varx_{J,x}[\eqs{k}(A_{x})] < (\delta + \theta)^2 - (\delta - \theta)^2 = 4 \theta \delta.
 
\]
 
\]
Thus Chebyshev's inequality implies that except with probability at most $\gamma^{1/2}$ over the choice of $(J,x)$, we have that $\eqs{k}(A_x)$ is within $\gamma^{-1/4} \cdot (4 \gamma \delta)^{1/2} = 2 \gamma^{1/4} \delta^{1/2}$ of its expectation.  When this happens,  
+
Thus Chebyshev's inequality implies that except with probability at most $\theta^{1/2}$ over the choice of $(J,x)$, we have that $\eqs{k}(A_x)$ is within $\theta^{-1/4} \cdot (4 \theta \delta)^{1/2} = 2 \theta^{1/4} \delta^{1/2}$ of its expectation.  When this happens,  
 
\begin{equation} \label{eqn:conc2a}
 
\begin{equation} \label{eqn:conc2a}
\eqs{k}(A_x) \geq \delta - \gamma - 2 \gamma^{1/4} \delta^{1/2} \geq \delta - 3 \gamma^{1/4} \delta^{1/2}
+
\eqs{k}(A_x) \geq \delta - \theta - 2 \theta^{1/4} \delta^{1/2} \geq \delta - 3 \theta^{1/4} \delta^{1/2}
 
\end{equation}
 
\end{equation}
(using $\gamma \leq \delta$).  On the other hand, from~\eqref{eqn:mean2} we immediately deduce that with probability at least $2\gamma^{1/2}$ over the choice of $(J,x)$ we have  
+
(using $\theta \leq \delta$).  On the other hand, from~\eqref{eqn:mean2} we immediately deduce that with probability at least $2\theta^{1/2}$ over the choice of $(J,x)$ we have  
 
\begin{equation} \label{eqn:conc2b}
 
\begin{equation} \label{eqn:conc2b}
\eqs{k-1}(A_{x}) \geq \delta - \gamma - 2 \gamma^{1/2} \geq \delta - 3\gamma^{1/2}.
+
\eqs{k-1}(A_{x}) \geq \delta - \theta - 2 \theta^{1/2} \geq \delta - 3\theta^{1/2}.
 
\end{equation}
 
\end{equation}
Thus with probability at least $2\gamma^{1/2} - \gamma^{1/2} > 0$ over the choice of $(J,x)$, we have both~\eqref{eqn:conc2a}, \eqref{eqn:conc2b}, establishing
+
Thus with probability at least $2\theta^{1/2} - \theta^{1/2} > 0$ over the choice of $(J,x)$, we have both~\eqref{eqn:conc2a}, \eqref{eqn:conc2b}, establishing
 
conclusion~\ref{eqn:lem32_2} of the lemma.
 
conclusion~\ref{eqn:lem32_2} of the lemma.
 
\end{proof}
 
\end{proof}
  
\noteryan{For DHJ(3), we'll want to use Theorem~\ref{thm:sperner-v} in the following.}
+
We now come to the main theorem in this section, deducing a key step in the proof of DHJ($k$) from the probabilistic DHJ($k-1$) theorem.
\begin{theorem} \label{thm:correlate} Suppose $A \subseteq [k]^n$ has $\ens{k}(A) \geq \delta_k$ and $\ens{k-1}(A) \geq \delta_{k-1}$.  Assume further that $n \geq n_{\ref{thm:dhj-dv}}(k-1,1,\delta_{k-1})$. Then either $A$ contains a nondegenerate combinatorial line (of length $k$), or there exists $B \subseteq [k]^n$ with $\ens{k}(B) \geq \delta_k$ and  
+
\begin{theorem} \label{thm:correlate} Let $k \geq 3$ and suppose we have established the probabilistic DHJ($k-1$) theorem.  Further suppose $A \subseteq [k]^n$ has $\ens{k}(A) \geq \delta_k$ and $\ens{k-1}(A) \geq \delta_{k-1}$.  Finally, assume $n \geq \Pdhj{k-1}{\delta_{k-1}}$. Then either $A$ contains a combinatorial line (of length $k$), or there exists $B \subseteq [k]^n$ with $\ens{k}(B) \geq \delta_k$ and  
 
\[
 
\[
\frac{\ens{k}(A \cap B)}{\ens{k}(B)} \geq \delta_k + \delta_k \cdot \eta_{\ref{thm:dhj-dv}}(k-1,1,\delta_{k-1})
+
\frac{\ens{k}(A \cap B)}{\ens{k}(B)} \geq \delta_k + \delta_k \cdot \pdhj{k-1}{\delta_{k-1}}
 
\]  
 
\]  
 
such that
 
such that
Line 50: Line 53:
 
B = B_{1} \cup B_2 \cup \cdots \cup B_{k-1},
 
B = B_{1} \cup B_2 \cup \cdots \cup B_{k-1},
 
\]
 
\]
where for each $i \in [k-1]$, $B_i$ is $ik$-insensitive.
+
where for each $a \in [k-1]$, $B_a$ is $ak$-insensitive.
 
\end{theorem}
 
\end{theorem}
 
\begin{proof}
 
\begin{proof}
For $i \in [k-1]$, let $L_i = \{x \in [k]^n : \chg{x}{k}{i} \in A\}$, an $ik$-insensitive set.  Let $L = L_1 \cap L_2 \cap \cdots \cap L_{k-1}$. In other words, $L$ is the set of length-$(k-1)$ lines (possibly degenerate) which are in $A$ when we interpret strings in $[k]^n$ as line templates over $[k-1]$.  Let $L' \subseteq L$ be the nondegenerate such line templates; i.e., $L' = \{x \in L : \exists j \in [n] \text{ s.t.\ } x_j = k\}$.  Note that $\ens{k}(L \setminus L') = 0$.
+
For $a \in [k-1]$, let $L_a = \{x \in [k]^n : \chg{x}{k}{a} \in A\}$, an $ak$-insensitive set.  Let $L = L_1 \cap L_2 \cap \cdots \cap L_{k-1}$. In other words, $L$ is the set line templates over $[k-1]^n$ (possibly degenerate) whose corresponding lines are entirely in $A$.  Let $L' \subseteq L$ be the nondegenerate such line templates; i.e., $L' = \{x \in L : \exists j \in [n] \text{ s.t.\ } x_j = k\}$.  Note that $\ens{k}(L \setminus L') = 0$.
  
The key observation is that if $A \cap L' \neq \emptyset$, we have a nondegenerate combinatorial line (of length $k$) entirely in $A$.  So assume otherwise; hence $A \subseteq \overline{L'} = [k]^n \setminus L'$.  Since $\ens{k-1}(A) \geq \delta_{k-1}$ and $n \geq n_{\ref{thm:dhj-dv}}(k-1,1,\delta_{k-1})$, Theorem~\ref{thm:dhj-dv} implies that $\ens{k}(L') = \ens{k}(L) \geq \eta_{\ref{thm:dhj-dv}}(k-1,1,\delta_{k-1})$.  It follows that
+
The key observation is that if $A \cap L' \neq \emptyset$, we have a (nondegenerate) combinatorial line of length~$k$ entirely in $A$.  So assume otherwise; hence $A \subseteq \overline{L'} = [k]^n \setminus L'$.  Since $\ens{k-1}(A) \geq \delta_{k-1}$ and $n \geq \Pdhj{k-1}{\delta_{k-1}}$, the probabilistic DHJ($k-1$) theorem implies that $\ens{k}(L') = \ens{k}(L) \geq \pdhj{k-1}{\delta_{k-1}}$.  It follows that
 
\[
 
\[
\frac{\ens{k}(A \cap \overline{L})}{\ens{k}(\overline{L})} = \frac{\ens{k}(A \cap \overline{L'})}{\ens{k}(\overline{L'})} = \frac{\eqs{k}(A)}{\eqs{k}(\overline{L'})} \geq \frac{\delta_k}{1 - \eta_{\ref{thm:dhj-dv}}(k,1,\delta_{k-1})} \geq \delta_k + \delta_k \cdot \eta_{\ref{thm:dhj-dv}}(k-1,1,\delta_{k-1}).
+
\frac{\ens{k}(A \cap \overline{L})}{\ens{k}(\overline{L})} = \frac{\ens{k}(A \cap \overline{L'})}{\ens{k}(\overline{L'})} = \frac{\eqs{k}(A)}{\eqs{k}(\overline{L'})} \geq \frac{\delta_k}{1 - \pdhj{k-1}{\delta_{k-1}}} \geq \delta_k + \delta_k \cdot \pdhj{k-1}{\delta_{k-1}}.
 
\]
 
\]
 
The proof is completed by taking $B = \overline{L}$; this has $\ens{k}(B) = \ens{k}(\overline{L'}) \geq \delta_k$ because $\overline{L'} \supseteq A$.
 
The proof is completed by taking $B = \overline{L}$; this has $\ens{k}(B) = \ens{k}(\overline{L'}) \geq \delta_k$ because $\overline{L'} \supseteq A$.
 
\end{proof}
 
\end{proof}
  
We will prefer to obtain this relative density increment under the uniform distribution, rather than under equal-nondegenerate-slices.
+
We will prefer to obtain this relative density increment under the uniform distribution, rather than under equal-nondegenerate-slices:
 
\begin{lemma} \label{lem:back-to-uniform}
 
\begin{lemma} \label{lem:back-to-uniform}
Suppose $A \subseteq B \subseteq [k]^n$ satisfy $\ens{k}(B) = \gamma$ and $\ens{k}(A) \geq \delta \ens{k}(A)$.  Let $0 < \eta < 3\delta$ be a parameter, define $r = (\gamma \eta /15k) \sqrt{n}$, and assume $2\ln n \leq r \leq n/2$.  Then there exists a restriction $(J,x_{\barJ})$ with $|J| \geq r$ under which $\pi(B_{x_\barJ}) \geq (\eta/3)\gamma$ and $\pi(A_{x_\barJ}) \geq (\delta - \eta) \pi(B_{x_\barJ})$, where $\pi$ denotes the uniform distribution on $[k]^J$.\noteryan{And if $B$ was a union of $ij$-insensitive sets before\dots}
+
Suppose $A \subseteq B \subseteq [k]^n$ satisfy $\ens{k}(B) = \beta$ and $\ens{k}(A) \geq \delta \cdot \ens{k}(B)$.  Let $0 < \eta < 3\delta$ be a parameter, define $r = (\beta \eta /15k) \sqrt{n}$, and assume $2\ln n \leq r \leq n/2$.  Then there exists a restriction $(J,x_{\barJ})$ with $\abs{J} \geq r$ under which $\unif_k(B_{x_\barJ}) \geq (\eta/3) \beta$ and $\unif_k(A_{x_\barJ}) \geq (\delta - \eta) \unif_k(B_{x_\barJ})$.\noteryan{And if $B$ was a union of $ij$-insensitive sets before\dots}
 
\end{lemma}
 
\end{lemma}
 
\begin{proof}
 
\begin{proof}
Let $J$ be an $[r,4r]$-random subset of $[n]$, let $x \sim \eqs{k}^{\barJ}$, and let $y \sim \pi^J$.  From Lemma~\ref{lem:distributions}, the distribution on $(x,y)$ is $(4kr/\sqrt{n})$-close to $\eqs{k}^n$.  Further, from~\eqref{prop:degen} the total variation distance between $\eqs{k}^n$ and $\ens{k}^n$ is at most $k(k-1)/n \leq kr/\sqrt{n}$\noteryan{uses $k \leq \sqrt{n}$ or so}.  By choice of $r$, the combined distance bound $5kr/\sqrt{n}$ is at most $(\eta/3)\gamma$.  We therefore have
+
Let $J$ be an $[r,4r]$-random subset of $[n]$, let $x \sim \eqs{k}^{\barJ}$, and let $y \sim \unif_k^J$.  From Lemma~\ref{lem:distributions}, the distribution on $(x,y)$ is $(4kr/\sqrt{n})$-close to $\eqs{k}^n$.  Further, from~\eqref{prop:degen} the total variation distance between $\eqs{k}^n$ and $\ens{k}^n$ is at most $k(k-1)/n \leq kr/\sqrt{n}$\noteryan{uses $k \leq \sqrt{n}$ or so}.  By choice of $r$, the combined distance bound $5kr/\sqrt{n}$ is at most $(\eta/3)\beta$.  We therefore have
 
\begin{equation} \label{eqn:AxBx}
 
\begin{equation} \label{eqn:AxBx}
\Ex_{J,x}[\pi(A_x)] \geq \delta \gamma - (\eta/3)\gamma, \qquad
+
\Ex_{J,x}[\unif_k(A_x)] \geq \delta \beta - (\eta/3)\beta, \qquad
\Ex_{J,x}[\pi(B_x)] \leq \gamma + (\eta/3)\gamma.
+
\Ex_{J,x}[\unif_k(B_x)] \leq \beta + (\eta/3)\beta.
 
\end{equation}
 
\end{equation}
  
Let $H$ be the event that $\pi(B_x) \geq (\eta/3)\gamma$.  On one hand, since $A_x \subseteq B_x$ always we have
+
Let $H$ be the event that $\unif_k(B_x) \geq (\eta/3)\beta$.  On one hand, since $A_x \subseteq B_x$ always we have
 
\[
 
\[
\Ex_{J,x}[\pi(A_x)] \leq \Ex_{J,x}[\pi(A_x) \mid H] \Pr[H] + (\eta/3)\gamma.
+
\Ex_{J,x}[\unif_k(A_x)] \leq \Ex_{J,x}[\unif_k(A_x) \mid H] \Pr[H] + (\eta/3)\beta.
 
\]
 
\]
 
On the other hand, we have
 
On the other hand, we have
 
\[
 
\[
\Ex_{J,x}[\pi(B_x)] \geq \Ex_{J,x}[\pi(B_x) \mid H] \Pr[H].
+
\Ex_{J,x}[\unif_k(B_x)] \geq \Ex_{J,x}[\unif_k(B_x) \mid H] \Pr[H].
 
\]
 
\]
 
Combining these two deductions with~\eqref{eqn:AxBx} yields
 
Combining these two deductions with~\eqref{eqn:AxBx} yields
 
\[
 
\[
\Ex_{J,x}[\pi(B_x) \mid H] \Pr[H] \leq \gamma(1+\eta/3), \qquad (\delta - 2\eta/3)\gamma \leq \Ex_{J,x}[\pi(A_x) \mid H] \Pr[H].
+
\Ex_{J,x}[\unif_k(B_x) \mid H] \Pr[H] \leq \beta(1+\eta/3), \qquad (\delta - 2\eta/3)\beta \leq \Ex_{J,x}[\unif_k(A_x) \mid H] \Pr[H].
 
\]
 
\]
 
Thus
 
Thus
 
\[
 
\[
\frac{\delta - 2\eta/3}{1+\eta/3} \cdot \Ex_{J,x}[\pi(B_x) \mid H] \Pr[H] \leq \Ex_{J,x}[\pi(A_x) \mid H] \Pr[H],
+
\frac{\delta - 2\eta/3}{1+\eta/3} \cdot \Ex_{J,x}[\unif_k(B_x) \mid H] \Pr[H] \leq \Ex_{J,x}[\unif_k(A_x) \mid H] \Pr[H],
 
\]
 
\]
 
whence
 
whence
 
\[
 
\[
0 \leq \Ex_{J,x}[\pi(A_x) - (\delta - \eta)\pi(B_x) \mid H],
+
0 \leq \Ex_{J,x}[\unif_k(A_x) - (\delta - \eta)\unif_k(B_x) \mid H],
 
\]
 
\]
where we used $(\delta - 2\eta/3)/(1+\eta/3) \geq \delta - \eta$.  Thus there exist some $(J,x)$ where $H$ occurs and also $\pi(A_x) \geq (\delta - \eta) \pi(B_x)$, completing the proof.
+
where we used $(\delta - 2\eta/3)/(1+\eta/3) \geq \delta - \eta$.  Thus there exist some $(J,x)$ where $H$ occurs and also $\unif_k(A_x) \geq (\delta - \eta) \unif_k(B_x)$, completing the proof.
 
\end{proof}
 
\end{proof}
  
 
+
Finally, the set $B$ we get from Theorem~\ref{thm:correlate} is a union of insensitive sets; we would prefer an intersection:
\noteryan{At this point, it's quite tempting to request a notation change so that we don't use $i$ and $j$ both for indices in $[n]$ and symbols in $[k]$\dots}
+
\begin{lemma} \label{lem:switch} Suppose $B = B_1 \cup B_2 \cup \cdots B_{k-1} \subseteq [k]^n$, where $B_a$ is $ak$-insensitive.  Then $B$ is a disjoint union of $k-1$ sets $C_1, \dots, C_{k-1}$, where $C_b$ is an intersection of $ak$-insensitive sets, $a = 1 \dots b$.
 
+
\begin{lemma} Suppose $B = B_1 \cup B_2 \cup \cdots B_{k-1} \subseteq [k]^n$, where $B_i$ is $ik$-insensitive.  Then $B$ is a disjoint union of $k-1$ sets $C_1, \dots, C_{k-1}$, where $C_i$ is an intersection of $jk$-insensitive sets, $j = 1 \dots i$.
+
 
\end{lemma}
 
\end{lemma}
 
\begin{proof}
 
\begin{proof}
Take $C_i = B_i \setminus (B_1 \cup \cdots \cup B_{i-1})$.
+
Take $C_b = B_b \setminus (B_1 \cup \cdots \cup B_{b-1})$.
 
\end{proof}
 
\end{proof}

Revision as of 21:22, 24 June 2009

\section{Line-free sets correlate with insensitive set intersections} In this section we make rigorous the argument described in Section~\ref{sec:outline-corr}; we show that a dense subset $A$ either contains a combinatorial line, or it has slight correlation with a union of $ab$-insensitive sets.

\noteryan{I think it would be good to go through the paper and soften explicit constants to $O(\cdot)$'s, for readability. }

First we reduce to the case where the set is dense under both $\eqs{k}$ and $\eqs{k-1}$.

\begin{lemma} \label{lem:32} Let $k \geq 3$, suppose $A \subseteq [k]^n$ satisfies $\eqs{k}(A) \geq \delta$, and let $\theta$ satisfy \[ 10 k \frac{\ln n}{\sqrt{n}} \leq \theta \leq \delta. \] Then there exists a restriction $(J, x_\barJ)$ with $\abs{J} \geq (\theta/5k) \sqrt{n}$ such that one of the following two conditions holds: \begin{enumerate} \item \label{eqn:lem32_1} $\eqs{k}^J(A_{x_\barJ}) \geq \delta + \theta$; or, \item \label{eqn:lem32_2} both $\eqs{k}^J(A_{x_\barJ}) \geq \delta - 3\theta^{1/4}\delta^{1/2}$ and $\eqs{k-1}^J(A_{x_\barJ}) \geq \delta - 3\theta^{1/2}$. \end{enumerate} \end{lemma} \begin{proof} Let $r = \lceil (\theta/5k) \sqrt{n} \rceil$. As in Lemma~\ref{lem:distributions}, let $J$ be an $[r,4r]$-random subset of $[n]$ and let $x$ be drawn from $\eqs{k}^{\barJ}$. If $\Ex_{J, x}[\eqs{k}(A_{x})^2] \geq (\delta + \theta)^2$, then there must exist some restriction $(J,x)$ with $\abs{J} \geq r$ and $\eqs{k}^J(A_{x}) \geq \delta + \theta$. In this case, conclusion~\ref{eqn:lem32_1} above holds. We henceforth assume \begin{equation} \label{eqn:moment} \Ex_{J, x}[\eqs{k}(A_{x})^2] < (\delta + \theta)^2 \end{equation} and show that conclusion~\ref{eqn:lem32_2} holds.

\ignore{Let $y$ be drawn from $\eqs{k}^{J}$, and let $z$ be drawn from $\eqs{k-1}^{J}$.} Since $\eqs{k}(A) \geq \delta$, two applications of Lemma~\ref{lem:distributions}.\ref{eqn:distrs-eqs-eqs} yield \begin{align} \Ex_{J, x}[\eqs{k}(A_{x})] & \geq \delta - 4kr/\sqrt{n} \geq \delta - \theta \label{eqn:mean3} \\ \Ex_{J, x}[\eqs{k-1}(A_{x})] & \geq \delta - 4kr/\sqrt{n} \geq \delta - \theta, \label{eqn:mean2} \end{align} where we've used our definition of $r$ (and the hypothesis $\theta \geq (10 k \ln n)/\sqrt{n}$ ensures that $r \geq 2 \ln n$ as required for the lemma.) Combining~\eqref{eqn:moment}, \eqref{eqn:mean3} gives \[ \Varx_{J,x}[\eqs{k}(A_{x})] < (\delta + \theta)^2 - (\delta - \theta)^2 = 4 \theta \delta. \] Thus Chebyshev's inequality implies that except with probability at most $\theta^{1/2}$ over the choice of $(J,x)$, we have that $\eqs{k}(A_x)$ is within $\theta^{-1/4} \cdot (4 \theta \delta)^{1/2} = 2 \theta^{1/4} \delta^{1/2}$ of its expectation. When this happens, \begin{equation} \label{eqn:conc2a} \eqs{k}(A_x) \geq \delta - \theta - 2 \theta^{1/4} \delta^{1/2} \geq \delta - 3 \theta^{1/4} \delta^{1/2} \end{equation} (using $\theta \leq \delta$). On the other hand, from~\eqref{eqn:mean2} we immediately deduce that with probability at least $2\theta^{1/2}$ over the choice of $(J,x)$ we have \begin{equation} \label{eqn:conc2b} \eqs{k-1}(A_{x}) \geq \delta - \theta - 2 \theta^{1/2} \geq \delta - 3\theta^{1/2}. \end{equation} Thus with probability at least $2\theta^{1/2} - \theta^{1/2} > 0$ over the choice of $(J,x)$, we have both~\eqref{eqn:conc2a}, \eqref{eqn:conc2b}, establishing conclusion~\ref{eqn:lem32_2} of the lemma. \end{proof}

We now come to the main theorem in this section, deducing a key step in the proof of DHJ($k$) from the probabilistic DHJ($k-1$) theorem. \begin{theorem} \label{thm:correlate} Let $k \geq 3$ and suppose we have established the probabilistic DHJ($k-1$) theorem. Further suppose $A \subseteq [k]^n$ has $\ens{k}(A) \geq \delta_k$ and $\ens{k-1}(A) \geq \delta_{k-1}$. Finally, assume $n \geq \Pdhj{k-1}{\delta_{k-1}}$. Then either $A$ contains a combinatorial line (of length $k$), or there exists $B \subseteq [k]^n$ with $\ens{k}(B) \geq \delta_k$ and \[ \frac{\ens{k}(A \cap B)}{\ens{k}(B)} \geq \delta_k + \delta_k \cdot \pdhj{k-1}{\delta_{k-1}} \] such that \[ B = B_{1} \cup B_2 \cup \cdots \cup B_{k-1}, \] where for each $a \in [k-1]$, $B_a$ is $ak$-insensitive. \end{theorem} \begin{proof} For $a \in [k-1]$, let $L_a = \{x \in [k]^n : \chg{x}{k}{a} \in A\}$, an $ak$-insensitive set. Let $L = L_1 \cap L_2 \cap \cdots \cap L_{k-1}$. In other words, $L$ is the set line templates over $[k-1]^n$ (possibly degenerate) whose corresponding lines are entirely in $A$. Let $L' \subseteq L$ be the nondegenerate such line templates; i.e., $L' = \{x \in L : \exists j \in [n] \text{ s.t.\ } x_j = k\}$. Note that $\ens{k}(L \setminus L') = 0$.

The key observation is that if $A \cap L' \neq \emptyset$, we have a (nondegenerate) combinatorial line of length~$k$ entirely in $A$. So assume otherwise; hence $A \subseteq \overline{L'} = [k]^n \setminus L'$. Since $\ens{k-1}(A) \geq \delta_{k-1}$ and $n \geq \Pdhj{k-1}{\delta_{k-1}}$, the probabilistic DHJ($k-1$) theorem implies that $\ens{k}(L') = \ens{k}(L) \geq \pdhj{k-1}{\delta_{k-1}}$. It follows that \[ \frac{\ens{k}(A \cap \overline{L})}{\ens{k}(\overline{L})} = \frac{\ens{k}(A \cap \overline{L'})}{\ens{k}(\overline{L'})} = \frac{\eqs{k}(A)}{\eqs{k}(\overline{L'})} \geq \frac{\delta_k}{1 - \pdhj{k-1}{\delta_{k-1}}} \geq \delta_k + \delta_k \cdot \pdhj{k-1}{\delta_{k-1}}. \] The proof is completed by taking $B = \overline{L}$; this has $\ens{k}(B) = \ens{k}(\overline{L'}) \geq \delta_k$ because $\overline{L'} \supseteq A$. \end{proof}

We will prefer to obtain this relative density increment under the uniform distribution, rather than under equal-nondegenerate-slices: \begin{lemma} \label{lem:back-to-uniform} Suppose $A \subseteq B \subseteq [k]^n$ satisfy $\ens{k}(B) = \beta$ and $\ens{k}(A) \geq \delta \cdot \ens{k}(B)$. Let $0 < \eta < 3\delta$ be a parameter, define $r = (\beta \eta /15k) \sqrt{n}$, and assume $2\ln n \leq r \leq n/2$. Then there exists a restriction $(J,x_{\barJ})$ with $\abs{J} \geq r$ under which $\unif_k(B_{x_\barJ}) \geq (\eta/3) \beta$ and $\unif_k(A_{x_\barJ}) \geq (\delta - \eta) \unif_k(B_{x_\barJ})$.\noteryan{And if $B$ was a union of $ij$-insensitive sets before\dots} \end{lemma} \begin{proof} Let $J$ be an $[r,4r]$-random subset of $[n]$, let $x \sim \eqs{k}^{\barJ}$, and let $y \sim \unif_k^J$. From Lemma~\ref{lem:distributions}, the distribution on $(x,y)$ is $(4kr/\sqrt{n})$-close to $\eqs{k}^n$. Further, from~\eqref{prop:degen} the total variation distance between $\eqs{k}^n$ and $\ens{k}^n$ is at most $k(k-1)/n \leq kr/\sqrt{n}$\noteryan{uses $k \leq \sqrt{n}$ or so}. By choice of $r$, the combined distance bound $5kr/\sqrt{n}$ is at most $(\eta/3)\beta$. We therefore have \begin{equation} \label{eqn:AxBx} \Ex_{J,x}[\unif_k(A_x)] \geq \delta \beta - (\eta/3)\beta, \qquad \Ex_{J,x}[\unif_k(B_x)] \leq \beta + (\eta/3)\beta. \end{equation}

Let $H$ be the event that $\unif_k(B_x) \geq (\eta/3)\beta$. On one hand, since $A_x \subseteq B_x$ always we have \[ \Ex_{J,x}[\unif_k(A_x)] \leq \Ex_{J,x}[\unif_k(A_x) \mid H] \Pr[H] + (\eta/3)\beta. \] On the other hand, we have \[ \Ex_{J,x}[\unif_k(B_x)] \geq \Ex_{J,x}[\unif_k(B_x) \mid H] \Pr[H]. \] Combining these two deductions with~\eqref{eqn:AxBx} yields \[ \Ex_{J,x}[\unif_k(B_x) \mid H] \Pr[H] \leq \beta(1+\eta/3), \qquad (\delta - 2\eta/3)\beta \leq \Ex_{J,x}[\unif_k(A_x) \mid H] \Pr[H]. \] Thus \[ \frac{\delta - 2\eta/3}{1+\eta/3} \cdot \Ex_{J,x}[\unif_k(B_x) \mid H] \Pr[H] \leq \Ex_{J,x}[\unif_k(A_x) \mid H] \Pr[H], \] whence \[ 0 \leq \Ex_{J,x}[\unif_k(A_x) - (\delta - \eta)\unif_k(B_x) \mid H], \] where we used $(\delta - 2\eta/3)/(1+\eta/3) \geq \delta - \eta$. Thus there exist some $(J,x)$ where $H$ occurs and also $\unif_k(A_x) \geq (\delta - \eta) \unif_k(B_x)$, completing the proof. \end{proof}

Finally, the set $B$ we get from Theorem~\ref{thm:correlate} is a union of insensitive sets; we would prefer an intersection: \begin{lemma} \label{lem:switch} Suppose $B = B_1 \cup B_2 \cup \cdots B_{k-1} \subseteq [k]^n$, where $B_a$ is $ak$-insensitive. Then $B$ is a disjoint union of $k-1$ sets $C_1, \dots, C_{k-1}$, where $C_b$ is an intersection of $ak$-insensitive sets, $a = 1 \dots b$. \end{lemma} \begin{proof} Take $C_b = B_b \setminus (B_1 \cup \cdots \cup B_{b-1})$. \end{proof}