Correlation.tex
\section{Line-free sets correlate with insensitive set intersections}
\noteryan{I think it would be good to go through the paper and soften explicit constants to $O(\cdot)$'s, for readability. I want to leave them in now just for: a) care; b) because we don't actually know what the final quantitative bounds are yet.}
\begin{lemma} \label{lem:32} Suppose $A \subseteq [k]^n$ satisfies $\eqs{k}(A) \geq \delta$, and let $\gamma$ satisfy \[ 10 k \frac{\ln n}{\sqrt{n}} \leq \gamma \leq \delta. \] Then there exists a restriction $(J, x_\barJ)$ with $|J| \geq (\gamma/5k) \sqrt{n}$ such that one of the following two conditions holds: \begin{enumerate} \item \label{eqn:lem32_1} $\eqs{k}^J(A_{x_\barJ}) \geq \delta + \gamma$; or, \item \label{eqn:lem32_2} both $\eqs{k}^J(A_{x_\barJ}) \geq \delta - 3\gamma^{1/4}\delta^{1/2}$ and $\eqs{k-1}^J(A_{x_\barJ}) \geq \delta - 3\gamma^{1/2}$. \end{enumerate} \end{lemma} \begin{proof} Let $r = \lceil (\gamma/5k) \sqrt{n} \rceil$. As in Lemma~\ref{lem:distributions}, let $J$ be an $[r,4r]$-random subset of $[n]$ and let $x$ be drawn from $\eqs{k}^{\barJ}$. If $\Ex_{J, x}[\eqs{k}(A_{x})^2] \geq (\delta + \gamma)^2$, then there must exist some restriction $(J,x)$ with $|J| \geq r$ and $\eqs{k}^J(A_{x}) \geq \delta + \gamma$. In this case, conclusion~\ref{eqn:lem32_1} above holds. We henceforth assume \begin{equation} \label{eqn:moment} \Ex_{J, x}[\eqs{k}(A_{x})^2] < (\delta + \gamma)^2 \end{equation} and show that conclusion~\ref{eqn:lem32_2} holds.\\
\ignore{Let $y$ be drawn from $\eqs{k}^{J}$, and let $z$ be drawn from $\eqs{k-1}^{J}$.} Since $\eqs{k}(A) \geq \delta$, two applications of Lemma~\ref{lem:distributions}.\ref{eqn:distrs-eqs-eqs} yield \begin{eqnarray} \Ex_{J, x}[\eqs{k}(A_{x})] & \geq & \delta - 4kr/\sqrt{n} \geq \delta - \gamma \label{eqn:mean3} \\ \Ex_{J, x}[\eqs{k-1}(A_{x})] & \geq & \delta - 4kr/\sqrt{n} \geq \delta - \gamma, \label{eqn:mean2} \end{eqnarray} where we've used our definition of $r$ (and the hypothesis $\gamma \geq (10 k \ln n)/\sqrt{n}$ ensures that $r \geq 2 \ln n$ as required for the lemma.) Combining~\eqref{eqn:moment}, \eqref{eqn:mean3} gives \[ \Varx_{J,x}[\eqs{k}(A_{x})] < (\delta + \gamma)^2 - (\delta - \gamma)^2 = 4 \gamma \delta. \] Thus Chebyshev's inequality implies: that except with probability at most $\gamma^{1/2}$ over the choice of $(J,x)$, we have that $\eqs{k}(A_x)$ is within $\gamma^{-1/4} \cdot (4 \gamma \delta)^{1/2} = 2 \gamma^{1/4} \delta^{1/2}$ of its expectation. When this happens, \begin{equation} \label{eqn:conc2a} \eqs{k}(A_x) \geq \delta - \gamma - 2 \gamma^{1/4} \delta^{1/2} \geq \delta - 3 \gamma^{1/4} \delta^{1/2} \end{equation} (using $\gamma \leq \delta$). On the other hand, from~\eqref{eqn:mean2} we immediately deduce that with probability at least $2\gamma^{1/2}$ over the choice of $(J,x)$ we have \begin{equation} \label{eqn:conc2b} \eqs{k-1}(A_{x}) \geq \delta - \gamma - 2 \gamma^{1/2} \geq \delta - 3\gamma^{1/2}. \end{equation} Thus with probability at least $2\gamma^{1/2} - \gamma^{1/2} > 0$ over the choice of $(J,x)$, we have both~\eqref{eqn:conc2a}, \eqref{eqn:conc2b}, establishing conclusion~\ref{eqn:lem32_2} of the lemma. \end{proof}
\noteryan{For DHJ(3), we'll want to use Theorem~\ref{thm:sperner-v} in the following.} \begin{theorem} Suppose $A \subseteq [k]^n$ has $\ens{k}(A) \geq \delta_k$ and $\ens{k-1}(A) \geq \delta_{k-1}$. Assume further that $n \geq n_{\ref{thm:dhj-dv}}(k-1,1,\delta_{k-1})$. Then either $A$ contains a nondegenerate combinatorial line (of length $k$), or there exists $B \subseteq [k]^n$ with \[ \frac{\ens{k}(A \cap B)}{\ens{k}(B)} \geq \delta_k + \delta_k \cdot \eta_{\ref{thm:dhj-dv}}(k,1,\delta_{k-1}) \] such that \[ B = B_{1} \cup B_2 \cup \cdots \cup B_{k-1}, \] where $B_i$ is $ik$-insensitive for each $i \in [k-1]$. \end{theorem} \begin{proof} For $i \in [k-1]$, let $L_i = \{x \in [k]^n : \chg{x}{k}{i} \in A\}$, an $ik$-insensitive set. Let $L = L_1 \cap L_2 \cap \cdots \cap L_{k-1}$. In other words, $L$ is the set of length-$(k-1)$ lines (possibly degenerate) which are in $A$ when we interpret strings in $[k]^n$ as line templates over $[k-1]$. Let $L' \subseteq L$ be the nondegenerate such line templates; i.e., $L' = \{x \in L : \exists j \in [n] \text{ s.t.\ } x_j = k\}$. Note that $\ens{k}(L \setminus L') = 0$.
The key observation is that if $A \cap L' \neq \emptyset$, we have a nondegenerate combinatorial line (of length $k$) entirely in $A$. So assume otherwise; hence $A \subseteq \overline{L'} = [k]^n \setminus L'$. Since $\ens{k-1}(A) \geq \delta_{k-1}$ and $n \geq n_{\ref{thm:dhj-dv}}(k-1,1,\delta_{k-1})$, Theorem~\ref{thm:dhj-dv} implies that $\ens{k}(L') = \ens{k}(L) \geq \eta_{\ref{thm:dhj-dv}}(k,1,\delta_{k-1})$. It follows that \[ \frac{\ens{k}(A \cap \overline{L})}{\ens{k}(\overline{L})} = \frac{\ens{k}(A \cap \overline{L'})}{\ens{k}(\overline{L'})} = \frac{\eqs{k}(A)}{\eqs{k}(\overline{L'})} \geq \frac{\delta_k}{1 - \eta_{\ref{thm:dhj-dv}}(k,1,\delta_{k-1})} \geq \delta_k + \delta_k \cdot \eta_{\ref{thm:dhj-dv}}(k,1,\delta_{k-1}). \] The proof is completed by taking $B = \overline{L}$. \end{proof}
\noteryan{At this point, it's quite tempting to request a notation change so that we don't use $i$ and $j$ both for indices in $[n]$ and symbols in $[k]$\dots}
\begin{lemma} Suppose $B = B_1 \cup B_2 \cup \cdots B_{k-1} \subseteq [k]^n$, where $B_i$ is $ik$-insensitive. Then $B$ is a disjoint union of $k-1$ sets $C_1, \dots, C_{k-1}$, where $C_i$ is an intersection of $jk$-insensitive sets, $j = 1 \dots i$. \end{lemma} \begin{proof} Take $C_i = B_i \setminus (B_1 \cup \cdots \cup B_{i-1})$. \end{proof}