\documentclass[12pt]{article}
\usepackage{url}
\usepackage{fullpage}
\usepackage{graphicx}
\usepackage{amsmath,amssymb}
\newcommand{\eps}{\varepsilon}
\newcommand{\R}{\mathbb{R}}
\newcommand{\inprod}[1]{\langle #1 \rangle}
\DeclareMathOperator*{\E}{\mathbb{E}}
\let\Pr\relax
\DeclareMathOperator*{\Pr}{\mathbb{P}}
\begin{document}
\thispagestyle{empty}
\begin{center}
{\Large \textsc{CS 226 / 6.889 Sketching Algorithms for Big Data} --- Fall 2017}
\bigskip
{\Large \textsc{Problem Set 2}}
\smallskip
Due: 11:59pm, Monday, October 23rd
Submit to: \url{sketchingbigdata-f17-assignments@seas.harvard.edu}
\bigskip
{\footnotesize See homework policy at \url{http://www.sketchingbigdata.org/fall17/syllabus/}}
\end{center}
\paragraph{Problem 1: Point queries in insertions-only streams.} (10 points)
Recall the algorithm for finding heavy hitters in insertions-only stream mentioned in Lecture 7. This algorithm, due to Misra and Gries, proceeds as follows. Let $C$ be a parameter, defining the number of counters maintained by the algorithm. The algorithm maintains a set $T$ of up to $C$ elements, and for every $i \in T$ maintains a counter $c_i$. Initially $T=\emptyset$. Then, for each stream element $i$:
\begin{itemize}
\item If $i \in T$, then $c_i=c_i+1$
\item Else
\begin{itemize}
\item If $|T|1/\sqrt n$ since otherwise the $1/\eps^2$ term in $m$ is already at least $n$, and there's of course already an simple-to-describe $n\times n$ $(0,k)$-RIP matrix (the $n\times n$ identity matrix).
\end{itemize}
\paragraph{Problem 3: Chaining with limited independence.} Recall in class the toy example of a random walk on a line: at time $t$ we are at position $\inprod{\sigma, x^{(t)}}$, where $x^{(t)} = \sum_{i=1}^t e_i$ and $\sigma\in\{-1,-1\}^n$ is chosen uniformly at random. We defined $v^{(t)} = x^{(t)}/\|x^{(n)}\|_2$ and showed via Dudley's inequality that
\begin{equation}
\E \sup_{1\le t\le n} \inprod{\sigma, v^{(t)}} = O(1) . \label{eqn:walk}
\end{equation}
Our goal is to show that this holds even if the $\sigma_i$ are not fully independent, but rather simply $p$-wise independent for some $p = O(1)$.
\begin{itemize}
\item[(a)] (2 points) Show that for integer $p>1$, $\E_\sigma \inprod{\sigma, x}^p$ is completely determined by $p$-wise independence.
\item[(b)] (2 points) In ``bound 1'' of lecture (the ``union bound'' argument), we showed that if $\sigma$ has independent entries and $T$ is an arbitrary set of vectors in $\R^n$, then $r(T) := \E \sup_{x\in T} \inprod{\sigma, x} \lesssim (\lg^{1/2} |T|)\cdot \rho_{\ell_2}(T)$. Here $\rho_X(T)$ denotes $\sup_{x\in T}\|x\|_X$. It turns out that an equivalent form of Khintchine's inequality is that for all $p\ge 1$, $\|\inprod{\sigma, x}\|_p \le \sqrt{p} \cdot \|x\|_2$ (you may use this fact without proof). Show that as long as the $\sigma_i$ are $p$-wise independent for some $p\ge 2$, then $r(T) \lesssim \sqrt{p}\cdot |T|^{1/p}\cdot \rho_{\ell_2}(T)$ (note we can then recover the $\lg^{1/2}|T|\cdot \rho_{\ell_2}(T)$ bound under full independence by setting $p = \Theta(\lg |T|)$).
\item[(c)] (6 points) Conclude via a modified Dudley chaining argument that Eqn.~\eqref{eqn:walk} holds even if the $\sigma_i$ are only $p$-wise independent for some constant $p = O(1)$. (We know how to show it for $p=4$ --- how small of a $p$ can you get to work?)
\item[(d)] (4 points) Show that if $(x^{(t)})_t$ is not simply the sequence with $x^{(t)} = \sum_{i=1}^t e_i$, but rather is an arbitrary sequence of vectors generated by an insertion-only stream, then Eqn.~\eqref{eqn:walk} still holds.
\end{itemize}
\paragraph{Problem 4: Sparse Johnson-Lindenstrauss analysis is tight.} (8 points)
Show that the analysis in class of the Sparse Johnson-Lindenstrauss Transform is tight. Specifically, consider the distribution over $\Pi\in\R^{m\times n}$ in which the columns are independent, and in each column there are exactly $s$ non-zero entries chosen at uniformly random locations without replacement, and each one is independently $\pm 1/\sqrt{s}$. Show that for all $\eps,\delta \in (0, 1/2)$ and constant $C>0$ there exists constants $c, n_0 >0$ such that for such $\Pi$ with $m \le C \eps^{-2}\log(1/\delta)$ and $s\le c\eps^{-1}\log(1/\delta)$, for all $n>n_0$ there exists $x\in\R^n$ such that
$$
\Pr_\Pi(| \|\Pi x\|_2^2 - 1| > \eps) > \delta .
$$
That is, there is a significant probability of failure unless $s$ is large (larger than $c\eps^{-1}\log(1/\delta)$). \textbf{Hint:} consider $x$ having its first $t$ coordinates equaling $1/\sqrt t$ and the rest $0$ and choose $t$ appropriately.
\medskip
\noindent\textbf{Bonus:} (3 points --- we do not know the answer!) can considering $x$ being a random vector on the sphere also give a tight lower bound?
\paragraph{Problem 5:} (1 point) How much time did you spend on this problem set? If you can remember the breakdown, please report this per problem. (sum of time spent solving problem and typing up your solution)
\end{document}