%%% This is the scribe notes template for CS611
%%% There are several comments preceded by CS611: and boxed in %%%%'s
%%% which indicate where macros should be altered to set up the header
%%% for the paper.  Your Notes should go at the comment SCRIBE NOTES GO HERE!.

%%% In the various .sty files that accompany this .tex file you will
%%% find LaTeX macros that make it easier to typeset inference rules
%%% and programming language constructs.  You must make sure that the
%%% file proof.sty is in a path searched by LaTeX when you try to
%%% use this file.  Take a look to see what macros are available--it
%%% will save you time and make the notes look better.  Feel free to
%%% extend the set of macros--post them to the newsgroup and contact
%%% the course staff if you come up with some good ones so they can be
%%% added to the template.

%%% This template includes examples of how to use some of the macros
%%% to give you an idea of how they work.  (Delete the examples when
%%% you do your scribing.)

\documentclass{article}
\usepackage{611-lecture}
\usepackage{amsmath,amssymb,amsthm,amsfonts,latexsym}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% CS611: Please fill in these macros as appropriate:
\lecture{35} %% Lecture number
\title{Abstract Interpretation}   %% Title of lecture
%\author{Asif-ul Haque, Xin Zheng}  %% name of scribe
\date{1 December 2006}    %% Date of lecture
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% See 611.sty for a variety of macros that will be helpful in
% typesetting the lecture
%
% quotes can be used for typewriter font
% <> can be used for angle brackets

\renewcommand\emptyset\varnothing
\newcommand{\inL}{\ensuremath{\mathsf{inL}}}
\newcommand{\inR}{\ensuremath{\mathsf{inR}}}
\newcommand\caseof[3]{\mathsf{case}~{#1}~\mathsf{of}~{#2}~|~{#3}}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{definition}[theorem]{Definition}

\newcommand{\Z}{\mathbb{Z}}
\newcommand{\group}[1]{\left\langle{#1}\right\rangle}

%\theoremstyle{definition}
%\newtheorem*{defn}{Definition}
\newcommand{\nondet}{\left[\!\kern1pt\right]}
\renewcommand\phi\varphi
\renewcommand\wp[2]{\mathsf{wp}~{#1}~{#2}}
\newcommand\wlp[2]{\mathsf{wlp}~{#1}~{#2}}
\renewcommand\({\begin{eqnarray*}}
\renewcommand\){\end{eqnarray*}}

\newcommand\LOOKUP[2]{\mathrm{LOOKUP}~{#1}~{#2}}
\newcommand\UPDATE[3]{\mathrm{UPDATE}~{#1}~{#2}~{#3}}
\newcommand\MALLOC[2]{\mathrm{MALLOC}~{#1}~{#2}}
\newcommand\EMPTY{\mathrm{EMPTY\mbox{-}STORE}}
\renewcommand\dom[1]{\mathrm{dom}\,{#1}}
\newcommand\p[2]{\langle{#1},\,{#2}\rangle}
\newcommand\bigcdot{\mathrel{\raisebox{1pt}{$\scriptscriptstyle\bullet$}}}
\newcommand\holed[1]{[\,#1\,]}
\newcommand\hole{\holed\bigcdot}
\newcommand\context[1]{E\kern1pt\holed{#1}}
\newcommand\contextHole{\context\bigcdot}
\newcommand\goesto[2]{\underset{#2}{\overset{#1}\to}}
\newcommand\ifthenelse[3]{\mathsf{if\ }#1\mathsf{\ then\ }#2\mathsf{\ else\ }#3}
\newcommand\ifpthenelse[3]{\mathsf{ifp\ }#1\mathsf{\ then\ }#2\mathsf{\ else\ }#3}
\newcommand\whiledo[2]{\mathsf{while\ }#1\mathsf{\ do\ }#2}
\newcommand\letin[3]{\mathsf{let\ }#1 = #2\mathsf{\ in\ }#3}
\newcommand\letrec[5]{\mathsf{letrec\ }#1 = #2\mathsf{\ and\ \ldots\ and\ }#3 = #4\mathsf{\ in\ }#5}
\newcommand\letrecone[3]{\mathsf{letrec\ }#1 = #2\mathsf{\ in\ }#3}
\newcommand\true{\ensuremath{\mathsf{true}}}
\newcommand\false{\ensuremath{\mathsf{false}}}
\newcommand\error{\ensuremath{\mathsf{error}}}
\newcommand\pca[3]{\{#1\}\kern1pt{#2}\kern1pt\{#3\}}
\newcommand\states{\Set{St}}
\newcommand\rtc{^{\textstyle *}}
\newcommand\sat\vDash
\newcommand\force\vdash

\newcommand\hyphen{\mbox{-}}
\newcommand\lookup[2]{\nm{LOOKUP}~#1~\mquote{#2}}
\newcommand\update[3]{\nm{UPDATE}~#1~\mquote{#2}~#3}
\newcommand\SBk[1]{\SB{#1}k}
\newcommand\fix[1]{\mathsf{fix}\,{#1}}

\newlength\reasonwidth
\setlength\reasonwidth{3cm}
\newcommand\reasoning[1]{\def\longest{#1}\settowidth{\reasonwidth}{$\displaystyle\longest$}\addtolength{\reasonwidth}{5mm}}
\newcommand\reason[2]{\makebox[\reasonwidth][l]{$\displaystyle{#1}$}\mbox{#2}}

\renewcommand\inj[1]{\mathsf{in}_{#1}}
\newcommand\proj[1]{\pi_{#1}}
\newcommand{\dlt}{\sqsubseteq}
\newcommand\floor[1]{\lfloor{#1}\rfloor}
\newcommand\cf[1]{[\kern1pt{#1}\kern1pt]}
\newcommand\SBpr[1]{\SB{#1}\,\phi\,\rho}
\renewcommand\C[3]{\Tr C{#1}\kern1pt{#2}\kern1pt{#3}}
\renewcommand\Cr[1]{\C{#1}\Gamma\rho}

\newcommand\judge[3]{{#1}\force{#2}:{#3}}
\newcommand\Gjudge[2]{\judge\Gamma{#1}{#2}}%
\newcommand\forceUSN{\mathrel{\makebox[2pt][l]{$\force$}\raisebox{-3pt}[0pt][0pt]{\tiny{\textit{USN}}}}}
\newcommand\judgeUSN[3]{{#1}\forceUSN{#2}:{#3}}
\renewcommand\C[3]{\Tr C{#1}\kern1pt{#2}\kern1pt{#3}}
\renewcommand\Cr[1]{\C{#1}\Gamma\rho}
\newcommand\Irred[1]{\ensuremath{\mathrm{Irred}(#1)}}

\newcommand\seq[3]{#1_{#2},\ldots,#1_{#3}}
\newcommand\subseteqttwo[5]{\subseteqt{#1}{#2}{#3,\,#4/#5}}
\newcommand\subseteqtlist[5]{\subseteqt{#1}{#2}{#3,\ldots,#4/#5}}
\newcommand\Unify[1]{\mathrm{Unify}(#1)}
\newcommand\fa[2]{\forall{#1}\kern1pt.\kern1pt{#2}}%
\newcommand\Judge[4]{\judge{#1;\,#2}{#3}{#4}}
%\renewcommand\infer[2]{\ensuremath{\dfrac{#2}{#1}}}%
\renewcommand\hat\widehat

\newenvironment{proofof}[1]{\addtolength{\topsep}{1mm}\begin{trivlist}\item[]\hspace{\parindent}{\em Proof of #1.}}{\qed\end{trivlist}}

\newcommand\lamcal{$\lambda$-calculus}
\newcommand\mut[2]{\mu#1.\,#2}
\renewcommand\star{^{\textstyle *}}

\newsavebox{\fterm}
\newsavebox{\gterm}
\savebox{\fterm}(40,20){
\begin{picture}(40,20)(-20,-20)
\put(0,0){\makebox(0,0){$f$}}
\put(-5,-5){\line(-1,-1){10}}
\put(5,-5){\line(1,-1){10}}
\end{picture}
}

\savebox{\gterm}(0,20){
\begin{picture}(0,20)(0,-20)
\thicklines
\put(0,0){\makebox(0,0){$g$}}
\put(0,-5){\line(0,-1){10}}
\end{picture}
}

\def\dead{(\;)}
\def\eps{\epsilon}
\def\edge#1#2#3{#1\stackrel{#2}{\to}#3}
\def\trans#1{\stackrel{#1}{\to}}
\def\sem#1{\mbox{$[\hspace{-0.15em}[$}#1\mbox{$]\hspace{-0.15em}]$}}
%\def\TS{{\mathcal{T}}}
%\def\FS{{\mathcal{F}}}
%\def\RS{{\mathcal{R}}}
\newcommand\DA[1]{_{#1}}
\def\lAC{\leq_{\rm AC}}
\def\lFIN{\leq_{\rm FIN}}

\begin{document}
\maketitle

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% CS611: SCRIBE NOTES GO HERE!
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\parindent=0pt%
\parskip=1em%

\newcommand\MM{M}
\newcommand\NN{N}
\newcommand\LL{\mathcal{L}}

\section{Introduction to Abstract Interpretation}

At this point in the course, we have looked at two aspects of programming languages: dynamic semantics 
and static program analysis. 
\begin{enumerate}
\item \textit{Dynamic Semantics} characterizes the dynamic execution of a program. Examples include operational 
and denotational semantics. 
\item \textit{Static Program Analysis} is a general term that refers to the collection of techniques and methods 
that allow us to reason statically at compile time about the program and extract information that 
is guaranteed to hold during all executions. This information can then be used for optimization and 
correctness. 
\end{enumerate}

\noindent
There are two static analysis techniques which are most prominent: 
\begin{enumerate}
\item \textit{Type Systems}: There are two aspects of type systems to keep in mind as we move 
on to abstract interpretation: 
\begin{enumerate}
\item In type systems, programmers typically annotate the program with type information. We can regard these 
type annotations as global invariants provided by the user. 
\item Types are flow insensitive. A variable or expression has the same type regardless of where it appears 
in the flow of the program. This is because the type annotations represent global properties. 
\end{enumerate}
\item \textit{Abstract Interpretation}: We can think of abstract interpretation being similar to dataflow analysis, but 
also providing a framework that allows to formally prove the correctness of an analysis. 

The idea behind abstract interpretation (and dataflow analysis) is as follows. The execution of a 
program computes a piece of concrete information. The goal of abstract interpretation is to statically 
compute a piece of abstract information that characterizes the concrete information in all possible 
executions of the program. 

A good example of abstract interpretation is Sign Analysis, whose goal is to statically compute the 
possible signs of each variable at each program point. In this example, the concrete information 
represents the values of variables during program execution, i.e, the state. The abstract information 
models just the sign of variables. 

In contrast to type systems, abstract interpretation has the following features: 
\begin{enumerate}
\item It is flow sensitive. It computes abstract information at each point in the program. The information at 
different points in the program may be different. 
\item There are no program annotations. The analysis must compute the abstract info by itself, because 
it is unreasonable to ask the user to provide annotations at every point of every program. In 
particular, the compiler must find out the loop invariants; it is not easy to extract this information 
automatically. 
\end{enumerate}
\end{enumerate}

How does the compiler statically figure out the abstract information that we are interested in? It ``executes'' 
the program in the abstract domain, hence the name Abstract Interpretation. Two differences compared to 
the concrete execution are the following: 
\begin{enumerate}
\item The analysis must follow all possible paths through program (dynamic execution only follows one path). 
\item The static analysis must terminate, even if the program doesn't. We expect the compiling process, 
including static analysis, to terminate even if our program has an infinite loop. 
\end{enumerate}

To summarize this introduction to abstract interpretation, we make the following comments. Type systems 
are a lightweight form of static analysis, which give some form of correctness (no errors) without much 
work. Abstract interpretation, on the other hand, is a heavyweight form of static analysis, giving detailed 
information at each point in the program. As a result, it provides a stronger sense of correctness and also 
enables optimizations, but at a larger cost. 

\section{Lattices}
We will formalize both the concrete and the abstract domain using lattices. A complete lattice is a pair $(L, \sqsubseteq)$ such that: 
\begin{itemize}
\item $\sqsubseteq$ is a partial order 
\item Any subset $X \subseteq L$ has least upper bound (lub) $\sqcup X$ and greatest lower bound (glb) $\sqcap X$
\end{itemize}

A complete lattice is different than a cpo, in that a cpo only requires a lub for $\omega$­chains. 

\subsection{Notation for Lattices}
Regarded as binary operators, the lub and the glb are also referred to as join and meet. In this case, we use 
the infix notation: 
\begin{itemize}
\item Join of two elements: $x \sqcup y$
\item Meet of two elements: $x \sqcap y$
\end{itemize}

From its definition, a complete lattice is guaranteed to have a top and a bottom element: 
\begin{itemize}
\item Top element: $\top = \sqcup L$
\item Bottom element: $\bot = \sqcap L$
\end{itemize}

In many cases in the literature, lattices are sometimes denoted as tuples to emphasize all these operators 
and values: $(L, \sqsubseteq, \sqcup, \sqcap, \top, \bot)$.

The intuition behind the partial ordering in the abstract lattice is that elements higher in the lattice are 
more precise. The most precise piece of abstract information is $\top$ and the least precise is $\bot$. Note that some 
books put it the other way around. 

\subsection{Properties about Operators}
The operators $\sqcap$, $\sqcup$ and the partial order $\sqsubseteq$ satisfy the following properties: 
\begin{itemize}
\item $x \sqcap y = x \mbox{ iff } x \sqsubseteq y$
\item $x \sqcup y = y \mbox{ iff } x \sqsubseteq y$
\item $\sqcap$ and $\sqcup$ are idempotent, commutative and associative. 
\end{itemize}

\subsection{Properties about Lattices}
A property that not all lattices have, but which will be important in providing a guarantee that the static 
analysis will terminate is the descending chain condition (DCC). A lattice satisfies DCC is any descending 
chain stabilizes: 
\[
\{x_i\}_n \mbox{ s.th. } x_{i+1} \sqsubseteq x_i \Rightarrow \exists n_0 \mbox{ s.th. } \forall n \geq n_0 x_n = x_{n_0}
\]
We can also define the height of a lattice as the maximum number of distinct elements in a chain. A finite 
height implies the descending chain condition (DCC). 

\section{Formal Framework}
We are going to study abstract interpretation using an imperative language, in fact IMP. We will extend its 
syntax with labels to model program points: 
\[
c~::=~[ \SKIP ]^l ~|~ [x~:=~a]^l ~|~ c_0; c_1 ~|~ \cond{[b]^l}{c_0}{c_1} ~|~ \while{[b]^l}{c_0}
\]
where the labels $l \in Labels$ model program points. There is a special label $l_{init}$, representing the entry point 
in the program. 

We formalize the result of abstract interpretation using a function Result which assigns two elements in the 
abstract lattice to each program point, before and after the point: 
\[
Result : Labels \rightarrow (L_a \times L_a)
\]

We denote by $Result(\bullet l)$ the result right before the program point represented by $l$; and by $Result(l \bullet)$ the 
result right after $l$. 

We next want to determine how the abstract information changes when a command is executed; in other 
words, how to ``execute'' a command in the abstract domain. For this, we introduce a transfer function for 
each command c, to map a piece of abstract information into another piece of abstract information: 
\[
\SB{c}: L_a \rightarrow L_a
\]

We now formulate the problem as a constraint problem. To compute the $Result$ function, we build the 
following constraint system (the constraints in the system are also know as dataflow equations): 
\begin{eqnarray*}
Result(l \bullet) &=& \SB{c} Result(\bullet l) \\
Result(\bullet l) &=& \sqcap \{Result(l' \bullet) ~|~ l' \in pred(l) \}, \mbox{ where } l \neq l_{init} \\
Result(\bullet l_{init}) &=& i_0
\end{eqnarray*}
where $pred(l)$ is the set immediate predecessors of $l$. Hence, $pred$ describes the flow of control in the program 
and can be computed from the nested structure of sequencing commands, if commands, and while loops. 
Also, $i_0$ is the boundary condition -- the abstract information at the entry point in the program. 

We next describe informally the rules in the system. First, we have a constraint to compute the result after 
execution of $c$. Next, in order to combine two branches of execution, we need to use the meet operator to 
go down in the lattice, to a conservative, less precise result. Thus we must add the second constraint rule. 
However, this only holds if $l$ has predecessors, when $l \neq l_{init}$. So finally, we must add an initial value to our 
labels, which give a starting point in order to solve the constraint system. 

In order to find the result, we must solve this system. The result is the greatest fixed point of this system 
(the most precise solution). Can the system be recursive? YES! For programs with while loops, the system 
is recursive. We can solve the system using an iterative algorithm, which repeatedly inspects each rule in 
the system and updates the information in $Result$ accordingly. 

To build an abstract interpretation analysis algorithm, one must define the following: the abstract lattice 
domain, the transfer functions for each command, and the initial dataflow information. To ensure the 
termination of the iterative algorithm that solves the constraints, the following conditions must be satisfied: 
\begin{itemize}
\item The lattice must satisfy the DCC condition 
\item The transfer functions $\SB{c}$ are monotonic for all commands $c$
\end{itemize}

The intuition behind these requirements is that we will only go down in the lattice by monotonicity, and 
then will terminate due to the DCC. 

\section{Example: Sign Analysis}
In this example, we will take a program and statically compute the signs of each variable in the program, at 
each point. We build the abstraction on top of the set of possible signs: 
\[
Sign = \{-, 0, +\} 
\]

We then define the lattice as a set of elements in $Sign$, with the partial order as set inclusion: 
\[
L_{a_1} = (2^{Sign}, \supseteq)
\]

This models the possible signs for a single variable, at a given point. The set of all variables is given by the 
lattice domain: $L_a : Var \rightarrow L_{a_1}$.

Now we have defined the lattice, we need to define how the program executes in the abstract domain. 
For this, we must define the transfer function $\SB{c} : L_a \rightarrow L_a$. We just need to define this function for $\SKIP$, 
assignments, and test conditions. The other commands (sequences, $\IF$, $\WHILE$) are just control flow constructs 
and their effect is captured in the pred function. The transfer functions are: 
\begin{eqnarray*}
\SB{b}s &=& s, \mbox{ where } s \in L_a \\
\SB{x := a}s &=& s[x \rightarrow sign(a, s)] \\
&& \\
sign(n, s) &=& \left\{ \begin{array}{l} \{+\} \mbox{ if } n \textgt 0 \\
                                             \{0\} \mbox{ if } n = 0 \\
                                             \{-\} \mbox{ if } n \textlt 0 \end{array}\right. \\
sign(x, s) &=& s(x), \mbox{ where } x \mbox{ is a variable and } s(x) \mbox{ is the set of signs for } x  \\
sign(a_1 \oplus a_2, s) &=& sign(a_1 , s) \oplus_{as} sign(a_2 , s) \\
s_1 \oplus_{as} s_2 = \bigcup\limits{x \in s_1, y \in s_2} x \oplus_a y
\end{eqnarray*}

We can define the $\oplus_a$ functions individually in tables: 

\begin{tabular}[t]{c|ccc}
+a  & - & 0 & + \\
\hline
- & \{-\} & \{-\} & \{-, 0, +\} \\
0 & \{-\} & \{0\} & \{+\} \\
+ & \{-, 0, +\} & \{+\} & \{+\}
 \end{tabular}

By using the functions defined above, we can statically compute the signs that each variable may have 
throughout the program.

\section{Correctness}
\subsection{Concrete vs. Abstract Domains}
We can define an abstraction function $\alpha : L_c \rightarrow L_a$, which takes concrete values and returns their signs.
\begin{eqnarray*}
\alpha(S_c) &=& \lambda x \in Var. \{sgn(n) ~|~ n \in S_c(x)\} \\
sgn(n) &=& \left\{\begin{array}{l} + \mbox{ if } n \textgt 0 \\
                                          0 \mbox{ if } n = 0 \\
                                          - \mbox{ if } n \textlt 0 \end{array}\right.
\end{eqnarray*}

We can also define a concretization function $\gamma : L_a \rightarrow L_c$, which returns the possible conrete values
given our abstract (sign) value. We define the function as
\[
\gamma(S_a) = \lambda x \in Var.\{n \in \mathbb{Z} ~|~ sgn(n) \in S_a(x) \}
\]

These two functions have the following properties:
\begin{itemize}
\item $\alpha, \gamma$ are both monotonic
\item $\forall x \in L_c, ~x \sqsupseteq \gamma(\alpha(x))$
\item[ ] $\forall y \in L_a, ~y \sqsubseteq \alpha(\gamma(y))$
\end{itemize}

\subsection{Soundness (Correctness)}
For soundness, we want to verify that the changes in the abstract information correspond to changes in the 
operational/denotational semantics.  If state $\sigma' = \mathcal{C}\SB{c} \sigma$, then we want to verify that 
the abstract information $i_a' = \SB{c} i_a$ does not contradict with the state $\sigma'$.

To do this, first we define a function $\beta(\sigma) = \alpha(\lambda x \in Var. \{\sigma(x)\})$.  For soundness,
we must show:
\[
\forall c,\sigma, i_a ~~ \beta(\sigma) \sqsupseteq i_a \Rightarrow \beta(\mathcal{C}\SB{c}\sigma) \sqsupseteq \SB{c}i_a
\]

Alternatively, we could also have:
\[
\beta(\mathcal{C}\SB{c}\sigma) \sqsupseteq \SB{c}(\beta(\sigma))
\]

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{document}

