%%% This is the scribe notes template for CS611
%%% There are several comments preceded by CS611: and boxed in %%%%'s 
%%% which indicate where macros should be altered to set up the header
%%% for the paper.  Your Notes should go at the comment SCRIBE NOTES GO HERE!.

%%% In the various .sty files that accompany this .tex file you will    
%%% find LaTeX macros that make it easier to typeset inference rules    
%%% and programming language constructs.  You must make sure that the   
%%% file proof.sty is in a path searched by LaTeX when you try to       
%%% use this file.  Take a look to see what macros are available--it    
%%% will save you time and make the notes look better.  Feel free to    
%%% extend the set of macros--post them to the newsgroup and contact    
%%% the course staff if you come up with some good ones so they can be  
%%% added to the template.                                              

%%% This template includes examples of hyow to use some of the macros
%%% to give you an idea of how they work.  (Delete the examples when
%%% you do your scribing.)

\documentclass{article} 
\usepackage{611-lecture}
\usepackage{amsmath,amssymb,amsthm,amsfonts}

\renewcommand\emptyset\varnothing
\newcommand{\inL}{\ensuremath{\mathsf{inL}}}
\newcommand{\inR}{\ensuremath{\mathsf{inR}}}
\newcommand\caseof[3]{\mathsf{case}~{#1}~\mathsf{of}~{#2}~|~{#3}}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{definition}[theorem]{Definition}

\newcommand{\Z}{\mathbb{Z}}
\newcommand{\group}[1]{\left\langle{#1}\right\rangle}

%\theoremstyle{definition}
%\newtheorem*{defn}{Definition}
\newcommand{\nondet}{\left[\!\kern1pt\right]}
\renewcommand\phi\varphi
\renewcommand\wp[2]{\mathsf{wp}~{#1}~{#2}}
\newcommand\wlp[2]{\mathsf{wlp}~{#1}~{#2}}
\renewcommand\({\begin{eqnarray*}}
\renewcommand\){\end{eqnarray*}}

\newcommand\LOOKUP[2]{\mathrm{LOOKUP}~{#1}~{#2}}
\newcommand\UPDATE[3]{\mathrm{UPDATE}~{#1}~{#2}~{#3}}
\newcommand\MALLOC[2]{\mathrm{MALLOC}~{#1}~{#2}}
\newcommand\EMPTY{\mathrm{EMPTY\mbox{-}STORE}}
\renewcommand\dom[1]{\mathrm{dom}\,{#1}}
\newcommand\p[2]{\langle{#1},\,{#2}\rangle}
\newcommand\bigcdot{\mathrel{\raisebox{1pt}{$\scriptscriptstyle\bullet$}}}
\newcommand\holed[1]{[\,#1\,]}
\newcommand\hole{\holed\bigcdot}
\newcommand\context[1]{E\kern1pt\holed{#1}}
\newcommand\contextHole{\context\bigcdot}
\newcommand\goesto[2]{\underset{#2}{\overset{#1}\to}}
\newcommand\ifthenelse[3]{\mathsf{if\ }#1\mathsf{\ then\ }#2\mathsf{\ else\ }#3}
\newcommand\ifpthenelse[3]{\mathsf{ifp\ }#1\mathsf{\ then\ }#2\mathsf{\ else\ }#3}
\newcommand\whiledo[2]{\mathsf{while\ }#1\mathsf{\ do\ }#2}
\newcommand\letin[3]{\mathsf{let\ }#1 = #2\mathsf{\ in\ }#3}
\newcommand\letrec[5]{\mathsf{letrec\ }#1 = #2\mathsf{\ and\ \ldots\ and\ }#3 = #4\mathsf{\ in\ }#5}
\newcommand\letrecone[3]{\mathsf{letrec\ }#1 = #2\mathsf{\ in\ }#3}
\newcommand\true{\ensuremath{\mathsf{true}}}
\newcommand\false{\ensuremath{\mathsf{false}}}
\newcommand\error{\ensuremath{\mathsf{error}}}
\newcommand\pca[3]{\{#1\}\kern1pt{#2}\kern1pt\{#3\}}
\newcommand\states{\Set{St}}
\newcommand\rtc{^{\textstyle *}}
\newcommand\sat\vDash
\newcommand\force\vdash

\newcommand\hyphen{\mbox{-}}
\newcommand\lookup[2]{\nm{LOOKUP}~#1~\mquote{#2}}
\newcommand\update[3]{\nm{UPDATE}~#1~\mquote{#2}~#3}
\newcommand\SBk[1]{\SB{#1}k}
\newcommand\fix[1]{\mathsf{fix}\,{#1}}

\newlength\reasonwidth
\setlength\reasonwidth{3cm}
\newcommand\reasoning[1]{\def\longest{#1}\settowidth{\reasonwidth}{$\displaystyle\longest$}\addtolength{\reasonwidth}{5mm}}
\newcommand\reason[2]{\makebox[\reasonwidth][l]{$\displaystyle{#1}$}\mbox{#2}}

\renewcommand\inj[1]{\mathsf{in}_{#1}}
\newcommand\proj[1]{\pi_{#1}}
\newcommand{\dlt}{\sqsubseteq}
\newcommand\floor[1]{\lfloor{#1}\rfloor}
\newcommand\cf[1]{[\kern1pt{#1}\kern1pt]}
\newcommand\SBpr[1]{\SB{#1}\,\phi\,\rho}
\renewcommand\C[3]{\Tr C{#1}\kern1pt{#2}\kern1pt{#3}}
\renewcommand\Cr[1]{\C{#1}\Gamma\rho}

\newcommand\judge[3]{{#1}\force{#2}:{#3}}
\newcommand\Gjudge[2]{\judge\Gamma{#1}{#2}}%
\newcommand\forceUSN{\mathrel{\makebox[2pt][l]{$\force$}\raisebox{-3pt}[0pt][0pt]{\tiny{\textit{USN}}}}}
\newcommand\judgeUSN[3]{{#1}\forceUSN{#2}:{#3}}
\renewcommand\C[3]{\Tr C{#1}\kern1pt{#2}\kern1pt{#3}}
\renewcommand\Cr[1]{\C{#1}\Gamma\rho}
\newcommand\Irred[1]{\ensuremath{\mathrm{Irred}(#1)}}

\newcommand\seq[3]{#1_{#2},\ldots,#1_{#3}}
\newcommand\substtwo[5]{\subst{#1}{#2}{#3,\,#4/#5}}
\newcommand\substlist[5]{\subst{#1}{#2}{#3,\ldots,#4/#5}}
\newcommand\Unify[1]{\mathrm{Unify}(#1)}

\newenvironment{proofof}[1]{\addtolength{\topsep}{1mm}\begin{trivlist}\item[]\hspace{\parindent}{\em Proof of #1.}}{\qed\end{trivlist}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% CS611: Please fill in these macros as appropriate:
\lecture{27}                  %% Lecture number
\title{Type Inference and Unification}   %% Title of lecture
%\author{Jonathan Kaldor, Ian Kash}       %% name of scribes
\date{6 November 2006}     %% Date of lecture, e.g., 1 January 2001
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% See 611.sty for a variety of macros that will be helpful in
% typesetting the lecture. Here are a few of particular interest:
%
% ``x"	 	x in keyword font (e.g., "if", "#t")
% _x_	 	x in italics
% \nm{n}   	n in slanted font (used for abbreviations)
% <e> 	 	e in angle brackets
% \lt 	 	less-than sign
% \gt 	 	greater-than sign
% \SB{x}	x in semantic brackets
% \Tr x{y} 	x[[y]] with x in calligraphic font
%          	(if x is more than a single character, use \Tr{x}{y})

\begin{document}
\maketitle

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% CS611: SCRIBE NOTES GO HERE!
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Type Inference}

_Type inference_ refers to the process of determining the appropriate types for expressions based on how they are used.  For example, ML knows that in the expression "(f 3)", "f" must be a function (because it is applied to something, not because its name is f!) and that it takes an "int" as input.  It knows nothing about the output type.  Therefore the type inference mechanism of ML would assign "f" the type "int -\gt 'a".
\begin{code}
- fn f =\gt (f 3);
val it = fn : (int -\gt 'a) -\gt 'a
\end{code}

There may be many different occurrences of a symbol in an expression, all leading to different typing constraints, and these constraints must have a common solution, otherwise the expression cannot be typed.
\begin{code}
- fn f =\gt f(f 3);
val it = fn : (int -\gt int) -\gt int
- fn f =\gt f(f "hi");
val it = fn : (string -\gt string) -\gt string
- fn f =\gt f(f 3,f 4);
stdIn:19.9-19.19 Error: operator and operand don't agree [literal]
  operator domain: int
  operand:         'Z * 'Z
  in expression:
    f (f 3,f 4)
\end{code}
In the first example, how does it know that the output type of "f" is "int"? Because the input type of "f" is "int", and the output of "f" is fed into "f" again, so the output type of "f" has to be the same as the input type of "f".

If a program is well-typed, then a type can be inferred.  For example, consider the program
\begin{tabbing}
\hspace{1em}\=\hspace{1em}\=\hspace{1em}\=\hspace{1em}\=\hspace{1em}\=\hspace{1em}\=\kill
$\letin{\mathit{square}}{\lam z{z*z}}{}$\\
\>$\lam f{\lam x{\lam y{}}}$\\
\>\>$"if"~(f~x~y)$\\
\>\>\>$"then"~(f~(\mathit{square}~x)~y)$\\
\>\>\>$"else"~(f~x~(f~x~y))$
\end{tabbing}
We are applying the multiplication operator to $z$, therefore we must have $z:"int"$, thus $\lam z{z*z}:"int"\to"int"$ and $\mathit{square}:"int"\to"int"$.  We know that the type of $f$ must be something of the form $f:\sigma\to\tau\to"bool"$ for some $\sigma$ and $\tau$, since it is applied to two arguments and its return value is used in a conditional test.  Since $f$ is applied to the value of $\mathit{square}~x$ as its first argument, it must be that $\sigma="int"$.  
Since $f$ is applied to the value of $f~x~y$ as its second argument, it must be that $\tau="bool"$.  
The return value is also "bool".  Thus the type of the entire program is $("int"\to"bool"\to"bool")\to"int"\to"bool"\to"bool"$.

\section{Unification}

Both type inference and pattern matching in ML are instances of a very general mechanism called _unification_.  Briefly, unification is the process of finding a substitution that makes two given terms equal.  Pattern matching in ML is done by applying unification to ML expressions, whereas type inference is done by applying unification to type expressions.  It is interesting that both these procedures turn out to be applications of the same general mechanism.  There are many other applications of unification in computer science; for example, the programming language PROLOG is based on it.

The essential task of unification is to find a substitution $S$ that unifies two given terms (that is, makes them equal).  Let's write $s~S$ for the result of applying the substitution $S$ to the term $s$.  For example,
\(
\substtwo{f(x,h(x,y))}{g(y)}xzy &=& f(g(y),h(g(y),z)),
\)
where the substitution operator $\substtwo{}{g(y)}xzy$ applied to a term simultaneously substitutes $g(y)$ for $x$ and $z$ for $y$.  The substitution is simultaneous, not sequential.  Sequential substitution would give a different result:
\(
\subst{\subst{f(x,h(x,y))}{g(y)}x}zy &=& \subst{f(g(y),h(g(y),y))}zy\ \ =\ \ f(g(z),h(g(z),z)).
\)

Thus, given $s$ and $t$, we want to find $S$ such that $s S = t S$.  Such a substitution $S$ is called a _unifier_ for $s$ and $t$.  For example, given the terms
\begin{eqnarray}
f(x,g(y)) &\quad&  f(g(z),w)\label{eqn:unif1}
\end{eqnarray}
the substitution
\begin{eqnarray}
S &=& \substtwo{}{g(z)}x{g(y)}w\label{eqn:substS}
\end{eqnarray}
would be a unifier, since 
\(
\substtwo{f(x,g(y))}{g(z)}x{g(y)}w &=& \substtwo{f(g(z),w)}{g(z)}x{g(y)}w\ \ =\ \ f(g(z),g(y)).
\)
Note that this is a purely syntactic definition; the meaning of expressions is not taken into consideration when computing unifiers.

Unifiers do not necessarily exist.  For example, the terms $x$ and $f(x)$ cannot be unified, since no substitution for $x$ can make the two terms equal.

Even when unifiers exist, they are not unique.  For example, the substitution
\(
T &=& \{g(f(a,b))/x,\,f(b,a)/y,\,f(a,b)/z,\,g(f(b,a))/w\}
\)
is also a unifier for the two terms (\ref{eqn:unif1}):
\(
f(x,g(y))~T &=& f(g(z),w)~T\ \ =\ \ f(g(f(a,b)),g(f(b,a))).
\)
However, when a unifier exists, there is always a _weakest_ or _most general unifier_ (mgu) that is unique up to renaming.  A unifier $S$ for $s$ and $t$ is a most general unifier (mgu) for $s$ and $t$ if
\begin{itemize}
\item
$S$ is a unifier for $s$ and $t$,
\item
any other unifier $T$ for $s$ and $t$ is a _refinement_ of $S$; that is, $T$ can be obtained from $S$ by doing further substitutions.
\end{itemize}

For example, the substitution $S$ in the example above is an mgu for $f(x,g(y))$ and $f(g(z),w)$.  The unifier $T$ is a refinement of $S$, since $T = S~U$, where
\(
U &=& \substtwo{}{f(a,b)}z{f(b,a)}y.
\)
Note that
\(
f(x,g(y))~S~U
&=& \substtwo{\substtwo{f(x,g(y))}{g(z)}x{g(y)}w}{f(a,b)}z{f(b,a)}y\\
&=& \substtwo{f(g(z),g(y))}{f(a,b)}z{f(b,a)}y\\
&=& f(g(f(a,b)),g(f(b,a)))\\
&=& f(x,g(y))~T.
\)

Note that we can compose substitutions, as we did in $S~U$.  This is the substitution that first applies $S$, then applies $U$ to the result.  The composition is also a substitution.

\section{Unification Algorithm}

The unification algorithm is known as Robinson's algorithm (1965).  We need unification for not just for a pair of terms, but more generally, for a set of pairs of terms.  We say that a substitution $S$ is a _unifier_ for a set $\{(s_1,t_1),\ldots,(s_n,t_n)\}$ if $s_i~S = t_i~S$ for all $1\leq i\leq n$.

The unification algorithm is given in terms of a function $\Unify{}$ that takes a set of pairs of terms $(s,t)$ and produces their mgu, if it exists.  If $E$ is a set of pairs of terms, then $\subst Etx$ denotes the result of applying the substitution $\subst{}tx$ to all the terms in $E$.
\begin{itemize}
\item
$\Unify{\{(x,t)\}\cup E} \definedas \subst{}tx~\Unify{\subst Etx}$ if $x\notin\FV t$
\item
$\Unify\emptyset \definedas I$ (the identity substitution $x\mapsto x$)
\item
$\Unify{\{(x,x)\}\cup E} \definedas \Unify E$
\item
$\Unify{\{(f(\seq s1n),f(\seq t1n))\}\cup E} \definedas \Unify{\{(s_1,t_1),\ldots,(s_n,t_n)\}\cup E}$.
\end{itemize}
In the first rule, $\subst{}tx$ denotes the substitution that substitutes $t$ for $x$,
and $\subst{}tx~\Unify{\subst Etx}$ denotes the composition of $\subst{}tx$ and $\Unify{\subst Etx}$.  Since we write substitutions on the right, we follow the convention that composition is from left to right; thus $S~T$ means, ``do $S$, then do $T$''.

One circumstance that causes a set of terms not to unify is if it contains a pair $(x,t)$ where $x\neq t$ but $x$ occurs in $t$; then no substitution can make $x$ and $t$ equal.

\section{Type Inference and Unification}

Now we show how to do type inference using unification on type expressions.  This technique gives the most general type (mgt) of any typable term; any other type of this term is a substitution instance of its most general type.  Recall the Curry-style simply typed $\lambda$-calculus with syntax
\(
e\ \ ::=\ \ x \bnf e_1~e_2 \bnf \lam xe &\qquad& \tau\ \ ::=\ \ \alpha \bnf \tau_1\to\tau_2
\)
and typing rules
\(
\frac{\judge\Gamma{e_1}{\sigma\to\tau}\qquad\judge\Gamma{e_2}\sigma}{\judge\Gamma{(e_1~e_2)}\tau}
&\qquad& \frac{\judge{\Gamma,\,x:\sigma}e\tau}{\judge\Gamma{\lam xe}{\sigma\to\tau}}.
\)
For the language of types, the last unification rule translates to
\begin{itemize}
\item
$\Unify{\{(s\to s',t\to t')\}\cup E} \definedas \Unify{\{(s,t),(s',t')\}\cup E}$.
\end{itemize}

The problem is that any type derivation starts with assumptions about the types of the variables in the form of a type environment $\Gamma$, but without a type environment or an annotation as in Church style, we do not know what these are.  However, we can observe that the form of the subterms impose constraints on the types.  We can write down these constraints and then try to solve them.

Suppose we want to infer the type of a given $\lambda$-term $e$.  Without loss of generality, suppose we have $\alpha$-converted $e$ so that no variable is bound more than once and no variable with a binding occurrence $\lambda x$ also occurs free.

Let $\seq e1m$ be an enumeration of all _occurrences_ of subterms of $e$.  We first assign a unique type variable $\alpha_i$ to each $e_i$, $1\leq i\leq m$, as well as a unique type variable $\beta_x$ to each variable $x$.  Then we take the following constraints:
\begin{itemize}
\item
if $e_i$ is an occurrence of a variable $x$, the constraint $\alpha_i = \beta_x$;
\item
for a subterm $e_i=e_j~e_k$, the constraint $\alpha_j = \alpha_k\to\alpha_i$; and
\item
for a subterm $e_i=\lam x{e_j}$, the constraint $\alpha_i = \beta_x\to\alpha_j$.
\end{itemize}

This gives us a list of pairs of type expressions representing type constraints imposed by the typing rules above.

Now we do unification on the constraints and apply the resulting substitution to the type variable $\alpha_e$.  The result is the mgu of $e$.

\subsection{An Example}

Here is an example of the algorithm applied to the $S$ combinator $\lam{xyz}{xz(yz)}$.  Let us mark the second occurrence of $z$ as $z'$ to distinguish it from the first occurrence, although they are occurrences of the same variable $z$.  Thus $S=\lam{xyz}{xz(yz')}$.  Each occurrence of a subterm generates a constraint:
\[
\begin{array}{rcll}
e_1 &=& \lam x{\lam y{\lam z{xz(yz')}}} & \alpha_1=\beta_x\to\alpha_2\\
e_2 &=& \lam y{\lam z{xz(yz')}} & \alpha_2=\beta_y\to\alpha_3\\
e_3 &=& \lam z{xz(yz')} & \alpha_3=\beta_z\to\alpha_4\\
e_4 &=& xz(yz') & \alpha_5=\alpha_8\to\alpha_4\\
e_5 &=& xz & \alpha_6=\alpha_7\to\alpha_5\\
e_6 &=& x & \alpha_6=\beta_x\\
e_7 &=& z & \alpha_7=\beta_z\\
e_8 &=& yz' & \alpha_9=\alpha_{10}\to\alpha_8\\
e_9 &=& y & \alpha_9=\beta_y\\
e_{10} &=& z' & \alpha_{10}=\beta_z.
\end{array}
\]
Solving these constraints using Robinson's algorithm yields
\(
\alpha_1 &=& (\alpha_7\to\alpha_8\to\alpha_4)\to(\alpha_7\to\alpha_8)\to\alpha_7\to\alpha_4\\
\alpha_2 &=& (\alpha_7\to\alpha_8)\to\alpha_7\to\alpha_4\\
\alpha_3 &=& \alpha_7\to\alpha_4\\
\alpha_5 &=& \alpha_8\to\alpha_4\\
\alpha_6 &=& \alpha_7\to\alpha_8\to\alpha_4\\
\alpha_9 &=& \alpha_7\to\alpha_8\\
\alpha_{10} &=& \alpha_7\\
\beta_x &=& \alpha_7\to\alpha_8\to\alpha_4\\
\beta_y &=& \alpha_7\to\alpha_8\\
\beta_z &=& \alpha_7
\)
so we see that the most general type of $e_1$ is $(\alpha_7\to\alpha_8\to\alpha_4)\to(\alpha_7\to\alpha_8)\to\alpha_7\to\alpha_4$.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{document}
