\documentclass[draft,letterpaper,landscape]{seminar}
\usepackage{fancybox}
\usepackage{slidesec}
\usepackage{color}
\usepackage{pstricks}
\input theorem
\input linear
\def\unify{\mathrel{\mathop{=}\limits^.}}
\def\rtype{\mathop{\mathsf{type}}}
\def\roccurs{occurs}
\def\rid{\mathop{\mathsf{id}}\nolimits}
\def\inv{^{-1}}
\def\rst{\mathop |}
%\def\rpat{\mathop{\mathsf{pat}}}
\def\shft{{\uparrow}}
\def\cn{{:}}
\def\sfr{\mathsf r}
\def\sfi{\mathsf i}
\def\sco#1{[\![#1]\!]}
\def\deb#1{\mathsf{#1}}
\definecolor{darkblue}{rgb}{0.1,0.1,0.5}
\definecolor{orange}{rgb}{0.7,0.2,0}
\definecolor{lightgray}{rgb}{0.75,0.75,0.75}
\def\myem{\bf\color{orange}}
\def\myslideheading#1{\slideheading{\color{darkblue}#1}}
\def\debu{\deb 1}
\def\cons{\cdot}
\def\unn{\deb n}
\def\aps{\diamond}

\def\Ops{\psset{%
  fillstyle=none,%
  linewidth=0.5pt,%
  framesep=\slideframesep,%
  cornersize=absolute,%
  linearc=0,%
  shadowsize=2pt,%
  shadowcolor=lightgray,%
  linecolor=lightgray}}%

\newslideframe{gah}[\Ops]{\psframebox{#1}}

\newpagestyle{gur}{\hfil}{\hfil\color{lightgray}\thepage\hfil}
\pagestyle{gur}
\begin{document}

\slideframe{gah}
\setcounter{section}{1}
\title{\color{darkblue} Extending Higher-Order Unification to Support Proof Irrelevance}
\author{Jason Reed\\  Carnegie Mellon University}
\date{September 11, 2003}
\begin{slide}
\maketitle
\end{slide}
\begin{slide}
\myslideheading{What is Proof Irrelevance?}
\begin{itemize}
\item The idea that all proofs of a proposition are equal.
\item (The term appears in the literature occasionally meaning `irrelevance everywhere', 
of all proof equality becoming trivial, especially
in proofs of the form `X and Y imply proof irrelevance' --- this is not what we are talking about)
\item ``Intensionality, Extensionality and Proof Irrelevance in Modal Type Theory'' [Pfenning '01] 
treats irrelevance as a {\myem modality}.
\item Compare with fact that both logic ``linear everywhere'' and logic with linear {\myem and} intuitionistic variables
are possible. 
\end{itemize}
\vfill
\end{slide}

\begin{slide}
\myslideheading{Outline}
\begin{itemize}
\item[I.] Motivation
\item[II.] Type Theory
\item[III.] Unification
\item[IV.] Patterns
\end{itemize}
\vfill
\end{slide}

\begin{slide}
\myslideheading{What good is Proof Irrelevance?}
\begin{itemize}
\item A couple examples, using the dependent type theory {\myem LF} [Harper, Honsell, Plotkin '93]
as a starting point.
\item Examples shaped and motivated throughout by the design choices of
 {\myem twelf},  [Pfenning, Sch\"urmann '99] an implementation of LF and associated algorithms.
%using the tried-and-true ideas of {\myem propositions as types} and {\myem higher-order abstract syntax}.
\item Motivation \#1: adequate encodings
\item Motivation \#2: proof compaction
\end{itemize}
\vfill\end{slide}

\begin{slide}
\myslideheading{Motivation \#1: Adequate Encodings}
\begin{itemize}
\item Desirable property for an encoding of a theory into a logic like LF
is {\myem adequacy}, existence of a {\myem compositional bijection} between object-language terms
and (canonical) LF objects.
\item Compositional, i.e. substitution commutes with translation.
\item Proof irrelevance as a modality makes adequate encodings of
certain concepts much easier.
\end{itemize}
\vfill\end{slide}

\begin{slide}
\myslideheading{Adequate Encodings (2)}
\begin{itemize}
\item Take the standard encoding of the untyped $\lambda$-calculus:
{\small
$$tm : \rtype\qquad lam : (tm \to tm) \to tm$$
$$app : tm \to tm \to tm$$
}
\item How to get `strict lambda calculus',
each $\lambda$ var to occur at least once? (Historical footnote: Church's original calculus like this)
\item Easy to code up a definition of occurrence:
{\small\obeylines
$\roccurs : (tm \to tm) \to \rtype$
%$\roccurs\_lam : \roccurs\ (\lambda x . lam\ (M\ x))$
%$\qquad\gets \Pi y \cn tm . \roccurs\ \lambda x.lam\ (M\ x\ y)$
$\roccurs\_app1 : \roccurs\ (\lambda x . app\ (M\ x)\ (N\ x)) \gets \roccurs\ (\lambda x . (M\ x)) $
$\roccurs\_app2 : \roccurs\ (\lambda x . app\ (M\ x)\ (N\ x)) \gets \roccurs\ (\lambda x . (N\ x)) $
$\roccurs\_var : \roccurs\ (\lambda x . x)$
...
}
\item So $\roccurs\ (\lambda x .  M\ x)$ type of proofs that $x$ occurs in $M$
\end{itemize}
\vfill\end{slide}

\begin{slide}
\myslideheading{Adequate Encodings (3)}
\begin{itemize}
\item We would try 
$lam : \Pi t\cn (tm \to tm) . (\roccurs\ t) \to tm$
but it doesn't work right.
\item Generally lots of proofs that $x$ occurs, as many as occurrences!
\item $lam\ t\ {P_1} \ne lam\ t\ {P_2}$ for $P_1\ne P_2$
\item {\myem Failure of adequacy!}
\item Don't want to care about which proof of occurrence.
\item That is, we want an `irrelevant arrow'. We'll write brackets around
the argument to suggest:
$$lam : \Pi t\cn (tm \to tm) . [(\roccurs\ t)] \to tm$$
\item Need $lam\ t\ [P_1] = lam\ t\ [P_2]$ for any proofs
$P_1,P_2$ to recover adequacy.
\end{itemize}
\vfill\end{slide}

\begin{slide}
\myslideheading{Motivation \#2: Proof Compaction}
\begin{itemize}
\item Domain: Proof-Carrying Code [Necula, Lee '96] 
\item Problem: proofs are big --- There's a market for ways of making them smaller.
\item Maybe we can omit subterms that can be recovered by the consumer?
\item This is realistic; big proofs of undecidable properties can have lots
of space-consuming little subproofs of (efficiently) decidable properties. 
\item Assert the existence of the little subproofs,
  let the consumer reconstruct them.
\end{itemize}
\vfill\end{slide}
\begin{slide}
\myslideheading{Proof Compaction (2)}
\begin{itemize}
\item But what if the consumer reconstructs a different proof of the same fact?
\item Coordinating reconstruction algorithms at both ends possible, but a headache
\item Instead use {\myem irrelevant} subproof requirements in the signature.
\item This permits the receiver to safely reconstruct {\myem any} valid subproof.
\item There's a result that states that after replacing an irrelevant subterm with another of
the same type, the whole term is still well-typed. 
\item Not true in ordinary LF because of dependent types.
\item Another win: avoiding constructing intermediate proof terms
\end{itemize}
\vfill\end{slide}

%\begin{slide}
%\myslideheading{Proof Compaction (3)}
%\begin{itemize}
%\item {\em This is not true generally!}
%\item Example: in the signature
%$$o : \rtype$$
%$$a : o \to \rtype$$
%$$b : \Pi x \cn o . a\ x$$
%$$c : \Pi x \cn o .  (a\ x) \to o$$
%$$k_1, k_2 : \rtype$$
%The term $c\ k_1\ (b\ k_1)$ is well-typed, but
%$c\ k_1\ (b\ k_2)$ is not.
%\end{itemize}
%\vfill\end{slide}


\begin{slide}
\myslideheading{Extending LF Type Theory}
\begin{itemize}
\item Normally, we can check applications for equality with the rule
$$\begin{prooftree}
\Gamma \prov M = M' : \Pi x \cn A. B \qquad \Gamma\prov N = N': A
\justifies
\Gamma \prov M\ N = M'\ N': \{N/x\}B 
\end{prooftree}$$ 
\item For irrelevant functions, we want the arguments not to matter. So we have:
$$\begin{prooftree}
\Gamma \prov M = M' : \Pi x \cn [A]. B \qquad \Gamma\prov N = N': [A]
\justifies
\Gamma \prov M\ [N] = M'\ [N']: \{N/x\}B 
\end{prooftree}$$ 
and say that any two objects at $[A]$ are equal.
\item (Just as $A \to B$ abbreviates $\Pi x\cn A . B$ where $x$ doesn't occur in $B$,
we'll say $[A] \to B$ means $\Pi x \cn [A] . B$)
\end{itemize}
\vfill\end{slide}

\begin{slide}
\myslideheading{Extending LF (2)}
\begin{itemize}
\item Naturally, we get terms at irrelevant-$\Pi$ type from irrelevant lambdas:
$$\begin{prooftree}
\Gamma, x : [A] \prov M : B
\justifies
\Gamma \prov \lambda x \cn [A] . M : \Pi x \cn [A] . B
\end{prooftree}$$ 
\item Forces us to consider what {\myem irrelevant hypotheses} mean.
\item Answer: $x : [A]$ assumes that some object at type $A$ exists, but
we are not allowed to analyze its structure, only use the bare fact that
its type is inhabited.
\item Knee-jerk reaction to a new kind of hypothesis: what kind of objects
can we substitute for it?
\item New typing judgment: $\Gamma \prov M : [A]$.
Think ``$M$ is an irrelevant object at type $A$''
or ``$M$ is an inhabitation witness for type $A$''
\end{itemize}
\vfill\end{slide}


\begin{slide}
\myslideheading{Irrelevance Rules}
\begin{itemize}
\item Defining inference rule: ($[\Gamma']$ just means $x_1 :[A_1],\ldots x_n:[A_n]$)
 $$\begin{prooftree}\Gamma,\Gamma' \prov M : A \qquad {\color{lightgray}\Gamma,[\Gamma'] \prov A : \rtype}
\justifies
\Gamma, [\Gamma'] \prov M : [A]
\end{prooftree}$$
Note hypothesis rule is still
merely $\Gamma, x : A \prov x : A$
not anything that would allow 
$\Gamma, x : [A] \prov x : A$.
($\Gamma, x : [A] \prov x : [A]$ is admissible)
\item $x :[A]$ is a weaker hypothesis than $x : A$, and
$M:[A]$ is a weaker assertion than $M:A$; When judging $M:[A]$ one gets to use
irrelevant hypotheses `unbracketed'. 
\item $\Gamma \prov M : A$ implies $\Gamma \prov M : [A]$.
%\item What about that curious side condition? Why not leave it out?
\item See the tech report for why $\Gamma,[\Gamma'] \prov A : \rtype$ needed.
\end{itemize}
\vfill\end{slide}


%\begin{slide}
%\myslideheading{Avoiding Pathology}
%\begin{itemize}
%\item Without it, we could say
%$$o :\rtype \qquad a : o \to \rtype$$
%$$b : \Pi x \cn o . a\ x\qquad c : \Pi x  \cn [o] .  [a\ x] \to o$$
%$$k_1, k_2 : o$$
%\begin{center} and then we get \fbox{$z : [o] \prov c\  [z] \ [b\ z] : o$}\end{center}
%\item Now if we substitute $k_1, k_2$ for $z$ (which are equal at $[o]$)  we
%expect to find that
%$c \ [k_1] \  [b\ k_1] = c \ [k_2] \ [b\ k_2] $
%\item But the respective second arguments to $c$ don't even have the same type.
%(one's $[a\ k_1]$, the other is $[a\ k_2]$) We don't want to identify proofs
%of {\myem different} propositions!
%\item Source of the problem: $[a\ x]$ (in the decl. of $c$) not really a type.
%\end{itemize}
%\vfill\end{slide}


\begin{slide}
\myslideheading{Higher-Order Pattern Unification}
\begin{itemize}
\item How {\myem twelf}, for instance, thinks of unification.
Used for type reconstruction, logic programming queries.
\item {\myem Higher-order}: allow variables to be of function type.
\item Restricted to the {\myem pattern} fragment [Miller '91], because we want
unification to be {\myem decidable} and have unique {\myem most general unifiers}.
\item The fact that type reconstruction relies on unification is a big
motivation for this: don't want type-checking to be undecidable or have
an ambiguous answer.
\item {\small [Dowek, Hardin, Kirchner, Pfenning '96]} worked out an algorithm
for this case; we extended it to cover LF with irrelevance.
\item Just few interesting corner cases --- see paper for details
\end{itemize}

\vfill\end{slide}

\begin{slide}
\myslideheading{Unification}
\begin{itemize}
\item Stepping back a bit, a unification problem looks like
$$\exists U_1 \ldots \exists U_n . M_1 \unify N_1 \land \cdots M_n \unify N_n$$
\item Find terms for $U_1,\ldots,U_n$ so all equations satisfied, or determine that no such exist.
\item Must allow open (allowing $\exists$-quantified variables to occur) instantiations, or
else immediate undecidability! 
For instance, $\exists U. \exists V . U \unify c\ V$. Answer: $U \gets c\ V$ 
\item Otherwise, exists closed term at $V$'s type? 
 Undecidable.
\end{itemize}
\vfill\end{slide}

\begin{slide}
\myslideheading{Unification (2)}
\begin{itemize}
\item Irrelevance means that equations that look straightforward
are actually trivial in the same way as the above one.
\item Consider
$$\exists U . c\ [k] \unify c\ [U]\eqno(*)$$
\item If this were
$$\exists U . c\ k \unify c\ U$$
We'd just assign $U\gets k$. 
\item But in $(*)$, the equation holds no matter what
$U$ is set to; to get most general unifier, we {\myem don't} instantiate $U$.
\end{itemize}
\vfill\end{slide}

\begin{slide}
\myslideheading{Unification (3)}
\begin{itemize}
\item Sometimes we need to introduce new variables.
Consider
$$\exists U . (\lambda x . c\ [x] \unify \lambda x . U)$$
Since $U$ is quantified on the outside, it doesn't make sense
to say $U \gets c\ [x]$. 
\item But the argument to $c$ here is irrelevant!
\item We can introduce $V$, instantiate $U \gets c\ [V]$ and the equation
$\lambda x . c\ [x] = \lambda x . c\ [V]$ holds, because of irrelevant application.
\item In fact this is the most general unifier.
\item Compare $\exists U . (\lambda x . c\ x \unify \lambda x . U)$, which fails.
\end{itemize}
\vfill\end{slide}

\begin{slide}
\myslideheading{Unification (4)}
\begin{itemize}
\item $\exists U . U \unify c\ U$ fails.
\item  $U$ appears rigid on the right, `occurs-check' fails.
\item $\exists U . U \unify c\ [U]$? Introduce new variable $V$;
\item  $U \gets c\ [V]$ gives $c\ [V] = c\ [c\ [V]]$. This is the most general unifier!
\end{itemize}
\vfill\end{slide}

\begin{slide}
\myslideheading{So...}
\begin{itemize}
\item Why not replace {\myem every} term
$M\ [N]$ with $M\ [V]$ for fresh $V$? Def'n of equality lets us.
\item Better yet, why not replace
with $M\ *$, where $*$ is a magic new term such that $* = *$?
\item Answer: We don't just want to solve the question of unifiability, but
{\myem unification}. We mean to {\myem find actual unifiers}, and provide {\myem as much
inhabitation information} as possible to potential algorithms downstream.
\item Overagressive insertion of variables or placeholders suboptimal in this aspect.
\end{itemize}

\vfill\end{slide}

\begin{slide}
\myslideheading{Patterns}
\begin{itemize}
\item When we come down to an equation like $U\ M_1\ M_2\ M_3 \unify N$, things get
hard. Got to build $N$ out of $M_i$, but $M_i$ may be messy.
\item A {\myem pattern} [Miller '91] is where we restrict variables $U,V,$ etc. to occur only
applied to distinct local (i.e. once bound by $\lambda$) variables.
$$\hbox{Pattern: } \lambda x . \lambda y . \lambda z . U\ z\ x$$
$$\hbox{Not: } \lambda x . \lambda y . \lambda z . U\ x\ x$$
$$\hbox{Not: } \lambda x . \lambda y . \lambda z . U\ (c\ y)$$
$$\hbox{Not: } \lambda x . \lambda y . \lambda z . U\ (V\ x\ y\ z)$$
$$\exists U . U\ z\ x \unify c\ z\ (x\ z) \Longrightarrow U \gets \lambda z . \lambda x .  c\ z\ (x\ z)$$
$$\exists U . U\ x\ x \unify x \Longrightarrow ???\  U \gets \lambda x_1 . \lambda x_2 . x_1? \ U \gets \lambda x_1 . \lambda x_2 . x_2?$$
\end{itemize}
\vfill\end{slide}

\begin{slide}
\myslideheading{Patterns (2)}
\begin{itemize}
\item Pattern restriction makes unification decidable, and most general unifiers always exist.
Current definition is sound with irrelevance, but we can squeeze more patterns out of it.
\item Turns out we can allow {\myem irrelevant} applications of {\myem any argument} at all. Normal args must still
be distinct bound variables.
\item Pattern: $\lambda x. \lambda y. U\ y\ [c\ x\ y]\ x\ [V\ y\ y]$
\item Pattern: $U\ [M]$ for any $M$. e.g.
$$\exists U . U\ [M] \unify c\ [N] \Longrightarrow U \gets \lambda z \cn [A] . c\ [N]$$
%fully general since it doesn't matter whether $z$ gets used, or how it's used:
%$\lambda z \cn [A] . c\ [z] = \lambda z \cn [A].c\ [M]$
\item Any substitution for $U$ that satisfies the eq'n {\myem must} be equal to $\lambda z \cn [A] . c\ [N]$!
\end{itemize}
\vfill\end{slide}

\begin{slide}
\myslideheading{Unification with Irrelevance}
\begin{itemize}
\item Turn all of these intuitions into an algorithm; technical details:
\item Soundness and completeness go as usual, showing that transition
rules maintain unifiers.
\item Termination because they make the problem smaller according to the right metric
\item Pattern unification with irrelevance is {\myem decidable}, has unique {\myem most general unifiers}
\item Extensible to the so-called {\myem dynamic pattern fragment} by postponing constraints.
\item We have a {\myem prototype implementation} based on {\tt twelf}
\end{itemize}
\vfill\end{slide}


\begin{slide}
\myslideheading{Summary}
\begin{itemize}
\item {\myem Proof irrelevance} as a modality is useful for expressing adequate encodings
and guaranteeing the safety of flexible proof reconstruction.
\item Known algorithm for {\myem higher-order pattern unification} modified to work in a type
theory with irrelevance.
\item Known definition of {\myem higher-order pattern} has a simple generalization to irrelevant arguments.
\item Questions?

\end{itemize}
\vfill
\end{slide}

\end{document}
