summaryrefslogtreecommitdiff
path: root/tfm.tex
blob: 946ffa5d10a1a07f99824dfdfdf2de9fb79d2566 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
\documentclass[12pt,a4paper,parskip=full]{scrreprt}
%\usepackage{scrhack} % http://tex.stackexchange.com/questions/51867/koma-warning-about-toc

\usepackage[top=3.5cm,bottom=3.5cm,left=3cm,right=3cm]{geometry}

% \usepackage[spanish]{babel}
\usepackage{lmodern}
\usepackage{fontspec} \setmonofont{DejaVu Sans Mono}
\usepackage{url}
\usepackage{graphicx} \usepackage{cite} \usepackage{amsmath}
\usepackage{mathtools} \usepackage{listings} \usepackage{syntax}
% \usepackage[compact,small]{titlesec}
\usepackage[usenames,dvipsnames]{xcolor}
\usepackage[backref,colorlinks=true,linkcolor=black,urlcolor=black,citecolor=blue]{hyperref}
\usepackage{perpage} \usepackage{subcaption}

\usepackage{tikz}
\usepackage{amssymb} % arrows
\usepackage{mathpartir} % inference rules

\usepackage{minted}

\usepackage{float}
\floatstyle{boxed}
\newfloat{code}{th}{los}[chapter]
\floatname{code}{Code listing}

\usetikzlibrary{shapes,arrows}

\hypersetup{pageanchor=false}


\input{defs}
\input{ec-defs}
\input{front/front-init}

\MakePerPage{footnote}

\def\emptypage{\newpage\thispagestyle{empty}\mbox{}}

\begin{document}
\input{front/front-body}

\pagenumbering{roman}


\tikzstyle{block} = [rectangle, draw, fill=blue!20,
text width=5em, text centered, rounded corners, minimum height=4em]
\tikzstyle{invisbl} = [text width=5em, text centered, minimum height=4em]
\tikzstyle{line} = [draw, -latex']
\tikzstyle{cloud} = [draw, ellipse,fill=blue!10, node distance=3cm,
minimum height=2em]


\emptypage
\chapter*{Resumen}

La sociedad depende hoy más que nunca de la tecnología, pero la inversión en
seguridad es escasa y los sistemas informáticos siguen estando muy lejos de ser
seguros. La criptografía es una de las piedras angulares de la seguridad en este
ámbito, por lo que recientemente se ha dedicado una cantidad considerable de
recursos al desarrollo de herramientas que ayuden en la evaluación y mejora de
los algoritmos criptográficos. EasyCrypt es uno de estos sistemas, desarrollado
recientemente en el Instituto IMDEA Software en respuesta a la creciente
necesidad de disponer de herramientas fiables de verificación formal de
criptografía.

(TODO: crypto, reescritura de términos, máquinas abstractas, mejoras a EasyCrypt)

\chapter*{Abstract}

Today, society depends more than ever on technology, but the investment in
security is still scarce and using computer systems are still far from safe to
use. Cryptography is one of the cornerstones of security, so there has been a
considerable amount of effort devoted recently to the development of tools
oriented to the evaluation and improvement of cryptographic algorithms. One of
these tools is EasyCrypt, developed recently at IMDEA Software Institute in
response to the increasing need of reliable formal verification tools for
cryptography.

(TODO: crypto, term rewriting, abstract machines, EasyCrypt impovements)

\emptypage
\tableofcontents

%% Content begins here
%
%% Level    | Spaces before | '%'s after
%  ---------+---------------+-----------
%  part     | 5             | until EOL
%  chapter  | 4             | 10
%  section  | 3             | 2
%  subs.    | 2             |
%  subsubs. | 1             |




\emptypage
\chapter{INTRODUCTION} %%%%%%%%%%

\pagenumbering{arabic}
\setcounter{page}{1}

In the last years, society is becoming ever more dependent on computer
systems. People manage their bank accounts via web, are constantly in touch with
their contacts thanks to instant messaging applications, and have huge amounts
of personal data stored in the \textit{cloud}. All this personal information
flowing through computer networks need to be protected by correctly implementing
adequate security measures regarding both information transmission and
storage. Building strong security systems is not an easy task, because there are
lots of parts that must be studied in order to assure the system as a whole
behaves as intended.




\section{Cryptography}

One of the most fundamental tools used to build security computer systems is
\textbf{cryptography}. As a relatively low-level layer in the security stack, it
is often the cornerstone over which all the system relies in order to keep being
safe. Due to its heavy mathematical roots, cryptography today is a mature
science that, when correctly implemented, can provide strong security guarantees
to the systems using it.

At this point, one could be tempted of just ``using strong, NIST-approved
cryptography'' and focusing on the security of other parts of the system. The
problem is that correctly implementing cryptography is a pretty difficult task
on its own, mainly because there is not a one-size-fits-all construction that
covers all security requirements. Every cryptographic primitive has its own
security assumptions and guarantees, so one must be exceptionally cautious when
combining them in order to build larger systems. A given cryptographic
construction is usually well suited for some kind of scenarios, and offers
little to no security otherwise. In turn, this can produce a false sense of
security, potentially worse that not having any security at all.




\section{Formal Methods} %%

In order to have the best guarantee that some cryptographic construction meets
its security requirements, we can use use formal methods to prove that the
requirements follow from the assumptions (scenario). (TODO: some more explaining.)

While mathematical proofs greatly enhance the confidence we have in that a given
cryptosystem behaves as expected, with the recent increase in complexity it has
become more and more difficult to write and verify the proofs by hand, to the
point of being practically non-viable. In the recent years there has been an
increasing effort in having computers help us write and verify this proofs.

There are various methods and tools for doing this, but one of the most
versatile and powerful are the \textbf{proof assistants}, which are tools
designed to help users develop formal proofs interactively. A proof assistant
usually follows the rules of one or more \textbf{logics} to derive theorems from
previous facts, and the user helps it by giving ``hints'' on how to
proceed. This is in contrast to some other theorem provers that use little or no
help from the user, making them easier to use but fundamentally more limited.
Coq\footnote{\url{http://coq.inria.fr/}} and
Isabelle\footnote{\url{http://isabelle.in.tum.de/}} are examples of widely used
proof assistants.

(TODO: more explaining, screenshots?)

One downside of proof assistants is that they require a considerable amount of
knowledge from the user, making them difficult to use for people that is not
somewhat fluent with theoretical computer science and logic. This is a
significant obstacle to the application of this technologies to other scientific
fields that could benefit from adopting the formal methods approach to
verification.


\section{EasyCrypt}
\label{sec:easycrypt}

EasyCrypt \cite{BartheGHB11} is a toolset conceived to help cryptographers
construct and verify cryptographic proofs. It is an open source
project\footnote{\url{https://www.easycrypt.info}} being developed
currently at IMDEA Software Institute and Inria. It is the evolution of the
previous CertiCrypt system \cite{BartheGB09}.

EasyCrypt's works as an interpreter of its own \textbf{programming language}, in
which the programmer can express all that's needed in order to develop the
proofs. At every step of the evaluation, EasyCrypt can output some information
regarding the state of the system so that external tools can parse and show it
to the user. Together with the fact that the evaluation steps can be reversed,
this forms the basis of the interactivity of the EasyCrypt system: the user can
evaluate the program step by step, and if needed, undo it and re-evaluate in the
fly.

\begin{figure}[H]
  \centering
  \includegraphics[width=1\textwidth]{img/easycrypt.png}
  \caption{EasyCrypt}
  \label{fig:easycrypt}
\end{figure}

The preferred way of working with EasyCrypt is using the
\textbf{Emacs}\footnote{\url{http://www.gnu.org/software/emacs/}} text editor,
with the \textbf{Proof
  General}\footnote{\url{http://proofgeneral.inf.ed.ac.uk/}} interface to proof
assistants (figure \ref{fig:easycrypt}). This interface shows both the source
code and the EasyCrypt output at the point of evaluation (the already evaluated
code is displayed in a different color), and offers standard key combinations
for forward/backward code stepping.

As we'll see later (section \ref{sec:verif-easycrypt}), EasyCrypt has different
sub-languages for working with different things, e.g., representing games,
developing the proofs, etc. One of them is specially relevant in this thesis:
the \textbf{expression language}. It is the language EasyCrypt uses to define
typed values, like quantified formulas, arithmetic expressions, functions,
function application and such. (TODO: briefly explain what it is used for and
what can be improved)




\section{Term Rewriting}

(TODO: explain the need to improve the EasyCrypt's one)



\section{Contributions} %%

(TODO:)

\begin{itemize}
\item Study and implement some rewriting engines
\item Improve the EasyCrypt's one
\end{itemize}





\part{STATE OF THE ART} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




\chapter{CRYPTOGRAPHY} %%%%%%%%%%

(TODO: brief introduction to crypto: encryption (signatures?) etc.)



\section{Symmetric Cryptography} %%

(TODO: not sure if this section is really needed)



\section{Asymmetric Cryptography} %%

Here we will introduce some of the most fundamental concepts in asymmetric
cryptography, as they will be useful to understand the next sections on
EasyCrypt's proof system and sequences of games (section
\ref{sec:sequences-games}).

\textbf{Asymmetric cryptography} (also called \textbf{Public Key cryptography}),
refers to cryptographic algorithms that make use of two different keys, $pk$
(public key) and $sk$ (secret key). There must be some mathematical relationship
that allows a specific pair of keys to perform dual operations, e.g., $pk$ to
encrypt and $sk$ to decrypt, $pk$ to verify a signature and $sk$ to create it,
and so on. A pair of (public, secret) keys can be generated using a procedure
called \textbf{key generation} ($\KG$).

The encryption ($\Enc$) and decryption ($\Dec$) functions work in the
following way:

$$\Enc(pk,M) = C$$
$$\Dec(sk,C) = M$$

That is, a message ($M$) can be encrypted using a public key to obtain a
ciphertext ($C$).

(TODO: expand)


\section{Proofs by reduction} %%
\label{sec:proofs-reduction}

(TODO: explain: Shannon + perfect security size problems, need to rely on strong
primitives -RSA, DDH...- and use them in reduction proofs)

\begin{figure}[ht]
  \centering
  \begin{tikzpicture}[node distance = 6cm, auto]
    % Nodes
    \node [block, fill=blue!40] (primitive) {Primitive};
    \node [block, below of=primitive, fill=blue!40] (scheme) {Scheme};
    \node [block, right of=primitive, fill=red!60] (attack1) {Attack};
    \node [block, right of=scheme, fill=red!60] (attack2) {Attack};
    % Edges
    \path [line,dashed] (attack1) -- (primitive);
    \path [line,dashed] (attack2) -- (scheme);
    \path [line]
    (primitive) edge node[left] {\textbf{Construction}} (scheme)
    (attack2) edge node[right] {\textbf{Reduction}} (attack1);
  \end{tikzpicture}
  \caption{Proofs by reduction}
  \label{fig:proofs}
\end{figure}




\section{Sequences of games} %%
\label{sec:sequences-games}

In 2004 \cite{Shoup04}, Victor Shoup introduced the concept of \textbf{sequences
  of games} as a method of taming the complexity of cryptography related
proofs. A \textbf{game} is like a program written in a well-defined,
probabilistic programming language, and a sequence of games is the result of
applying transformations over the initial one. Every game represents the
interaction between a \textbf{challenger} and an \textbf{adversary}, with the
last one being usually encoded as a function (probabilistic program). In the
end, we will want the sequence of games to form a proof by reduction (see
section \ref{sec:proofs-reduction}), where the transition of games proves that
our system can be reduced, under certain conditions, to some well-known
cryptographic primitive (TODO: is this correct?).

(TODO: explain transitions and events (adversary winning))

In order to see a practical example of how sequences of games work, let us
define the following game:

\newpage
\begin{game}[caption=IND-CPA game (from \cite{BartheGB09}),
  label={lst:indcpa}]{}
  $(pk,sk) \leftarrow \KG();$
  $(m_0,m_1) \leftarrow A_1(pk);$
  $b \overset{\$}{\leftarrow} \{0,1\};$
  $c \leftarrow \Enc(pk,m_b);\\$
  $\tilde{b} \leftarrow A_2(c)$
\end{game}

The Game \ref{lst:indcpa} can be used to define the IND-CPA property of public
key encryption schemes. (TODO: explain IND-CPA)

In the game, $\KG$ and $\Enc$ are the key generation and encryption functions
provided by the encryption algorithm, respectively, and $A_1$ is the encoding of
the adversary.

(TODO)



\section{Verification: EasyCrypt} %%
\label{sec:verif-easycrypt}

EasyCrypt has different languages to perform different tasks:


\subsection{Specification languages}

This are the languages EasyCrypt uses to declare and define types, functions,
axioms, games, oracles, adversaries and other entities involved in the proofs.

\subsubsection{Expressions}

The main specification language of EasyCrypt is the expression language, in
which \textbf{types} are defined together with \textbf{operators} that can be
applied to them (or be constant). EasyCrypt follows the usual semicolon
notation\cite{Church40} to denote the typing relationship: <<$a : T$>> means
``$a$ has type $T$''. EasyCrypt has a type system supporting parametric
polymorphism: \rawec{int list} represents a list of integers.

The operators are functions over types, defined with the keyword \rawec{op}
(e.g., \\ \rawec{op even : nat -> bool}). An operator can be applied to some
argument by putting them separated by a space: \rawec{even 4}. Operators can be
abstract, i.e., defined without any actual implementation; with semantics given
by the definition of axioms and lemmas that describe its observational
behavior. Operators are also \textit{curried}, so they support multiple
arguments by returning new functions that consume the next one. For example,
$f : (A \times B\times C) \rightarrow D$ would be encoded as
$f : A \rightarrow (B \rightarrow (C \rightarrow D))$, or, by associativity,
$f : A \rightarrow B \rightarrow C \rightarrow D$.

In this example (from the current EasyCrypt library) we can see the how actual
types and operators are defined in the EasyCrypt's expression language:

\begin{easycrypt}[caption=Lists (expression language), label={lst:exprlang}]{}
type 'a list = [
  | "[]"
  | (::) of 'a & 'a list ].

op hd: 'a list -> 'a.
axiom hd_cons (x:'a) xs: hd (x::xs) = x.

op map (f:'a -> 'b) (xs:'a list) =
  with xs = "[]"      => []
  with xs = (::) x xs => (f x)::(map f xs).
\end{easycrypt}

The first line defines the \rawec{list} type as a sum type with two constructors
(cases): the \textit{empty list} and the \textit{construction} of a new list
from an existing one and an element that will appear at its head position. The
rest of the code defines operators working with lists.

The next line abstractly defines the operator \rawec{hd}, together with its
type. The axiom following it partially specifies the behavior of the \rawec{hd} when
applied to some list: if the list has the form \rawec{x::xs} (element \rawec{x}
followed by \rawec{xs}), the return value is \rawec{x}. The other case (empty
list) is left unspecified.

The last line defines the \rawec{map} operator directly, using pattern
matching. This operator receives a function and a list, and returns the list
consisting of the results of evaluating the function over each element of the
list, preserving its order.

\paragraph{Probabilistic expressions}

Additionally, EasyCrypt defines some standard types and operators to work with
probabilistic expressions. The type \rawec{'a distr} represents discrete
sub-distributions over types. The operator \rawec{mu} represents the probability
of some event in a sub-distribution:

\begin{easycrypt}[]{}
op mu : 'a distr -> ('a -> bool) -> real
\end{easycrypt}

For example, the uniform distribution over booleans is defined in the
EasyCrypt's standard library as follows:

\begin{easycrypt}[caption=Uniform distribution over bool,
  label={lst:booldistr}]{}
op dbool : bool distr.
axiom mu_def : forall (p : bool -> bool),
  mu dbool p =
    (1/2) * charfun p true +
    (1/2) * charfun p false.
\end{easycrypt}

\subsubsection{pWhile language}

Expression languages are usually not adequate to define games and other data
structures as cryptographic schemes and oracles, due to the stateful nature of
sequential algorithms. That's why EasyCrypt uses a different language
called \textbf{pWhile} \cite{BartheGB12} (probabilistic while) to define them:

\begin{bnf}[caption=pWhile language, label={lst:pwhile}]{}
  $\mathcal{C} ::=$ skip
    | $\mathcal{V \leftarrow E}$
    | $\mathcal{V \overset{\$}{\leftarrow} DE}$
    | if $\mathcal{E}$ then $\mathcal{C}$ else $\mathcal{C}$
    | while $\mathcal{E}$ do $\mathcal{C}$
    | $\mathcal{V} \leftarrow \mathcal{P}(\mathcal{E}, ..., \mathcal{E})$
    | $\mathcal{C}$; $\mathcal{C}$
\end{bnf}


\subsection{Proof languages}

These are the languages used to write and prove theorems:

\subsubsection{Judgments}

Whenever there is some statement that we want to prove, it must be written as a
judgment in some \textbf{logic}. Apart from the first order logic expressions,
EasyCrypt supports judgments in some logics derived from Hoare logic:

\begin{itemize}
\item Hoare Logic (\textit{HL}). These judgments have the following shape:

  $$c : P \Longrightarrow Q$$

  where $P$ and $Q$ are assertions (predicates) and $c$ is a statement or
  program. $P$ is the \textbf{precondition} and $Q$ is the
  \textbf{postcondition}. The validity of this kind of Hoare judgment implies
  that if $P$ holds before the execution of $c$ and it terminates, then $Q$ must
  also hold.

\item Probabilistic Hoare Logic (\textit{pHL}). This is the logic resulting from
  assigning some probability to the validity of the previously seen Hoare
  judgments. The probability can be a number or an upper/lower bound:

  $$[c : P \Longrightarrow Q] \leq \delta$$
  $$[c : P \Longrightarrow Q] = \delta$$
  $$[c : P \Longrightarrow Q] \geq \delta$$

\item Probabilistic Relational Hoare Logic (\textit{pRHL}). These have the
  following shape:

  $$c_1 \sim c_2 : \Psi \Longrightarrow \Phi$$

  In this case, the pre and postconditions are not standalone predicates, but
  \textbf{relationships} between the memories of the two programs $c_1$ and
  $c_2$. This judgment means that if the precondition $\Psi$ holds before the
  execution of $c_1$ and $c_2$, the postcondition $\Phi$ will also hold after
  finishing its execution.

  This logic is the most complete and useful when developing game-based
  reduction proofs, because it allows to encode each game transition as a
  judgment. Twe two games are $c_1$ and $c_2$ respectively, and the
  pre/postconditions refer to the probability of the adversary winning the
  games.

\end{itemize}

\subsubsection{Tactics}

If the judgment is declared as an axiom, it is taken as a fact and does not need
to be proven. Lemmas, however, will make EasyCrypt enter in ``proof mode'',
where it stops reading declarations, takes the current judgment as a goal and
and starts accepting \textbf{tactics} until the current goal is trivially
true. Tactics are indications on what rules EasyCrypt must apply to transform
the current goal.

This is a simplified example of proof from the EasyCrypt's library:

\begin{easycrypt}[caption=Tactics usage, label={lst:tactics}]{}
lemma cons_hd_tl :
  forall (xs:'a list),
    xs <> [] => (hd xs)::(tl xs) = xs.
proof.
  intros xs.
  elim / list_ind xs.
  simplify.
  intros x xs' IH h {h}.
  rewrite hd_cons.
  rewrite tl_cons.
  reflexivity.
qed.
\end{easycrypt}

(TODO: briefly explain proof)




\chapter{TERM REWRITING} %%%%%%%%%%



\section{Term Rewriting Systems/Theory} %%



\section{Lambda Calculus} %%

The \textbf{Lambda Calculus} \cite{Church36} is a formal system developed by
Alonzo Church in the decade of 1930 as part of his research on the foundations
of mathematics and computation (it was later proven to be equivalent to the
Turing Machine). In its essence, the Lambda Calculus is a simple term rewriting
system that represents computation through the \textbf{application of
  functions}.

Following is the grammar representing \textbf{lambda terms} ($\mathcal{T}$),
also called ``well-formed formulas'':

\begin{bnf}[caption=Lambda Calculus, label={lst:lambda}]{}
  $\mathcal{T} ::= x$               variable
    | $(\lambda x . \mathcal{T})$           abstraction
    | $(\mathcal{T}_1 \: \mathcal{T}_2)$           application
  $x ::= v_1, v_2, v_3, ...$               (infinite variables available)
\end{bnf}

Intuitively, the \textbf{abstraction} rule represents function creation: take an
existing term ($\mathcal{T}$) and parameterize it with an argument ($x$). The
variable $x$ binds every instance of the same variable on the body, which we say
are \textbf{bound} instances. The \textbf{application} rule represents function
evaluation ($\mathcal{T}_1$) with an actual argument ($\mathcal{T}_2)$.

Seen as a term rewriting system, the Lambda Calculus has some reduction rules
that can be applied over terms in order to perform the computation:


\subsection{Reduction rules}

Arguably the most important rule in Lambda Calculus is the \textbf{beta
  reduction}, or $\beta$-reduction. This rule represents function evaluation,
and can be outlined in the following way:

\[
  \inferrule* [left=$\beta$-red]
  { }
  {((\lambda x . \mathcal{T}_1) \: \mathcal{T}_2) \Betared \mathcal{T}_1[x :=
    \mathcal{T}_2]}
\]

An application with an abstraction in the left-hand side is called a
\textbf{redex}, short for ``reducible expression'', because it can be
$\beta$-reduced following the rule. The semantics of the rule match with the
intuition of function application: the result is the body of the function with
the formal parameter replaced by the actual argument.

The \textbf{substitution operation} $\mathcal{T}_1[x := \mathcal{T}_2]$ replaces
$x$ by $\mathcal{T}_2$ in the body of $\mathcal{T}_1$, but we have to be careful
in its definition, because the ``obvious/naïve'' substitution process can lead
to unexpected results. For example, $(\lambda x . y)[y := x]$ would
$\beta$-reduce to $(\lambda x . x)$, which is not the expected result: the new
$x$ in the body has been \textbf{captured} by the argument and the function
behavior is now different.

The solution to this problem comes from the intuitive idea that ``the exact
choice of names for bound variables is not really important''. The functions
$(\lambda x . x)$ and $(\lambda y . y)$ behave in the same way and thus should
be considered equal. The \textbf{alpha equivalence} ($\alpha$-equivalence) is
the equivalence relationship that expresses this idea through another rule: the
\textbf{alpha conversion} ($\alpha$-conversion). The basic definition of this
rule is the following:

\[
  \inferrule* [left=$\alpha$-conv]
  {y \notin \mathcal{T}}
  {(\lambda x . \mathcal{T}) \Alphared (\lambda y . \mathcal{T}[x := y])}
\]

So, to correctly apply a $\beta$-reduction, we will do \textbf{capture-avoiding
  substitutions}: if there is the danger of capturing variables during a
substitution, we will first apply $\alpha$-conversions to change the problematic
variables by fresh ones.

Another equivalence relation over lambda terms is the one defined by the
\textbf{eta conversion} ($\eta$-conversion), and follows by the extensional
equivalence of functions in the calculus:

\[
  \inferrule* [left=$\eta$-conv]
  {x \notin FV(\mathcal{T})}
  {(\lambda x . \mathcal{T} \: x) \Etabired \mathcal{T}}
\]


In general, we will treat $\alpha$-equivalent and $\eta$-equivalent functions as
interchangeable.


\subsection{Types}

Until now, we have been discussing the \textbf{untyped} Lambda Calculus, where
there are no restrictions to the kind of terms we can build. Every formula
resulting from the grammar (\ref{lst:lambda}) is a valid term. There are
alternative interpretations of the Lambda Calculus, the \textbf{Typed Lambda
  Calculi}, which give \textbf{typing rules} to construct ``well-typed terms''
(terms that have a \textbf{type} associated to it). The point is being unable to
construct terms that ``make no sense'', like the ones that encode logical
paradoxes or whose evaluation does not finish.

As previously seen, EasyCrypt uses a statically typed expression language that
in turn is represented internally as a typed Lambda Calculus. (TODO: put in
context)


\subsection{Extensions}

Until there we have seen the ``basic'', or ``pure'' Lambda Calculus, where the
only data primitive is the function. Being turing complete, it is all that is
needed to express all the possible computations, but there are extensions that
make it more suitable to be manipulated by human beings.

One of the most typical is having natural numbers as primitive terms, that is
way more convenient that representing them as \textbf{church numerals}:

$$0 \Longleftrightarrow (\lambda f . (\lambda x . x))$$
$$1 \Longleftrightarrow (\lambda f . (\lambda x . (f \: x)))$$
$$2 \Longleftrightarrow (\lambda f . (\lambda x . (f \: (f \: x))))$$
$$...$$

As with the natural numbers, any data structure can be encoded in basic Lambda
Calculus \cite{Mogensen00}, but the representation is complex and the terms can
rapidly become very difficult to handle. This is why the first major extension
comes in: \textbf{algebraic data types}.

\subsubsection{Algebraic Data Types}

If we want to be able to extend the Lambda Calculus to encode structured data,
we will want to have some way to build new data, instead of extending the
language with every data type we can imagine (naturals, booleans,
etc.). \textbf{Algebraic data types} (ADT's) offer a powerful way to compose
existing terms into more complex ones\footnote{Although ADT's are a
  type-theoretical concept, as we are concerned with the untyped version of the
  Lambda Calculus, we will be only interested in how it allows us to build new
  \textbf{terms}, not \textbf{types}. }. The composition can be made in two
different ways:

\paragraph{Product types}

If we have some terms $T_1, T_2, ..., T_n$, we can produce a new term
that is the ordered \textit{grouping} of them: $T = (T_1, T_2, ..., T_n)$. The
resulting structure is often called an \textbf{n-tuple}.

\paragraph{Sum types}

If we have some terms $T_1, T_2, ..., T_n$, we can produce a new term that can
be \textbf{disjoint union} of them:
$T = C_1(T_1) \: | \: C_2(T_2) \: | \: ... \: | \: C_n(T_n)$. It consists of a
series of \textbf{variants}, one for every previous term, each tagged by a
unique \textbf{constructor} ($C_1-C_n$). A sum type will take the form of one
(and only one) of the variants, and the constructor is used to in runtime to
know which of the variants it actually is.



As a special interesting case, it is often useful not to parameterize a variant
(sum type) and just have the constructor as one of the values. This can be done
naturally by parameterizing it over the empty product type $()$, also called
\textbf{unit}.

While the

\subsubsection{Fixpoints}


There's also the problem of recursion: as the lambda
functions are anonymous, they can't simply refer to themselves. As with the
church numerals, there are ways to \cite{PeytonJones87}

---

http://adam.chlipala.net/cpdt/html/Equality.html

\begin{itemize}
\item Alpha reduction
\item Beta reduction
\item ...
\end{itemize}



\section{Evaluation Strategies} %%

\cite{Sestoft02}



\section{Normal forms}



\section{Abstract Machines} %%

(TODO: basic intro, motivation, function application implementations
(push-enter, eval-apply))


\subsection{Krivine Machine} %%%%%%%%%%

\cite{Krivine07}
\cite{Douence07}


\subsection{ZAM: Standard version}

\cite{Leroy90}


\subsection{ZAM: Alternative version}

\cite{LeroyG02}





\part{IMPLEMENTATION} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




\chapter{KRIVINE MACHINE} %%%%%%%%%%

To begin our study of the implementation of abstract machines, we will start
with the Krivine Machine. It is a relatively simple and well-known model that
will help us see the steps that we need to take in order to implement a real
abstract machine. We will be using the OCaml language from now on (not only in
this section but also in the next ones).

The Krivine Machine \cite{Krivine07} is an implementation of the weak-head
call-by-name reduction strategy for pure lambda terms. What that means is that:

\begin{itemize}
\item The Krivine Machine reduces pure (untyped) terms in the Lambda Calculus
\item The reduction strategy it implements is call-by-name, reducing first the
  leftmost outermost term in the formula
\item It stops reducing whenever the formula is in weak-head normal form, that
  is:
  \begin{itemize}
  \item does not reduce abstractions: $(\lambda x . \mathcal{T})$
  \item does not reduce applications of non-abstractions: $(x \: \mathcal{T})$
  \end{itemize}
\end{itemize}

The first thing we need to have is an encoding of the language we will be
reducing, in this case the Lambda Calculus. We will define a module,
\textbf{Lambda}, containing the data structure and basic operations over it:

\begin{code}
  \begin{minted}[fontsize=\footnotesize]{ocaml}
type symbol = string * int
let show_symbol (s, _) = s

module Lambda = struct
    type t = Var of symbol | App of t * t | Abs of symbol * t
    let rec show = match m with
      | Var x -> show_symbol x
      | App (m1, m2) -> "(" ^ show m1 ^ " " ^ show m2 ^ ")"
      | Abs (x, m) -> "(λ" ^ show_symbol x ^ "." ^ show m ^ ")"
end
  \end{minted}  
\end{code}

(From now on, the auxiliar (e.g., pretty-printing) functions will be ommited for
brevity.)

Now we have a representation of the pure Lambda Calculus. As the Krivine Machine
usually works with expressions in \textbf{de Bruijn} notation\footnote{ The de
  Bruijn's notation gets rid of variable names by replacing them by the number
  of ``lambdas'' between it and the lambda that is binding the variable. For
  example, $(\lambda x . (\lambda y . (x \: y)))$ will be written in de Bruijn
  notation as $(\lambda . (\lambda . (1 \: 0)))$}, we'll need to write an
algorithm to do the conversion. To be sure we do not accidentally build terms
mixing the two notatios, we'll create another module, \textbf{DBILambda}, with a
different data type to represent them:

\begin{code}
  \begin{minted}[fontsize=\scriptsize]{ocaml}
let rec find_idx_exn x = function
  | [] -> raise Not_found
  | (y::ys) -> if x = y then 0 else 1 + find_idx_exn x ys

module DBILambda = struct
    type dbi_symbol = int * symbol
    type t = Var of dbi_symbol | App of t * t | Abs of symbol * t

    let dbi dbis x = (find_idx_exn x dbis, x)

    let of_lambda =
      let rec of_lambda dbis = function
        | Lambda.Var x -> let (n, x) = dbi dbis x in Var (n, x)
        | Lambda.App (m1, m2) -> App (of_lambda dbis m1, of_lambda dbis m2)
        | Lambda.Abs (x, m) -> Abs (x, of_lambda (x :: dbis) m)
      in of_lambda []
end
  \end{minted}  
\end{code}

The new variables areThe function <<of_lambda>> accepts traditional lambda terms (Lambda.t) and
returns its representation as a term using de Bruijn notation (DBILambda.t). Now
we are ready to implement the actual reduction.

The Krivine Machine has a state consisting on the code it is evaluating
(\textbf{C}), an environmentThe typical description of the Krivine Machine \cite{Douence07} is given by the
following set of rules:

\begin{center}
\line(1,0){250}
\end{center}
\begin{table}[H]
\centering
\begin{tabular}{lllll}
$(M N, S, E)$ & \rightsquigarrow & $(M, (S,(N,E)), E)$ \\ \\
$(\lambda M, (S,N), E)$ & \rightsquigarrow & $(M, S, (E,N))$ \\ \\
$(i+1, S, (E,N))$ & \rightsquigarrow & $(i, S, E)$ \\ \\
$(0, S, (E_1,(M,E_2)))$ & \rightsquigarrow & $(M, S, E_2)$
\end{tabular}
\end{table}
\begin{center}
\line(1,0){250}
\end{center}

\section{With power} %%



%\section{Compiled} %%




\chapter{ZAM} %%%%%%%%%%






\chapter{REDUCTION IN EASYCRYPT} %%%%%%%%%%



\section{Architecture overview} %%



\section{Data types}



\section{Implementation}





\part{EPILOGUE} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




\chapter{CONCLUSIONS} %%%%%%%%%%

\begin{itemize}
\item Brief wrapping of importance of verified crypto today
\item Survey of available reduction machineries, differences between them
\item Improvements to EasyCrypt: modularity, extensibility, efficiency (?)
\item Problems encountered during the implementation (debugging!!! specially the
  compiled one, progressive replacing of the EasyCrypt's one due to state)
\end{itemize}




\chapter{FUTURE WORK} %%%%%%%%%%

\begin{itemize}
\item Compile code to abstract machine opcodes to improve efficiency
\item Tests
\item Make users able to define their own rules (expand the engine in ``userland'')
\end{itemize}




\chapter{ANNEX} %%%%%%%%%%



\section{Krivine Machine source code} %%

\inputminted[linenos=true,fontsize=\scriptsize]{ocaml}{code/krivine.ml}

\newpage
\section{ZAM source code} %%

%\inputminted[linenos=true,fontsize=\scriptsize]{ocaml}{code/zam.ml}

\pagebreak \bibliography{bib}{} \bibliographystyle{ieeetr}

\end{document}