# Encrypted Learning Intermediate Representation
## Motivation
Goals:
- support natural lowering between different representations
- enable quick iteration and minimal overhead by sporting various entry and exit points
- match mental models of data scientists, machine learning researchers, cryptographers, and systems engineers
- enable [information flow analysis](https://en.wikipedia.org/wiki/Information_flow_(information_theory)) of user-supplied computations
- allow efficient execution for both MPC and HE applications
- follow the functional paradigm and be strongly typed
- leverage existing work when possible, including autograd and execution
Non-goals:
- serve as framework for cryptographic protocol theory and analysis; we have no notions of ideal functionality, adversary, simulator, nor environment
- support verification of cryptographic protocols and primitives; all of these are assumed to be black boxes that are secure by construction (but can be misused)
## Walk-through example
In the high-level Python eDSL:
```python
inputter0 = ...
inputter1 = ...
aggregator = ...
outputter = ...
@tfe.computation
def comp():
with inputter0:
x = store_in0.load("input")
with inputter1:
y = store_in1.load("input")
with computer:
z = x * y
v = x * z
with outputter:
store_out = store_out.save(v, "output")
comp()
```
Express as MLIR program in EL dialect:
```
func @main() {
%inputter0 = el.native.functionality()
%inputter1 = el.native.functionality()
%outputter = el.native.functionality()
%server0 = el.pond.server_functionality()
%server1 = el.pond.server_functionality()
%aggregator = el.pond.functionality(
inputters = [%inputter0, %inputter1],
outputter = [%outputter],
server0 = %server0,
server1 = %server1,
)
%cluster = el.functionality.union(
%inputter0, %inputter1, %aggregator, %outputter)
%comp = el.computation { func = %cluster } {
%comp_x = el.computation { func = %inputter0 } (%store) {
%x = el.load(%store, "input")
el.return %x
}
%comp_y = el.computation { func = %inputter1 } (%store) {
%y = el.load(%store, "input")
el.return %y
}
%comp_agg = el.computation { func = %aggregator } (%x, %y) {
%z = el.mul(%x, %y)
%v = el.mul(%x, %z)
el.return %v
}
%comp_output = el.computation { func = %outputter } (%store, %v) {
%updated_store = el.save(%store, "output", %v)
el.return %updated_store
}
}
%x = el.run %comp_x (%store_in0) : Tensor@inputter0
%y = el.run %comp_y (%store_in1) : Tensor@inputter1
%v = el.run %comp_agg (%x, %y) : Tensor@outputter
%updated_store_out = el.run %comp_output %v
el.return %updated_store_out
}
el.run %comp (%store_in0, %store_in1, %store_out)
}
```
Use functionality information to specify computation type (pass over `el.computation` operations):
```
func @main() {
%inputter0 = el.native.functionality()
%inputter1 = el.native.functionality()
%outputter = el.native.functionality()
%server0 = el.pond.server_functionality()
%server1 = el.pond.server_functionality()
%aggregator = el.pond.functionality(
inputters = [%inputter0, %inputter1],
outputter = [%outputter],
server0 = %server0,
server1 = %server1,
)
%cluster = el.functionality.union(
%inputter0, %inputter1, %aggregator, %outputter)
%comp = el.computation { func = %cluster } {
%comp_x = el.native.computation { func = %inputter0 } () {
%x = el.load("input")
el.return %x
}
%comp_y = el.native.computation { func = %inputter1 } () {
%y = el.load("input")
el.return %y
}
%comp_agg = el.pond.computation { func = %aggregator } (%x, %y) {
%z = el.mul(%x, %y)
%v = el.mul(%x, %z)
el.return %v
}
%comp_output = el.native.computation { func = %outputter } (%v) {
el.save(%v, "output")
el.return ()
}
}
%x = el.run %comp_x () : Tensor@inputter0
%y = el.run %comp_y () : Tensor@inputter1
%v = el.run %comp_agg (%x, %y) : Tensor@outputter
el.run %comp_output %v
el.return () // CHAIN TO ENSURE %comp_output IS RUN FIRST?
}
el.run %comp ()
}
```
Make operations more concrete based on computation type:
```
func @main() {
%inputter0 = el.native.functionality()
%inputter1 = el.native.functionality()
%outputter = el.native.functionality()
%server0 = el.pond.server_functionality()
%server1 = el.pond.server_functionality()
%aggregator = el.pond.functionality(
inputters = [%inputter0, %inputter1],
outputter = [%outputter],
server0 = %server0,
server1 = %server1,
)
%cluster = el.functionality.union(
%inputter0, %inputter1, %aggregator, %outputter)
%comp = el.computation { func = %cluster } {
%comp_x = el.native.computation { func = %inputter0 } () {
%x = el.native.load("input")
el.return %x
}
%comp_y = el.native.computation { func = %inputter1 } () {
%y = el.native.load("input")
el.return %y
}
%comp_agg = el.pond.computation { func = %aggregator } (%x, %y) {
%z = el.pond.mul(%x, %y)
%v = el.pond.mul(%x, %z)
el.return %v
}
%comp_output = el.native.computation { func = %outputter } (%v) {
el.native.save(%v, "output")
el.return ()
}
}
%x = el.run %comp_x () : Tensor@inputter0
%y = el.run %comp_y () : Tensor@inputter1
%v = el.run %comp_agg (%x, %y) : Tensor@outputter
el.run %comp_output %v
el.return () // CHAIN TO ENSURE %comp_output IS RUN FIRST?
}
el.run %comp ()
}
```
```
cluster = ...
comp = el.cluster.computation@cluster() {
comp_x = el.native.computation@inputter0(%store) -> Tensor@inputter0 {
x = el.native.load(%store, "input")
el.return(x)
}
comp_y = el.native.computation@inputter1(%store) -> Tensor@inputter1 {
x = el.native.load(%store, "input")
el.return(x)
}
x = comp_x() : Tensor@inputter0
y = comp_y() : Tensor@inputter1
comp_agg = el.pond.computation@aggregator(x, y) {
z = el.pond.mul(x, y) // HOW DO WE KNOW TO MASK?
v = el.pond.mul(x, z) // HOW DO WE REUSE MASKING?
el.return(v)
} : (Tensor@inputter0, Tensor@inputter1) -> Tensor@outputter
v = comp_agg(x, y) : Tensor@outputter
comp_output = el.native.computation@outputter(
v: Tensor@outputter
) -> () {
el.native.save(v, "output")
el.return()
}
comp_output(outputter)(v)
el.return()
}
comp()
```
Expand Pond operations:
```
comp_agg = el.pond.computation@aggregator(
x: Tensor@inputter0,
y: Tensor@inputter1
) -> Tensor@outputter {
x = el.pond.share@inputter0(x)
comp_x = el.pond.computation@inputter0(x) {
x_encoded = el.pond.encode(x) : EncodedTensor@inputter0
x_shared = el.pond.share(x_encoded) : SharedTensor@inputter0
x_masked = el.pond.mask(x_shared) : MaskedTensor@inputter0
el.return(el.pond.send(x_masked))
}
x_masked = comp_x(x) : MaskedTensor@servers
comp_y = el.pond.computation@inputter1(y) {
y_encoded = el.pond.encode(y) : EncodedTensor@inputter1
y_shared = el.pond.share(y_encoded) : SharedTensor@inputter1
y_masked = el.pond.mask(y_shared) : MaskedTensor@inputter1
el.return(el.pond.send(y_masked))
}
y_masked = comp_y(y) : MaskedTensor@servers
comp_mul = el.pond.computation@servers(x_masked, y_masked) {
z = el.pond.mul.masked(x_masked, y_masked) : Tensor@servers
z_masked = el.pond.mask(z) : MaskedTensor@servers
v = el.pond.mul(x_masked, z_masked) : Tensor@servers
el.return(el.pond.send(v))
}
v = comp_mul(x_masked, y_masked) : Tensor@outputter
}
v = comp_agg(x, y) : Tensor@outputter
```
## Abstract computations
The notion of an abstract computation might turn out to be useful in several cases, allowing for reuse by essentially specifying the *what* without the *where*. This would match the mental model of defining a protocol using abstract roles. Note that `%func` is again a single object containing e.g.
```mlir
func @my_protocol(%alice, %bob) {
%joint = el.functionality.union(%alice, %bob)
el.return el.computation { func = %joint } (%x) {
%comp_alice = el.computation { func = %alice } (%x) {
%y = el.mul %x %x
el.return %y
}
%comp_bob = el.computation { func = %bob } (%y) {
%z = el.add %y %y
el.return %z
}
el.return el.apply %comp_bob (el.apply %comp_alice %x)
}
}
%player0 = ...
%player1 = ...
%cluster = el.functionality.union(%player0, %player1, ...)
%main_comp = el.computation { func = %cluster } (%x, %y) {
// run the same protocol twice with roles swapped
%comp0 = el.instantiate @my_protocol(%player0, %player1) : !el.comp
%comp1 = el.instantiate @my_protocol(%player1, %player0) : !el.comp
%y = el.apply %comp0 %x
%z = el.apply %comp1 %y
el.return %z
}
```
As an alternative, we could introduce a special `el.protocol` type and operations to model this.
## Rationals
### Inspiration and mental models
On the cryptographic side, two papers in particular have influenced the current design:
- [A Simpler Variant of Universally Composable Security for Standard Multiparty Computation](https://eprint.iacr.org/2014/553)
- [An Equational Approach to Secure Multi-party Computation](https://cseweb.ucsd.edu/~daniele/papers/ITCS13.pdf)
The former not least for the use of the UC model but also the limitation to a fixed set of parties. The latter as a strong indication that also thinking functionally about secure computation has good prospects.
### Computation subset requirement
EL requires that a computation with parties `S` must be run from within a computation with parties `T >= S`, i.e. a smaller set of parties cannot instruct a larger set of parties to run a computation.
The rational behind this is mostly one of interpretability, namely that we want the mental model to be that "computations are run jointly by a cluster" as opposed to "compuations are launched by a party". In light of this, the question is what it would mean for parties `S` to run a computation involving parties `T >= S`:
- `S` is clearly not running the computation since it involves more parties.
- If `S` is instead seen as the parties invoking the computation then that could hint towards `S` been (trusted) coordinators, which is a paradigm we want to avoid.
- If `S` is instead seen as the destination for the return values then we would likely have to add a constraint anyway that ensures that the values only reside on (a subset of) `S`.
One implication of this is that a cryptographic functionality using e.g. Pond must be made up of not only the servers performing the computation, but also all inputters and outputter since these will be instructed to perform e.g. sharing and reconstruction.