###### tags: `ebpf`
# region type system inference algorithm
```
/*
language supported (so far):
stmt ::= reg = *(type *)(reg oprtr const)
| *(type *)(reg oprtr const) = reg
| reg = reg
| reg oprtr= val
type ::= u64 | u32 | u16 | u8
oprtr ::= + | -
val ::= reg | const
*/
```
## Constants
```
int STACK_BEGIN = 0;
int CTX_BEGIN = 0;
int PACKET_BEGIN = 0;
int SHARED_BEGIN = 0;
int PACKET_END = -4100;
int PACKET_META = -1;
int MAX_PACKET_SIZE = 0xffff;
```
## Given
```
// assuming packet_begin, packet_end, packet_meta locations in context and context size have been given
extern int PACKET_BEGIN_LOC;
extern int PACKET_END_LOC;
extern int PACKET_META_LOC;
extern int CTX_SIZE;
```
## Definitions
```
using interval_t = std::pair<int, int>;
using region_t = enum {T_PACKET, T_CTX, T_SHARED, T_STACK};
using map_key_size_t = unsigned int;
using map_value_size_t = unsigned int;
using ptr_no_off_t = struct {
region_t m_r;
};
using ptr_with_offset_t = struct {
region_t m_r;
int m_offset;
};
using ptr_t = ptr_no_off_t | ptr_with_off_t;
using mapfd_t = struct {
int m_mapfd_t;
map_key_size_t m_key_size;
map_value_size_t m_value_size;
// also defined EbpfMapValueType m_value_type;
};
using ctx_t = std::unordered_map<int, ptr_no_off_t>;
using ptr_or_mapfd_t = ptr_t | mapfd_t;
using ptr_or_mapfd_cells_t = std::pair<ptr_or_mapfd_t, int>;
using stack_t = std::unordered_map<int, ptr_or_mapfd_types_t>;
using register_types_t = std::array<ptr_or_mapfd_t, 11>
using state_t = struct {
stack_t m_stack;
register_types_t m_registers;
ctx_t m_ctx;
};
```
## Initializations
```
map<cfg_node, state_t> states;
states[0].m_ctx = ctx_t{
PACKET_BEGIN_LOC: ptr_no_off_t{m_r: T_PACKET},
PACKET_META_LOC: ptr_no_off_t{m_r: T_PACKET}
PACKET_END_LOC: ptr_no_off_t{m_r: T_PACKET}
};
states[0].m_registers[1] = ptr_with_off_t{m_r: T_CTX, m_offset: 0};
states[0].m_registers[10] = ptr_with_off_t{m_r: T_STACK, m_offset: 512};
```
## Algorithm
```
def operator||(stack_t stack1, stack_t stack2):
// stack is bottom
stack_t stack;
for (key, value) in stack1:
if key not in stack2 or value is stack2[key]:
stack[key] = value;
else:
type_error("layout of stack regions at join point do not have same fields");
for (key, value) in stack2:
if key not in stack1:
stack[key] = value;
return stack;
def operator||(state_t state1, state_t state2):
// state is bottom
state_t state;
state.stack = state1.stack || state2.stack;
for (key, value) in state1.curr_def:
ptr_t type1 = state1.reg_type_env[value];
if key in state2.curr_def:
ptr_t type2 = state2.reg_type_env[state2.curr_def[key]];
if type1 is type2:
state.reg_type_env[value] = type1;
state.curr_def[key] = value;
return state;
```
```
def join_at(cfg_node bb):
// state is bottom
state_t state;
cfg_node[] all_preds = cfg.get_all_predecessors(bb);
for i in [0..all_preds.length()]:
if i == 0:
state = states[all_preds[i]]
else:
state = state || states[all_preds[i]];
return state;
```
```
state_t state = {
map{};
map{r1@0: {T_CTX, 0}, r10@0: {T_STACK, 512}},
map{r1: r1@0, r10: r10@0}
};
```
```
for bb in cfg.topological_sort():
if cfg.num_of_predecessors(bb) == 1:
state = states[cfg.get_predecessor(bb)];
else if cfg.num_of_predecessors(bb) > 1:
state = join_at(bb);
for instr in bb:
stack_t stack = state.stack;
reg_type_env_t reg_type_env = state.reg_type_env;
curr_def_t curr_def = state.curr_def;
int line_num = get_line_number(instr);
match(instr) with:
```
```
va = *(T *)(vb op vc) ->
if state is bottom:
return;
if vb not in curr_def:
type_error("loading from an unknown pointer, or from number");
vb_curr_def = curr_def[vb];
if vb_curr_def not in reg_type_env or reg_type_env[vb_curr_def] is not ptr_t:
type_error("loading from an unknown pointer, or from number");
ptr_t type = reg_type_env[vb_curr_def];
ptr_t field_type;
if type is ptr_with_offset_t:
int load_at = vc+type.offset;
if type.region is T_STACK:
assert(type.region is T_STACK);
if load_at not in stack:
// forget the type of va
return;
field_type = stack[load_at];
else:
assert(type.region is T_CTX);
if load_at not in ctx:
// forget the type of va
return;
field_type = ctx[load_at];
else:
// forget the information about va
return;
reg_type_env[va@line_num] = field_type;
curr_def[va] = va@line_num;
```
```
*(T *)(va op vb) = vc ->
if state is bottom:
return;
if va not in curr_def:
type_error("storing at an unknown pointer, or from number");
va_curr_def = curr_def[va];
if va_curr_def not in reg_type_env or reg_type_env[va_curr_def] is not ptr_t:
type_error("storing an unknown pointer, or from number");
ptr_t va_type = reg_type_env[va_curr_def];
if va_type is ptr_with_offset_t:
int store_at = va_type.offset+vb;
if va_type.region is T_STACK:
if vc not in curr_def or curr_def[vc] not in reg_type_env:
// forget the offset at the stack
return;
ptr_t vc_type = reg_type_env[curr_def[vc]];
for i in store_at+1..store_at+width(T):
if i in stack:
type_error("type being stored already overlaps with previous type");
stack[store_at] = vc_type;
else if va_type is T_CTX:
if vc in curr_def and curr_def[vc] in reg_type_env:
type_error("we cannot store a pointer into ctx");
else:
if vc_type in reg_type_env and vc_type.region is not T_STACK:
// remember we also allow stack pointers to not have offset but then they should not be used to read any offset later
type_error("we cannot store pointers into packet or shared");
```
```
va = vb ->
if state is bottom:
return;
if vb not in curr_def:
// forget type of va
return;
vb_curr_def = curr_def[vb];
if vb_curr_def not in reg_type_env or reg_type_env[vb_curr_def] is not ptr_t:
// forget type of va
return;
reg_type_env[va@line_num] = reg_type_env[vb_curr_def];
curr_def[va] = va@line_num;
```
```
va oprtr= vb ->
if state is bottom:
return;
if vb is reg:
if vb in curr_def:
type_error("addition of two pointers not allowed");
if va in curr_def and curr_def[va] in reg_type_env:
va_type = reg_type_env[curr_def[va]];
if va_type is ptr_with_offset_t:
if va_type.region is T_STACK:
reg_type_env[va@line_num] = (ptr_t)va_type;
curr_def[va] = va@line_num;
else:
reg_type_env[va@line_num] = va_type;
curr_def[va] = va@line_num;
else:
if va in curr_def and curr_def[va] in reg_type_env:
va_type = reg_type_env[curr_def[va]];
if va_type is ptr_with_offset_t:
int new_off = va_type.offset+vb;
va_type.offset = new_off; // updating in-place
reg_type_env[va@line_num] = va_type;
curr_def[va] = va@line_num;
else:
reg_type_env[va@line_num] = va_type;
curr_def[va] = va@line_num;
state.stack = stack;
state.reg_type_env = reg_type_env;
state.curr_def = curr_def;
states[bb] = state;
```
```
def initial_pointer_checks(val va, val vb, oprtr op, reg_type_env_t reg_type_env):
if va is reg and vb is reg:
if va in reg_type_env and reg_type_env[va] is ptr_t
and vb in reg_type_env and reg_type_env[vb] is ptr_t:
if op is +:
type_error("two pointers cannot be added");
if op is -:
type_error("memory operation cannot use a number");
else:
type_error("memory operation using unknown types");
if va is const and vb is const:
type_error("memory operation cannot use a number");
if va is reg and va in reg_type_env and reg_type_env[va] is ptr_t:
return (va,vb);
else if vb is reg and vb in reg_type_env and reg_type_env[vb] is ptr_t:
return (vb,va);
else:
type_error("memory operation using unknown types");
```
## Extra definitions
```
using reg_at_loc_t = struct {
reg_t name;
int location;
};
// we use a compact way to show this as name@location
using reg_type_env_t = map<reg_at_loc_t, ptr_t>;
using curr_def_t = map<reg_t, reg_at_loc_t>;
```