# HW 1 ECE-GY9243 Optimal and Learning Control for Robotics
for better reading:
https://hackmd.io/VRwB62W7Qhm6dBMyZf9q-Q?view
## ex 1
```python=
def dp_solver(N):
def x_next(x,u):
if -2 <= -x+1+u and -x+1+u <= 2:
return -x+1+u
if -x+1+u > 2:
return 2
return -2
dict_need_control_for = {}
possible_x = [-2, -1, 0, 1, 2]
possible_u = [-1, 0, 1]
for x in possible_x:
for u in possible_u:
if (x, x_next(x,u)) in dict_need_control_for:
if abs(u) < abs(dict_need_control_for[(x, x_next(x,u))]):
dict_need_control_for[(x, x_next(x,u))] = u
else:
dict_need_control_for[(x, x_next(x,u))] = u
def cost_to_go(x_k,u_k):
return 2*abs(x_k) + abs(u_k)
def final_cost(x_N):
return x_N**2
def cost_from_to(this_x, next_x):
if (this_x, next_x) not in dict_need_control_for:
return float('inf')
u = dict_need_control_for[(this_x, next_x)]
return cost_to_go(this_x,u)
control_chart = [x[:] for x in [[None] * 5] * (N)]
dp_cost_chart = [x[:] for x in [[None] * 5] * (N+1)]
for x_N in possible_x:
dp_cost_chart[N][x_N+2] = final_cost(x_N)
for n in range(N-1,-1,-1):
for this_x in possible_x:
cur_min_cost = float('inf')
best_control = None
for next_x in possible_x:
tmp_cost = cost_from_to(this_x, next_x) + dp_cost_chart[n+1][next_x+2]
if cur_min_cost > tmp_cost:
cur_min_cost = tmp_cost
best_control = dict_need_control_for[(this_x, next_x)]
control_chart[n][this_x+2] = best_control
dp_cost_chart[n][this_x+2] = cur_min_cost
return dp_cost_chart, control_chart
N = 3
dp_cost_chart, control_chart = dp_solver(N)
print("cost to go")
for ele in dp_cost_chart:
print(ele)
print("optimal controls")
for ele in control_chart:
print(ele)
print()
for ini in [0,-2,2]:
state=[ini] # initial state
control=[]
i=0
while len(state)<4:
now = state[-1]
state.append(x_next(now,control_chart[i][now+2]))
control.append(control_chart[i][now+2])
i+=1
print(state)
print(control)
```
### a)
cost-to-go
```
x=-2 -1 0 1 2
n=0 [10, 6, 3, 4, 7]
n=1 [ 9, 5, 2, 3, 6]
n=2 [ 8, 4, 1, 2, 5]
n=3 [ 4, 1, 0, 1, 4]
```
optimal control
```
x=-2 -1 0 1 2
n=0 [0, -1, -1, 0, 1]
n=1 [0, -1, -1, 0, 1]
n=2 [0, -1, -1, 0, 0]
```
### b)
#### $x_0 = 0$
```
states: [0, 0, 0, 0]
opt. controls: [-1, -1, -1]
```
#### $x_0 = -2$
```
states: [-2, 2, 0, 0]
opt. controls: [0, 1, -1]
```
#### $x_0 = 2$
```
states: [2, 0, 0, 0]
opt. controls: [1, -1, -1]
```
### c)
```python=
def x_next(x,u):
def x_next_03(x,u):
if -2 <= -x+0+u and -x+1+u <= 2:
return -x+0+u
if -x+0+u > 2:
return 2
return -2
def x_next_07(x,u):
if -2 <= -x+1+u and -x+1+u <= 2:
return -x+1+u
if -x+1+u > 2:
return 2
return -2
return 0.3*x_next_03(x,u)+ 0.7*x_next_03(x,u)
def cost(x_k,u_k):
return 2*abs(x_k) + abs(u_k)
collect = []
for x0 in [2]:
for u0 in [-1, 0, 1]:
for u1 in [-1, 0, 1]:
for u2 in [-1, 0, 1]:
x1 = x_next(x0, u0)
x2 = x_next(x1, u1)
x3 = x_next(x2, u2)
total_cost = cost(x0, u0)+cost(x1, u1)+cost(x2, u2)+x3**2
collect.append([(x0,x1,x2,x3),(u0,u1,u2),total_cost])
print(collect)
```
```
x_0 = -2
[states: (-2, 1.0, 0.0, 0.0),
controls: (-1, 1, 0),
total_cost: 8.0]
x_0 = -1
[(-1, 0.0, 0.0, 0.0),
(-1, 0, 0),
3.0],
x_0 = 0
[(0, 0.0, 0.0, 0.0),
(0, 0, 0),
0.0],
x_0 = 1
[(1, 0.0, 0.0, 0.0),
(1, 0, 0),
3.0],
x_0 = 2
[(2, -1.0, 0.0, 0.0),
(1, -1, 0),
8.0],
```
### d)
## ex 2
### a)
```python=
def dp_solver(N):
def x_next(x,u):
return x+u
def cost_to_go(x_k,u_k):
if abs(x_next(x_k, u_k))>4:
return float('inf')
return (x_k+4)**2+u_k**2
def final_cost(x_N):
if x_N==0:
return 0
return float('inf')
def cost_from_to(this_x, next_x):
u = next_x - this_x
if abs(u)>2:
return float('inf')
return (this_x+4)**2+u**2
possible_x = list(range(-4,5))
control_chart = [x[:] for x in [[None] * 9] * (N)]
dp_cost_chart = [x[:] for x in [[None] * 9] * (N+1)]
for x_N in possible_x:
dp_cost_chart[N][x_N+4] = final_cost(x_N)
for n in range(N-1,-1,-1):
for this_x in possible_x:
cur_min_cost = float('inf')
best_control = None
for next_x in possible_x:
tmp_cost = cost_from_to(this_x, next_x) + dp_cost_chart[n+1][next_x+4]
if cur_min_cost > tmp_cost:
cur_min_cost = tmp_cost
best_control = next_x - this_x
control_chart[n][this_x+4] = best_control
dp_cost_chart[n][this_x+4] = cur_min_cost
return dp_cost_chart, control_chart
```
```python=
N = 15
dp_cost_chart, control_chart = dp_solver(N)
for ele in dp_cost_chart:
print(ele)
for ele in control_chart:
print(ele)
```
```python=
state=[4] # initial state
control=[]
i=0
while len(state)<16:
now = state[-1]
state.append(now + control_chart[i][now+4])
control.append(control_chart[i][now+4])
i+=1
print(state)
print(control)
```
### b)
at this step
$$
(4+4)^2 + (-2)^2=68
$$
until the goal
$$
146
$$
### c)


#### $x_0 = 0$
```
states: `[0, -2, -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -2, 0]`
control: `[-2, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2]`
```
#### $x_0 = 1$
```
states: `[1, -1, -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -2, 0]`
control: `[-2, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2]`
```
#### $x_0 = -4$
```
states: `[-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -2, 0]`
control: `[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2]`
```
### d)
#### $x_0 = 0$
```
states: `[0, -2, -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4]`
control: `[-2, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]`
```
#### $x_0 = 1$
```
states: `[1, -1, -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4]`
control: `[-2, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]`
```
#### $x_0 = -4$
```
states: `[-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4]`
control: `[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]`
```
#### How does the optimal cost change compared to the previous cost function? And the optimal control/state sequence? Can you explain why?
There is no decided goals. Trajectories try to minimize cost-of-each-step. They all go to `-4` because there is no cost to stay on `-4`.
### e)
#### x_0 = 0
```
states: `[0, -2, -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4]`
control: `[-2, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]`
```
#### x_0 = 1
```
states: `[1, -1, -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4]`
control: `[-2, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]`
```
#### x_0 = -4
```
states: `[-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4]`
control: `[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]`
```
#### How does the optimal cost change compared to the previous cost function? And the optimal control/state sequence? Can you explain why?
No control cost and no decided goals. The system will try to minimize the state cost and stays there.