HW 1 ECE-GY9243 Optimal and Learning Control for Robotics

# HW 1 ECE-GY9243 Optimal and Learning Control for Robotics for better reading: https://hackmd.io/VRwB62W7Qhm6dBMyZf9q-Q?view ## ex 1 ```python= def dp_solver(N): def x_next(x,u): if -2 <= -x+1+u and -x+1+u <= 2: return -x+1+u if -x+1+u > 2: return 2 return -2 dict_need_control_for = {} possible_x = [-2, -1, 0, 1, 2] possible_u = [-1, 0, 1] for x in possible_x: for u in possible_u: if (x, x_next(x,u)) in dict_need_control_for: if abs(u) < abs(dict_need_control_for[(x, x_next(x,u))]): dict_need_control_for[(x, x_next(x,u))] = u else: dict_need_control_for[(x, x_next(x,u))] = u def cost_to_go(x_k,u_k): return 2*abs(x_k) + abs(u_k) def final_cost(x_N): return x_N**2 def cost_from_to(this_x, next_x): if (this_x, next_x) not in dict_need_control_for: return float('inf') u = dict_need_control_for[(this_x, next_x)] return cost_to_go(this_x,u) control_chart = [x[:] for x in [[None] * 5] * (N)] dp_cost_chart = [x[:] for x in [[None] * 5] * (N+1)] for x_N in possible_x: dp_cost_chart[N][x_N+2] = final_cost(x_N) for n in range(N-1,-1,-1): for this_x in possible_x: cur_min_cost = float('inf') best_control = None for next_x in possible_x: tmp_cost = cost_from_to(this_x, next_x) + dp_cost_chart[n+1][next_x+2] if cur_min_cost > tmp_cost: cur_min_cost = tmp_cost best_control = dict_need_control_for[(this_x, next_x)] control_chart[n][this_x+2] = best_control dp_cost_chart[n][this_x+2] = cur_min_cost return dp_cost_chart, control_chart N = 3 dp_cost_chart, control_chart = dp_solver(N) print("cost to go") for ele in dp_cost_chart: print(ele) print("optimal controls") for ele in control_chart: print(ele) print() for ini in [0,-2,2]: state=[ini] # initial state control=[] i=0 while len(state)<4: now = state[-1] state.append(x_next(now,control_chart[i][now+2])) control.append(control_chart[i][now+2]) i+=1 print(state) print(control) ``` ### a) cost-to-go ``` x=-2 -1 0 1 2 n=0 [10, 6, 3, 4, 7] n=1 [ 9, 5, 2, 3, 6] n=2 [ 8, 4, 1, 2, 5] n=3 [ 4, 1, 0, 1, 4] ``` optimal control ``` x=-2 -1 0 1 2 n=0 [0, -1, -1, 0, 1] n=1 [0, -1, -1, 0, 1] n=2 [0, -1, -1, 0, 0] ``` ### b) #### $x_0 = 0$ ``` states: [0, 0, 0, 0] opt. controls: [-1, -1, -1] ``` #### $x_0 = -2$ ``` states: [-2, 2, 0, 0] opt. controls: [0, 1, -1] ``` #### $x_0 = 2$ ``` states: [2, 0, 0, 0] opt. controls: [1, -1, -1] ``` ### c) ```python= def x_next(x,u): def x_next_03(x,u): if -2 <= -x+0+u and -x+1+u <= 2: return -x+0+u if -x+0+u > 2: return 2 return -2 def x_next_07(x,u): if -2 <= -x+1+u and -x+1+u <= 2: return -x+1+u if -x+1+u > 2: return 2 return -2 return 0.3*x_next_03(x,u)+ 0.7*x_next_03(x,u) def cost(x_k,u_k): return 2*abs(x_k) + abs(u_k) collect = [] for x0 in [2]: for u0 in [-1, 0, 1]: for u1 in [-1, 0, 1]: for u2 in [-1, 0, 1]: x1 = x_next(x0, u0) x2 = x_next(x1, u1) x3 = x_next(x2, u2) total_cost = cost(x0, u0)+cost(x1, u1)+cost(x2, u2)+x3**2 collect.append([(x0,x1,x2,x3),(u0,u1,u2),total_cost]) print(collect) ``` ``` x_0 = -2 [states: (-2, 1.0, 0.0, 0.0), controls: (-1, 1, 0), total_cost: 8.0] x_0 = -1 [(-1, 0.0, 0.0, 0.0), (-1, 0, 0), 3.0], x_0 = 0 [(0, 0.0, 0.0, 0.0), (0, 0, 0), 0.0], x_0 = 1 [(1, 0.0, 0.0, 0.0), (1, 0, 0), 3.0], x_0 = 2 [(2, -1.0, 0.0, 0.0), (1, -1, 0), 8.0], ``` ### d) ## ex 2 ### a) ```python= def dp_solver(N): def x_next(x,u): return x+u def cost_to_go(x_k,u_k): if abs(x_next(x_k, u_k))>4: return float('inf') return (x_k+4)**2+u_k**2 def final_cost(x_N): if x_N==0: return 0 return float('inf') def cost_from_to(this_x, next_x): u = next_x - this_x if abs(u)>2: return float('inf') return (this_x+4)**2+u**2 possible_x = list(range(-4,5)) control_chart = [x[:] for x in [[None] * 9] * (N)] dp_cost_chart = [x[:] for x in [[None] * 9] * (N+1)] for x_N in possible_x: dp_cost_chart[N][x_N+4] = final_cost(x_N) for n in range(N-1,-1,-1): for this_x in possible_x: cur_min_cost = float('inf') best_control = None for next_x in possible_x: tmp_cost = cost_from_to(this_x, next_x) + dp_cost_chart[n+1][next_x+4] if cur_min_cost > tmp_cost: cur_min_cost = tmp_cost best_control = next_x - this_x control_chart[n][this_x+4] = best_control dp_cost_chart[n][this_x+4] = cur_min_cost return dp_cost_chart, control_chart ``` ```python= N = 15 dp_cost_chart, control_chart = dp_solver(N) for ele in dp_cost_chart: print(ele) for ele in control_chart: print(ele) ``` ```python= state=[4] # initial state control=[] i=0 while len(state)<16: now = state[-1] state.append(now + control_chart[i][now+4]) control.append(control_chart[i][now+4]) i+=1 print(state) print(control) ``` ### b) at this step $$ (4+4)^2 + (-2)^2=68 $$ until the goal $$ 146 $$ ### c) ![](https://i.imgur.com/MGo5Dt2.png) ![](https://i.imgur.com/NmNCwNS.png) #### $x_0 = 0$ ``` states: `[0, -2, -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -2, 0]` control: `[-2, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2]` ``` #### $x_0 = 1$ ``` states: `[1, -1, -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -2, 0]` control: `[-2, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2]` ``` #### $x_0 = -4$ ``` states: `[-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -2, 0]` control: `[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2]` ``` ### d) #### $x_0 = 0$ ``` states: `[0, -2, -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4]` control: `[-2, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]` ``` #### $x_0 = 1$ ``` states: `[1, -1, -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4]` control: `[-2, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]` ``` #### $x_0 = -4$ ``` states: `[-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4]` control: `[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]` ``` #### How does the optimal cost change compared to the previous cost function? And the optimal control/state sequence? Can you explain why? There is no decided goals. Trajectories try to minimize cost-of-each-step. They all go to `-4` because there is no cost to stay on `-4`. ### e) #### x_0 = 0 ``` states: `[0, -2, -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4]` control: `[-2, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]` ``` #### x_0 = 1 ``` states: `[1, -1, -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4]` control: `[-2, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]` ``` #### x_0 = -4 ``` states: `[-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4]` control: `[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]` ``` #### How does the optimal cost change compared to the previous cost function? And the optimal control/state sequence? Can you explain why? No control cost and no decided goals. The system will try to minimize the state cost and stays there.