---
tags: Q_Learning
---
# 透過Qlearning在網路拓樸上尋徑
強化式學習
## dijkstra.py
### output
```
path= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023]
time difference: 202375
```
### init
adjacency

基本上就是讓每個點都可以被兩個以上的點到達和被到達 的一個圖

```python
from collections import defaultdict
import datetime
# 用於graph本體
adjacency=defaultdict(lambda:defaultdict(lambda:None))
src=512
dst=1023
n=1023
# 建立一個 graph
for i in range(1,int(n/2)+1):
if 2*i<=(n-1):
for j in [2*i, 2*i+1]:
#print(i,j)
adjacency[i][j]=1 # 1是指成本還是只是 True 代表可達?
adjacency[j][i]=1 # 構成無向邊
```
### main
```python
start=datetime.datetime.now()
get_path(src,dst)
end=datetime.datetime.now()
diff=end-start
print("time difference:", diff.microseconds)
```
### get_path(src, dst)
```python
def get_path(src,dst):
global adjacency, n
distance = {}
previous = {}
for dpid in range(1,n+1): # 1 ~ 1023
distance[dpid] = float('Inf') # 初始化 distance 1~1023 = Inf
previous[dpid] = None # 初始化 previous 1~1023 = None
distance[src]=0
# distance = {1:inf, 2:inf, ..., 512:0, ... 1023:inf}
# previous = {1:None, 2:None, ... 1023:None}
Q=set(range(1,n+1))
# Q = {1,2,...n}
#print "Q=", Q
while len(Q)>0:
u = minimum_distance(distance, Q) # 0 or min
#print "u=", u
Q.remove(u)
#print "After removing ", u, " Q=", Q
for p in range(1,n+1): # 1 ~ 1023
if adjacency[u][p]!=None: # u 節點 到其他節點 的距離(不可達為None)
#print u, "--------", p
# 第一次進入這邊是u:512, p:256 ,adjacency[512][256]=1 代表u節點第一個可達的點 (p節點)
w = 1 # 成本?
# distance = {1:inf, 2:inf, ..., 512:0, ... 1023:inf}
if distance[u] + w < distance[p]: # 可到達u節點的最小距離 +1 < 可到達p節點的最小距離 ?(判斷是否有更短距離的節點可達p節點)
distance[p] = distance[u] + w # 記錄可達p節點的最短距離 + 1?
previous[p] = u # 記錄距離p節點最短的節點
#print "distance=", distance, " previous=", previous
r=[]
p=dst
r.append(p)
q=previous[p] # 最短路徑可達終點的前一個節點
while q is not None:
if q == src:
r.append(q)
break
p=q
r.append(p)
q=previous[p] # 最短路徑可達p點的前一個節點
r.reverse()
if src==dst:
path=[src] # 當終點等於起點時,避免選擇到環路而不是本身
else:
path=r
print("path=", path)
```
path= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127,
255, 511, 1023]
time difference: 201360
### minimum_distance(distance, Q)
```python
def minimum_distance(distance, Q):
min = float('Inf')
node = 0
for v in Q:
if distance[v] < min: # if distance存在
# distance[v] 預設為 inf
min = distance[v]
node = v #記錄 成立時 (distance中)的位置
return node # distance中最後一個有賦値的 位置
```
---
## dijkstra_random.py
### output
```txt
path= [1, 2, 4, 10]
time difference: 0 0
```
### init
```python
from collections import defaultdict
import datetime
import random
adjacency=defaultdict(lambda:defaultdict(lambda:None))
random.seed(1)
src=1
dst=10
n=10 # 節點數
for i in range(1,n): # 1~9
flag=0
for j in range(i+1, n+1): # 1~10 -> 9~10
if random.uniform(0,1)<0.01: # 0~1間的隨機數 < 0.01 -> 1%
#print(i,j)
adjacency[i][j]=1
adjacency[j][i]=1
flag=1
if flag==0: # 99% 沒有設定無向邊時
choice=random.choice(range(i+1, n+1)) # 在 i+1 ~ n 間隨機選一個數
#print(i,choice)
adjacency[i][choice]=1 # 設定無向邊
adjacency[choice][i]=1
# 1% 99% 設定無向邊 看起來都是隨機設? 那為甚麼不要100% 都走flag==0時 的隨機?
```
### main
```python
start=datetime.datetime.now()
get_path(src,dst)
end=datetime.datetime.now()
diff=end-start
print("time difference:", diff.seconds, diff.microseconds)
```
### get_path(src, dst)
```python
def get_path(src,dst):
global adjacency, n
distance = {}
previous = {}
for dpid in range(1,n+1): # 1 ~ 1023
distance[dpid] = float('Inf') # 初始化 distance 1~1023 = Inf
previous[dpid] = None # 初始化 previous 1~1023 = None
distance[src]=0
# distance = {1:inf, 2:inf, ..., 512:0, ... 1023:inf}
# previous = {1:None, 2:None, ... 1023:None}
Q=set(range(1,n+1))
# Q = {1,2,...n}
#print "Q=", Q
while len(Q)>0:
u = minimum_distance(distance, Q) # 0 or min
#print "u=", u
Q.remove(u)
#print "After removing ", u, " Q=", Q
for p in range(1,n+1): # 1 ~ 1023
if adjacency[u][p]!=None: # u 節點 到其他節點 的距離(不可達為None)
#print u, "--------", p
# 第一次進入這邊是u:512, p:256 ,adjacency[512][256]=1 代表u節點第一個可達的點 (p節點)
w = 1 # 成本?
# distance = {1:inf, 2:inf, ..., 512:0, ... 1023:inf}
if distance[u] + w < distance[p]: # 可到達u節點的最小距離 +1 < 可到達p節點的最小距離 ?(判斷是否有更短距離的節點可達p節點)
distance[p] = distance[u] + w # 記錄可達p節點的最短距離 + 1?
previous[p] = u # 記錄距離p節點最短的節點
#print "distance=", distance, " previous=", previous
r=[]
p=dst
r.append(p)
q=previous[p] # 最短路徑可達終點的前一個節點
while q is not None:
if q == src:
r.append(q)
break
p=q
r.append(p)
q=previous[p] # 最短路徑可達p點的前一個節點
r.reverse()
if src==dst:
path=[src] # 當終點等於起點時,避免選擇到環路而不是本身
else:
path=r
print("path=", path)
```
path= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127,
255, 511, 1023]
time difference: 201360
### minimum_distance(distance, Q)
```python
def minimum_distance(distance, Q):
min = float('Inf')
node = 0
for v in Q:
if distance[v] < min: # if distance存在
# distance[v] 預設為 inf
min = distance[v]
node = v #記錄 成立時 (distance中)的位置
return node # distance中最後一個有賦値的 位置
```
---
## qlearning_tree.py
### output
```txt
A_Z_dict==A_Z_dict={1: [2, 3], 2: [1, 4, 5], ..., 1023: [511]}
R={1: {2: 1, 3: 1}, 2: {1: 1, 4: 1, 5: 1}, ..., 1023: {511: 1}}
Q: {1: {2: 100, 3: 100}, 2: {1: 100, 4: 100, 5: 100}, 1023: {511: 100}}
in get_result(), start= 512 end= [1023]
================================================== e= 30
Q= {1: {2: 110.987043, 3: 109.0}, 2: {1: 109.999998, 4: 111.789482, 5: 110.751853}, ..., , 1023: {511: 100}}
node_use= [512, 256] next_level= [256]
node_use= [512, 256, 128] next_level= [128]
node_use= [512, 256, 128, 64] next_level= [64]
node_use= [512, 256, 128, 64, 32] next_level= [32]
node_use= [512, 256, 128, 64, 32, 16] next_level= [16]
node_use= [512, 256, 128, 64, 32, 16, 8] next_level= [8]
node_use= [512, 256, 128, 64, 32, 16, 8, 4] next_level= [4]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2] next_level= [2]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1] next_level= [1]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3] next_level= [3]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7] next_level= [7]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15] next_level= [15]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31] next_level= [31]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63] next_level= [63]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127] next_level= [127]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127, 255] next_level= [255]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127, 255, 511] next_level= [511]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023] next_level= [1023]
nodes: [512, 256, 128, 2, 4, 1, 3, 7, 8, 511, 15, 16, 1023, 31, 32, 63, 64, 127, 255]
================================================== e= 35
Q= {1: {2: 110.996113, 3: 109.0}, 2: {1: 110.0, 4: 111.936844, 5: 110.751853}, ..., 1023: {511: 100}}
node_use= [512, 256] next_level= [256]
node_use= [512, 256, 128] next_level= [128]
node_use= [512, 256, 128, 64] next_level= [64]
node_use= [512, 256, 128, 64, 32] next_level= [32]
node_use= [512, 256, 128, 64, 32, 16] next_level= [16]
node_use= [512, 256, 128, 64, 32, 16, 8] next_level= [8]
node_use= [512, 256, 128, 64, 32, 16, 8, 4] next_level= [4]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2] next_level= [2]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1] next_level= [1]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3] next_level= [3]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7] next_level= [7]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15] next_level= [15]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31] next_level= [31]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63] next_level= [63]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127] next_level= [127]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127, 255] next_level= [255]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127, 255, 511] next_level= [511]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023] next_level= [1023]
nodes: [512, 256, 128, 2, 4, 1, 3, 7, 8, 511, 15, 16, 1023, 31, 32, 63, 64, 127, 255]
================================================== e= 40
Q= {1: {2: 110.996113, 3: 109.0}, 2: {1: 110.0, 4: 111.936844, 5: 110.751853}, ..., 1023: {511: 100}}
node_use= [512, 256] next_level= [256]
node_use= [512, 256, 128] next_level= [128]
node_use= [512, 256, 128, 64] next_level= [64]
node_use= [512, 256, 128, 64, 32] next_level= [32]
node_use= [512, 256, 128, 64, 32, 16] next_level= [16]
node_use= [512, 256, 128, 64, 32, 16, 8] next_level= [8]
node_use= [512, 256, 128, 64, 32, 16, 8, 4] next_level= [4]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2] next_level= [2]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1] next_level= [1]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3] next_level= [3]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7] next_level= [7]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15] next_level= [15]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31] next_level= [31]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63] next_level= [63]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127] next_level= [127]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127, 255] next_level= [255]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127, 255, 511] next_level= [511]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023] next_level= [1023]
nodes: [512, 256, 128, 2, 4, 1, 3, 7, 8, 511, 15, 16, 1023, 31, 32, 63, 64, 127, 255]
in get_result(),
Q= {1: {2: 110.996113, 3: 109.0}, 2: {1: 110.0, 4: 111.936844, 5: 110.751853}, ..., 1023: {511: 100}}
node_use= [512, 256] next_level= [256]
node_use= [512, 256, 128] next_level= [128]
node_use= [512, 256, 128, 64] next_level= [64]
node_use= [512, 256, 128, 64, 32] next_level= [32]
node_use= [512, 256, 128, 64, 32, 16] next_level= [16]
node_use= [512, 256, 128, 64, 32, 16, 8] next_level= [8]
node_use= [512, 256, 128, 64, 32, 16, 8, 4] next_level= [4]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2] next_level= [2]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1] next_level= [1]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3] next_level= [3]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7] next_level= [7]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15] next_level= [15]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31] next_level= [31]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63] next_level= [63]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127] next_level= [127]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127, 255] next_level= [255]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127, 255, 511] next_level= [511]
node_use= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023] next_level= [1023]
nodes= [512, 256, 128, 2, 4, 1, 3, 7, 8, 511, 15, 16, 1023, 31, 32, 63, 64, 127, 255]
graph= {512: [256], 256: [128], 128: [64], 2: [1], 4: [2], 1: [3], 3: [7], 7: [15], 8: [4], 511: [1023], 15: [31], 16: [8], 1023: [511], 31: [63], 32: [16], 63: [127], 64: [32], 127: [255], 255: [511]}
routes= [[512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023]]
result= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023]
path= [512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023]
time difference: 268082
```
### init
```python
from collections import defaultdict
import datetime
import copy
import random
from collections import Counter
adjacency=defaultdict(lambda:defaultdict(lambda:None))
src=512
dst=1023
n=1023
for i in range(1,int(n/2)+1):
if 2*i<=(n-1):
for j in [2*i, 2*i+1]:
#print(i,j)
adjacency[i][j]=1
adjacency[j][i]=1
```
### main
```python
start=datetime.datetime.now()
get_path2(src,dst)
end=datetime.datetime.now()
diff=end-start
print("time difference:", diff.microseconds)
```
### get_path2(src,dst)
```python
def get_path2(src,dst):
A_Z_dict={}
for p in range(1,n+1):
A_Z_dict[p] = [] # 初始化
for q in range(1,n+1):
if adjacency[p][q]!=None:
A_Z_dict[p].append(q) # 將adjacency設定好的有向邊 複製給 A_Z_dict
print(f"A_Z_dict=={A_Z_dict=}")
R={}
net = copy.deepcopy(A_Z_dict) # 建立一份完全獨立的變數
for i in net.keys(): #{key:value} -> {節點:[所有可達節點]}
sub_key=net[i] # [所有可達節點]
sub_dic={}
for j in sub_key:
sub_dic[j]=adjacency[i][j] # 節點1:{可達節點1:1, 可達節點2:1, ...,可達節點n:1}
R[i] = sub_dic # R is for Reward
print(f"R={R}")
Q = copy.deepcopy(R) # 建立一份完全獨立的變數
for i in Q.keys():
for j in Q[i].keys():
Q[i][j] = 100 # 把R可達節點的值全部從1變成100 -> 節點1:{可達節點1:100, 可達節點2:100, ...,可達節點n:100}
print("Q:",Q)
alpha = 0.7 # learning rate # 決定這次結果的誤差有多少會被學習
epsilon = 0.1 #greedy policy # 10% 按照Q表選擇最佳路徑, 90% 隨機選則 # 隨著learn的次數提高
n_episodes = 1000
result = get_result(R,Q,alpha,epsilon,n_episodes,src,[dst])
print(f"result= {result[0]}")
if src==dst:
path=[src]
else:
path=result[0]
print(f"path= {path}")
```
### get_result(R,Q,alpha,epsilon,n_episodes,src,[dst])
```python
def get_result(R,Q,alpha,epsilon,n_episodes,start,end):
print("in get_result(), start=", start, " end=", end)
Q = Q_routing(R,Q,alpha,epsilon,n_episodes,start,end)
print("in get_result(), Q=", Q)
nodes = get_best_nodes(Q,start,end)
print("nodes=", nodes)
graph = get_best_net(Q,nodes)
print("graph=", graph)
route_len = len(get_route(Q,start,end))
routes = get_all_best_routes(graph,start,end,route_len+1)
print("routes=", routes)
result = count_routes(routes)
return routes
```
### Q_routing(R,Q,alpha,epsilon,n_episodes,start,end)
```python
def Q_routing(T,Q,alpha,epsilon,n_episodes,start,end):
nodes_number = [0,0]
for e in range(n_episodes):
#print("="*50, "e=", e)
#if e in range(0,n_episodes,1000):
# print("loop:",e)
current_state = start
#current_route = [start]
goal = False
while not goal:
valid_moves = list(Q[current_state].keys()) #Q[current_state] = 節點1:{可達節點1:100, 可達節點2:100, ...,可達節點n:100}, keys = [可達節點n...可達節點n]
if len(valid_moves) <= 1: # 設計上 每個節點都有至少一條路可以走
next_state = valid_moves[0]
else:
best_action = random.choice(get_key_of_min_value(Q[current_state])) # 隨機選一個 Q值最小的的可達節點
if random.random() < epsilon: # if epsilon = 0.1 -> 10%
valid_moves.pop(valid_moves.index(best_action))
next_state = random.choice(valid_moves) #除了最短路徑以外的路
else: # if epsilon = 0.1 -> 90%
next_state = best_action
Q = update_Q(T,Q,current_state, next_state, alpha)
#print("Q=",Q)
#print("current_state=", current_state, "next_state=", next_state)
if next_state in end:
goal = True
current_state = next_state
#current_route.append(next_state)
#print "current:",current_route
#print get_route(Q,start,end)
# check stop standard
# 離開while後
# e : 0~999 (for e in range(n_episodes):)
if e in range(30,1000,5): # 30、35、...998
print("="*50, "e=", e)
for i in Q.keys():
for j in Q[i].keys():
Q[i][j] = round(Q[i][j],6) # 只取小數點後6位
print("Q=", Q)
nodes = get_best_nodes(Q,start,end) # 取得start 到 end 的所有路徑上的節點 + 可能可以到end的節點(透過遍歷所有路徑 直到找到終點 所以還沒到終點的路徑也會被記錄?)
print("nodes:",nodes)
nodes_number.append(len(nodes)) # 紀錄 目前的最佳路徑 步數
#print("nodes_number:",nodes_number)
if len(set(nodes_number[-3:])) == 1: # set去除重複元素 當set(nodes_number[-3::]) = set([0]) 時 break -> 當 紀錄的最佳路徑步數 連續3次都一樣 則停止尋找
break
return Q
```
### get_key_of_min_value(Q[current_state])
```python
def get_key_of_min_value(dic):
min_val = min(dic.values())
return [k for k, v in dic.items() if v == min_val] # 取得value為min_val的key
```
### update_Q(T,Q,current_state, next_state, alpha)
```python
def update_Q(T,Q,current_state, next_state, alpha):
current_t = T[current_state][next_state] # 最初 全為1
current_q = Q[current_state][next_state] # 最初 全為100
new_q = current_q + alpha * (current_t + min(Q[next_state].values()) - current_q)
# current_t 永遠=1, 代表每走一步 距離+1 (reward (最終的score越小越好))
#new_q = current_q + alpha * (current_t + max(Q[next_state].values()) - current_q)
Q[current_state][next_state] = new_q
return Q
```
### get_best_nodes(Q,start,end)
```python
def get_best_nodes(Q,start,end):
next_level = [start]
node_use = [start]
while list(set(next_level) & set(end)) == []: # set(next_level) & set(end) 取交集 -> 判斷是否到達終點
temp_level = []
for i in next_level: # 不是尋找路徑 是遍歷從起點出發的所有路徑
temp_level += get_single_dict(Q[i]) # 取得節點Q[i] 所有可達節點中Q值最小的
next_level = list(set(temp_level))
node_use += next_level
print("node_use=", node_use, "next_level=", next_level)
return list(set(node_use))
```
### get_single_dict(Q[i]) # 初始Q[i] = 可達節點1:100, 可達節點2:100, ..., 可達節點n:100
```python
def get_single_dict(dic):
single_link = {}
min_value = min(dic.values()) # 取得 所有可達節點中 值最小的 值
for key in dic.keys(): # dic.keys = 所有可達節點
if dic[key] == min_value:
single_link[key] = dic[key] # single_link[值最小的節點] = 值
return single_link.keys() # 回傳"一個"最小Q值的節點(可能不唯一 但只回傳節點名稱最小的) # 值 可代表 距離
```
### get_best_net(Q,nodes)
```python
def get_best_net(Q,nodes): # nodes= [512, 256, 128, 2, 4, 1, 3, 7, 8, 511, 15, 16, 1023, 31, 32, 63, 64, 127, 255]
best_net = {}
for i in nodes:
best_net[i] = list(set( get_single_dict(Q[i]) ) & set(nodes))
return best_net
```
### get_route(Q,start,end)
```python
def get_route(Q,start,end):
""" input is Q-table is like:{1: {2: 0.5, 3: 3.8},
2: {1: 5.9, 5: 10}} """
single_route = [start]
while single_route[-1] not in end:
next_step = min(Q[single_route[-1]],key=Q[single_route[-1]].get)
# min(,key=...) key設定比較的方式 -> Q[single_route[-1]].get 代表 取得Q[single_route[-1]]字典中 值(Q值)最小的 "值" 最後next_step會得到最小"值"的鍵 -> 最小Q值節點
single_route.append(next_step)
if len(single_route) > 2 and single_route[-1] in single_route[:-1]:
# single_route[-1] in single_route[:-1] 代表出現重複元素(single_route[-1])
break
return single_route
```
### get_all_best_routes(graph,start,end,route_len+1)
```python
# graph = {512: [256], 256: [128], 128: [64], 2: [1], 4: [2], 1: [3], 3: [7], 7: [15], 8: [4], 511: [1023], 15: [31], 16: [8], 1023: [511], 31: [63], 32: [16], 63: [127], 64: [32], 127: [255], 255: [511]}
# start= 512 end= [1023]
# route_len+1 = 19 + 1 # len([512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023]) = 19
def get_all_best_routes(graph,start,end,max_depth):
past_path = []
# maintain a queue of paths
queue = []
# push the first path into the queue
queue.append([start]) # 二維陣列
while queue:
# get the first path from the queue
path = queue.pop(0) # 一維陣列
# get the last node from the path
node = path[-1]
# path found
# print "*"
# enumerate all adjacent nodes, construct a new path and push it into the queue
for adjacent in graph.get(node, []): # graph.get(node,[]) 回傳 graph中node對應的值 若node沒有對應的值回傳[] # ex: graph.get(512,[]) = [256]
new_path = list(path) # ex: [512] (一維)
## end the current loop if we already reach the point
if adjacent in end:
new_path.append(adjacent)
past_path.append(new_path)
continue
if adjacent in new_path: # 避免重複
continue
new_path.append(adjacent)
if len(new_path) >= max_depth and new_path[-1] not in end:
break # 跳出for loop
# print new_path
queue.append(new_path)
past_path.append(new_path)
best_paths = []
for l in past_path:
if l[-1] in end:
best_paths.append(l)
return best_paths
```
### 實驗數據
```
在 10 個 節點
di: *100 (執行100次後 取10次平均)
996+1003+1595+996+1004+996+1008+2169+1007+1981 = *12,755*
12.755 # 最終結果 單位(microsecond)
ql: *100
21098+21049+22564+22375+23049+22133+22691+22325+22642+21590 = *221,516*
221.516 # 最終結果 單位(microsecond)
------------------------------------------------------------------------
在 50 個 節點
di:*100
19962+20103+20015+19177+18990+20936+18978+18510+19052+20259 = *195,982*
195.982 # 最終結果 單位(microsecond)
ql:*100
87753+91628+84637+86088+86100+89192+88802+93270+88635+93979 = *890,084*
890.084 # 最終結果 單位(microsecond)
------------------------------------------------------------------------
在 100 個節點
di:*100
74740+85027+74699+77784+75551+70264+73045+68555+76592+75287 = *751,544*
751.544 # 最終結果 單位(microsecond)
ql:*100
181232+192641+181541+184840+207016+198307+187308+190106+197272+189659 = *1,909,922*
1909.922 # 最終結果 單位(microsecond)
------------------------------------------------------------------------
在 500 個節點
di:*1
48413+47036+48090+46205+48617+46103+46063+49696+49538+46514 = *476,275*
476.275 # 最終結果 單位(microsecond)
ql:*1
47931+47070+43957+49026+45667+45019+44293+48721+47991+45318 = *464,993*
46499.3 # 最終結果 單位(microsecond)
------------------------------------------------------------------------
在 1000 個節點
di:*1
185386+200810+205391+191985+189766+204043+185073+212553+194596+196716 = *1,966,319*
196631.9 # 最終結果 單位(microsecond)
ql:*1
175255+180451+183513+174370+178361+182982+192534+188779+186701+185487 = *1,828,433*
182843.3 # 最終結果 單位(microsecond)
------------------------------------------------------------------------
在 5000 個節點
di:
(409858+431286+560219+550301+600138+584622+530526+690033+607939+530798) / 10 = *549572.0* # 最終結果 單位(microsecond)
ql:
(349916+464350+409767+310032+335007+329855+335007+383069+454963+383605) / 10 = *375557.1* # 最終結果 單位(microsecond)
```