# shared stuff
[TOC]
```python=
from collections import Counter
def get_hs_tags(lirds, hs):
c=Counter()
for nid in hs.order.as_signed_nodes():
for rid in lirds.mapper.paths_in_node[abs(nid)]:
c[lirds.get_read_tag(abs(rid))]+=1
return set([x for x,c in c.most_common() if c>1])
def node_tags_in_tagset(lirds, nid, tagset):
f=t=0
for rid in lirds.mapper.paths_in_node[abs(nid)]:
if lirds.get_read_tag(abs(rid)) in tagset:
f+=1
t+=1
if t==0: return 0
return f/t
tagset1=get_hs_tags(lirds, hs1)
tagset2=get_hs_tags(lirds, hs2)
print("order 1")
for n in hs1.order.as_signed_nodes():
print("%20d\t%.2f\t%.2f"%(n,node_tags_in_tagset(lirds,n,tagset1 ),node_tags_in_tagset(lirds,n,tagset2)))
print("\n\n\norder 2")
for n in hs2.order.as_signed_nodes():
print("%20d\t%.2f\t%.2f"%(n,node_tags_in_tagset(lirds,n,tagset1 ),node_tags_in_tagset(lirds,n,tagset2)))
```
# Thread plots vs. node orders:
```python=
def thread_plot(rtg,nodes,tids=[],min_nodes=4):
if not tids:
tc=Counter()
for nid in nodes:
tc.update(rtg.node_threads(nid,True))
tids=[tid for tid,c in tc.items() if c>=min_nodes]
figure(figsize=(40,40))
#each node gets an x position.
y=0
node_x={}
for xp,nid in enumerate(nodes):
node_x[nid]=xp
#reorder the tids by their first node
tf=[]
for tid in tids:
for np in rtg.get_thread(tid):
if np.node in node_x:
tf.append([node_x[np.node],tid])
break
tids=[x[1] for x in sorted(tf)]
yticks_y=[]
yticks_l=[]
for tid in tids:
y-=1
yticks_y.append(y)
yticks_l.append("%d (%d crosses)" % (tid, hs_fail.order.thread_order_crosses(rtg.get_thread(tid))))
t_x=[]
t_y=[]
for np in rtg.get_thread(tid):
if np.node in node_x: pos=node_x[np.node]
elif -np.node in node_x: pos=-node_x[-np.node]
else: continue
if len(t_x) and ( ((last_pos>0) != (pos>0)) or (last_pos>pos)):
y-=1
last_pos=pos
t_x.append(abs(pos))
t_y.append(y)
#print(t_y,t_x)
plot(t_x,t_y,'o-',linewidth=3)
grid(color='black', axis='x',which='both', linestyle=':', linewidth=1)
grid(color='black', axis='y',which='both', linestyle=':', linewidth=1, alpha=.5)
xticks(range(len(node_x)),labels=[str(x[0]) for x in sorted(node_x.items(),key=lambda v:v[1])],rotation=90)
yticks(yticks_y,labels=yticks_l)
```
# Checking LR + 10 integration:
```python=
hss={}
tags={}
for i, nid in enumerate(nodos):
print(i, nid)
hss[nid]=SDG.HappySorter(rtg,.5,.7)
hss[nid].start_from_node(nid)
hss[nid].grow_loop(10)
tags[nid]=get_hs_tags(lirds, hss[nid])
def greedy_cluster_nodes(nodos, nid):
cnodes=set([nid])
last_len=0
while len(cnodes)>last_len:
last_len=len(cnodes)
for onid in nodos:
if onid in cnodes: continue
for oonid in cnodes:
if len(tags[oonid]&tags[onid])>10:
cnodes.add(onid)
# print(onid)
break
return cnodes
def all_nodes_touched_by(nodos, nid):
nodes=set()
for o in greedy_cluster_nodes(nodos, nid):
for n in hss[o].order.as_signed_nodes():
nodes.add(n)
return (n)
```
# creating the reference positions for nodes:
```shell=
~/software/mummer-4.0.0rc1/nucmer -t16 -c2 -l15 -p anchors_to_fv Fragaria_vesca_v4.0.a1.fasta sdg_raw.fasta
~/software/mummer-4.0.0rc1/delta-filter -q -l20 anchors_to_fv.delta > anchor_positions.delta
~/software/mummer-4.0.0rc1/show-coords -qH anchor_positions.delta > anchor_positions.coords
```
```python=
def load_npos():
npos={}
for l in open('anchor_positions.coords'):
sl=l.split()
rstart=int(sl[0])
rend=int(sl[1])
qstart=int(sl[3])
qend=int(sl[4])
tname=sl[11]
if tname[:3]!="Fvb": continue
chrid=int(tname[3:])
if qstart>qend: chrid=-chrid
if rstart>rend: chrid=-chrid
qlen=abs(qend-qstart)+1
nid=int(sl[12][3:])
if nid not in npos or npos[nid][2]<qlen:
npos[nid]=(chrid,rstart,qlen)
return npos
```
# Problems to solve
## 1 - Nodes with haplotype collapsing/switch
```python=
%%time
hs_fail=SDG.HappySorter(rtg,.5,.7)
hs_fail.start_from_node(6736641)
hs_fail.fast_grow_loop(verbose=True)
print(hs_fail.order.size())
plot_F1(hs_fail.order,[6736641])
```

```python=
def plot_F1(order,mark_nodes=[],exclude_happil_shares=True):
patterns=[]
to_mark=[]
ni=[]
for i,nid in enumerate(order.as_signed_nodes()):
if cf.get_pattern(rtg.get_nodeview(nid))[-1]!='1' or not exclude_happil_shares:
patterns.append([int(x) for x in cf.get_pattern(rtg.get_nodeview(nid))])
ni.append((len(patterns)-1,nid))
if nid in mark_nodes:
to_mark.append((len(patterns)-1,' %d'% nid))
step=len(ni)//20
for i in range(0,20*step,step):
to_mark.append(ni[i])
to_mark.append(ni[-1])
to_mark.sort()
figure(figsize=(4,10))
ax=subplot(facecolor='w')
ax.imshow(patterns,aspect='auto',interpolation='none')
#for i,nid in to_mark:
#plotpatterns[i]=[5 for x in range(21)]
# ax.right_ax.
rax=ax.axes.twinx()
#help(rax)
rax.set_yticks([x[0] for x in to_mark])
rax.set_yticklabels([x[1] for x in to_mark])
rax.imshow(patterns,aspect='auto',interpolation='none')
for i,tm in enumerate(to_mark):
if int(tm[1]) in mark_nodes:
rax.get_yticklabels()[i].set_color('red')
rax.get_yticklabels()[i].set_weight('bold')
show()
```
### Conclusion
For a haplotype to switch there must be recruitment of threads form the alternative haplotype. Since taking into account a larger section of the haplotype for node happiness solved all those problems for nodes (with only occasional switches now incurred by the nodes), we need to implement something similar for thread recruitment: a more stringent happiness condition that computes hits towards all the potential length of the thread that could fall into the order. Then the identity for recruitment will need to be lowered, but it will still be more precise.
## 2 - Node update creates "drifts" on some nodes
```python=
START_NODE=694137
hs_fail=SDG.HappySorter(rtg,.5,.7)
hs_fail.start_from_node(START_NODE)
hs_fail.fast_grow_loop(verbose=True,steps=2)
for i in range(3):
hs_fail.update_positions()
nc=dict(hs_fail.order.node_coordinates)
figure()
plot([nc[nid] for nid in hs_fail.order.as_signed_nodes()])
```

# Finding complementary haplotypes in a region of chromosome
```python=
def sdist(s1,s2):
return len([1 for x in range(len(s1)) if s1[x]!=s2[x]])
from collections import Counter
c=Counter()
for x in selected_anchors:
if ws.sdg.get_nodeview(x).size()<70: continue
p=cf.get_pattern(ws.sdg.get_nodeview(x))
if p[-1]=='1': continue
#print(p)
c[p[:-2]]+=1
used=set()
for x in c.most_common()[:20]:
if x[0] in used: continue
#print("\n",x)
closest=""
closest_score=30
furthest=""
furthest_score=0
for x2 in c.keys():
if x2 !=x[0]:
s=sdist(x[0],x2)
if s<closest_score:
closest_score=s
closest=x2
if s>furthest_score:
furthest_score=s
furthest=x2
#print("closest:",closest,c[closest],"s:",closest_score)
#print("furthest",furthest,c[furthest],"s:",furthest_score)
if furthest_score==19:
#print("furthest",furthest,c[furthest],"s:",furthest_score)
print(max(x[0],furthest),min(x[0],furthest),c[max(x[0],furthest)],c[min(x[0],furthest)])
used.add(x[0])
used.add(furthest)
```