# Gonza notepad
```python
from collections import Counter
def tangle_stats(s):
t_fw=set()
internal=set()
t_bw=set()
fc=Counter()
tc=0
for t in ws.sdg.get_all_tangleviews(s):
tc+=1
fc[len(t.frontiers)]+=1
for f in t.frontiers:
if f.node_id()<0: t_fw.add(-f.node_id())
else: t_bw.add(f.node_id())
for n in t.internals:
internal.add(n.node_id())
print("Tangle stats for %d tangles\n"%tc)
for x in fc.most_common():
print("%d with %d frontiers"%(x[1],x[0]))
internal_count=0
internal_bp=0
single_frontier_count=0
single_frontier_bp=0
double_frontier_count=0
double_frontier_bp=0
untangled_count=0
untangled_bp=0
for nv in ws.sdg.get_all_nodeviews():
if nv.node_id() in internal:
internal_count+=1
internal_bp+=nv.size()
elif nv.node_id() in t_fw and nv.node_id() in t_bw:
double_frontier_count+=1
double_frontier_bp+=nv.size()
elif nv.node_id() in t_fw or nv.node_id() in t_bw:
single_frontier_count+=1
single_frontier_bp+=nv.size()
else:
untangled_count+=1
untangled_bp+=nv.size()
print("\n\nStats for graph nodes:\n")
print("Untangled: %d nodes, %d bp" % (untangled_count,untangled_bp))
print("Internal: %d nodes, %d bp" % (internal_count,internal_bp))
print("Single frontier: %d nodes, %d bp" % (single_frontier_count,single_frontier_bp))
print("Double frontier: %d nodes, %d bp" % (double_frontier_count,double_frontier_bp))
tangle_stats(300)
```
```python
import sys
sys.path.append('/Users/ggarcia/Documents/git_sources/sdg/cmake-build-relwithdebinfo/SDGpython/')
import SDGpython as SDG
from collections import Counter
from datetime import datetime
K=63
MIN_CVG=2
NUM_BATCHES=1
ws=SDG.WorkSpace()
peds=ws.add_paired_reads_datastore('./pseudo_triploid_octoploid_new/octoploid_pe.prseq', 'pe')
gm=SDG.GraphMaker(ws.sdg)
gm.new_graph_from_paired_datastore(peds,K,MIN_CVG,NUM_BATCHES);
gc=SDG.GraphContigger(ws)
gc.clip_tips(200)
print("Remaining nodes: %s" %(len(ws.sdg.get_all_nodeviews())))
ws.sdg.write_to_gfa1('./bob.gfa')
kc = ws.add_kmer_counter('pek31', 31)
kc.add_count('pe', peds)
plt.plot(kc.count_spectra('pe')[10:500])
```
```python
lower_limit = 50
upper_limit = 90
def extract_segments(nv, lower_limit, upper_limit):
mask=[]
for i, k in enumerate(nv.kmer_coverage('pek31', 'pe')):
if lower_limit<k<upper_limit: mask.append(True)
else: mask.append(False)
segments = []
start = None
end = None
p=0
while p<len(mask):
m = mask[p]
if m is True:
start = p
p+=1
while m is True and p<len(mask):
m = mask[p]
p+=1
end = p
p+=1
segments.append(nv.sequence()[start: end+31])
start = None
end = None
else:
p+=1
return segments
def extract_sequence():
all_new_nodes=[]
out_fasta = open('./extraction.fasta', 'w')
nodes_with_content=0
total_nodes=0
new_nodes=[]
for nv in ws.sdg.get_all_nodeviews():
total_nodes+=1
segments = extract_segments(nv, lower_limit, upper_limit)
for s in segments:
out_fasta.write(">%s\n%s\n"%(nv.node_id(), s))
new_nodes.append(ws.sdg.add_node(s))
if len(segments)>0:
nodes_with_content+=1
ws.sdg.remove_node(abs(nv.node_id()))
print("Content extracted from %s/%s nodes" %(nodes_with_content, total_nodes))
extract_sequence()
ws.sdg.write_to_gfa1('./susan.gfa')
```
```python
```