Add notes to the HackMD during the class so we can collaborate :)
CyVerse link
Genomics data carpentry: https://datacarpentry.org/lessons/#genomics-workshop
General Coding
Bioinformatics
Help
Go to
https://mybinder.org/v2/gh/AnnaFeitzinger/BioCoding2022.git/HEAD
Make a new notebook. Name it "setup"
Execute:
!pip3 install ipykernel bash_kernel nbgitpuller && python3 -m bash_kernel.install
length = 10
aa_string= ''
trials=0
while(len(aa_string) < length):
#Pick a random Amino acids
aa=random.choice(list(amino_acids.values()))
if aa== 'M':
print('Start with M!')
#Add M to the aa_string.
aa_string+=aa
for i in range(length-1):
aa=random.choice(list(amino_acids.values()))
#Pick new amino acids
if(aa != ''):
#If amino acid is not equal to '' extend the string
aa_string+=aa
print(aa_string)
else:
#If amino acid is '_' reset the string, add 1 to trials, and break out of the for loop
aa_string= ''
trials+=1
break
print(aa_string)
print(trials)
length = 10
aa_string= ''
trials=0
#Force amino acid to start with a value
aa='M'
if aa== 'M':
print('Start with M!')
#Add M to the aa_string.
aa_string+=aa
for i in range(length-1):
aa=random.choice(list(amino_acids.values()))
#Pick new amino acids
if(aa != ''):
#If amino acid is not equal to '' extend the string
aa_string+=aa
print(aa_string)
else:
#If amino acid is '_' reset the string, add 1 to trials, and break out of the for loop
aa_string= ''
trials+=1
print('Stop codon!')
break
#print('didnt start with m!')
print(aa_string)
print(trials)
Jake
Ethan
Connor
Kari
Logan
emma
Asvin
cat command- c.u.p.s
df -shows space used, remaining on the hard drive
Ls –help -
if you don't understand a command- type the command - (dash) help
mkdir- make a directory (then use ls to see it)
directory- kindof like documents where files are stored (where they live)
cd- change directory
head - head tab (tab gives you the file if theres one file in the directory) head gives you the first 10 lines of the file
––python–
print- print("what you want to print") ex: print("hello world"); could use single or double quotes; print is always with a lowercase p
variables- can assign values to variables; ex: variable=3; when printing a variable, you don't need quotes
comment- #
string- array of bytes, have quotes arround them, red color
integer- doesnt have any quotes, green color
print(type(example)) -prints the datatype
subet strings- print(string[index]);
print(string[begin:end:step]); index starts at 0
to make a new line - use '\n'
counting- pritn(string.count('thing you want to count'))
print(my_string.count('character')) -gives the number of a character in the string
lists- go between square brackets, index starts at 0,
append- adds things together; ex: mylist.append("gag")
subest lists- same syntax as subset string
for loops- ex:for dummy variable in iteratable data type: print(datatype); print has to be indented; basically, you type the datatype you want the function to act on and then type th function you want to do: allows you to perform the same function on each value on a list
while loops– ex:
i=0
j=10
while i < j:
print("cool")
i=i+1
the last line of code insures that the loop isnt infinate because at some point, i>j
defining(making) a function–- have to start w/ def then name the function, then whatever you want your fuction to do is indented below;
ex1:
def prints_dna_len(): <-function name
dna = 'gatgcattatcgtgagc' <- what is does
print(len(dna))
prints_dna_len() <-now you can j type this
ex2:
def gc_content(dna):
c_count=dna.count('c')
g_count=dna.count('g')
dna_length=len(dna)
gc_content=(g_count+c_count)/length
return(gc_content)
gc_content('atgcgcgtac')
getting random nucleotides– wihth numpy
nucleotides=['A','T','G','C']
probabilities= [.25,.25,.25,.25]
random.choice(nucleotides,p=probabilities )
getting random lengths–
random.randit(0,100)
to save values from a loop to a string/list–
use +=; make an open variable
ex:
random_dna=''
for n in range(length):
nucleotide=(random.choice(nucleotides,p=probabilities ))
random_dna +=nucleotide
dictionaries- data type, enclosed with {}
git clone https://github.com/AnnaFeitzinger/BioCoding2022.git
Jake = print('ST4V2MBB011VHC001 ' * 77)
hiv_genome_list=list(hiv_genome)
for mutation_result in mutation_results:
if mutation_result == 'Mutation':
random_postion=random.randint(0,len(hiv_genome_list))
print(hiv_genome_list[random_postion])
Asvin
mydict = {83: 83}
txt = "Hello Sam!"
print(txt.translate(mydict))
Kari- determines if all characters are upper or lowercase
a= 'HELLO WORLD'
b= 'Hello World'
c= 'hello world'
print(a.isupper())
print(b.isupper())
print(c.isupper())
Logan–-lowercase the string
txt = "Hello my FRIENDS"
x = txt.lower()
print(x)
variable for the avg weight
variable for the number of mice
emma– average_weight numberof_mice
logan– avg_weight_ amnt_mice_
Jake = average_mass_group, number_group_mice
Asvin= avgweight amountofmice
Connor = avg_weight_groupname, num_mice_group
Ethan = avg_groupname_weight / num_groupname_mice
Asvin
print(alpha_id[7])
print(alpha_id[6])
print(alpha_id[5])
print(alpha_id[4])
print(alpha_id[3])
print(alpha_id[2])
print(alpha_id[1])
print(alpha_id[0])
emma–-
alpha_id='CGJ28371'
print(alpha_id[7])
print(alpha_id[6])
print(alpha_id[5])
print(alpha_id[4])
print(alpha_id[3])
print(alpha_id[2])
print(alpha_id[1])
print(alpha_id[0])
Connor -
x = len(alpha_id) - 1
backwards = ''
while x > -1:
backwards = backwards + (alpha_id[x])
x-=1
print(backwards)
Ethan -
alpha_id = CGJ28371
print(alpha_id[::-1])
Logan–-
print (alpha_id[7])
print (alpha_id[6])
print (alpha_id[5])
print (alpha_id[4])
print (alpha_id[3])
print (alpha_id[2])
print (alpha_id[1])
print (alpha_id[0])
emma–
alpha_initials = 'CGJ'
beta_initials = 'SJW'
gamma_initials = 'PWS'
print(alpha_initials)
print(beta_initials)
print(gamma_initials)
ethan -
alpha_id = 'CGJ28371'
beta_id = 'SJW99399'
gamma_id = 'PWS29382'
Initials:
alpha_init = print(alpha_id[:3])
beta_init = print(beta_id[:3])
gamma_init = print(gamma_id[:3])
Experimenter ID:
alpha_expid = print(alpha_id[3:])
beta_expid = print(beta_id[3:])
gamma_expid = print(gamma_id[3:])
Asvin
alpha='CGJ'
beta='SJW'
gmma='PWS'
print(alpha+beta+gmma)
alpha_id = 'CGJ28371'
beta_id = 'SJW99399'
gamma_id ='PWS29382'
print(alpha[0:3:])
print(beta[0:3:])
print(gmma[0:3:])
Logan–
Kari
print(alpha_id[0:3])
print(beta_id[0:3])
print(gamma_id[0:3])
print(alpha_id[3:8])
print(beta_id[3:8])
print(gamma_id[3:8])
Connor
alpha_id = 'CGJ28371'
beta_id = 'SJW99399'
gamma_id = 'PWS29382'
alpha_initial = alpha_id[0:3:1]
beta_initial = beta_id[0:3:1]
gamma_initial = gamma_id[0:3:1]
print(alpha_initial)
print(beta_initial)
print(gamma_initial)
alpha_end = alpha_id[3:9:1]
beta_end = beta_id[3:9:1]
gamma_end = gamma_id[3:9:1]
print(alpha_end)
print(beta_end)
print(gamma_end)
emma-
sequencename= '>sequence 001'
sequencestring= 'ATTCGAGGATCGATTTCGATCGATGCTTAGCTTTAGCTTTTTTAGATCTCCCA'
print(sequencename + "\n" + sequencestring)
ethan:
name">sequence 003\n"
sequence_string = "AAGTCGATCGAAGTCTTCC"
print(name+ sequence_string)
Asvin
name='>sequence 001'
Code='ATTCGAGGATCGATTTCGATCGATGCTTAGCTTTAGCTTTTTTAGATCTCCCA'
print(name+"\n"+Code)
Kari
seqname = '>sequence 001'
seqnum ='ATTCGAGGATCGATTTCGATCGATGCTTAGCTTTAGCTTTTTTAGATCTCCCA'
print(seqname+"\n"+seqnum)
Connor
sequence_name = '>sequence 001'
sequence_string = 'ATTCGAGGATCGATTTCGATCGATGCTTAGCTTTAGCTTTTTTAGATCTCCCA'
print(sequence_name + '\n' + sequence_string)
Kari
b = 'QWERTY'
print(b.lower())
ethan:
#gag
gag_name = ">gag sequence \n"
gag_gene = hiv_genome[789:2292]
#pol
pol_name = ">pol sequence \n"
pol_gene = hiv_genome[2084:5096]
#vif
vif_name = ">vif sequence \n"
vif_gene = hiv_genome[5040:5619]
#vpr
vpr_name = ">vpr sequence \n"
vpr_gene = hiv_genome[5558:5850]
#env
env_name = ">env sequence \n"
env_gene = hiv_genome[6224:8795]
#command
full_sequence = gag_name+gag_gene+"\n"+pol_name+ pol_gene + "\n" + vif_name+vif_gene + "\n" +vpr_name+ vpr_gene + "\n" +env_name+ env_gene
print(full_sequence)
#rna sequence
rna_sequence = full_sequence.replace('t', 'u')
print(rna_sequence)
A='a'
hiv_genome.count('a')
print(hiv_genome.count('a'))
print(hiv_genome.count('c'))
print(hiv_genome.count('u'))
print(hiv_genome.count('g'))
sum_of_g_gag = gag_gene.count('g')
sum_of_c_gag = gag_gene.count('c')
sum_of_gag = len(gag_gene)
print("GC content of gag gene:")
print(sum_of_c_gag + sum_of_g_gag / sum_of_gag)
sum_of_g_pol = pol_gene.count('g')
sum_of_c_pol = pol_gene.count('c')
sum_of_pol = len(pol_gene)
print("GC content of pol gene:")
print(sum_of_c_pol + sum_of_g_pol / sum_of_pol)
hiv_genome = 'tggaagggctaattcactcccaacgaagacaagatatccttgatctgtggatctaccacacacaaggctacttccctgattagcagaactacacaccagggccagggatcagatatccactgacctttggatggtgctacaagctagtaccagttgagccagagaagttagaagaagccaacaaaggagagaacaccagcttgttacaccctgtgagcctgcatggaatggatgacccggagagagaagtgttagagtggaggtttgacagccgcctagcatttcatcacatggcccgagagctgcatccggagtacttcaagaactgctgacatcgagcttgctacaagggactttccgctggggactttccagggaggcgtggcctgggcgggactggggagtggcgagccctcagatcctgcatataagcagctgctttttgcctgtactgggtctctctggttagaccagatctgagcctgggagctctctggctaactagggaacccactgcttaagcctcaataaagcttgccttgagtgcttcaagtagtgtgtgcccgtctgttgtgtgactctggtaactagagatccctcagacccttttagtcagtgtggaaaatctctagcagtggcgcccgaacagggacctgaaagcgaaagggaaaccagaggagctctctcgacgcaggactcggcttgctgaagcgcgcacggcaagaggcgaggggcggcgactggtgagtacgccaaaaattttgactagcggaggctagaaggagagagatgggtgcgagagcgtcagtattaagcgggggagaattagatcgatgggaaaaaattcggttaaggccagggggaaagaaaaaatataaattaaaacatatagtatgggcaagcagggagctagaacgattcgcagttaatcctggcctgttagaaacatcagaaggctgtagacaaatactgggacagctacaaccatcccttcagacaggatcagaagaacttagatcattatataatacagtagcaaccctctattgtgtgcatcaaaggatagagataaaagacaccaaggaagctttagacaagatagaggaagagcaaaacaaaagtaagaaaaaagcacagcaagcagcagctgacacaggacacagcaatcaggtcagccaaaattaccctatagtgcagaacatccaggggcaaatggtacatcaggccatatcacctagaactttaaatgcatgggtaaaagtagtagaagagaaggctttcagcccagaagtgatacccatgttttcagcattatcagaaggagccaccccacaagatttaaacaccatgctaaacacagtggggggacatcaagcagccatgcaaatgttaaaagagaccatcaatgaggaagctgcagaatgggatagagtgcatccagtgcatgcagggcctattgcaccaggccagatgagagaaccaaggggaagtgacatagcaggaactactagtacccttcaggaacaaataggatggatgacaaataatccacctatcccagtaggagaaatttataaaagatggataatcctgggattaaataaaatagtaagaatgtatagccctaccagcattctggacataagacaaggaccaaaggaaccctttagagactatgtagaccggttctataaaactctaagagccgagcaagcttcacaggaggtaaaaaattggatgacagaaaccttgttggtccaaaatgcgaacccagattgtaagactattttaaaagcattgggaccagcggctacactagaagaaatgatgacagcatgtcagggagtaggaggacccggccataaggcaagagttttggctgaagcaatgagccaagtaacaaattcagctaccataatgatgcagagaggcaattttaggaaccaaagaaagattgttaagtgtttcaattgtggcaaagaagggcacacagccagaaattgcagggcccctaggaaaaagggctgttggaaatgtggaaaggaaggacaccaaatgaaagattgtactgagagacaggctaattttttagggaagatctggccttcctacaagggaaggccagggaattttcttcagagcagaccagagccaacagccccaccagaagagagcttcaggtctggggtagagacaacaactccccctcagaagcaggagccgatagacaaggaactgtatcctttaacttccctcaggtcactctttggcaacgacccctcgtcacaataaagataggggggcaactaaaggaagctctattagatacaggagcagatgatacagtattagaagaaatgagtttgccaggaagatggaaaccaaaaatgatagggggaattggaggttttatcaaagtaagacagtatgatcagatactcatagaaatctgtggacataaagctataggtacagtattagtaggacctacacctgtcaacataattggaagaaatctgttgactcagattggttgcactttaaattttcccattagccctattgagactgtaccagtaaaattaaagccaggaatggatggcccaaaagttaaacaatggccattgacagaagaaaaaataaaagcattagtagaaatttgtacagagatggaaaaggaagggaaaatttcaaaaattgggcctgaaaatccatacaatactccagtatttgccataaagaaaaaagacagtactaaatggagaaaattagtagatttcagagaacttaataagagaactcaagacttctgggaagttcaattaggaataccacatcccgcagggttaaaaaagaaaaaatcagtaacagtactggatgtgggtgatgcatatttttcagttcccttagatgaagacttcaggaagtatactgcatttaccatacctagtataaacaatgagacaccagggattagatatcagtacaatgtgcttccacagggatggaaaggatcaccagcaatattccaaagtagcatgacaaaaatcttagagccttttagaaaacaaaatccagacatagttatctatcaatacatggatgatttgtatgtaggatctgacttagaaatagggcagcatagaacaaaaatagaggagctgagacaacatctgttgaggtggggacttaccacaccagacaaaaaacatcagaaagaacctccattcctttggatgggttatgaactccatcctgataaatggacagtacagcctatagtgctgccagaaaaagacagctggactgtcaatgacatacagaagttagtggggaaattgaattgggcaagtcagatttacccagggattaaagtaaggcaattatgtaaactccttagaggaaccaaagcactaacagaagtaataccactaacagaagaagcagagctagaactggcagaaaacagagagattctaaaagaaccagtacatggagtgtattatgacccatcaaaagacttaatagcagaaatacagaagcaggggcaaggccaatggacatatcaaatttatcaagagccatttaaaaatctgaaaacaggaaaatatgcaagaatgaggggtgcccacactaatgatgtaaaacaattaacagaggcagtgcaaaaaataaccacagaaagcatagtaatatggggaaagactcctaaatttaaactgcccatacaaaaggaaacatgggaaacatggtggacagagtattggcaagccacctggattcctgagtgggagtttgttaatacccctcccttagtgaaattatggtaccagttagagaaagaacccatagtaggagcagaaaccttctatgtagatggggcagctaacagggagactaaattaggaaaagcaggatatgttactaatagaggaagacaaaaagttgtcaccctaactgacacaacaaatcagaagactgagttacaagcaatttatctagctttgcaggattcgggattagaagtaaacatagtaacagactcacaatatgcattaggaatcattcaagcacaaccagatcaaagtgaatcagagttagtcaatcaaataatagagcagttaataaaaaaggaaaaggtctatctggcatgggtaccagcacacaaaggaattggaggaaatgaacaagtagataaattagtcagtgctggaatcaggaaagtactatttttagatggaatagataaggcccaagatgaacatgagaaatatcacagtaattggagagcaatggctagtgattttaacctgccacctgtagtagcaaaagaaatagtagccagctgtgataaatgtcagctaaaaggagaagccatgcatggacaagtagactgtagtccaggaatatggcaactagattgtacacatttagaaggaaaagttatcctggtagcagttcatgtagccagtggatatatagaagcagaagttattccagcagaaacagggcaggaaacagcatattttcttttaaaattagcaggaagatggccagtaaaaacaatacatactgacaatggcagcaatttcaccggtgctacggttagggccgcctgttggtgggcgggaatcaagcaggaatttggaattccctacaatccccaaagtcaaggagtagtagaatctatgaataaagaattaaagaaaattataggacaggtaagagatcaggctgaacatcttaagacagcagtacaaatggcagtattcatccacaattttaaaagaaaaggggggattggggggtacagtgcaggggaaagaatagtagacataatagcaacagacatacaaactaaagaattacaaaaacaaattacaaaaattcaaaattttcgggtttattacagggacagcagaaatccactttggaaaggaccagcaaagctcctctggaaaggtgaaggggcagtagtaatacaagataatagtgacataaaagtagtgccaagaagaaaagcaaagatcattagggattatggaaaacagatggcaggtgatgattgtgtggcaagtagacaggatgaggattagaacatggaaaagtttagtaaaacaccatatgtatgtttcagggaaagctaggggatggttttatagacatcactatgaaagccctcatccaagaataagttcagaagtacacatcccactaggggatgctagattggtaataacaacatattggggtctgcatacaggagaaagagactggcatttgggtcagggagtctccatagaatggaggaaaaagagatatagcacacaagtagaccctgaactagcagaccaactaattcatctgtattactttgactgtttttcagactctgctataagaaaggccttattaggacacatagttagccctaggtgtgaatatcaagcaggacataacaaggtaggatctctacaatacttggcactagcagcattaataacaccaaaaaagataaagccacctttgcctagtgttacgaaactgacagaggatagatggaacaagccccagaagaccaagggccacagagggagccacacaatgaatggacactagagcttttagaggagcttaagaatgaagctgttagacattttcctaggatttggctccatggcttagggcaacatatctatgaaacttatggggatacttgggcaggagtggaagccataataagaattctgcaacaactgctgtttatccattttcagaattgggtgtcgacatagcagaataggcgttactcgacagaggagagcaagaaatggagccagtagatcctagactagagccctggaagcatccaggaagtcagcctaaaactgcttgtaccaattgctattgtaaaaagtgttgctttcattgccaagtttgtttcataacaaaagccttaggcatctcctatggcaggaagaagcggagacagcgacgaagagctcatcagaacagtcagactcatcaagcttctctatcaaagcagtaagtagtacatgtaacgcaacctataccaatagtagcaatagtagcattagtagtagcaataataatagcaatagttgtgtggtccatagtaatcatagaatataggaaaatattaagacaaagaaaaatagacaggttaattgatagactaatagaaagagcagaagacagtggcaatgagagtgaaggagaaatatcagcacttgtggagatgggggtggagatggggcaccatgctccttgggatgttgatgatctgtagtgctacagaaaaattgtgggtcacagtctattatggggtacctgtgtggaaggaagcaaccaccactctattttgtgcatcagatgctaaagcatatgatacagaggtacataatgtttgggccacacatgcctgtgtacccacagaccccaacccacaagaagtagtattggtaaatgtgacagaaaattttaacatgtggaaaaatgacatggtagaacagatgcatgaggatataatcagtttatgggatcaaagcctaaagccatgtgtaaaattaaccccactctgtgttagtttaaagtgcactgatttgaagaatgatactaataccaatagtagtagcgggagaatgataatggagaaaggagagataaaaaactgctctttcaatatcagcacaagcataagaggtaaggtgcagaaagaatatgcatttttttataaacttgatataataccaatagataatgatactaccagctataagttgacaagttgtaacacctcagtcattacacaggcctgtccaaaggtatcctttgagccaattcccatacattattgtgccccggctggttttgcgattctaaaatgtaataataagacgttcaatggaacaggaccatgtacaaatgtcagcacagtacaatgtacacatggaattaggccagtagtatcaactcaactgctgttaaatggcagtctagcagaagaagaggtagtaattagatctgtcaatttcacggacaatgctaaaaccataatagtacagctgaacacatctgtagaaattaattgtacaagacccaacaacaatacaagaaaaagaatccgtatccagagaggaccagggagagcatttgttacaataggaaaaataggaaatatgagacaagcacattgtaacattagtagagcaaaatggaataacactttaaaacagatagctagcaaattaagagaacaatttggaaataataaaacaataatctttaagcaatcctcaggaggggacccagaaattgtaacgcacagttttaattgtggaggggaatttttctactgtaattcaacacaactgtttaatagtacttggtttaatagtacttggagtactgaagggtcaaataacactgaaggaagtgacacaatcaccctcccatgcagaataaaacaaattataaacatgtggcagaaagtaggaaaagcaatgtatgcccctcccatcagtggacaaattagatgttcatcaaatattacagggctgctattaacaagagatggtggtaatagcaacaatgagtccgagatcttcagacctggaggaggagatatgagggacaattggagaagtgaattatataaatataaagtagtaaaaattgaaccattaggagtagcacccaccaaggcaaagagaagagtggtgcagagagaaaaaagagcagtgggaataggagctttgttccttgggttcttgggagcagcaggaagcactatgggcgcagcctcaatgacgctgacggtacaggccagacaattattgtctggtatagtgcagcagcagaacaatttgctgagggctattgaggcgcaacagcatctgttgcaactcacagtctggggcatcaagcagctccaggcaagaatcctggctgtggaaagatacctaaaggatcaacagctcctggggatttggggttgctctggaaaactcatttgcaccactgctgtgccttggaatgctagttggagtaataaatctctggaacagatttggaatcacacgacctggatggagtgggacagagaaattaacaattacacaagcttaatacactccttaattgaagaatcgcaaaaccagcaagaaaagaatgaacaagaattattggaattagataaatgggcaagtttgtggaattggtttaacataacaaattggctgtggtatataaaattattcataatgatagtaggaggcttggtaggtttaagaatagtttttgctgtactttctatagtgaatagagttaggcagggatattcaccattatcgtttcagacccacctcccaaccccgaggggacccgacaggcccgaaggaatagaagaagaaggtggagagagagacagagacagatccattcgattagtgaacggatccttggcacttatctgggacgatctgcggagcctgtgcctcttcagctaccaccgcttgagagacttactcttgattgtaacgaggattgtggaacttctgggacgcagggggtgggaagccctcaaatattggtggaatctcctacagtattggagtcaggaactaaagaatagtgctgttagcttgctcaatgccacagccatagcagtagctgaggggacagatagggttatagaagtagtacaaggagcttgtagagctattcgccacatacctagaagaataagacagggcttggaaaggattttgctataagatgggtggcaagtggtcaaaaagtagtgtgattggatggcctactgtaagggaaagaatgagacgagctgagccagcagcagatagggtgggagcagcatctcgagacctggaaaaacatggagcaatcacaagtagcaatacagcagctaccaatgctgcttgtgcctggctagaagcacaagaggaggaggaggtgggttttccagtcacacctcaggtacctttaagaccaatgacttacaaggcagctgtagatcttagccactttttaaaagaaaaggggggactggaagggctaattcactcccaaagaagacaagatatccttgatctgtggatctaccacacacaaggctacttccctgattagcagaactacacaccagggccaggggtcagatatccactgacctttggatggtgctacaagctagtaccagttgagccagataagatagaagaggccaataaaggagagaacaccagcttgttacaccctgtgagcctgcatgggatggatgacccggagagagaagtgttagagtggaggtttgacagccgcctagcatttcatcacgtggcccgagagctgcatccggagtacttcaagaactgctgacatcgagcttgctacaagggactttccgctggggactttccagggaggcgtggcctgggcgggactggggagtggcgagccctcagatcctgcatataagcagctgctttttgcctgtactgggtctctctggttagaccagatctgagcctgggagctctctggctaactagggaacccactgcttaagcctcaataaagcttgccttgagtgcttcaagtagtgtgtgcccgtctgttgtgtgactctggtaactagagatccctcagacccttttagtcagtgtggaaaatctctagca'
gag = hiv_genome[789:2292]
pol = hiv_genome[2084:5096]
vif = hiv_genome[5040:5619]
vpr = hiv_genome[5558:5970]
env = hiv_genome[6224:8795]
#percent GC = sum of (G) + sum © / total number of nuclotides in a given gene
Kari
#gag
gaglen = (len(gag))
g_gag = (gag.count('g'))
c_gag = (gag.count('c'))
gag_percent = ((g_gag + c_gag)/gaglen)
print('gag percent')
print(gag_percent)
#pol
pollen = (len(pol))
g_pol = (pol.count('g'))
c_pol = (pol.count('c'))
pol_percent = ((g_pol + c_pol)/pollen)
print('pol percent')
print(pol_percent)
#vif
viflen = (len(vif))
g_vif = (vif.count('g'))
c_vif = (vif.count('c'))
vif_percent = ((g_vif + c_vif)/viflen)
print('vif percent')
print(vif_percent)
#vpr
vprlen = (len(vpr))
g_vpr = (vpr.count('g'))
c_vpr = (vpr.count('c'))
vpr_percent = ((g_vpr + c_vpr)/vprlen)
print('vpr percent')
print(vpr_percent)
#env
envlen = (len(env))
g_env = (env.count('g'))
c_env = (env.count('c'))
env_percent = ((g_env + c_env)/envlen)
print('env percent')
print(env_percent)
Jake:
RNA_gag = 'ugggugcgagagcgucaguauuaagcgggggagaauuagaucgaugggaaaaaauucgguuaaggccagggggaaagaaaaaauauaaauuaaaacauauaguaugggcaagcagggagcuagaacgauucgcaguuaauccuggccuguuagaaacaucagaaggcuguagacaaauacugggacagcuacaaccaucccuucagacaggaucagaagaacuuagaucauuauauaauacaguagcaacccucuauugugugcaucaaaggauagagauaaaagacaccaaggaagcuuuagacaagauagaggaagagcaaaacaaaaguaagaaaaaagcacagcaagcagcagcugacacaggacacagcaaucaggucagccaaaauuacccuauagugcagaacauccaggggcaaaugguacaucaggccauaucaccuagaacuuuaaaugcauggguaaaaguaguagaagagaaggcuuucagcccagaagugauacccauguuuucagcauuaucagaaggagccaccccacaagauuuaaacaccaugcuaaacacaguggggggacaucaagcagccaugcaaauguuaaaagagaccaucaaugaggaagcugcagaaugggauagagugcauccagugcaugcagggccuauugcaccaggccagaugagagaaccaaggggaagugacauagcaggaacuacuaguacccuucaggaacaaauaggauggaugacaaauaauccaccuaucccaguaggagaaauuuauaaaagauggauaauccugggauuaaauaaaauaguaagaauguauagcccuaccagcauucuggacauaagacaaggaccaaaggaacccuuuagagacuauguagaccgguucuauaaaacucuaagagccgagcaagcuucacaggagguaaaaaauuggaugacagaaaccuuguugguccaaaaugcgaacccagauuguaagacuauuuuaaaagcauugggaccagcggcuacacuagaagaaaugaugacagcaugucagggaguaggaggacccggccauaaggcaagaguuuuggcugaagcaaugagccaaguaacaaauucagcuaccauaaugaugcagagaggcaauuuuaggaaccaaagaaagauuguuaaguguuucaauuguggcaaagaagggcacacagccagaaauugcagggccccuaggaaaaagggcuguuggaaauguggaaaggaaggacaccaaaugaaagauuguacugagagacaggcuaauuuuuuagggaagaucuggccuuccuacaagggaaggccagggaauuuucuucagagcagaccagagccaacagccccaccagaagagagcuucaggucugggguagagacaacaacucccccucagaagcaggagccgauagacaaggaacuguauccuuuaacuucccucaggucacucuuuggcaacgaccccucgucacaauaa'
GC_gag = (RNA_gag.count('c') + RNA_gag.count('g')) / len(RNA_gag) * 100
RNA_pol = 'uuuuuagggaagaucuggccuuccuacaagggaaggccagggaauuuucuucagagcagaccagagccaacagccccaccagaagagagcuucaggucugggguagagacaacaacucccccucagaagcaggagccgauagacaaggaacuguauccuuuaacuucccucaggucacucuuuggcaacgaccccucgucacaauaaagauaggggggcaacuaaaggaagcucuauuagauacaggagcagaugauacaguauuagaagaaaugaguuugccaggaagauggaaaccaaaaaugauagggggaauuggagguuuuaucaaaguaagacaguaugaucagauacucauagaaaucuguggacauaaagcuauagguacaguauuaguaggaccuacaccugucaacauaauuggaagaaaucuguugacucagauugguugcacuuuaaauuuucccauuagcccuauugagacuguaccaguaaaauuaaagccaggaauggauggcccaaaaguuaaacaauggccauugacagaagaaaaaauaaaagcauuaguagaaauuuguacagagauggaaaaggaagggaaaauuucaaaaauugggccugaaaauccauacaauacuccaguauuugccauaaagaaaaaagacaguacuaaauggagaaaauuaguagauuucagagaacuuaauaagagaacucaagacuucugggaaguucaauuaggaauaccacaucccgcaggguuaaaaaagaaaaaaucaguaacaguacuggaugugggugaugcauauuuuucaguucccuuagaugaagacuucaggaaguauacugcauuuaccauaccuaguauaaacaaugagacaccagggauuagauaucaguacaaugugcuuccacagggauggaaaggaucaccagcaauauuccaaaguagcaugacaaaaaucuuagagccuuuuagaaaacaaaauccagacauaguuaucuaucaauacauggaugauuuguauguaggaucugacuuagaaauagggcagcauagaacaaaaauagaggagcugagacaacaucuguugagguggggacuuaccacaccagacaaaaaacaucagaaagaaccuccauuccuuuggauggguuaugaacuccauccugauaaauggacaguacagccuauagugcugccagaaaaagacagcuggacugucaaugacauacagaaguuaguggggaaauugaauugggcaagucagauuuacccagggauuaaaguaaggcaauuauguaaacuccuuagaggaaccaaagcacuaacagaaguaauaccacuaacagaagaagcagagcuagaacuggcagaaaacagagagauucuaaaagaaccaguacauggaguguauuaugacccaucaaaagacuuaauagcagaaauacagaagcaggggcaaggccaauggacauaucaaauuuaucaagagccauuuaaaaaucugaaaacaggaaaauaugcaagaaugaggggugcccacacuaaugauguaaaacaauuaacagaggcagugcaaaaaauaaccacagaaagcauaguaauauggggaaagacuccuaaauuuaaacugcccauacaaaaggaaacaugggaaacaugguggacagaguauuggcaagccaccuggauuccugagugggaguuuguuaauaccccucccuuagugaaauuaugguaccaguuagagaaagaacccauaguaggagcagaaaccuucuauguagauggggcagcuaacagggagacuaaauuaggaaaagcaggauauguuacuaauagaggaagacaaaaaguugucacccuaacugacacaacaaaucagaagacugaguuacaagcaauuuaucuagcuuugcaggauucgggauuagaaguaaacauaguaacagacucacaauaugcauuaggaaucauucaagcacaaccagaucaaagugaaucagaguuagucaaucaaauaauagagcaguuaauaaaaaaggaaaaggucuaucuggcauggguaccagcacacaaaggaauuggaggaaaugaacaaguagauaaauuagucagugcuggaaucaggaaaguacuauuuuuagauggaauagauaaggcccaagaugaacaugagaaauaucacaguaauuggagagcaauggcuagugauuuuaaccugccaccuguaguagcaaaagaaauaguagccagcugugauaaaugucagcuaaaaggagaagccaugcauggacaaguagacuguaguccaggaauauggcaacuagauuguacacauuuagaaggaaaaguuauccugguagcaguucauguagccaguggauauauagaagcagaaguuauuccagcagaaacagggcaggaaacagcauauuuucuuuuaaaauuagcaggaagauggccaguaaaaacaauacauacugacaauggcagcaauuucaccggugcuacgguuagggccgccuguuggugggcgggaaucaagcaggaauuuggaauucccuacaauccccaaagucaaggaguaguagaaucuaugaauaaagaauuaaagaaaauuauaggacagguaagagaucaggcugaacaucuuaagacagcaguacaaauggcaguauucauccacaauuuuaaaagaaaaggggggauugggggguacagugcaggggaaagaauaguagacauaauagcaacagacauacaaacuaaagaauuacaaaaacaaauuacaaaaauucaaaauuuucggguuuauuacagggacagcagaaauccacuuuggaaaggaccagcaaagcuccucuggaaaggugaaggggcaguaguaauacaagauaauagugacauaaaaguagugccaagaagaaaagcaaagaucauuagggauuauggaaaacagauggcaggugaugauuguguggcaaguagacaggaugaggauuag'
GC_pol = (RNA_pol.count('c') + RNA_pol.count('g')) / len(RNA_pol) * 100
RNA_vif = 'uggaaaacagauggcaggugaugauuguguggcaaguagacaggaugaggauuagaacauggaaaaguuuaguaaaacaccauauguauguuucagggaaagcuaggggaugguuuuauagacaucacuaugaaagcccucauccaagaauaaguucagaaguacacaucccacuaggggaugcuagauugguaauaacaacauauuggggucugcauacaggagaaagagacuggcauuugggucagggagucuccauagaauggaggaaaaagagauauagcacacaaguagacccugaacuagcagaccaacuaauucaucuguauuacuuugacuguuuuucagacucugcuauaagaaaggccuuauuaggacacauaguuagcccuaggugugaauaucaagcaggacauaacaagguaggaucucuacaauacuuggcacuagcagcauuaauaacaccaaaaaagauaaagccaccuuugccuaguguuacgaaacugacagaggauagauggaacaagccccagaagaccaagggccacagagggagccacacaaugaauggacacuag'
GC_vif = (RNA_vif.count('c') + RNA_vif.count('g')) / len(RNA_vif) * 100
RNA_vpr = 'uggaacaagccccagaagaccaagggccacagagggagccacacaaugaauggacacuagagcuuuuagaggagcuuaagaaugaagcuguuagacauuuuccuaggauuuggcuccauggcuuagggcaacauaucuaugaaacuuauggggauacuugggcaggaguggaagccauaauaagaauucugcaacaacugcuguuuauccauuuucagaauugggugucgacauagcagaauaggcguuacucgacagaggagagcaagaaauggagccaguagauccuag'
GC_vpr = (RNA_vpr.count('c') + RNA_vpr.count('g')) / len(RNA_vpr) * 100
RNA_env = 'ugagagugaaggagaaauaucagcacuuguggagauggggguggagauggggcaccaugcuccuugggauguugaugaucuguagugcuacagaaaaauugugggucacagucuauuaugggguaccuguguggaaggaagcaaccaccacucuauuuugugcaucagaugcuaaagcauaugauacagagguacauaauguuugggccacacaugccuguguacccacagaccccaacccacaagaaguaguauugguaaaugugacagaaaauuuuaacauguggaaaaaugacaugguagaacagaugcaugaggauauaaucaguuuaugggaucaaagccuaaagccauguguaaaauuaaccccacucuguguuaguuuaaagugcacugauuugaagaaugauacuaauaccaauaguaguagcgggagaaugauaauggagaaaggagagauaaaaaacugcucuuucaauaucagcacaagcauaagagguaaggugcagaaagaauaugcauuuuuuuauaaacuugauauaauaccaauagauaaugauacuaccagcuauaaguugacaaguuguaacaccucagucauuacacaggccuguccaaagguauccuuugagccaauucccauacauuauugugccccggcugguuuugcgauucuaaaauguaauaauaagacguucaauggaacaggaccauguacaaaugucagcacaguacaauguacacauggaauuaggccaguaguaucaacucaacugcuguuaaauggcagucuagcagaagaagagguaguaauuagaucugucaauuucacggacaaugcuaaaaccauaauaguacagcugaacacaucuguagaaauuaauuguacaagacccaacaacaauacaagaaaaagaauccguauccagagaggaccagggagagcauuuguuacaauaggaaaaauaggaaauaugagacaagcacauuguaacauuaguagagcaaaauggaauaacacuuuaaaacagauagcuagcaaauuaagagaacaauuuggaaauaauaaaacaauaaucuuuaagcaauccucaggaggggacccagaaauuguaacgcacaguuuuaauuguggaggggaauuuuucuacuguaauucaacacaacuguuuaauaguacuugguuuaauaguacuuggaguacugaagggucaaauaacacugaaggaagugacacaaucacccucccaugcagaauaaaacaaauuauaaacauguggcagaaaguaggaaaagcaauguaugccccucccaucaguggacaaauuagauguucaucaaauauuacagggcugcuauuaacaagagauggugguaauagcaacaaugaguccgagaucuucagaccuggaggaggagauaugagggacaauuggagaagugaauuauauaaauauaaaguaguaaaaauugaaccauuaggaguagcacccaccaaggcaaagagaagaguggugcagagagaaaaaagagcagugggaauaggagcuuuguuccuuggguucuugggagcagcaggaagcacuaugggcgcagccucaaugacgcugacgguacaggccagacaauuauugucugguauagugcagcagcagaacaauuugcugagggcuauugaggcgcaacagcaucuguugcaacucacagucuggggcaucaagcagcuccaggcaagaauccuggcuguggaaagauaccuaaaggaucaacagcuccuggggauuugggguugcucuggaaaacucauuugcaccacugcugugccuuggaaugcuaguuggaguaauaaaucucuggaacagauuuggaaucacacgaccuggauggagugggacagagaaauuaacaauuacacaagcuuaauacacuccuuaauugaagaaucgcaaaaccagcaagaaaagaaugaacaagaauuauuggaauuagauaaaugggcaaguuuguggaauugguuuaacauaacaaauuggcugugguauauaaaauuauucauaaugauaguaggaggcuugguagguuuaagaauaguuuuugcuguacuuucuauagugaauagaguuaggcagggauauucaccauuaucguuucagacccaccucccaaccccgaggggacccgacaggcccgaaggaauagaagaagaagguggagagagagacagagacagauccauucgauuagugaacggauccuuggcacuuaucugggacgaucugcggagccugugccucuucagcuaccaccgcuugagagacuuacucuugauuguaacgaggauuguggaacuucugggacgcagggggugggaagcccucaaauauugguggaaucuccuacaguauuggagucaggaacuaaagaauagugcuguuagcuugcucaaugccacagccauagcaguagcugaggggacagauaggguuauagaaguaguacaaggagcuuguagagcuauucgccacauaccuagaagaauaagacagggcuuggaaaggauuuugcuauaa'
GC_env = (RNA_env.count('c') + RNA_env.count('g')) / len(RNA_env) * 100
print(GC_gag,GC_pol,GC_vif,GC_vpr,GC_env)
hiv_gene_names = ['env',
'gag',
'vif',
'pol',
'vpr',
'vpu',
'nef']
print(hiv_gene_names[1:2],hiv_gene_names[3:4],hiv_gene_names[2:3],hiv_gene_names[4:5],hiv_gene_names[5:6],hiv_gene_names[0:1],hiv_gene_names[6:7])
hiv_gene_names = ['env',
'gag',
'vif',
'pol',
'vpr',
'vpu',
'nef']
hiv_genes_ordered = [hiv_gene_names[1],
hiv_gene_names[3],
hiv_gene_names[2],
hiv_gene_names[4],
hiv_gene_names[5],
hiv_gene_names[0],
hiv_gene_names[6]]
print(hiv_genes_ordered)
cool= "pizza"
if 3==6:
print(cool[0])
else:
print("3 doesn't equal 6")
if 1<=5:
print('1 is less than or equal to 5.')
if 3 != 5:
print('3 is not equal to 5')
if 5+6==11:
print("ture")
a = 1
b = 2
if a > b:
print('a is greater than b')
else:
print('a is less than b')
x = random.randint(1,10)
if (x*2)>=10:
print("%d is greater than or equal to 5"%x)
x=6
y=9
z=100
if y+z>=x:
print('true statement')
#if/else statement:
x = random.randint(1,10)
if (x*2)>=10:
print("%d is greater than or equal to 5"%x)
else:
print("%d is less than 5"%x)
if 5+6==10:
print("ture")
else:
print("false")
a=4
x=5
if a>=x:
print ('a is greater than or equal to x')
else:
print ('a is less than x')
x = 3
if x == 1:
print('1 is equal to 1')
elif x==2:
print('2 is equal to 2')
else:
print(x, '=', x)
my_random_float = random.ranf()
print('My random float is %f' % my_random_float)
if my_random_float<= 0.5:
print("Tails")
elif my_random_float>= 0.5:
print("Heads")
hiv_state = ['mutated','did not mutate']
hiv_probability = [0.44, 0.56]
mutation_results = []
for flip in range(1,21):
mutation_rate = random.choice(hiv_state, p = hiv_probability)
mutation_results.append(mutation_rate)
print(mutation_results)
M=['mutation','no mutation']
unfair_coin_probabilities = [0.44,0.56]
M_list=[]
for flip in range(1,21):
unfair_flip = random.choice(M,p = fair_coin_probabilities)
print(unfair_flip)
M_list.append(unfair_flip)
print(M_list)
mutation = ['mutation', 'no mutation']
mutation_probabilities = [0.44, 0.56]
results = []
for i in range(20):
mutation_flip = random.choice(mutation,p=mutation_probabilities)
results.append(mutation_flip)
print(results)
emma-
mutation_state = ['mutation', 'no_mutation']
mutation_probabilities = [.44, .56]
mutation_results =[]
for mutation in range (1,21):
mutation= random.choice(mutation_state,p = mutation_probabilities)
mutation_results.append(mutation)
print(mutation)
genome_state = ['Mutation', 'No mutation']
mut_probability = [0.44, 0.56]
rep_results = []
for flip in range (20):
replication = random.choice(genome_state, p=mut_probability)
rep_results.append(replication)
print(rep_results)
** Logan––-
def rand_protein_sequence():
len =(15)
rand_protein=''
for n in range(len):
aa=random.choice(list(amino_acids.values()))
rand_protein += aa
return(rand_protein) **
def Tube_11 = [17, 3, 6, 10, 7, 4]build_random_protein(length = 15):
aa = amino_acids.values()
rand_protein = ''
for a in range(length):
aminos = random.choice(list(aa))
rand_protein += aminos
return(rand_protein)
build_random_protein()
emma-
amino_acids_list=['I','T','N','S','L','P','H','R','V','A','D','G','F','Y','C','K','Q','E','_','W','M']
length=random.randint(0,100)
random_protein=''
for x in range (length):
protein=random.choice(amino_acids_list)
random_protein+=protein
print(random_protein)
ethan-
amino_acids_list=list(amino_acids.values())
def make_protein(length=16):
protein2 = ''
for n in range(length):
sequence=random.choice(amino_acids_list)
protein2+=sequence
return protein2
make_protein()
for a in Tubes:
observations = a
n = len(observations)
index = np.arange(n)
colors = ['blue','brown','green','orange','red','yellow']
plot_1 = plot.bar(index,observations,color=colors,tick_label=colors,align='center')
plot.show(plot_1)
print(observations)
tubez=[Tube_0,Tube_1,Tube_2,Tube_4,Tube_6,Tube_8,Tube_11]
for m in tubez:
observations = m
print(m)
n = len(observations)
print(n)
index = np.arange(n)
colors = ['blue',
'brown',
'green',
'orange',
'red',
'yellow']
plot_3 = plot.bar(index,
observations,
color=colors,
tick_label=colors,
align='center')
plot.show(plot_3)
for i in all_tubes:
observations = i
n = len(observations)
index = np.arange(n)
colors = ['blue',
'brown',
'green',
'orange',
'red',
'yellow']
plot_1 = plot.bar(index,
observations,
color=colors,
tick_label=colors,
align='center')
plot.show(plot_1)