## Notes for the files
## [Link to the Google Drive](https://drive.google.com/drive/folders/1q3ueIZ0e4HkVE3tntkhMzLnR3J50JTt8?usp=sharing)
### the 1st symbol that we can agree to remove
totals=check(od,symbols=['!'],show=True)
### the the 2nd symbol that we can agree to replace (with e)
totals=check(od,symbols=['ẽ'],show=True)
### the 3rd symbol that we can agree to replace (with u)
totals=check(od,symbols=['ù'],show=True)
### I think it's safe to revert this back to regular e, just like in Balinese
totals=check(od,symbols=['é'],show=True)
### the 4th symbol that we can agree to replace (with a)
totals=check(od,symbols=['à'],show=True)
### ivatan_Ichbayatan cleaned file OCSEAN-IVV_20240316-WORDLIST_ITBAYATEN_1TO1228.xlsx has been edited and saved in version 1.1
### several other cleaned files edited into version 1.1 (taken from)
totals=check(od,symbols=['('],show=True)
totals=check(od,symbols=['?'],show=True)
### several concepts need to be checked and replaced
ň (found in Ivatan Isabtangen) is changed into ŋ
x (found in Ivatan Itbatayen) is changed into hɤ
2X (found in Ata language) is a sign of repetition so 'polo2x' is 'polo-polo'
### edited the column for Hiligaynon_2022 and uploaded to version 1.1
import pandas as pd
df = pd.read_excel("CleanedFiles/OCSEAN-HIL_20220813-WORDLIST_1TO1228_PostQC.xlsx", engine='openpyxl')
df.rename(columns={"Hiligaynon_20220813": "Hiligaynon"}, inplace=True)
df.to_excel("edited-OCSEAN-HIL_20220813-WORDLIST_1TO1228_PostQC.xlsx", index=False, engine='openpyxl')
### adding 2 columns of Indonesian and tagalog into the OCSEAN_initial_joineddata and named edited-OCSEAN_initial_joineddata
import pandas as pd
import numpy as np
source_df = pd.read_excel("CleanedFiles/Wordlist.xlsx", engine='openpyxl')
destination_df = pd.read_excel("OCSEAN_initial_joineddata.xlsx", engine='openpyxl')
column_to_add = source_df["Indonesian"]
column_to_add = source_df["Tagalog"]
repeated_column = pd.Series(np.tile(column_to_add.values,
int(np.ceil(len(destination_df) / len(column_to_add))))[:len(destination_df)])
destination_df["Indonesian"] = repeated_column
destination_df["Tagalog"] = repeated_column
destination_df.to_excel("edited-OCSEAN_initial_joineddata.xlsx", index=False, engine='openpyxl')