```
!pip3 install Faker
!pip3 install us
#from dataclasses import fields
from faker import Faker
import pandas as pd
import random
import string
import time
import us
from concurrent.futures import ThreadPoolExecutor, as_completed
fake = Faker('en_US')
# print(fake.name())
# print(fake.profile())
# print(fake.profile(fields="current_location").get("current_location")[0])
# print(fake.current_country_code())
# print(fake.street_address())
# print("state",fake.state())
# print(fake.state_abbr())
#state_val = "Indianna"
# state = us.states.lookup('{}'.format(fake.state()))
# print(state.abbr)
# print(fake.postalcode_in_state(state_abbr=state.abbr))
# print(state)
# state = fake.state()
# print(state)
# #states.append(state)
# state_code = us.states.lookup('{}'.format(state))
# print(state_code)
# newstatecode = us.states.lookup('{}'.format(fake.state()))
# print(newstatecode.abbr)
def random_phone_num_generator():
first = str(random.randint(100, 999))
second = str(random.randint(1, 888)).zfill(3)
last = (str(random.randint(1, 9998)).zfill(4))
while last in ['1111', '2222', '3333', '4444', '5555', '6666', '7777', '8888']:
last = (str(random.randint(1, 9998)).zfill(4))
return '{}-{}-{}'.format(first, second, last)
def id_generator(sizeS=2,sizeN=5,charsS=string.ascii_uppercase, charsN = string.digits):
stChars = ''.join(random.choice(charsS) for _ in range(sizeS))
numChars = ''.join(random.choice(charsN) for _ in range(sizeN))
return stChars + numChars
customer_id = []
name = []
marital_status = []
country = []
education = []
lat = []
longitude = []
street_address = []
city = []
gender = []
occupation = []
national_id = []
company_name = []
zip_code = []
states = []
state_codes = []
email = []
phone = []
customer_lifetime_value = []
num_records = 250000
def generate_random_values():
name.append(fake.name())
marital_status.append(''.join(fake.random_elements(elements=("married","single"),length=1,unique=False))) #marital status
education.append(''.join(fake.random_elements(elements=("College","Masters","Doctor","High School","Bachelor"),length=1,unique=False))) #education
lat.append(fake.latitude())
longitude.append(fake.longitude())
street_address.append(fake.street_address())
city.append(fake.city())
country.append(fake.current_country_code())
gender.append(fake.profile(fields="sex").get('sex'))
occupation.append(fake.profile(fields="job").get('job'))
national_id.append(fake.profile(fields="ssn").get('ssn'))
state = fake.state()
states.append(state)
state_code = us.states.lookup('{}'.format(state)).abbr
state_codes.append(state_code)
zip_code.append(fake.postalcode_in_state(state_abbr=state_code))
company_name.append(fake.profile(fields="company").get('company'))
email.append(fake.profile(fields="mail").get('mail'))
phone.append(random_phone_num_generator()) #US phone number generator
customer_id.append(id_generator())
customer_lifetime_value.append(random.uniform(2000,61000))
start_time = time.time()
print(start_time)
with ThreadPoolExecutor(max_workers = 10) as executor:
for _ in range(num_records):
executor.submit(generate_random_values())
dict = {"customer_id":customer_id,"full_name":name,
"country": country, "lat": lat, "longitude":longitude,
"street_address": street_address,"city":city, "state": state,
"state_code": state_code, "zip_code": zip_code,
"email":email,"phone":phone,"gender": gender,
"education": education,
"national_id":national_id,
"education":education, "marital_status": marital_status,
"customer_lifetime_value":customer_lifetime_value}
df = pd.DataFrame(dict)
print("---%s---" % (time.time() - start_time))
df.head()
# print(df.columns)
# print(df.shape[0])
# 5 workers: 9.138494491577148
# 3 workers: 8.327600717544556
# 10 workers: 8.334825038909912
```