RDD 2
==
## Spring 146
import argparse
from collections import namedtuple
import csv
from itertools import groupby
from operator import attrgetter
Record = namedtuple("Record", ["INDIVIDUAL_KEY", "ADDRESS_KEY", "HOSEHOLD_KEY", "ZIP", "ZIP_PADDED"])
OLD_PATH = "out_pii_file.txt.old"
NEW_PATH = "out_pii_file.txt.new"
def with_padded_zip(record):
record = record.copy()
record["ZIP_PADDED"] = record["ZIP"].zfill(5)
return record
def load_records(lines):
return [Record(**with_padded_zip(x)) for x in csv.DictReader(lines, delimiter="|")]
def group_records(records):
f = attrgetter("ZIP_PADDED")
return {
key: set(values)
for key, values in groupby(sorted(records, key=f), f)
}
def main():
with open(NEW_PATH) as fr:
new_records = group_records(load_records(fr))
with open(OLD_PATH) as fr:
old_records = group_records(load_records(fr))
for key in set(list(new_records.keys()) + list(old_records.keys())):
print(f"Padded key: {key}")
print("\tOld records:")
for record in old_records.get(key, []):
print(f"\t\t{record.ZIP.ljust(5)}\t{record.INDIVIDUAL_KEY}\t{record.INDIVIDUAL_KEY}\t{record.HOSEHOLD_KEY}")
print("\tNew records:")
for record in new_records.get(key, []):
print(f"\t\t{record.ZIP.ljust(5)}\t{record.INDIVIDUAL_KEY}\t{record.INDIVIDUAL_KEY}\t{record.HOSEHOLD_KEY}")
print("\n\n")
if __name__ == "__main__":
main()
3/16/22
Not, not part of this story. That is why I got the other story ready for you
Just testing that all 3 phone jobs work ok with this new file, updating unit tests is part of this story