Before you turn this problem in, make sure everything runs as expected. This is a combination of restarting the kernel and then running all cells (in the menubar, select Kernel$\rightarrow$Restart And Run All).
Make sure you fill in any place that says YOUR CODE HERE
or "YOUR ANSWER HERE".
import os
import os.path
datadir = "publicdata"
Q1 Assume a CSV file with the same format used in our variations of topnames
with a header line containing the four column names, and then $x$ rows of comman-separated field values. Write a function
readTopNamesLoD(path)
that reads the file and creates a LoD representation and returns the structure.
# YOUR CODE HERE
raise NotImplementedError()
tn10data = readTopNamesLoD(os.path.join(datadir, "tn10.csv"))
print(tn10data)
tn10data = readTopNamesLoD(os.path.join(datadir, "tn10.csv"))
assert isinstance(tn10data, list)
assert len(tn10data) == 10
assert len(tn10data[0]) == 4
assert isinstance(tn10data[0], dict)
assert 'year' in tn10data[0]
assert tn10data[0]['sex'] == 'Female'
# hiddent tests here
assert True
Q2 Write a function
addCatColumnLoD(tnLoD, threshold1, threshold2)
that adds a categorical column to a LoL representation in parameter tnDoL
with the new column named category
whose values are the strings "small"
when count is below threshold1
, is "medium"
when count is greater than or equal to threshold1
and less than threshold2
, and "large"
when count is greater than or equal to threshold2
. The function should perform its modifications in place.
# YOUR CODE HERE
raise NotImplementedError()
LoD = [{'year': 2014, 'sex': 'Female', 'name': 'Emma', 'count': 20936},
{'year': 2014, 'sex': 'Male', 'name': 'Noah', 'count': 19305},
{'year': 2015, 'sex': 'Female', 'name': 'Emma', 'count': 20455},
{'year': 2015, 'sex': 'Male', 'name': 'Noah', 'count': 19635},
{'year': 2016, 'sex': 'Female', 'name': 'Emma', 'count': 19496},
{'year': 2016, 'sex': 'Male', 'name': 'Noah', 'count': 19117},
{'year': 2017, 'sex': 'Female', 'name': 'Emma', 'count': 19800},
{'year': 2017, 'sex': 'Male', 'name': 'Liam', 'count': 18798},
{'year': 2018, 'sex': 'Female', 'name': 'Emma', 'count': 18688},
{'year': 2018, 'sex': 'Male', 'name': 'Liam', 'count': 19837}]
addCatColumnLoD(LoD, 19000, 19500)
assert len(LoD) == 10
assert isinstance(LoD[0], dict)
assert len(LoD[0]) == 5
assert 'category' in LoD[0]
assert [ rowD['category'] for rowD in LoD ] == ['large', 'medium', 'large',
'large', 'medium', 'medium', 'large', 'small', 'small', 'large']
Q3 Write a function
dropColumnLoD(LoD, columnname)
that drops the column specified by columnname
from the list of dictionary representation given in LoD
. This should be done "in place" and should not assume any particular columns. If the specified columnname
is not present, there should be no error, nor effect in calling this function.
# YOUR CODE HERE
raise NotImplementedError()
LoD = [{'year': 2014, 'sex': 'Female', 'name': 'Emma', 'count': 20936},
{'year': 2014, 'sex': 'Male', 'name': 'Noah', 'count': 19305},
{'year': 2015, 'sex': 'Female', 'name': 'Emma', 'count': 20455},
{'year': 2015, 'sex': 'Male', 'name': 'Noah', 'count': 19635},
{'year': 2016, 'sex': 'Female', 'name': 'Emma', 'count': 19496},
{'year': 2016, 'sex': 'Male', 'name': 'Noah', 'count': 19117},
{'year': 2017, 'sex': 'Female', 'name': 'Emma', 'count': 19800},
{'year': 2017, 'sex': 'Male', 'name': 'Liam', 'count': 18798},
{'year': 2018, 'sex': 'Female', 'name': 'Emma', 'count': 18688},
{'year': 2018, 'sex': 'Male', 'name': 'Liam', 'count': 19837}]
dropColumnLoD(LoD, 'sex')
LoD
LoD = [{'year': 2014, 'sex': 'Female', 'name': 'Emma', 'count': 20936},
{'year': 2014, 'sex': 'Male', 'name': 'Noah', 'count': 19305},
{'year': 2015, 'sex': 'Female', 'name': 'Emma', 'count': 20455},
{'year': 2015, 'sex': 'Male', 'name': 'Noah', 'count': 19635},
{'year': 2016, 'sex': 'Female', 'name': 'Emma', 'count': 19496},
{'year': 2016, 'sex': 'Male', 'name': 'Noah', 'count': 19117},
{'year': 2017, 'sex': 'Female', 'name': 'Emma', 'count': 19800},
{'year': 2017, 'sex': 'Male', 'name': 'Liam', 'count': 18798},
{'year': 2018, 'sex': 'Female', 'name': 'Emma', 'count': 18688},
{'year': 2018, 'sex': 'Male', 'name': 'Liam', 'count': 19837}]
dropColumnLoD(LoD, 'count')
assert len(LoD) == 10
assert isinstance(LoD[0], dict)
assert len(LoD[0]) == 3
assert 'count' not in LoD[0]