In [1]:
import os
import os.path
import pandas as pd

datadir = "publicdata"
In [2]:
ind_pivot_LoL = [['CAN', 'pop', 36.26],
               ['CAN', 'gdp', 1535.77],
               ['CAN', 'life', 82.30],
               ['USA', 'pop', 323.13],
               ['USA', 'gdp', 18624.47],
               ['USA', 'life', 76.25]]
dataset_columns = ['code', 'ind', 'value']

ind_to_pivot = pd.DataFrame(ind_pivot_LoL, columns=dataset_columns)
In [ ]:
# Pivot with index of code, and pivot column of ind
In [3]:
row_labels = ['A1', 'A2', 'B1', 'B2', 'C1', 'C2']
column_labels = ['I', 'J', 'K', 'V']
dataset = [[ 'A.I', 'A.J', 'K.1', 'A1.V'],
           [ 'A.I', 'A.J', 'K.2', 'A2.V'],
           [ 'B.I', 'B.J', 'K.1', 'B1.V'],
           [ 'B.I', 'B.J', 'K.2', 'B2.V'],
           [ 'C.I', 'C.J', 'K.1', 'C1.V'],
           [ 'C.I', 'C.J', 'K.2', 'C2.V'],
]
pivot_ex2 = pd.DataFrame(dataset, columns=column_labels, index=row_labels)
pivot_ex1 = pivot_ex2.drop('J', axis=1)
In [4]:
pivot_ex1
Out[4]:
I K V
A1 A.I K.1 A1.V
A2 A.I K.2 A2.V
B1 B.I K.1 B1.V
B2 B.I K.2 B2.V
C1 C.I K.1 C1.V
C2 C.I K.2 C2.V
In [5]:
# Pivot with index of I and pivot column of K (with explicit value column of V)
In [6]:
row_labels = ['A1', 'A2', 'B1', 'B2', 'C1', 'C2']
column_labels = ['I', 'K', 'V1', 'V2']
dataset = [[ 'A.I', 'K.1', 'A1.V1', 'A1.V2'],
           [ 'A.I', 'K.2', 'A2.V1', 'A2.V2'],
           [ 'B.I', 'K.1', 'B1.V1', 'B1.V2'],
           [ 'B.I', 'K.2', 'B2.V1', 'B2.V2'],
           [ 'C.I', 'K.1', 'C1.V1', 'C1.V2'],
           [ 'C.I', 'K.2', 'C2.V1', 'C2.V2']
]
pivot_ex2 = pd.DataFrame(dataset, columns=column_labels, index=row_labels)
In [7]:
# Pivot with index of I, pivot column of K, and *two* value columns of V1 and V2
In [9]:
melted_names = pd.read_csv(os.path.join(datadir, "melted_names.csv"))
melted_names
Out[9]:
sex year name
0 Female 2015 Emma
1 Male 2015 Noah
2 Female 2016 Emma
3 Male 2016 Noah
4 Female 2017 Emma
5 Male 2017 Liam
In [10]:
# Pivot with index of sex, pivot column of year, and value of name
In [11]:
# Pivot with index of year, pivot column of sex, and value of name
In [12]:
indicators = pd.read_csv(os.path.join(datadir, "indicators_ex.csv"))
indicators.head()
Out[12]:
code year pop gdp
0 CHN 2005 1303.72 2285.97
1 CHN 2010 1337.70 6087.16
2 CHN 2015 1371.22 11015.54
3 GBR 2005 60.40 2525.01
4 GBR 2010 62.77 2452.90
In [13]:
# Pivot with index of code, and pivot column of year
In [14]:
# Pivot with index of year, and pivot column of code
In [15]:
datarows = [
['CHN',  2010,  1337.70,   6087.16],
['CHN',  2015,  1371.22,  11015.54],
['CHN',  2018,  1392.73,  13608.15],
['GBR',  2010,    62.77,   2452.90],
['GBR',  2015,    65.13,   2896.42],
['IND',  2010,  1234.28,   1675.62],
['IND',  2015,  1310.15,   2103.59]]
indicators = pd.DataFrame(datarows, columns=['code','year','pop','gdp'])
In [ ]:
# Pivot with index of code and pivot column of year
In [16]:
column_labels = ['I', 'P', 'V1', 'V2']
dataset = [['A', 'w', 15, 23.5],
           ['A', 'x', 10, 42.5],
           ['A', 'x', 5, 18.0],
           ['B', 'w', 8, 10.2],
           ['B', 'w', 4, 14.3],
           ['B', 'x', 6, 12.5]]
table = pd.DataFrame(dataset, columns=column_labels)
In [17]:
# Attempt at pivot with index of I and pivot column of P
In [18]:
# pivot table with index of I and pivot column of P
In [ ]:
pivoted = table.pivot_table(index='I', columns='P', 
                        aggfunc={'V1': 'mean', 'V2': 'sum'})