In [4]:
import os
import os.path
import pandas as pd

datadir = "publicdata"
In [5]:
path = os.path.join(datadir, "topnames.csv")
topnames0 = pd.read_csv(path)
topnames = topnames0.set_index(['year', 'sex'])
names0 = topnames0.head(10)
names = topnames.head(10)
In [6]:
path = os.path.join(datadir, "indicators2016.csv")
ind0 = pd.read_csv(path)
ind = ind0.set_index('code')
In [7]:
large_country = ind['pop'] > 1000
large_country
Out[7]:
code
CAN    False
CHN     True
IND     True
RUS    False
USA    False
VNM    False
Name: pop, dtype: bool

Row Selection by Condition

In [8]:
ind[large_country]
Out[8]:
country pop gdp life cell
code
CHN China 1378.66 11199.15 76.25 1364.93
IND India 1324.17 2263.79 68.56 1127.81
In [9]:
ind[ind['pop'] > 1000]
Out[9]:
country pop gdp life cell
code
CHN China 1378.66 11199.15 76.25 1364.93
IND India 1324.17 2263.79 68.56 1127.81
In [10]:
ind['life'] > 77
Out[10]:
code
CAN     True
CHN    False
IND    False
RUS    False
USA     True
VNM    False
Name: life, dtype: bool
In [12]:
(ind['pop'] > 1000) | (ind['life'] > 77)
Out[12]:
code
CAN     True
CHN     True
IND     True
RUS    False
USA     True
VNM    False
dtype: bool

What happens if we omit the parenthesis? Why?

Other subsets and Sorting

  • nlargest
  • nsmallest
  • sort_index
  • sort_values

Combinations of selecting rows and projecting columns

In [ ]: