import pandas as pd
import os
import os.path
import json
import sys
import importlib
module_dir = "../../modules"
module_path = os.path.abspath(module_dir)
if not module_path in sys.path:
sys.path.append(module_path)
import dbutil
importlib.reload(dbutil)
%load_ext sql
Edit
creds.json
to reflect your mysql user and passwordThis must be done prior to executing the following cell
In general, you will be able to choose whether you are using the remote MySQL database or the SQLite database(s) by setting the dbsource
variable to "mysql"
or "sqlite"
respectively. The function dbutil.db_cstring
function computes a connection string for the chosen dbsource
using the information in the creds.json
file. If the last argument to this function is present, the generated connection string uses that datbase as superceding the name of the database in creds.json
.
dbsource = "sqlite"
db = "book"
cstring = dbutil.db_cstring(dbsource, "creds.json", ".", db)
%sql $cstring
%%sql
SELECT year, code, MAX(pop)
FROM indicators
GROUP BY code
LIMIT 15
%%sql
SELECT code, MAX(pop) AS max_pop
FROM indicators
GROUP BY code
LIMIT 7
%%sql
SELECT I.code, I.year, maxTable.max_pop FROM
(SELECT code, MAX(pop) AS max_pop
FROM indicators
GROUP BY code) AS maxTable
INNER JOIN indicators AS I
ON (I.pop = maxTable.max_pop) AND (I.code = maxTable.code)
ORDER BY I.code
LIMIT 14
%%sql
SELECT code, MIN(year) AS year, ROUND(AVG(max_pop),2) AS max_pop FROM
(SELECT I.code, I.year, maxTable.max_pop FROM
(SELECT code, MAX(pop) AS max_pop
FROM indicators
GROUP BY code) AS maxTable
INNER JOIN indicators AS I
ON (I.pop = maxTable.max_pop) AND (I.code = maxTable.code)) AS inner
GROUP BY code
ORDER BY code
LIMIT 10
%%sql
SELECT region, ROUND(AVG(life),2) AS avg_life,
COUNT(*) AS num_country, COUNT(life) AS num_life
FROM indicators LEFT JOIN countries USING (code)
WHERE year = 2017
GROUP BY region
ORDER BY avg_life DESC
from IPython.display import Image
Image("classimages/school_schema.jpg", width=600, height=600)