import pandas as pd
import os
import os.path
import json
import sys
import importlib
module_dir = "../../modules"
module_path = os.path.abspath(module_dir)
if not module_path in sys.path:
sys.path.append(module_path)
import dbutil
importlib.reload(dbutil)
%load_ext sql
Edit
creds.json
to reflect your mysql user and passwordThis must be done prior to executing the following cell
In general, you will be able to choose whether you are using the remote MySQL database or the SQLite database(s) by setting the dbsource
variable to "mysql"
or "sqlite"
respectively. The function dbutil.db_cstring
function computes a connection string for the chosen dbsource
using the information in the creds.json
file. If the last argument to this function is present, the generated connection string uses that datbase as superceding the name of the database in creds.json
.
dbsource = "sqlite"
db = "book"
cstring = dbutil.db_cstring(dbsource, "creds.json", ".", db)
%sql $cstring
%%sql
SELECT year, code, MAX(pop)
FROM indicators
GROUP BY code
LIMIT 15
%%sql
SELECT code, MAX(pop) AS max_pop
FROM indicators
GROUP BY code
LIMIT 7
%%sql
SELECT I.code, I.year, maxTable.max_pop FROM
(SELECT code, MAX(pop) AS max_pop
FROM indicators
GROUP BY code) AS maxTable
INNER JOIN indicators AS I
ON (I.pop = maxTable.max_pop) AND (I.code = maxTable.code)
ORDER BY I.code
LIMIT 14
%%sql
SELECT code, MIN(year) AS year, ROUND(AVG(max_pop),2) AS max_pop FROM
(SELECT I.code, I.year, maxTable.max_pop FROM
(SELECT code, MAX(pop) AS max_pop
FROM indicators
GROUP BY code) AS maxTable
INNER JOIN indicators AS I
ON (I.pop = maxTable.max_pop) AND (I.code = maxTable.code)) AS inner
GROUP BY code
ORDER BY code
LIMIT 10
%%sql
SELECT region, ROUND(AVG(life),2) AS avg_life,
COUNT(*) AS num_country, COUNT(life) AS num_life
FROM indicators LEFT JOIN countries USING (code)
WHERE year = 2017
GROUP BY region
ORDER BY avg_life DESC
from IPython.display import Image
Image("classimages/school_schema.jpg", width=600, height=600)
dbsource = "sqlite"
db = "school"
cstring = dbutil.db_cstring(dbsource, "creds.json", ".", db)
%sql $cstring
%%sql
SELECT *
FROM (students AS ST INNER JOIN subjects AS SU
ON ST.studentmajor = SU.subjectid)
INNER JOIN departments AS D
ON D.departmentid = SU.departmentid
LIMIT 10
%%sql
SELECT D.division, COUNT(*) AS majors
FROM (students AS ST INNER JOIN subjects AS SU
ON ST.studentmajor = SU.subjectid)
INNER JOIN departments AS D
ON D.departmentid = SU.departmentid
GROUP BY division
ORDER BY majors DESC
dbsource = "sqlite"
db = "book"
cstring = dbutil.db_cstring(dbsource, "creds.json", ".", db)
%sql $cstring
%%sql
SELECT AVG(gdp) * 5 AS avg_gdp
FROM indicators
WHERE year = 2017
%%sql
SELECT code, gdp, life
FROM indicators
WHERE year=2017 AND
gdp > (SELECT AVG(gdp) * 5 AS avg_gdp
FROM indicators
WHERE year = 2017)
%%sql
SELECT DISTINCT code FROM indicators WHERE life > 83
%%sql
SELECT AVG(gdp) AS longlife_gdp
FROM indicators
WHERE year = 2017 AND
code IN (SELECT DISTINCT code
FROM indicators
WHERE life > 83)
dbsource = "sqlite"
db = "school"
cstring = dbutil.db_cstring(dbsource, "creds.json", ".", db)
%sql $cstring
%%sql
SELECT studentmajor, COUNT(*) AS nummajor
FROM students
WHERE studentmajor IS NOT NULL
GROUP BY studentmajor
LIMIT 10
%%sql
SELECT studentmajor, nummajor, departmentid
FROM ( SELECT studentmajor, COUNT(*) AS nummajor
FROM students
WHERE studentmajor IS NOT NULL
GROUP BY studentmajor ) AS majors
INNER JOIN subjects AS S
ON S.subjectid = majors.studentmajor
LIMIT 10