import pandas as pd
import os
import os.path
import json
import sys
import importlib
import sqlalchemy as sa
if os.path.isdir(os.path.join("../../..", "modules")):
module_dir = os.path.join("../../..", "modules")
else:
module_dir = os.path.join("../..", "modules")
module_path = os.path.abspath(module_dir)
if not module_path in sys.path:
sys.path.append(module_path)
import dbutil
importlib.reload(dbutil)
def sqlite_cstring(database, dbdir=".", driver="pysqlite", protocol="sqlite"):
if driver is None:
cstring = f"{protocol}:///{os.path.join(dbdir, database+'.db')}"
else:
cstring = f"{protocol}+{driver}:///{os.path.join(dbdir, database+'.db')}"
return cstring
def db_connect(cstring):
engine = sa.create_engine(cstring)
connection = engine.connect()
return engine, connection
def db_removedatabase(cstring):
if cstring == "no cstring":
return
if cstring[:6] != "sqlite":
print("Connection string is expected to be for sqlite. Yours:", cstring)
return
i = cstring.find(":///")
if i == -1:
print("Connection string may be malformed. No ':///' found in", cstring)
return
dbpath = cstring[i+4:]
if not os.path.exists(dbpath):
print(f"Database path specified in cstring ({dbpath}) does not exist")
return
if not os.path.isfile(dbpath):
print(f"Database path specified in cstring ({dbpath}) does not resolve to a file")
print(f"About to remove database file {dbpath}")
answer = input("Continue? ([Y]/n)")
if len(answer) < 1 or answer[0].lower() != 'y':
print(f"Aborting removal of database {dbpath}")
return
os.remove(dbpath)
print()
dbfiledir = "../../dbfiles"
if not os.path.isdir(dbfiledir):
dbfiledir = "../" + dbfiledir
assert os.path.isdir(dbfiledir)
cstring = "no cstring"
%load_ext sql
# Note that this cell is an interactive cell, that requests a confimation
# from the user prior to deleting the database file specified as a part
# of cstring.
# On first run of this notebook, the special cstring set in the first
# cell will
db_removedatabase(cstring)
Edit database
below to be your own Denison login (not including the @denison.edu
). If the value you pick is not unique, and your database "collides" with another student's database, my running of your notebook will fail.
After execution of this cell, there will be a database created in the dbfiledir
directory. You should be able to create a connection using TablePlus to this new empty database. If this cell is executed and the database already exists, it will still simply establish an engine and connection to the database (named e
and c
, respectively)
database = "nostudent"
cstring = sqlite_cstring(database, dbfiledir)
%sql $cstring
# e, c = db_connect(cstring) # Non-SQL-magic to establish the connection
Refer to your book (section 14.5) and/or one of the following for the syntax of table creation:
and you can use the following as a reference of data types you can choose for the data type constraint on the acceptable values for a field
Then, fill in a string value for sql_statement
below (just like we did with query
in previous exercises), with the syntax for creating a table named topnames
with fields of year
, sex
, name
, and count
. The data type for year and for count should be INT, and for sex and name should be VARCHAR where, in parenthesis, you specify the maximum length of the variable length character string.
Start with a version that omits a primary key.
What happens if you execute the cell twice? On an error, be sure and look for the error message (typically at the bottom of a traceback) and determine exactly what the error is.
sql_statment = """
"""
# BEGIN SOLUTION
sql_statement = """
CREATE TABLE topnames (
year INT,
sex VARCHAR(6),
name VARCHAR(20),
count INT
)
"""
# END SOLUTION
%sql $sql_statement
Q Use TablePlus
to create a connection to your database. When connected, and following a successful creation, you should see the table and the "Data" spreadsheet view will show no contents in the table. At the bottom of the window, there are two buttons, by which you can toggle back and forth between the data spreadsheet view and a view that shows the Structure
of the selected table.
Q Find the SQL syntax for dropping a table that currently exists in a database. Then fill in sql_statement
with that SQL, adapted to drop the topnames
table.
sql_statment = """
"""
# BEGIN SOLUTION
sql_statement = """
DROP TABLE topnames
"""
# END SOLUTION
%sql $sql_statement
Q Next, copy your create-table SQL from earlier, and modify it to constrain it to have a composite primary key consisting of both year
and sex
. Also, add constraints to these two fields to specify that they cannot be null. You can do these two steps separately (with the requisite drop of the table in between.
sql_statment = """
"""
# BEGIN SOLUTION
sql_statement = """
CREATE TABLE topnames (
year INT NOT NULL,
sex VARCHAR(6) NOT NULL,
name VARCHAR(20),
count INT,
PRIMARY KEY (year, sex)
)
"""
# END SOLUTION
%sql $sql_statement
Q In the cell that follows, I have given you an appropriate drop SQL command. Your job is to add a CONSTRAINT that checks whether or not the count value is >= 0 (to keep negative values from becomming part of your database).
drop_table = "DROP TABLE IF EXISTS topnames"
sql_statment = """
"""
# BEGIN SOLUTION
sql_statement = """
CREATE TABLE IF NOT EXISTS topnames (
year INT NOT NULL,
sex VARCHAR(6) NOT NULL,
name VARCHAR(20),
count INT,
PRIMARY KEY (year, sex),
CONSTRAINT countvalid CHECK (count >= 0)
)
"""
# END SOLUTION
%sql $drop_table
%sql $sql_statement
Q With this database table, we are now going to practice some inserts. The instructor will lead you through those steps. To begin, look at the following link: https://www.w3schools.com/sql/sql_insert.asp, and see if you can write the SQL to "hard code" entering a set of values as a row in your topnames
table.
onerow = [2018, 'Female', 'Emma', 18688]
setofrows = [
[2017, 'Female', 'Emma', 19800],
[2017, 'Male', 'Liam', 19798],
[2018, 'Female', 'Emma', 18688],
[2018, 'Male', 'Liam', 19836]
]
sql_statment = """
"""
# BEGIN SOLUTION
sql_statement = """
INSERT INTO topnames (year, sex, name, count)
VALUES (2018, 'Female', 'Emma', 18688)
"""
# END SOLUTION
%sql $sql_statement
Q Next, we want to construct an SQL statement that incorporates Python program variables into the SQL, so that the data need not be hard-coded.
testrow = [2016, 'Male', "Thomas", 5]
sql_statement = """
"""
### BEGIN SOLUTION
template = """
INSERT INTO topnames (year, sex, name, count)
VALUES ({}, '{}', '{}', {})
"""
sql_statement = template.format(testrow[0], testrow[1], testrow[2], testrow[3])
sql_statement = f"""
INSERT INTO topnames (year, sex, name, count)
VALUES ({testrow[0]}, '{testrow[1]}', '{testrow[2]}', {testrow[3]})
"""
### END SOLUTIOH
print(sql_statement)
%sql $sql_statement