import os.path
from lxml import etree

datadir = "publicdata"

def localXML(filename, datafolder=".", parser=None):
    filepath = os.path.join(datafolder, filename)
    if not os.path.isfile(filepath):
        print("File not found: {}".format(filepath))
        return None
    try:
        if not parser:
            parser = etree.XMLParser(remove_blank_text=True)
        mytree = etree.parse(filepath, parser=parser)
    except Exception as e:
        return None
    
    return mytree.getroot()

book_root = localXML("bookstore.xml", datadir)
ind_root = localXML("ind0.xml", datadir)
widom_root = localXML("widombooks.xml", datadir)

assert book_root is not None
assert ind_root is not None
assert widom_root is not None

Quiz Question 4 Examples¶

First child of root¶

firstchild = book_root.getchildren()[0]
print(firstchild)
# Alternatives?

<Element book at 0x10ed89dc0>

# Get multipath; like "findall" for location step of last tag

xp = """??"""
node_set = book_root.xpath(xp)

node_set[0]

[<Element book at 0x10ed89dc0>,
 <Element book at 0x10ece50f0>,
 <Element book at 0x10ed8c9b0>,
 <Element book at 0x10ed8c690>,
 <Element book at 0x10ed8cb90>,
 <Element book at 0x10ed8caf0>,
 <Element book at 0x10ed8cc80>,
 <Element book at 0x10ed8ccd0>,
 <Element book at 0x10ed8cd70>,
 <Element book at 0x10ed8ce10>,
 <Element book at 0x10ed8ce60>,
 <Element book at 0x10ed8cf00>]

# Get single by adding a position predicate

xp = """??"""
node_set = book_root.xpath(xp)

node_set[0]

[<Element book at 0x10ed89dc0>,
 <Element book at 0x10ece50f0>,
 <Element book at 0x10ed8c9b0>,
 <Element book at 0x10ed8c690>,
 <Element book at 0x10ed8cb90>,
 <Element book at 0x10ed8caf0>,
 <Element book at 0x10ed8cc80>,
 <Element book at 0x10ed8ccd0>,
 <Element book at 0x10ed8cd70>,
 <Element book at 0x10ed8ce10>,
 <Element book at 0x10ed8ce60>,
 <Element book at 0x10ed8cf00>]

Value of attribute of first child of root¶

firstid = book_root[0].attrib["id"]
firstid

'bk101'

# Use above XPath and then take another "step" for attribute

xp = """??"""
node_set = book_root.xpath(xp)

node_set[0]

'bk101'

Children (tags) of first child of root¶

tag_list = []
for E in book_root.getchildren()[0]:
    tag_list.append(E.tag)
tag_list

['author', 'title', 'genre', 'price', 'publish_date', 'description']

# Use above XPath and then take another "step" for attribute

xp = """??"""
node_set = book_root.xpath(xp)

[e.tag for e in node_set]

['author', 'title', 'genre', 'price', 'publish_date', 'description']

List of prices¶

price_list = []
for bookNode in book_root.getchildren():
    price_node = bookNode.find("price")
    price_list.append(price_node.text)
price_list

['44.95',
 '5.95',
 '5.95',
 '5.95',
 '5.95',
 '4.95',
 '4.95',
 '4.95',
 '6.95',
 '36.95',
 '36.95',
 '49.95']

# Traversal to price element and its text

xp = """??"""
node_set = book_root.xpath(xp)

node_set

['44.95',
 '5.95',
 '5.95',
 '5.95',
 '5.95',
 '4.95',
 '4.95',
 '4.95',
 '6.95',
 '36.95',
 '36.95',
 '49.95']

# Traversal to price element and its text -- shortcut

xp = """??"""
node_set = book_root.xpath(xp)

node_set

['44.95',
 '5.95',
 '5.95',
 '5.95',
 '5.95',
 '4.95',
 '4.95',
 '4.95',
 '6.95',
 '36.95',
 '36.95',
 '49.95']