import os.path
from lxml import etree
datadir = "publicdata"
def localXML(filename, datafolder=".", parser=None):
filepath = os.path.join(datafolder, filename)
if not os.path.isfile(filepath):
print("File not found: {}".format(filepath))
return None
try:
if not parser:
parser = etree.XMLParser(remove_blank_text=True)
mytree = etree.parse(filepath, parser=parser)
except Exception as e:
return None
return mytree.getroot()
book_root = localXML("bookstore.xml", datadir)
ind_root = localXML("ind0.xml", datadir)
widom_root = localXML("widombooks.xml", datadir)
assert book_root is not None
assert ind_root is not None
assert widom_root is not None
firstchild = book_root.getchildren()[0]
print(firstchild)
# Alternatives?
# Get multipath; like "findall" for location step of last tag
xp = """??"""
node_set = book_root.xpath(xp)
node_set[0]
# Get single by adding a position predicate
xp = """??"""
node_set = book_root.xpath(xp)
node_set[0]
firstid = book_root[0].attrib["id"]
firstid
# Use above XPath and then take another "step" for attribute
xp = """??"""
node_set = book_root.xpath(xp)
node_set[0]
tag_list = []
for E in book_root.getchildren()[0]:
tag_list.append(E.tag)
tag_list
# Use above XPath and then take another "step" for attribute
xp = """??"""
node_set = book_root.xpath(xp)
[e.tag for e in node_set]
price_list = []
for bookNode in book_root.getchildren():
price_node = bookNode.find("price")
price_list.append(price_node.text)
price_list
# Traversal to price element and its text
xp = """??"""
node_set = book_root.xpath(xp)
node_set
# Traversal to price element and its text -- shortcut
xp = """??"""
node_set = book_root.xpath(xp)
node_set