In [2]:
from lxml import etree
import os.path
In [3]:
myparser = etree.XMLParser(remove_blank_text=True)
path1 = os.path.join("publicdata", "ind0.xml")
ind_tree = etree.parse(path1, parser=myparser)
ind_root = ind_tree.getroot()
In [4]:
def nodeInfo(node):
    print("Tag:", node.tag)
    print("Text:", repr(node.text))
    print("XML Attributes:", node.attrib)
In [5]:
nodeInfo(ind_root)
Tag: indicators
Text: None
XML Attributes: {}
In [18]:
for country in ind_root.getchildren():
    nodeInfo(country)
Tag: country
Text: None
XML Attributes: {'code': 'FRA', 'name': 'France'}
Tag: country
Text: None
XML Attributes: {'code': 'GBR', 'name': 'United Kingdom'}
Tag: country
Text: None
XML Attributes: {'code': 'USA', 'name': 'United States'}
In [8]:
nodeInfo(ind_root[1])
Tag: country
Text: None
XML Attributes: {'code': 'GBR', 'name': 'United Kingdom'}

Path investigation

  1. Path for root
  2. Path with multiple tag-steps to get single Element result
  3. Path with multiple tag-steps to get multiple Element results
  4. Path with no tag match
  5. Path with text match
  6. Path with attribute match (but not predicate)
  7. Path with single level wildcard
  8. Path with multi-level wildcard
In [15]:
ind_root.xpath("/indicators")
Out[15]:
[<Element indicators at 0x10d7d93c0>]
In [17]:
ind_root[1].xpath("timedata")
Out[17]:
[<Element timedata at 0x10d86f500>, <Element timedata at 0x10d86f550>]
In [ ]: