from lxml import etree import pathlib parser = etree.XMLParser(recover=True) tree = etree.parse(p, parser) root = tree.getroot() # 名前空間マッピング確認 root.nsmap # タグ確認 print(etree.tostring(root, pretty_print=True, encoding="utf-8").decode()) # 抽出 for i in root.xpath("//tei:physDesc", namespaces=ns): print(i.xpath(".//tei:bindingDesc/p/text()", namespaces=ns)) # glob for p in pathlib.Path("cam_jp_xml").glob("*.xml"): print(str(p))