incremental_tree.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917
  1. # Code modified from cPython's Lib/xml/etree/ElementTree.py
  2. # The write() code is modified to allow specifying a particular namespace
  3. # uri -> prefix mapping.
  4. #
  5. # ---------------------------------------------------------------------
  6. # Licensed to PSF under a Contributor Agreement.
  7. # See https://www.python.org/psf/license for licensing details.
  8. #
  9. # ElementTree
  10. # Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.
  11. #
  12. # fredrik@pythonware.com
  13. # http://www.pythonware.com
  14. # --------------------------------------------------------------------
  15. # The ElementTree toolkit is
  16. #
  17. # Copyright (c) 1999-2008 by Fredrik Lundh
  18. #
  19. # By obtaining, using, and/or copying this software and/or its
  20. # associated documentation, you agree that you have read, understood,
  21. # and will comply with the following terms and conditions:
  22. #
  23. # Permission to use, copy, modify, and distribute this software and
  24. # its associated documentation for any purpose and without fee is
  25. # hereby granted, provided that the above copyright notice appears in
  26. # all copies, and that both that copyright notice and this permission
  27. # notice appear in supporting documentation, and that the name of
  28. # Secret Labs AB or the author not be used in advertising or publicity
  29. # pertaining to distribution of the software without specific, written
  30. # prior permission.
  31. #
  32. # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
  33. # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
  34. # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
  35. # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
  36. # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
  37. # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
  38. # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  39. # OF THIS SOFTWARE.
  40. # --------------------------------------------------------------------
  41. import contextlib
  42. import io
  43. import xml.etree.ElementTree as ET
  44. def current_global_nsmap():
  45. return {
  46. prefix: uri for uri, prefix in ET._namespace_map.items()
  47. }
  48. class IncrementalTree(ET.ElementTree):
  49. def write(
  50. self,
  51. file_or_filename,
  52. encoding=None,
  53. xml_declaration=None,
  54. default_namespace=None,
  55. method=None,
  56. *,
  57. short_empty_elements=True,
  58. nsmap=None,
  59. root_ns_only=False,
  60. minimal_ns_only=False,
  61. ):
  62. """Write element tree to a file as XML.
  63. Arguments:
  64. *file_or_filename* -- file name or a file object opened for writing
  65. *encoding* -- the output encoding (default: US-ASCII)
  66. *xml_declaration* -- bool indicating if an XML declaration should be
  67. added to the output. If None, an XML declaration
  68. is added if encoding IS NOT either of:
  69. US-ASCII, UTF-8, or Unicode
  70. *default_namespace* -- sets the default XML namespace (for "xmlns").
  71. Takes precedence over any default namespace
  72. provided in nsmap or
  73. xml.etree.ElementTree.register_namespace().
  74. *method* -- either "xml" (default), "html, "text", or "c14n"
  75. *short_empty_elements* -- controls the formatting of elements
  76. that contain no content. If True (default)
  77. they are emitted as a single self-closed
  78. tag, otherwise they are emitted as a pair
  79. of start/end tags
  80. *nsmap* -- a mapping of namespace prefixes to URIs. These take
  81. precedence over any mappings registered using
  82. xml.etree.ElementTree.register_namespace(). The
  83. default_namespace argument, if supplied, takes precedence
  84. over any default namespace supplied in nsmap. All supplied
  85. namespaces will be declared on the root element, even if
  86. unused in the document.
  87. *root_ns_only* -- bool indicating namespace declrations should only
  88. be written on the root element. This requires two
  89. passes of the xml tree adding additional time to
  90. the writing process. This is primarily meant to
  91. mimic xml.etree.ElementTree's behaviour.
  92. *minimal_ns_only* -- bool indicating only namespaces that were used
  93. to qualify elements or attributes should be
  94. declared. All namespace declarations will be
  95. written on the root element regardless of the
  96. value of the root_ns_only arg. Requires two
  97. passes of the xml tree adding additional time to
  98. the writing process.
  99. """
  100. if not method:
  101. method = "xml"
  102. elif method not in ("text", "xml", "html"):
  103. raise ValueError("unknown method %r" % method)
  104. if not encoding:
  105. encoding = "us-ascii"
  106. with _get_writer(file_or_filename, encoding) as (write, declared_encoding):
  107. if method == "xml" and (
  108. xml_declaration
  109. or (
  110. xml_declaration is None
  111. and encoding.lower() != "unicode"
  112. and declared_encoding.lower() not in ("utf-8", "us-ascii")
  113. )
  114. ):
  115. write("<?xml version='1.0' encoding='%s'?>\n" % (declared_encoding,))
  116. if method == "text":
  117. ET._serialize_text(write, self._root)
  118. else:
  119. if method == "xml":
  120. is_html = False
  121. else:
  122. is_html = True
  123. if nsmap:
  124. if None in nsmap:
  125. raise ValueError(
  126. 'Found None as default nsmap prefix in nsmap. '
  127. 'Use "" as the default namespace prefix.'
  128. )
  129. new_nsmap = nsmap.copy()
  130. else:
  131. new_nsmap = {}
  132. if default_namespace:
  133. new_nsmap[""] = default_namespace
  134. if root_ns_only or minimal_ns_only:
  135. # _namespaces returns a mapping of only the namespaces that
  136. # were used.
  137. new_nsmap = _namespaces(
  138. self._root,
  139. default_namespace,
  140. new_nsmap,
  141. )
  142. if not minimal_ns_only:
  143. if nsmap:
  144. # We want all namespaces defined in the provided
  145. # nsmap to be declared regardless of whether
  146. # they've been used.
  147. new_nsmap.update(nsmap)
  148. if default_namespace:
  149. new_nsmap[""] = default_namespace
  150. global_nsmap = {
  151. prefix: uri for uri, prefix in ET._namespace_map.items()
  152. }
  153. if None in global_nsmap:
  154. raise ValueError(
  155. 'Found None as default nsmap prefix in nsmap registered with '
  156. 'register_namespace. Use "" for the default namespace prefix.'
  157. )
  158. nsmap_scope = {}
  159. _serialize_ns_xml(
  160. write,
  161. self._root,
  162. nsmap_scope,
  163. global_nsmap,
  164. is_html=is_html,
  165. is_root=True,
  166. short_empty_elements=short_empty_elements,
  167. new_nsmap=new_nsmap,
  168. )
  169. def _make_new_ns_prefix(
  170. nsmap_scope,
  171. global_prefixes,
  172. local_nsmap=None,
  173. default_namespace=None,
  174. ):
  175. i = len(nsmap_scope)
  176. if default_namespace is not None and "" not in nsmap_scope:
  177. # Keep the same numbering scheme as python which assumes the default
  178. # namespace is present if supplied.
  179. i += 1
  180. while True:
  181. prefix = f"ns{i}"
  182. if (
  183. prefix not in nsmap_scope
  184. and prefix not in global_prefixes
  185. and (
  186. not local_nsmap or prefix not in local_nsmap
  187. )
  188. ):
  189. return prefix
  190. i += 1
  191. def _get_or_create_prefix(
  192. uri,
  193. nsmap_scope,
  194. global_nsmap,
  195. new_namespace_prefixes,
  196. uri_to_prefix,
  197. for_default_namespace_attr_prefix=False,
  198. ):
  199. """Find a prefix that doesn't conflict with the ns scope or create a new prefix
  200. This function mutates nsmap_scope, global_nsmap, new_namespace_prefixes and
  201. uri_to_prefix. It is intended to keep state in _serialize_ns_xml consistent
  202. while deduplicating the house keeping code or updating these dictionaries.
  203. """
  204. # Check if we can reuse an existing (global) prefix within the current
  205. # namespace scope. There maybe many prefixes pointing to a single URI by
  206. # this point and we need to select a prefix that is not in use in the
  207. # current scope.
  208. for global_prefix, global_uri in global_nsmap.items():
  209. if uri == global_uri and global_prefix not in nsmap_scope:
  210. prefix = global_prefix
  211. break
  212. else: # no break
  213. # We couldn't find a suitable existing prefix for this namespace scope,
  214. # let's create a new one.
  215. prefix = _make_new_ns_prefix(nsmap_scope, global_prefixes=global_nsmap)
  216. global_nsmap[prefix] = uri
  217. nsmap_scope[prefix] = uri
  218. if not for_default_namespace_attr_prefix:
  219. # Don't override the actual default namespace prefix
  220. uri_to_prefix[uri] = prefix
  221. if prefix != "xml":
  222. new_namespace_prefixes.add(prefix)
  223. return prefix
  224. def _find_default_namespace_attr_prefix(
  225. default_namespace,
  226. nsmap,
  227. local_nsmap,
  228. global_prefixes,
  229. provided_default_namespace=None,
  230. ):
  231. # Search the provided nsmap for any prefixes for this uri that aren't the
  232. # default namespace ""
  233. for prefix, uri in nsmap.items():
  234. if uri == default_namespace and prefix != "":
  235. return prefix
  236. for prefix, uri in local_nsmap.items():
  237. if uri == default_namespace and prefix != "":
  238. return prefix
  239. # _namespace_map is a 1:1 mapping of uri -> prefix
  240. prefix = ET._namespace_map.get(default_namespace)
  241. if prefix and prefix not in nsmap:
  242. return prefix
  243. return _make_new_ns_prefix(
  244. nsmap,
  245. global_prefixes,
  246. local_nsmap,
  247. provided_default_namespace,
  248. )
  249. def process_attribs(
  250. elem,
  251. is_nsmap_scope_changed,
  252. default_ns_attr_prefix,
  253. nsmap_scope,
  254. global_nsmap,
  255. new_namespace_prefixes,
  256. uri_to_prefix,
  257. ):
  258. item_parts = []
  259. for k, v in elem.items():
  260. if isinstance(k, ET.QName):
  261. k = k.text
  262. try:
  263. if k[:1] == "{":
  264. uri_and_name = k[1:].rsplit("}", 1)
  265. try:
  266. prefix = uri_to_prefix[uri_and_name[0]]
  267. except KeyError:
  268. if not is_nsmap_scope_changed:
  269. # We're about to mutate the these dicts so
  270. # let's copy them first. We don't have to
  271. # recompute other mappings as we're looking up
  272. # or creating a new prefix
  273. nsmap_scope = nsmap_scope.copy()
  274. uri_to_prefix = uri_to_prefix.copy()
  275. is_nsmap_scope_changed = True
  276. prefix = _get_or_create_prefix(
  277. uri_and_name[0],
  278. nsmap_scope,
  279. global_nsmap,
  280. new_namespace_prefixes,
  281. uri_to_prefix,
  282. )
  283. if not prefix:
  284. if default_ns_attr_prefix:
  285. prefix = default_ns_attr_prefix
  286. else:
  287. for prefix, known_uri in nsmap_scope.items():
  288. if known_uri == uri_and_name[0] and prefix != "":
  289. default_ns_attr_prefix = prefix
  290. break
  291. else: # no break
  292. if not is_nsmap_scope_changed:
  293. # We're about to mutate the these dicts so
  294. # let's copy them first. We don't have to
  295. # recompute other mappings as we're looking up
  296. # or creating a new prefix
  297. nsmap_scope = nsmap_scope.copy()
  298. uri_to_prefix = uri_to_prefix.copy()
  299. is_nsmap_scope_changed = True
  300. prefix = _get_or_create_prefix(
  301. uri_and_name[0],
  302. nsmap_scope,
  303. global_nsmap,
  304. new_namespace_prefixes,
  305. uri_to_prefix,
  306. for_default_namespace_attr_prefix=True,
  307. )
  308. default_ns_attr_prefix = prefix
  309. k = f"{prefix}:{uri_and_name[1]}"
  310. except TypeError:
  311. ET._raise_serialization_error(k)
  312. if isinstance(v, ET.QName):
  313. if v.text[:1] != "{":
  314. v = v.text
  315. else:
  316. uri_and_name = v.text[1:].rsplit("}", 1)
  317. try:
  318. prefix = uri_to_prefix[uri_and_name[0]]
  319. except KeyError:
  320. if not is_nsmap_scope_changed:
  321. # We're about to mutate the these dicts so
  322. # let's copy them first. We don't have to
  323. # recompute other mappings as we're looking up
  324. # or creating a new prefix
  325. nsmap_scope = nsmap_scope.copy()
  326. uri_to_prefix = uri_to_prefix.copy()
  327. is_nsmap_scope_changed = True
  328. prefix = _get_or_create_prefix(
  329. uri_and_name[0],
  330. nsmap_scope,
  331. global_nsmap,
  332. new_namespace_prefixes,
  333. uri_to_prefix,
  334. )
  335. v = f"{prefix}:{uri_and_name[1]}"
  336. item_parts.append((k, v))
  337. return item_parts, default_ns_attr_prefix, nsmap_scope
  338. def write_elem_start(
  339. write,
  340. elem,
  341. nsmap_scope,
  342. global_nsmap,
  343. short_empty_elements,
  344. is_html,
  345. is_root=False,
  346. uri_to_prefix=None,
  347. default_ns_attr_prefix=None,
  348. new_nsmap=None,
  349. **kwargs,
  350. ):
  351. """Write the opening tag (including self closing) and element text.
  352. Refer to _serialize_ns_xml for description of arguments.
  353. nsmap_scope should be an empty dictionary on first call. All nsmap prefixes
  354. must be strings with the default namespace prefix represented by "".
  355. eg.
  356. - <foo attr1="one"> (returns tag = 'foo')
  357. - <foo attr1="one">text (returns tag = 'foo')
  358. - <foo attr1="one" /> (returns tag = None)
  359. Returns:
  360. tag:
  361. The tag name to be closed or None if no closing required.
  362. nsmap_scope:
  363. The current nsmap after any prefix to uri additions from this
  364. element. This is the input dict if unmodified or an updated copy.
  365. default_ns_attr_prefix:
  366. The prefix for the default namespace to use with attrs.
  367. uri_to_prefix:
  368. The current uri to prefix map after any uri to prefix additions
  369. from this element. This is the input dict if unmodified or an
  370. updated copy.
  371. next_remains_root:
  372. A bool indicating if the child element(s) should be treated as
  373. their own roots.
  374. """
  375. tag = elem.tag
  376. text = elem.text
  377. if tag is ET.Comment:
  378. write("<!--%s-->" % text)
  379. tag = None
  380. next_remains_root = False
  381. elif tag is ET.ProcessingInstruction:
  382. write("<?%s?>" % text)
  383. tag = None
  384. next_remains_root = False
  385. else:
  386. if new_nsmap:
  387. is_nsmap_scope_changed = True
  388. nsmap_scope = nsmap_scope.copy()
  389. nsmap_scope.update(new_nsmap)
  390. new_namespace_prefixes = set(new_nsmap.keys())
  391. new_namespace_prefixes.discard("xml")
  392. # We need to recompute the uri to prefixes
  393. uri_to_prefix = None
  394. default_ns_attr_prefix = None
  395. else:
  396. is_nsmap_scope_changed = False
  397. new_namespace_prefixes = set()
  398. if uri_to_prefix is None:
  399. if None in nsmap_scope:
  400. raise ValueError(
  401. 'Found None as a namespace prefix. Use "" as the default namespace prefix.'
  402. )
  403. uri_to_prefix = {uri: prefix for prefix, uri in nsmap_scope.items()}
  404. if "" in nsmap_scope:
  405. # There may be multiple prefixes for the default namespace but
  406. # we want to make sure we preferentially use "" (for elements)
  407. uri_to_prefix[nsmap_scope[""]] = ""
  408. if tag is None:
  409. # tag supression where tag is set to None
  410. # Don't change is_root so namespaces can be passed down
  411. next_remains_root = is_root
  412. if text:
  413. write(ET._escape_cdata(text))
  414. else:
  415. next_remains_root = False
  416. if isinstance(tag, ET.QName):
  417. tag = tag.text
  418. try:
  419. # These splits / fully qualified tag creationg are the
  420. # bottleneck in this implementation vs the python
  421. # implementation.
  422. # The following split takes ~42ns with no uri and ~85ns if a
  423. # prefix is present. If the uri was present, we then need to
  424. # look up a prefix (~14ns) and create the fully qualified
  425. # string (~41ns). This gives a total of ~140ns where a uri is
  426. # present.
  427. # Python's implementation needs to preprocess the tree to
  428. # create a dict of qname -> tag by traversing the tree which
  429. # takes a bit of extra time but it quickly makes that back by
  430. # only having to do a dictionary look up (~14ns) for each tag /
  431. # attrname vs our splitting (~140ns).
  432. # So here we have the flexibility of being able to redefine the
  433. # uri a prefix points to midway through serialisation at the
  434. # expense of performance (~10% slower for a 1mb file on my
  435. # machine).
  436. if tag[:1] == "{":
  437. uri_and_name = tag[1:].rsplit("}", 1)
  438. try:
  439. prefix = uri_to_prefix[uri_and_name[0]]
  440. except KeyError:
  441. if not is_nsmap_scope_changed:
  442. # We're about to mutate the these dicts so let's
  443. # copy them first. We don't have to recompute other
  444. # mappings as we're looking up or creating a new
  445. # prefix
  446. nsmap_scope = nsmap_scope.copy()
  447. uri_to_prefix = uri_to_prefix.copy()
  448. is_nsmap_scope_changed = True
  449. prefix = _get_or_create_prefix(
  450. uri_and_name[0],
  451. nsmap_scope,
  452. global_nsmap,
  453. new_namespace_prefixes,
  454. uri_to_prefix,
  455. )
  456. if prefix:
  457. tag = f"{prefix}:{uri_and_name[1]}"
  458. else:
  459. tag = uri_and_name[1]
  460. elif "" in nsmap_scope:
  461. raise ValueError(
  462. "cannot use non-qualified names with default_namespace option"
  463. )
  464. except TypeError:
  465. ET._raise_serialization_error(tag)
  466. write("<" + tag)
  467. if elem.attrib:
  468. item_parts, default_ns_attr_prefix, nsmap_scope = process_attribs(
  469. elem,
  470. is_nsmap_scope_changed,
  471. default_ns_attr_prefix,
  472. nsmap_scope,
  473. global_nsmap,
  474. new_namespace_prefixes,
  475. uri_to_prefix,
  476. )
  477. else:
  478. item_parts = []
  479. if new_namespace_prefixes:
  480. ns_attrs = []
  481. for k in sorted(new_namespace_prefixes):
  482. v = nsmap_scope[k]
  483. if k:
  484. k = "xmlns:" + k
  485. else:
  486. k = "xmlns"
  487. ns_attrs.append((k, v))
  488. if is_html:
  489. write("".join([f' {k}="{ET._escape_attrib_html(v)}"' for k, v in ns_attrs]))
  490. else:
  491. write("".join([f' {k}="{ET._escape_attrib(v)}"' for k, v in ns_attrs]))
  492. if item_parts:
  493. if is_html:
  494. write("".join([f' {k}="{ET._escape_attrib_html(v)}"' for k, v in item_parts]))
  495. else:
  496. write("".join([f' {k}="{ET._escape_attrib(v)}"' for k, v in item_parts]))
  497. if is_html:
  498. write(">")
  499. ltag = tag.lower()
  500. if text:
  501. if ltag == "script" or ltag == "style":
  502. write(text)
  503. else:
  504. write(ET._escape_cdata(text))
  505. if ltag in ET.HTML_EMPTY:
  506. tag = None
  507. elif text or len(elem) or not short_empty_elements:
  508. write(">")
  509. if text:
  510. write(ET._escape_cdata(text))
  511. else:
  512. tag = None
  513. write(" />")
  514. return (
  515. tag,
  516. nsmap_scope,
  517. default_ns_attr_prefix,
  518. uri_to_prefix,
  519. next_remains_root,
  520. )
  521. def _serialize_ns_xml(
  522. write,
  523. elem,
  524. nsmap_scope,
  525. global_nsmap,
  526. short_empty_elements,
  527. is_html,
  528. is_root=False,
  529. uri_to_prefix=None,
  530. default_ns_attr_prefix=None,
  531. new_nsmap=None,
  532. **kwargs,
  533. ):
  534. """Serialize an element or tree using 'write' for output.
  535. Args:
  536. write:
  537. A function to write the xml to its destination.
  538. elem:
  539. The element to serialize.
  540. nsmap_scope:
  541. The current prefix to uri mapping for this element. This should be
  542. an empty dictionary for the root element. Additional namespaces are
  543. progressively added using the new_nsmap arg.
  544. global_nsmap:
  545. A dict copy of the globally registered _namespace_map in uri to
  546. prefix form
  547. short_empty_elements:
  548. Controls the formatting of elements that contain no content. If True
  549. (default) they are emitted as a single self-closed tag, otherwise
  550. they are emitted as a pair of start/end tags.
  551. is_html:
  552. Set to True to serialize as HTML otherwise XML.
  553. is_root:
  554. Boolean indicating if this is a root element.
  555. uri_to_prefix:
  556. Current state of the mapping of uri to prefix.
  557. default_ns_attr_prefix:
  558. new_nsmap:
  559. New prefix -> uri mapping to be applied to this element.
  560. """
  561. (
  562. tag,
  563. nsmap_scope,
  564. default_ns_attr_prefix,
  565. uri_to_prefix,
  566. next_remains_root,
  567. ) = write_elem_start(
  568. write,
  569. elem,
  570. nsmap_scope,
  571. global_nsmap,
  572. short_empty_elements,
  573. is_html,
  574. is_root,
  575. uri_to_prefix,
  576. default_ns_attr_prefix,
  577. new_nsmap=new_nsmap,
  578. )
  579. for e in elem:
  580. _serialize_ns_xml(
  581. write,
  582. e,
  583. nsmap_scope,
  584. global_nsmap,
  585. short_empty_elements,
  586. is_html,
  587. next_remains_root,
  588. uri_to_prefix,
  589. default_ns_attr_prefix,
  590. new_nsmap=None,
  591. )
  592. if tag:
  593. write(f"</{tag}>")
  594. if elem.tail:
  595. write(ET._escape_cdata(elem.tail))
  596. def _qnames_iter(elem):
  597. """Iterate through all the qualified names in elem"""
  598. seen_el_qnames = set()
  599. seen_other_qnames = set()
  600. for this_elem in elem.iter():
  601. tag = this_elem.tag
  602. if isinstance(tag, str):
  603. if tag not in seen_el_qnames:
  604. seen_el_qnames.add(tag)
  605. yield tag, True
  606. elif isinstance(tag, ET.QName):
  607. tag = tag.text
  608. if tag not in seen_el_qnames:
  609. seen_el_qnames.add(tag)
  610. yield tag, True
  611. elif (
  612. tag is not None
  613. and tag is not ET.ProcessingInstruction
  614. and tag is not ET.Comment
  615. ):
  616. ET._raise_serialization_error(tag)
  617. for key, value in this_elem.items():
  618. if isinstance(key, ET.QName):
  619. key = key.text
  620. if key not in seen_other_qnames:
  621. seen_other_qnames.add(key)
  622. yield key, False
  623. if isinstance(value, ET.QName):
  624. if value.text not in seen_other_qnames:
  625. seen_other_qnames.add(value.text)
  626. yield value.text, False
  627. text = this_elem.text
  628. if isinstance(text, ET.QName):
  629. if text.text not in seen_other_qnames:
  630. seen_other_qnames.add(text.text)
  631. yield text.text, False
  632. def _namespaces(
  633. elem,
  634. default_namespace=None,
  635. nsmap=None,
  636. ):
  637. """Find all namespaces used in the document and return a prefix to uri map"""
  638. if nsmap is None:
  639. nsmap = {}
  640. out_nsmap = {}
  641. seen_uri_to_prefix = {}
  642. # Multiple prefixes may be present for a single uri. This will select the
  643. # last prefix found in nsmap for a given uri.
  644. local_prefix_map = {uri: prefix for prefix, uri in nsmap.items()}
  645. if default_namespace is not None:
  646. local_prefix_map[default_namespace] = ""
  647. elif "" in nsmap:
  648. # but we make sure the default prefix always take precedence
  649. local_prefix_map[nsmap[""]] = ""
  650. global_prefixes = set(ET._namespace_map.values())
  651. has_unqual_el = False
  652. default_namespace_attr_prefix = None
  653. for qname, is_el in _qnames_iter(elem):
  654. try:
  655. if qname[:1] == "{":
  656. uri_and_name = qname[1:].rsplit("}", 1)
  657. prefix = seen_uri_to_prefix.get(uri_and_name[0])
  658. if prefix is None:
  659. prefix = local_prefix_map.get(uri_and_name[0])
  660. if prefix is None or prefix in out_nsmap:
  661. prefix = ET._namespace_map.get(uri_and_name[0])
  662. if prefix is None or prefix in out_nsmap:
  663. prefix = _make_new_ns_prefix(
  664. out_nsmap,
  665. global_prefixes,
  666. nsmap,
  667. default_namespace,
  668. )
  669. if prefix or is_el:
  670. out_nsmap[prefix] = uri_and_name[0]
  671. seen_uri_to_prefix[uri_and_name[0]] = prefix
  672. if not is_el and not prefix and not default_namespace_attr_prefix:
  673. # Find the alternative prefix to use with non-element
  674. # names
  675. default_namespace_attr_prefix = _find_default_namespace_attr_prefix(
  676. uri_and_name[0],
  677. out_nsmap,
  678. nsmap,
  679. global_prefixes,
  680. default_namespace,
  681. )
  682. out_nsmap[default_namespace_attr_prefix] = uri_and_name[0]
  683. # Don't add this uri to prefix mapping as it might override
  684. # the uri -> "" default mapping. We'll fix this up at the
  685. # end of the fn.
  686. # local_prefix_map[uri_and_name[0]] = default_namespace_attr_prefix
  687. else:
  688. if is_el:
  689. has_unqual_el = True
  690. except TypeError:
  691. ET._raise_serialization_error(qname)
  692. if "" in out_nsmap and has_unqual_el:
  693. # FIXME: can this be handled in XML 1.0?
  694. raise ValueError(
  695. "cannot use non-qualified names with default_namespace option"
  696. )
  697. # The xml prefix doesn't need to be declared but may have been used to
  698. # prefix names. Let's remove it if it has been used
  699. out_nsmap.pop("xml", None)
  700. return out_nsmap
  701. def tostring(
  702. element,
  703. encoding=None,
  704. method=None,
  705. *,
  706. xml_declaration=None,
  707. default_namespace=None,
  708. short_empty_elements=True,
  709. nsmap=None,
  710. root_ns_only=False,
  711. minimal_ns_only=False,
  712. tree_cls=IncrementalTree,
  713. ):
  714. """Generate string representation of XML element.
  715. All subelements are included. If encoding is "unicode", a string
  716. is returned. Otherwise a bytestring is returned.
  717. *element* is an Element instance, *encoding* is an optional output
  718. encoding defaulting to US-ASCII, *method* is an optional output which can
  719. be one of "xml" (default), "html", "text" or "c14n", *default_namespace*
  720. sets the default XML namespace (for "xmlns").
  721. Returns an (optionally) encoded string containing the XML data.
  722. """
  723. stream = io.StringIO() if encoding == "unicode" else io.BytesIO()
  724. tree_cls(element).write(
  725. stream,
  726. encoding,
  727. xml_declaration=xml_declaration,
  728. default_namespace=default_namespace,
  729. method=method,
  730. short_empty_elements=short_empty_elements,
  731. nsmap=nsmap,
  732. root_ns_only=root_ns_only,
  733. minimal_ns_only=minimal_ns_only,
  734. )
  735. return stream.getvalue()
  736. def tostringlist(
  737. element,
  738. encoding=None,
  739. method=None,
  740. *,
  741. xml_declaration=None,
  742. default_namespace=None,
  743. short_empty_elements=True,
  744. nsmap=None,
  745. root_ns_only=False,
  746. minimal_ns_only=False,
  747. tree_cls=IncrementalTree,
  748. ):
  749. lst = []
  750. stream = ET._ListDataStream(lst)
  751. tree_cls(element).write(
  752. stream,
  753. encoding,
  754. xml_declaration=xml_declaration,
  755. default_namespace=default_namespace,
  756. method=method,
  757. short_empty_elements=short_empty_elements,
  758. nsmap=nsmap,
  759. root_ns_only=root_ns_only,
  760. minimal_ns_only=minimal_ns_only,
  761. )
  762. return lst
  763. def compat_tostring(
  764. element,
  765. encoding=None,
  766. method=None,
  767. *,
  768. xml_declaration=None,
  769. default_namespace=None,
  770. short_empty_elements=True,
  771. nsmap=None,
  772. root_ns_only=True,
  773. minimal_ns_only=False,
  774. tree_cls=IncrementalTree,
  775. ):
  776. """tostring with options that produce the same results as xml.etree.ElementTree.tostring
  777. root_ns_only=True is a bit slower than False as it needs to traverse the
  778. tree one more time to collect all the namespaces.
  779. """
  780. return tostring(
  781. element,
  782. encoding=encoding,
  783. method=method,
  784. xml_declaration=xml_declaration,
  785. default_namespace=default_namespace,
  786. short_empty_elements=short_empty_elements,
  787. nsmap=nsmap,
  788. root_ns_only=root_ns_only,
  789. minimal_ns_only=minimal_ns_only,
  790. tree_cls=tree_cls,
  791. )
  792. # --------------------------------------------------------------------
  793. # serialization support
  794. @contextlib.contextmanager
  795. def _get_writer(file_or_filename, encoding):
  796. # Copied from Python 3.12
  797. # returns text write method and release all resources after using
  798. try:
  799. write = file_or_filename.write
  800. except AttributeError:
  801. # file_or_filename is a file name
  802. if encoding.lower() == "unicode":
  803. encoding = "utf-8"
  804. with open(file_or_filename, "w", encoding=encoding,
  805. errors="xmlcharrefreplace") as file:
  806. yield file.write, encoding
  807. else:
  808. # file_or_filename is a file-like object
  809. # encoding determines if it is a text or binary writer
  810. if encoding.lower() == "unicode":
  811. # use a text writer as is
  812. yield write, getattr(file_or_filename, "encoding", None) or "utf-8"
  813. else:
  814. # wrap a binary writer with TextIOWrapper
  815. with contextlib.ExitStack() as stack:
  816. if isinstance(file_or_filename, io.BufferedIOBase):
  817. file = file_or_filename
  818. elif isinstance(file_or_filename, io.RawIOBase):
  819. file = io.BufferedWriter(file_or_filename)
  820. # Keep the original file open when the BufferedWriter is
  821. # destroyed
  822. stack.callback(file.detach)
  823. else:
  824. # This is to handle passed objects that aren't in the
  825. # IOBase hierarchy, but just have a write method
  826. file = io.BufferedIOBase()
  827. file.writable = lambda: True
  828. file.write = write
  829. try:
  830. # TextIOWrapper uses this methods to determine
  831. # if BOM (for UTF-16, etc) should be added
  832. file.seekable = file_or_filename.seekable
  833. file.tell = file_or_filename.tell
  834. except AttributeError:
  835. pass
  836. file = io.TextIOWrapper(file,
  837. encoding=encoding,
  838. errors="xmlcharrefreplace",
  839. newline="\n")
  840. # Keep the original file open when the TextIOWrapper is
  841. # destroyed
  842. stack.callback(file.detach)
  843. yield file.write, encoding