etree_defs.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. #ifndef HAS_ETREE_DEFS_H
  2. #define HAS_ETREE_DEFS_H
  3. /* quick check for Python/libxml2/libxslt devel setup */
  4. #include "Python.h"
  5. #ifndef PY_VERSION_HEX
  6. # error the development package of Python (header files etc.) is not installed correctly
  7. #elif PY_VERSION_HEX < 0x03060000
  8. # error this version of lxml requires Python 3.6 or later
  9. #endif
  10. #include "libxml/xmlversion.h"
  11. #ifndef LIBXML_VERSION
  12. # error the development package of libxml2 (header files etc.) is not installed correctly
  13. #elif LIBXML_VERSION < 20700
  14. # error minimum required version of libxml2 is 2.7.0
  15. #endif
  16. #include "libxslt/xsltconfig.h"
  17. #ifndef LIBXSLT_VERSION
  18. # error the development package of libxslt (header files etc.) is not installed correctly
  19. #elif LIBXSLT_VERSION < 10123
  20. # error minimum required version of libxslt is 1.1.23
  21. #endif
  22. /* v_arg functions */
  23. #define va_int(ap) va_arg(ap, int)
  24. #define va_charptr(ap) va_arg(ap, char *)
  25. #ifdef PYPY_VERSION
  26. # define IS_PYPY 1
  27. #else
  28. # define IS_PYPY 0
  29. #endif
  30. /* unused */
  31. #define IS_PYTHON2 0
  32. #define IS_PYTHON3 1
  33. #undef LXML_UNICODE_STRINGS
  34. #define LXML_UNICODE_STRINGS 1
  35. #if !IS_PYPY
  36. # define PyWeakref_LockObject(obj) (NULL)
  37. #endif
  38. /* Threading is not currently supported by PyPy */
  39. #if IS_PYPY
  40. # ifndef WITHOUT_THREADING
  41. # define WITHOUT_THREADING
  42. # endif
  43. #endif
  44. #if IS_PYPY
  45. # ifndef PyUnicode_FromFormat
  46. # define PyUnicode_FromFormat PyString_FromFormat
  47. # endif
  48. # if !defined(PyBytes_FromFormat)
  49. # ifdef PyString_FromFormat
  50. # define PyBytes_FromFormat PyString_FromFormat
  51. # else
  52. #include <stdarg.h>
  53. static PyObject* PyBytes_FromFormat(const char* format, ...) {
  54. PyObject *string;
  55. va_list vargs;
  56. #ifdef HAVE_STDARG_PROTOTYPES
  57. va_start(vargs, format);
  58. #else
  59. va_start(vargs);
  60. #endif
  61. string = PyUnicode_FromFormatV(format, vargs);
  62. va_end(vargs);
  63. if (string && PyUnicode_Check(string)) {
  64. PyObject *bstring = PyUnicode_AsUTF8String(string);
  65. Py_DECREF(string);
  66. string = bstring;
  67. }
  68. if (string && !PyBytes_CheckExact(string)) {
  69. Py_DECREF(string);
  70. string = NULL;
  71. PyErr_SetString(PyExc_TypeError, "String formatting and encoding failed to return bytes object");
  72. }
  73. return string;
  74. }
  75. # endif
  76. # endif
  77. #endif
  78. #if PY_VERSION_HEX >= 0x030B00A1
  79. /* Python 3.12 doesn't have wstr Unicode strings any more. */
  80. #undef PyUnicode_GET_DATA_SIZE
  81. #define PyUnicode_GET_DATA_SIZE(ustr) (0)
  82. #undef PyUnicode_AS_DATA
  83. #define PyUnicode_AS_DATA(ustr) (NULL)
  84. #undef PyUnicode_IS_READY
  85. #define PyUnicode_IS_READY(ustr) (1)
  86. #endif
  87. #ifdef WITHOUT_THREADING
  88. # undef PyEval_SaveThread
  89. # define PyEval_SaveThread() (NULL)
  90. # undef PyEval_RestoreThread
  91. # define PyEval_RestoreThread(state) if (state); else {}
  92. # undef PyGILState_Ensure
  93. # define PyGILState_Ensure() (PyGILState_UNLOCKED)
  94. # undef PyGILState_Release
  95. # define PyGILState_Release(state) if (state); else {}
  96. # undef Py_UNBLOCK_THREADS
  97. # define Py_UNBLOCK_THREADS _save = NULL;
  98. # undef Py_BLOCK_THREADS
  99. # define Py_BLOCK_THREADS if (_save); else {}
  100. #endif
  101. #ifdef WITHOUT_THREADING
  102. # define ENABLE_THREADING 0
  103. #else
  104. # define ENABLE_THREADING 1
  105. #endif
  106. #if LIBXML_VERSION < 20704
  107. /* FIXME: hack to make new error reporting compile in old libxml2 versions */
  108. # define xmlStructuredErrorContext NULL
  109. # define xmlXIncludeProcessTreeFlagsData(n,o,d) xmlXIncludeProcessTreeFlags(n,o)
  110. #endif
  111. /* schematron was added in libxml2 2.6.21 */
  112. #ifdef LIBXML_SCHEMATRON_ENABLED
  113. # define ENABLE_SCHEMATRON 1
  114. #else
  115. # define ENABLE_SCHEMATRON 0
  116. # define XML_SCHEMATRON_OUT_QUIET 0
  117. # define XML_SCHEMATRON_OUT_XML 0
  118. # define XML_SCHEMATRON_OUT_ERROR 0
  119. typedef void xmlSchematron;
  120. typedef void xmlSchematronParserCtxt;
  121. typedef void xmlSchematronValidCtxt;
  122. # define xmlSchematronNewDocParserCtxt(doc) NULL
  123. # define xmlSchematronNewParserCtxt(file) NULL
  124. # define xmlSchematronParse(ctxt) NULL
  125. # define xmlSchematronFreeParserCtxt(ctxt)
  126. # define xmlSchematronFree(schema)
  127. # define xmlSchematronNewValidCtxt(schema, options) NULL
  128. # define xmlSchematronValidateDoc(ctxt, doc) 0
  129. # define xmlSchematronFreeValidCtxt(ctxt)
  130. # define xmlSchematronSetValidStructuredErrors(ctxt, errorfunc, data)
  131. #endif
  132. #if LIBXML_VERSION < 20708
  133. # define HTML_PARSE_NODEFDTD 4
  134. #endif
  135. #if LIBXML_VERSION < 20900
  136. # define XML_PARSE_BIG_LINES 0x400000
  137. #endif
  138. #if LIBXML_VERSION < 21300
  139. # define XML_PARSE_NO_XXE 0x800000
  140. #endif
  141. #if LIBXML_VERSION < 21400
  142. # define XML_PARSE_UNZIP 0x1000000
  143. # define XML_PARSE_NO_SYS_CATALOG 0x2000000
  144. # define XML_PARSE_CATALOG_PI 0x4000000
  145. #endif
  146. #if LIBXML_VERSION < 21500
  147. # define XML_PARSE_SKIP_IDS 0x8000000
  148. #endif
  149. #include "libxml/tree.h"
  150. #ifndef LIBXML2_NEW_BUFFER
  151. typedef xmlBuffer xmlBuf;
  152. # define xmlBufContent(buf) xmlBufferContent(buf)
  153. # define xmlBufUse(buf) xmlBufferLength(buf)
  154. #endif
  155. #if LIBXML_VERSION < 21500
  156. # define xmlCtxtIsStopped(p_ctxt) ((p_ctxt)->disableSAX != 0)
  157. #endif
  158. /* libexslt 1.1.25+ support EXSLT functions in XPath */
  159. #if LIBXSLT_VERSION < 10125
  160. #define exsltDateXpathCtxtRegister(ctxt, prefix)
  161. #define exsltSetsXpathCtxtRegister(ctxt, prefix)
  162. #define exsltMathXpathCtxtRegister(ctxt, prefix)
  163. #define exsltStrXpathCtxtRegister(ctxt, prefix)
  164. #endif
  165. #define LXML_GET_XSLT_ENCODING(result_var, style) XSLT_GET_IMPORT_PTR(result_var, style, encoding)
  166. /* work around MSDEV 6.0 */
  167. #if (_MSC_VER == 1200) && (WINVER < 0x0500)
  168. long _ftol( double ); //defined by VC6 C libs
  169. long _ftol2( double dblSource ) { return _ftol( dblSource ); }
  170. #endif
  171. #ifdef __GNUC__
  172. /* Test for GCC > 2.95 */
  173. #if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
  174. #define unlikely_condition(x) __builtin_expect((x), 0)
  175. #else /* __GNUC__ > 2 ... */
  176. #define unlikely_condition(x) (x)
  177. #endif /* __GNUC__ > 2 ... */
  178. #else /* __GNUC__ */
  179. #define unlikely_condition(x) (x)
  180. #endif /* __GNUC__ */
  181. #ifndef Py_TYPE
  182. #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
  183. #endif
  184. #define _fqtypename(o) ((Py_TYPE(o))->tp_name)
  185. #define lxml_malloc(count, item_size) \
  186. (unlikely_condition((size_t)(count) > (size_t) (PY_SSIZE_T_MAX / item_size)) ? NULL : \
  187. (PyMem_Malloc((count) * item_size)))
  188. #define lxml_realloc(mem, count, item_size) \
  189. (unlikely_condition((size_t)(count) > (size_t) (PY_SSIZE_T_MAX / item_size)) ? NULL : \
  190. (PyMem_Realloc(mem, (count) * item_size)))
  191. #define lxml_free(mem) PyMem_Free(mem)
  192. #define _isString(obj) (PyUnicode_Check(obj) || PyBytes_Check(obj))
  193. #define _isElement(c_node) \
  194. (((c_node)->type == XML_ELEMENT_NODE) || \
  195. ((c_node)->type == XML_COMMENT_NODE) || \
  196. ((c_node)->type == XML_ENTITY_REF_NODE) || \
  197. ((c_node)->type == XML_PI_NODE))
  198. #define _isElementOrXInclude(c_node) \
  199. (_isElement(c_node) || \
  200. ((c_node)->type == XML_XINCLUDE_START) || \
  201. ((c_node)->type == XML_XINCLUDE_END))
  202. #define _getNs(c_node) \
  203. (((c_node)->ns == 0) ? 0 : ((c_node)->ns->href))
  204. #include "string.h"
  205. static void* lxml_unpack_xmldoc_capsule(PyObject* capsule, int* is_owned) {
  206. xmlDoc *c_doc;
  207. void *context;
  208. *is_owned = 0;
  209. if (unlikely_condition(!PyCapsule_IsValid(capsule, (const char*)"libxml2:xmlDoc"))) {
  210. PyErr_SetString(
  211. PyExc_TypeError,
  212. "Not a valid capsule. The capsule argument must be a capsule object with name libxml2:xmlDoc");
  213. return NULL;
  214. }
  215. c_doc = (xmlDoc*) PyCapsule_GetPointer(capsule, (const char*)"libxml2:xmlDoc");
  216. if (unlikely_condition(!c_doc)) return NULL;
  217. if (unlikely_condition(c_doc->type != XML_DOCUMENT_NODE && c_doc->type != XML_HTML_DOCUMENT_NODE)) {
  218. PyErr_Format(
  219. PyExc_ValueError,
  220. "Illegal document provided: expected XML or HTML, found %d", (int)c_doc->type);
  221. return NULL;
  222. }
  223. context = PyCapsule_GetContext(capsule);
  224. if (unlikely_condition(!context && PyErr_Occurred())) return NULL;
  225. if (context && strcmp((const char*) context, "destructor:xmlFreeDoc") == 0) {
  226. /* take ownership by setting destructor to NULL */
  227. if (PyCapsule_SetDestructor(capsule, NULL) == 0) {
  228. /* ownership transferred => invalidate capsule by clearing its name */
  229. if (unlikely_condition(PyCapsule_SetName(capsule, NULL))) {
  230. /* this should never happen since everything above succeeded */
  231. xmlFreeDoc(c_doc);
  232. return NULL;
  233. }
  234. *is_owned = 1;
  235. }
  236. }
  237. return c_doc;
  238. }
  239. /* Macro pair implementation of a depth first tree walker
  240. *
  241. * Calls the code block between the BEGIN and END macros for all elements
  242. * below c_tree_top (exclusively), starting at c_node (inclusively iff
  243. * 'inclusive' is 1). The _ELEMENT_ variants will only stop on nodes
  244. * that match _isElement(), the normal variant will stop on every node
  245. * except text nodes.
  246. *
  247. * To traverse the node and all of its children and siblings in Pyrex, call
  248. * cdef xmlNode* some_node
  249. * BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 1)
  250. * # do something with some_node
  251. * END_FOR_EACH_ELEMENT_FROM(some_node)
  252. *
  253. * To traverse only the children and siblings of a node, call
  254. * cdef xmlNode* some_node
  255. * BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 0)
  256. * # do something with some_node
  257. * END_FOR_EACH_ELEMENT_FROM(some_node)
  258. *
  259. * To traverse only the children, do:
  260. * cdef xmlNode* some_node
  261. * some_node = parent_node.children
  262. * BEGIN_FOR_EACH_ELEMENT_FROM(parent_node, some_node, 1)
  263. * # do something with some_node
  264. * END_FOR_EACH_ELEMENT_FROM(some_node)
  265. *
  266. * NOTE: 'some_node' MUST be a plain 'xmlNode*' !
  267. *
  268. * NOTE: parent modification during the walk can divert the iterator, but
  269. * should not segfault !
  270. */
  271. #define _LX__ELEMENT_MATCH(c_node, only_elements) \
  272. ((only_elements) ? (_isElement(c_node)) : 1)
  273. #define _LX__ADVANCE_TO_NEXT(c_node, only_elements) \
  274. while ((c_node != 0) && (!_LX__ELEMENT_MATCH(c_node, only_elements))) \
  275. c_node = c_node->next;
  276. #define _LX__TRAVERSE_TO_NEXT(c_stop_node, c_node, only_elements) \
  277. { \
  278. /* walk through children first */ \
  279. xmlNode* _lx__next = c_node->children; \
  280. if (_lx__next != 0) { \
  281. if (c_node->type == XML_ENTITY_REF_NODE || c_node->type == XML_DTD_NODE) { \
  282. _lx__next = 0; \
  283. } else { \
  284. _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
  285. } \
  286. } \
  287. if ((_lx__next == 0) && (c_node != c_stop_node)) { \
  288. /* try siblings */ \
  289. _lx__next = c_node->next; \
  290. _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
  291. /* back off through parents */ \
  292. while (_lx__next == 0) { \
  293. c_node = c_node->parent; \
  294. if (c_node == 0) \
  295. break; \
  296. if (c_node == c_stop_node) \
  297. break; \
  298. if ((only_elements) && !_isElement(c_node)) \
  299. break; \
  300. /* we already traversed the parents -> siblings */ \
  301. _lx__next = c_node->next; \
  302. _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
  303. } \
  304. } \
  305. c_node = _lx__next; \
  306. }
  307. #define _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, only_elements) \
  308. { \
  309. if (c_node != 0) { \
  310. const xmlNode* _lx__tree_top = (c_tree_top); \
  311. const int _lx__only_elements = (only_elements); \
  312. /* make sure we start at an element */ \
  313. if (!_LX__ELEMENT_MATCH(c_node, _lx__only_elements)) { \
  314. /* we skip the node, so 'inclusive' is irrelevant */ \
  315. if (c_node == _lx__tree_top) \
  316. c_node = 0; /* nothing to traverse */ \
  317. else { \
  318. c_node = c_node->next; \
  319. _LX__ADVANCE_TO_NEXT(c_node, _lx__only_elements) \
  320. } \
  321. } else if (! (inclusive)) { \
  322. /* skip the first node */ \
  323. _LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \
  324. } \
  325. \
  326. /* now run the user code on the elements we find */ \
  327. while (c_node != 0) { \
  328. /* here goes the code to be run for each element */
  329. #define _LX__END_FOR_EACH_FROM(c_node) \
  330. _LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \
  331. } \
  332. } \
  333. }
  334. #define BEGIN_FOR_EACH_ELEMENT_FROM(c_tree_top, c_node, inclusive) \
  335. _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 1)
  336. #define END_FOR_EACH_ELEMENT_FROM(c_node) \
  337. _LX__END_FOR_EACH_FROM(c_node)
  338. #define BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive) \
  339. _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 0)
  340. #define END_FOR_EACH_FROM(c_node) \
  341. _LX__END_FOR_EACH_FROM(c_node)
  342. #endif /* HAS_ETREE_DEFS_H */