| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173 |
- # support for Schematron validation
- from lxml.includes cimport schematron
- cdef class SchematronError(LxmlError):
- """Base class of all Schematron errors.
- """
- cdef class SchematronParseError(SchematronError):
- """Error while parsing an XML document as Schematron schema.
- """
- cdef class SchematronValidateError(SchematronError):
- """Error while validating an XML document with a Schematron schema.
- """
- ################################################################################
- # Schematron
- cdef class Schematron(_Validator):
- """Schematron(self, etree=None, file=None)
- A Schematron validator.
- Pass a root Element or an ElementTree to turn it into a validator.
- Alternatively, pass a filename as keyword argument 'file' to parse from
- the file system.
- Schematron is a less well known, but very powerful schema language. The main
- idea is to use the capabilities of XPath to put restrictions on the structure
- and the content of XML documents. Here is a simple example::
- >>> schematron = Schematron(XML('''
- ... <schema xmlns="http://www.ascc.net/xml/schematron" >
- ... <pattern name="id is the only permitted attribute name">
- ... <rule context="*">
- ... <report test="@*[not(name()='id')]">Attribute
- ... <name path="@*[not(name()='id')]"/> is forbidden<name/>
- ... </report>
- ... </rule>
- ... </pattern>
- ... </schema>
- ... '''))
- >>> xml = XML('''
- ... <AAA name="aaa">
- ... <BBB id="bbb"/>
- ... <CCC color="ccc"/>
- ... </AAA>
- ... ''')
- >>> schematron.validate(xml)
- 0
- >>> xml = XML('''
- ... <AAA id="aaa">
- ... <BBB id="bbb"/>
- ... <CCC/>
- ... </AAA>
- ... ''')
- >>> schematron.validate(xml)
- 1
- Schematron was added to libxml2 in version 2.6.21. Before version 2.6.32,
- however, Schematron lacked support for error reporting other than to stderr.
- This version is therefore required to retrieve validation warnings and
- errors in lxml.
- """
- cdef schematron.xmlSchematron* _c_schema
- cdef xmlDoc* _c_schema_doc
- def __init__(self, etree=None, *, file=None):
- cdef _Document doc
- cdef _Element root_node
- cdef xmlNode* c_node
- cdef char* c_href
- cdef schematron.xmlSchematronParserCtxt* parser_ctxt = NULL
- _Validator.__init__(self)
- if not config.ENABLE_SCHEMATRON:
- raise SchematronError, \
- "lxml.etree was compiled without Schematron support."
- import warnings
- warnings.warn(
- "The (non-ISO) Schematron feature is deprecated and will be removed from libxml2 and lxml. "
- "Use 'lxml.isoschematron' instead.",
- DeprecationWarning,
- )
- if etree is not None:
- doc = _documentOrRaise(etree)
- root_node = _rootNodeOrRaise(etree)
- self._c_schema_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
- parser_ctxt = schematron.xmlSchematronNewDocParserCtxt(self._c_schema_doc)
- elif file is not None:
- filename = _getFilenameForFile(file)
- if filename is None:
- # XXX assume a string object
- filename = file
- filename = _encodeFilename(filename)
- with self._error_log:
- orig_loader = _register_document_loader()
- parser_ctxt = schematron.xmlSchematronNewParserCtxt(_cstr(filename))
- _reset_document_loader(orig_loader)
- else:
- raise SchematronParseError, "No tree or file given"
- if parser_ctxt is NULL:
- if self._c_schema_doc is not NULL:
- tree.xmlFreeDoc(self._c_schema_doc)
- self._c_schema_doc = NULL
- raise MemoryError()
- try:
- with self._error_log:
- orig_loader = _register_document_loader()
- self._c_schema = schematron.xmlSchematronParse(parser_ctxt)
- _reset_document_loader(orig_loader)
- finally:
- schematron.xmlSchematronFreeParserCtxt(parser_ctxt)
- if self._c_schema is NULL:
- raise SchematronParseError(
- "Document is not a valid Schematron schema",
- self._error_log)
- def __dealloc__(self):
- schematron.xmlSchematronFree(self._c_schema)
- if self._c_schema_doc is not NULL:
- tree.xmlFreeDoc(self._c_schema_doc)
- def __call__(self, etree):
- """__call__(self, etree)
- Validate doc using Schematron.
- Returns true if document is valid, false if not."""
- cdef _Document doc
- cdef _Element root_node
- cdef xmlDoc* c_doc
- cdef schematron.xmlSchematronValidCtxt* valid_ctxt
- cdef int ret
- assert self._c_schema is not NULL, "Schematron instance not initialised"
- doc = _documentOrRaise(etree)
- root_node = _rootNodeOrRaise(etree)
- valid_ctxt = schematron.xmlSchematronNewValidCtxt(
- self._c_schema, schematron.XML_SCHEMATRON_OUT_ERROR)
- if valid_ctxt is NULL:
- raise MemoryError()
- try:
- self._error_log.clear()
- # Need a cast here because older libxml2 releases do not use 'const' in the functype.
- schematron.xmlSchematronSetValidStructuredErrors(
- valid_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>self._error_log)
- c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
- with nogil:
- ret = schematron.xmlSchematronValidateDoc(valid_ctxt, c_doc)
- _destroyFakeDoc(doc._c_doc, c_doc)
- finally:
- schematron.xmlSchematronFreeValidCtxt(valid_ctxt)
- if ret == -1:
- raise SchematronValidateError(
- "Internal error in Schematron validation",
- self._error_log)
- if ret == 0:
- return True
- else:
- return False
|