manifest.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. # Copyright (c) 2010-2024 openpyxl
  2. """
  3. File manifest
  4. """
  5. from mimetypes import MimeTypes
  6. import os.path
  7. from openpyxl.descriptors.serialisable import Serialisable
  8. from openpyxl.descriptors import String, Sequence
  9. from openpyxl.xml.functions import fromstring
  10. from openpyxl.xml.constants import (
  11. ARC_CONTENT_TYPES,
  12. ARC_THEME,
  13. ARC_STYLE,
  14. THEME_TYPE,
  15. STYLES_TYPE,
  16. CONTYPES_NS,
  17. ACTIVEX,
  18. CTRL,
  19. VBA,
  20. )
  21. from openpyxl.xml.functions import tostring
  22. # initialise mime-types
  23. mimetypes = MimeTypes()
  24. mimetypes.add_type('application/xml', ".xml")
  25. mimetypes.add_type('application/vnd.openxmlformats-package.relationships+xml', ".rels")
  26. mimetypes.add_type("application/vnd.ms-office.vbaProject", ".bin")
  27. mimetypes.add_type("application/vnd.openxmlformats-officedocument.vmlDrawing", ".vml")
  28. mimetypes.add_type("image/x-emf", ".emf")
  29. class FileExtension(Serialisable):
  30. tagname = "Default"
  31. Extension = String()
  32. ContentType = String()
  33. def __init__(self, Extension, ContentType):
  34. self.Extension = Extension
  35. self.ContentType = ContentType
  36. class Override(Serialisable):
  37. tagname = "Override"
  38. PartName = String()
  39. ContentType = String()
  40. def __init__(self, PartName, ContentType):
  41. self.PartName = PartName
  42. self.ContentType = ContentType
  43. DEFAULT_TYPES = [
  44. FileExtension("rels", "application/vnd.openxmlformats-package.relationships+xml"),
  45. FileExtension("xml", "application/xml"),
  46. ]
  47. DEFAULT_OVERRIDE = [
  48. Override("/" + ARC_STYLE, STYLES_TYPE), # Styles
  49. Override("/" + ARC_THEME, THEME_TYPE), # Theme
  50. Override("/docProps/core.xml", "application/vnd.openxmlformats-package.core-properties+xml"),
  51. Override("/docProps/app.xml", "application/vnd.openxmlformats-officedocument.extended-properties+xml")
  52. ]
  53. class Manifest(Serialisable):
  54. tagname = "Types"
  55. Default = Sequence(expected_type=FileExtension, unique=True)
  56. Override = Sequence(expected_type=Override, unique=True)
  57. path = "[Content_Types].xml"
  58. __elements__ = ("Default", "Override")
  59. def __init__(self,
  60. Default=(),
  61. Override=(),
  62. ):
  63. if not Default:
  64. Default = DEFAULT_TYPES
  65. self.Default = Default
  66. if not Override:
  67. Override = DEFAULT_OVERRIDE
  68. self.Override = Override
  69. @property
  70. def filenames(self):
  71. return [part.PartName for part in self.Override]
  72. @property
  73. def extensions(self):
  74. """
  75. Map content types to file extensions
  76. Skip parts without extensions
  77. """
  78. exts = {os.path.splitext(part.PartName)[-1] for part in self.Override}
  79. return [(ext[1:], mimetypes.types_map[True][ext]) for ext in sorted(exts) if ext]
  80. def to_tree(self):
  81. """
  82. Custom serialisation method to allow setting a default namespace
  83. """
  84. defaults = [t.Extension for t in self.Default]
  85. for ext, mime in self.extensions:
  86. if ext not in defaults:
  87. mime = FileExtension(ext, mime)
  88. self.Default.append(mime)
  89. tree = super().to_tree()
  90. tree.set("xmlns", CONTYPES_NS)
  91. return tree
  92. def __contains__(self, content_type):
  93. """
  94. Check whether a particular content type is contained
  95. """
  96. for t in self.Override:
  97. if t.ContentType == content_type:
  98. return True
  99. def find(self, content_type):
  100. """
  101. Find specific content-type
  102. """
  103. try:
  104. return next(self.findall(content_type))
  105. except StopIteration:
  106. return
  107. def findall(self, content_type):
  108. """
  109. Find all elements of a specific content-type
  110. """
  111. for t in self.Override:
  112. if t.ContentType == content_type:
  113. yield t
  114. def append(self, obj):
  115. """
  116. Add content object to the package manifest
  117. # needs a contract...
  118. """
  119. ct = Override(PartName=obj.path, ContentType=obj.mime_type)
  120. self.Override.append(ct)
  121. def _write(self, archive, workbook):
  122. """
  123. Write manifest to the archive
  124. """
  125. self.append(workbook)
  126. self._write_vba(workbook)
  127. self._register_mimetypes(filenames=archive.namelist())
  128. archive.writestr(self.path, tostring(self.to_tree()))
  129. def _register_mimetypes(self, filenames):
  130. """
  131. Make sure that the mime type for all file extensions is registered
  132. """
  133. for fn in filenames:
  134. ext = os.path.splitext(fn)[-1]
  135. if not ext:
  136. continue
  137. mime = mimetypes.types_map[True][ext]
  138. fe = FileExtension(ext[1:], mime)
  139. self.Default.append(fe)
  140. def _write_vba(self, workbook):
  141. """
  142. Add content types from cached workbook when keeping VBA
  143. """
  144. if workbook.vba_archive:
  145. node = fromstring(workbook.vba_archive.read(ARC_CONTENT_TYPES))
  146. mf = Manifest.from_tree(node)
  147. filenames = self.filenames
  148. for override in mf.Override:
  149. if override.PartName not in (ACTIVEX, CTRL, VBA):
  150. continue
  151. if override.PartName not in filenames:
  152. self.Override.append(override)