excel.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. # Copyright (c) 2010-2024 openpyxl
  2. # Python stdlib imports
  3. import datetime
  4. import re
  5. from zipfile import ZipFile, ZIP_DEFLATED
  6. # package imports
  7. from openpyxl.utils.exceptions import InvalidFileException
  8. from openpyxl.xml.constants import (
  9. ARC_ROOT_RELS,
  10. ARC_WORKBOOK_RELS,
  11. ARC_APP,
  12. ARC_CORE,
  13. ARC_CUSTOM,
  14. CPROPS_TYPE,
  15. ARC_THEME,
  16. ARC_STYLE,
  17. ARC_WORKBOOK,
  18. )
  19. from openpyxl.drawing.spreadsheet_drawing import SpreadsheetDrawing
  20. from openpyxl.xml.functions import tostring, fromstring
  21. from openpyxl.packaging.manifest import Manifest
  22. from openpyxl.packaging.relationship import (
  23. get_rels_path,
  24. RelationshipList,
  25. Relationship,
  26. )
  27. from openpyxl.comments.comment_sheet import CommentSheet
  28. from openpyxl.styles.stylesheet import write_stylesheet
  29. from openpyxl.worksheet._writer import WorksheetWriter
  30. from openpyxl.workbook._writer import WorkbookWriter
  31. from .theme import theme_xml
  32. class ExcelWriter:
  33. """Write a workbook object to an Excel file."""
  34. def __init__(self, workbook, archive):
  35. self._archive = archive
  36. self.workbook = workbook
  37. self.manifest = Manifest()
  38. self.vba_modified = set()
  39. self._tables = []
  40. self._charts = []
  41. self._images = []
  42. self._drawings = []
  43. self._comments = []
  44. self._pivots = []
  45. def write_data(self):
  46. from openpyxl.packaging.extended import ExtendedProperties
  47. """Write the various xml files into the zip archive."""
  48. # cleanup all worksheets
  49. archive = self._archive
  50. props = ExtendedProperties()
  51. archive.writestr(ARC_APP, tostring(props.to_tree()))
  52. archive.writestr(ARC_CORE, tostring(self.workbook.properties.to_tree()))
  53. if self.workbook.loaded_theme:
  54. archive.writestr(ARC_THEME, self.workbook.loaded_theme)
  55. else:
  56. archive.writestr(ARC_THEME, theme_xml)
  57. if len(self.workbook.custom_doc_props) >= 1:
  58. archive.writestr(ARC_CUSTOM, tostring(self.workbook.custom_doc_props.to_tree()))
  59. class CustomOverride():
  60. path = "/" + ARC_CUSTOM #PartName
  61. mime_type = CPROPS_TYPE #ContentType
  62. custom_override = CustomOverride()
  63. self.manifest.append(custom_override)
  64. self._write_worksheets()
  65. self._write_chartsheets()
  66. self._write_images()
  67. self._write_charts()
  68. self._write_external_links()
  69. stylesheet = write_stylesheet(self.workbook)
  70. archive.writestr(ARC_STYLE, tostring(stylesheet))
  71. writer = WorkbookWriter(self.workbook)
  72. archive.writestr(ARC_ROOT_RELS, writer.write_root_rels())
  73. archive.writestr(ARC_WORKBOOK, writer.write())
  74. archive.writestr(ARC_WORKBOOK_RELS, writer.write_rels())
  75. self._merge_vba()
  76. self.manifest._write(archive, self.workbook)
  77. def _merge_vba(self):
  78. """
  79. If workbook contains macros then extract associated files from cache
  80. of old file and add to archive
  81. """
  82. ARC_VBA = re.compile("|".join(
  83. ('xl/vba', r'xl/drawings/.*vmlDrawing\d\.vml',
  84. 'xl/ctrlProps', 'customUI', 'xl/activeX', r'xl/media/.*\.emf')
  85. )
  86. )
  87. if self.workbook.vba_archive:
  88. for name in set(self.workbook.vba_archive.namelist()) - self.vba_modified:
  89. if ARC_VBA.match(name):
  90. self._archive.writestr(name, self.workbook.vba_archive.read(name))
  91. def _write_images(self):
  92. # delegate to object
  93. for img in self._images:
  94. self._archive.writestr(img.path[1:], img._data())
  95. def _write_charts(self):
  96. # delegate to object
  97. if len(self._charts) != len(set(self._charts)):
  98. raise InvalidFileException("The same chart cannot be used in more than one worksheet")
  99. for chart in self._charts:
  100. self._archive.writestr(chart.path[1:], tostring(chart._write()))
  101. self.manifest.append(chart)
  102. def _write_drawing(self, drawing):
  103. """
  104. Write a drawing
  105. """
  106. self._drawings.append(drawing)
  107. drawing._id = len(self._drawings)
  108. for chart in drawing.charts:
  109. self._charts.append(chart)
  110. chart._id = len(self._charts)
  111. for img in drawing.images:
  112. self._images.append(img)
  113. img._id = len(self._images)
  114. rels_path = get_rels_path(drawing.path)[1:]
  115. self._archive.writestr(drawing.path[1:], tostring(drawing._write()))
  116. self._archive.writestr(rels_path, tostring(drawing._write_rels()))
  117. self.manifest.append(drawing)
  118. def _write_chartsheets(self):
  119. for idx, sheet in enumerate(self.workbook.chartsheets, 1):
  120. sheet._id = idx
  121. xml = tostring(sheet.to_tree())
  122. self._archive.writestr(sheet.path[1:], xml)
  123. self.manifest.append(sheet)
  124. if sheet._drawing:
  125. self._write_drawing(sheet._drawing)
  126. rel = Relationship(type="drawing", Target=sheet._drawing.path)
  127. rels = RelationshipList()
  128. rels.append(rel)
  129. tree = rels.to_tree()
  130. rels_path = get_rels_path(sheet.path[1:])
  131. self._archive.writestr(rels_path, tostring(tree))
  132. def _write_comment(self, ws):
  133. cs = CommentSheet.from_comments(ws._comments)
  134. self._comments.append(cs)
  135. cs._id = len(self._comments)
  136. self._archive.writestr(cs.path[1:], tostring(cs.to_tree()))
  137. self.manifest.append(cs)
  138. if ws.legacy_drawing is None or self.workbook.vba_archive is None:
  139. ws.legacy_drawing = 'xl/drawings/commentsDrawing{0}.vml'.format(cs._id)
  140. vml = None
  141. else:
  142. vml = fromstring(self.workbook.vba_archive.read(ws.legacy_drawing))
  143. vml = cs.write_shapes(vml)
  144. self._archive.writestr(ws.legacy_drawing, vml)
  145. self.vba_modified.add(ws.legacy_drawing)
  146. comment_rel = Relationship(Id="comments", type=cs._rel_type, Target=cs.path)
  147. ws._rels.append(comment_rel)
  148. def write_worksheet(self, ws):
  149. ws._drawing = SpreadsheetDrawing()
  150. ws._drawing.charts = ws._charts
  151. ws._drawing.images = ws._images
  152. if self.workbook.write_only:
  153. if not ws.closed:
  154. ws.close()
  155. writer = ws._writer
  156. else:
  157. writer = WorksheetWriter(ws)
  158. writer.write()
  159. ws._rels = writer._rels
  160. self._archive.write(writer.out, ws.path[1:])
  161. self.manifest.append(ws)
  162. writer.cleanup()
  163. def _write_worksheets(self):
  164. pivot_caches = set()
  165. for idx, ws in enumerate(self.workbook.worksheets, 1):
  166. ws._id = idx
  167. self.write_worksheet(ws)
  168. if ws._drawing:
  169. self._write_drawing(ws._drawing)
  170. for r in ws._rels:
  171. if "drawing" in r.Type:
  172. r.Target = ws._drawing.path
  173. if ws._comments:
  174. self._write_comment(ws)
  175. if ws.legacy_drawing is not None:
  176. shape_rel = Relationship(type="vmlDrawing", Id="anysvml",
  177. Target="/" + ws.legacy_drawing)
  178. ws._rels.append(shape_rel)
  179. for t in ws._tables.values():
  180. self._tables.append(t)
  181. t.id = len(self._tables)
  182. t._write(self._archive)
  183. self.manifest.append(t)
  184. ws._rels.get(t._rel_id).Target = t.path
  185. for p in ws._pivots:
  186. if p.cache not in pivot_caches:
  187. pivot_caches.add(p.cache)
  188. p.cache._id = len(pivot_caches)
  189. self._pivots.append(p)
  190. p._id = len(self._pivots)
  191. p._write(self._archive, self.manifest)
  192. self.workbook._pivots.append(p)
  193. r = Relationship(Type=p.rel_type, Target=p.path)
  194. ws._rels.append(r)
  195. if ws._rels:
  196. tree = ws._rels.to_tree()
  197. rels_path = get_rels_path(ws.path)[1:]
  198. self._archive.writestr(rels_path, tostring(tree))
  199. def _write_external_links(self):
  200. # delegate to object
  201. """Write links to external workbooks"""
  202. wb = self.workbook
  203. for idx, link in enumerate(wb._external_links, 1):
  204. link._id = idx
  205. rels_path = get_rels_path(link.path[1:])
  206. xml = link.to_tree()
  207. self._archive.writestr(link.path[1:], tostring(xml))
  208. rels = RelationshipList()
  209. rels.append(link.file_link)
  210. self._archive.writestr(rels_path, tostring(rels.to_tree()))
  211. self.manifest.append(link)
  212. def save(self):
  213. """Write data into the archive."""
  214. self.write_data()
  215. self._archive.close()
  216. def save_workbook(workbook, filename):
  217. """Save the given workbook on the filesystem under the name filename.
  218. :param workbook: the workbook to save
  219. :type workbook: :class:`openpyxl.workbook.Workbook`
  220. :param filename: the path to which save the workbook
  221. :type filename: string
  222. :rtype: bool
  223. """
  224. archive = ZipFile(filename, 'w', ZIP_DEFLATED, allowZip64=True)
  225. workbook.properties.modified = datetime.datetime.now(tz=datetime.timezone.utc).replace(tzinfo=None)
  226. writer = ExcelWriter(workbook, archive)
  227. writer.save()
  228. return True