rich_text.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. # Copyright (c) 2010-2024 openpyxl
  2. """
  3. RichText definition
  4. """
  5. from copy import copy
  6. from openpyxl.compat import NUMERIC_TYPES
  7. from openpyxl.cell.text import InlineFont, Text
  8. from openpyxl.descriptors import (
  9. Strict,
  10. String,
  11. Typed
  12. )
  13. from openpyxl.xml.functions import Element, whitespace
  14. class TextBlock(Strict):
  15. """ Represents text string in a specific format
  16. This class is used as part of constructing a rich text strings.
  17. """
  18. font = Typed(expected_type=InlineFont)
  19. text = String()
  20. def __init__(self, font, text):
  21. self.font = font
  22. self.text = text
  23. def __eq__(self, other):
  24. return self.text == other.text and self.font == other.font
  25. def __str__(self):
  26. """Just retun the text"""
  27. return self.text
  28. def __repr__(self):
  29. font = self.font != InlineFont() and self.font or "default"
  30. return f"{self.__class__.__name__} text={self.text}, font={font}"
  31. def to_tree(self):
  32. el = Element("r")
  33. el.append(self.font.to_tree(tagname="rPr"))
  34. t = Element("t")
  35. t.text = self.text
  36. whitespace(t)
  37. el.append(t)
  38. return el
  39. #
  40. # Rich Text class.
  41. # This class behaves just like a list whose members are either simple strings, or TextBlock() instances.
  42. # In addition, it can be initialized in several ways:
  43. # t = CellRFichText([...]) # initialize with a list.
  44. # t = CellRFichText((...)) # initialize with a tuple.
  45. # t = CellRichText(node) # where node is an Element() from either lxml or xml.etree (has a 'tag' element)
  46. class CellRichText(list):
  47. """Represents a rich text string.
  48. Initialize with a list made of pure strings or :class:`TextBlock` elements
  49. Can index object to access or modify individual rich text elements
  50. it also supports the + and += operators between rich text strings
  51. There are no user methods for this class
  52. operations which modify the string will generally call an optimization pass afterwards,
  53. that merges text blocks with identical formats, consecutive pure text strings,
  54. and remove empty strings and empty text blocks
  55. """
  56. def __init__(self, *args):
  57. if len(args) == 1:
  58. args = args[0]
  59. if isinstance(args, (list, tuple)):
  60. CellRichText._check_rich_text(args)
  61. else:
  62. CellRichText._check_element(args)
  63. args = [args]
  64. else:
  65. CellRichText._check_rich_text(args)
  66. super().__init__(args)
  67. @classmethod
  68. def _check_element(cls, value):
  69. if not isinstance(value, (str, TextBlock, NUMERIC_TYPES)):
  70. raise TypeError(f"Illegal CellRichText element {value}")
  71. @classmethod
  72. def _check_rich_text(cls, rich_text):
  73. for t in rich_text:
  74. CellRichText._check_element(t)
  75. @classmethod
  76. def from_tree(cls, node):
  77. text = Text.from_tree(node)
  78. if text.t:
  79. return (text.t.replace('x005F_', ''),)
  80. s = []
  81. for r in text.r:
  82. t = ""
  83. if r.t:
  84. t = r.t.replace('x005F_', '')
  85. if r.rPr:
  86. s.append(TextBlock(r.rPr, t))
  87. else:
  88. s.append(t)
  89. return cls(s)
  90. # Merge TextBlocks with identical formatting
  91. # remove empty elements
  92. def _opt(self):
  93. last_t = None
  94. l = CellRichText(tuple())
  95. for t in self:
  96. if isinstance(t, str):
  97. if not t:
  98. continue
  99. elif not t.text:
  100. continue
  101. if type(last_t) == type(t):
  102. if isinstance(t, str):
  103. last_t += t
  104. continue
  105. elif last_t.font == t.font:
  106. last_t.text += t.text
  107. continue
  108. if last_t:
  109. l.append(last_t)
  110. last_t = t
  111. if last_t:
  112. # Add remaining TextBlock at end of rich text
  113. l.append(last_t)
  114. super().__setitem__(slice(None), l)
  115. return self
  116. def __iadd__(self, arg):
  117. # copy used here to create new TextBlock() so we don't modify the right hand side in _opt()
  118. CellRichText._check_rich_text(arg)
  119. super().__iadd__([copy(e) for e in list(arg)])
  120. return self._opt()
  121. def __add__(self, arg):
  122. return CellRichText([copy(e) for e in list(self) + list(arg)])._opt()
  123. def __setitem__(self, indx, val):
  124. CellRichText._check_element(val)
  125. super().__setitem__(indx, val)
  126. self._opt()
  127. def append(self, arg):
  128. CellRichText._check_element(arg)
  129. super().append(arg)
  130. def extend(self, arg):
  131. CellRichText._check_rich_text(arg)
  132. super().extend(arg)
  133. def __repr__(self):
  134. return "CellRichText([{}])".format(', '.join((repr(s) for s in self)))
  135. def __str__(self):
  136. return ''.join([str(s) for s in self])
  137. def as_list(self):
  138. """
  139. Returns a list of the strings contained.
  140. The main reason for this is to make editing easier.
  141. """
  142. return [str(s) for s in self]
  143. def to_tree(self):
  144. """
  145. Return the full XML representation
  146. """
  147. container = Element("is")
  148. for obj in self:
  149. if isinstance(obj, TextBlock):
  150. container.append(obj.to_tree())
  151. else:
  152. el = Element("r")
  153. t = Element("t")
  154. t.text = obj
  155. whitespace(t)
  156. el.append(t)
  157. container.append(el)
  158. return container