cell.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. # Copyright (c) 2010-2024 openpyxl
  2. """
  3. Collection of utilities used within the package and also available for client code
  4. """
  5. from functools import lru_cache
  6. from itertools import chain, product
  7. from string import ascii_uppercase, digits
  8. import re
  9. from .exceptions import CellCoordinatesException
  10. # constants
  11. COORD_RE = re.compile(r'^[$]?([A-Za-z]{1,3})[$]?(\d+)$')
  12. COL_RANGE = """[A-Z]{1,3}:[A-Z]{1,3}:"""
  13. ROW_RANGE = r"""\d+:\d+:"""
  14. RANGE_EXPR = r"""
  15. [$]?(?P<min_col>[A-Za-z]{1,3})?
  16. [$]?(?P<min_row>\d+)?
  17. (:[$]?(?P<max_col>[A-Za-z]{1,3})?
  18. [$]?(?P<max_row>\d+)?)?
  19. """
  20. ABSOLUTE_RE = re.compile('^' + RANGE_EXPR +'$', re.VERBOSE)
  21. SHEET_TITLE = r"""
  22. (('(?P<quoted>([^']|'')*)')|(?P<notquoted>[^'^ ^!]*))!"""
  23. SHEETRANGE_RE = re.compile("""{0}(?P<cells>{1})(?=,?)""".format(
  24. SHEET_TITLE, RANGE_EXPR), re.VERBOSE)
  25. def get_column_interval(start, end):
  26. """
  27. Given the start and end columns, return all the columns in the series.
  28. The start and end columns can be either column letters or 1-based
  29. indexes.
  30. """
  31. if isinstance(start, str):
  32. start = column_index_from_string(start)
  33. if isinstance(end, str):
  34. end = column_index_from_string(end)
  35. return [get_column_letter(x) for x in range(start, end + 1)]
  36. def coordinate_from_string(coord_string):
  37. """Convert a coordinate string like 'B12' to a tuple ('B', 12)"""
  38. match = COORD_RE.match(coord_string)
  39. if not match:
  40. msg = f"Invalid cell coordinates ({coord_string})"
  41. raise CellCoordinatesException(msg)
  42. column, row = match.groups()
  43. row = int(row)
  44. if not row:
  45. msg = f"There is no row 0 ({coord_string})"
  46. raise CellCoordinatesException(msg)
  47. return column, row
  48. def absolute_coordinate(coord_string):
  49. """Convert a coordinate to an absolute coordinate string (B12 -> $B$12)"""
  50. m = ABSOLUTE_RE.match(coord_string)
  51. if not m:
  52. raise ValueError(f"{coord_string} is not a valid coordinate range")
  53. d = m.groupdict('')
  54. for k, v in d.items():
  55. if v:
  56. d[k] = f"${v}"
  57. if d['max_col'] or d['max_row']:
  58. fmt = "{min_col}{min_row}:{max_col}{max_row}"
  59. else:
  60. fmt = "{min_col}{min_row}"
  61. return fmt.format(**d)
  62. __decimal_to_alpha = [""] + list(ascii_uppercase)
  63. @lru_cache(maxsize=None)
  64. def get_column_letter(col_idx):
  65. """
  66. Convert decimal column position to its ASCII (base 26) form.
  67. Because column indices are 1-based, strides are actually pow(26, n) + 26
  68. Hence, a correction is applied between pow(26, n) and pow(26, 2) + 26 to
  69. prevent and additional column letter being prepended
  70. "A" == 1 == pow(26, 0)
  71. "Z" == 26 == pow(26, 0) + 26 // decimal equivalent 10
  72. "AA" == 27 == pow(26, 1) + 1
  73. "ZZ" == 702 == pow(26, 2) + 26 // decimal equivalent 100
  74. """
  75. if not 1 <= col_idx <= 18278:
  76. raise ValueError("Invalid column index {0}".format(col_idx))
  77. result = []
  78. if col_idx < 26:
  79. return __decimal_to_alpha[col_idx]
  80. while col_idx:
  81. col_idx, remainder = divmod(col_idx, 26)
  82. result.insert(0, __decimal_to_alpha[remainder])
  83. if not remainder:
  84. col_idx -= 1
  85. result.insert(0, "Z")
  86. return "".join(result)
  87. __alpha_to_decimal = {letter:pos for pos, letter in enumerate(ascii_uppercase, 1)}
  88. __powers = (1, 26, 676)
  89. @lru_cache(maxsize=None)
  90. def column_index_from_string(col):
  91. """
  92. Convert ASCII column name (base 26) to decimal with 1-based index
  93. Characters represent descending multiples of powers of 26
  94. "AFZ" == 26 * pow(26, 0) + 6 * pow(26, 1) + 1 * pow(26, 2)
  95. """
  96. error_msg = f"'{col}' is not a valid column name. Column names are from A to ZZZ"
  97. if len(col) > 3:
  98. raise ValueError(error_msg)
  99. idx = 0
  100. col = reversed(col.upper())
  101. for letter, power in zip(col, __powers):
  102. try:
  103. pos = __alpha_to_decimal[letter]
  104. except KeyError:
  105. raise ValueError(error_msg)
  106. idx += pos * power
  107. if not 0 < idx < 18279:
  108. raise ValueError(error_msg)
  109. return idx
  110. def range_boundaries(range_string):
  111. """
  112. Convert a range string into a tuple of boundaries:
  113. (min_col, min_row, max_col, max_row)
  114. Cell coordinates will be converted into a range with the cell at both end
  115. """
  116. msg = "{0} is not a valid coordinate or range".format(range_string)
  117. m = ABSOLUTE_RE.match(range_string)
  118. if not m:
  119. raise ValueError(msg)
  120. min_col, min_row, sep, max_col, max_row = m.groups()
  121. if sep:
  122. cols = min_col, max_col
  123. rows = min_row, max_row
  124. if not (
  125. all(cols + rows) or
  126. all(cols) and not any(rows) or
  127. all(rows) and not any(cols)
  128. ):
  129. raise ValueError(msg)
  130. if min_col is not None:
  131. min_col = column_index_from_string(min_col)
  132. if min_row is not None:
  133. min_row = int(min_row)
  134. if max_col is not None:
  135. max_col = column_index_from_string(max_col)
  136. else:
  137. max_col = min_col
  138. if max_row is not None:
  139. max_row = int(max_row)
  140. else:
  141. max_row = min_row
  142. return min_col, min_row, max_col, max_row
  143. def rows_from_range(range_string):
  144. """
  145. Get individual addresses for every cell in a range.
  146. Yields one row at a time.
  147. """
  148. min_col, min_row, max_col, max_row = range_boundaries(range_string)
  149. rows = range(min_row, max_row + 1)
  150. cols = [get_column_letter(col) for col in range(min_col, max_col + 1)]
  151. for row in rows:
  152. yield tuple('{0}{1}'.format(col, row) for col in cols)
  153. def cols_from_range(range_string):
  154. """
  155. Get individual addresses for every cell in a range.
  156. Yields one row at a time.
  157. """
  158. min_col, min_row, max_col, max_row = range_boundaries(range_string)
  159. rows = range(min_row, max_row+1)
  160. cols = (get_column_letter(col) for col in range(min_col, max_col+1))
  161. for col in cols:
  162. yield tuple('{0}{1}'.format(col, row) for row in rows)
  163. def coordinate_to_tuple(coordinate):
  164. """
  165. Convert an Excel style coordinate to (row, column) tuple
  166. """
  167. for idx, c in enumerate(coordinate):
  168. if c in digits:
  169. break
  170. col = coordinate[:idx]
  171. row = coordinate[idx:]
  172. return int(row), column_index_from_string(col)
  173. def range_to_tuple(range_string):
  174. """
  175. Convert a worksheet range to the sheetname and maximum and minimum
  176. coordinate indices
  177. """
  178. m = SHEETRANGE_RE.match(range_string)
  179. if m is None:
  180. raise ValueError("Value must be of the form sheetname!A1:E4")
  181. sheetname = m.group("quoted") or m.group("notquoted")
  182. cells = m.group("cells")
  183. boundaries = range_boundaries(cells)
  184. return sheetname, boundaries
  185. def quote_sheetname(sheetname):
  186. """
  187. Add quotes around sheetnames if they contain spaces.
  188. """
  189. if "'" in sheetname:
  190. sheetname = sheetname.replace("'", "''")
  191. sheetname = u"'{0}'".format(sheetname)
  192. return sheetname