dataframe.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. # Copyright (c) 2010-2024 openpyxl
  2. from itertools import accumulate
  3. import operator
  4. import numpy
  5. from openpyxl.compat.product import prod
  6. def dataframe_to_rows(df, index=True, header=True):
  7. """
  8. Convert a Pandas dataframe into something suitable for passing into a worksheet.
  9. If index is True then the index will be included, starting one row below the header.
  10. If header is True then column headers will be included starting one column to the right.
  11. Formatting should be done by client code.
  12. """
  13. from pandas import Timestamp
  14. if header:
  15. if df.columns.nlevels > 1:
  16. rows = expand_index(df.columns, header)
  17. else:
  18. rows = [list(df.columns.values)]
  19. for row in rows:
  20. n = []
  21. for v in row:
  22. if isinstance(v, numpy.datetime64):
  23. v = Timestamp(v)
  24. n.append(v)
  25. row = n
  26. if index:
  27. row = [None]*df.index.nlevels + row
  28. yield row
  29. if index:
  30. yield df.index.names
  31. expanded = ([v] for v in df.index)
  32. if df.index.nlevels > 1:
  33. expanded = expand_index(df.index)
  34. # Using the expanded index is preferable to df.itertuples(index=True) so that we have 'None' inserted where applicable
  35. for (df_index, row) in zip(expanded, df.itertuples(index=False)):
  36. row = list(row)
  37. if index:
  38. row = df_index + row
  39. yield row
  40. def expand_index(index, header=False):
  41. """
  42. Expand axis or column Multiindex
  43. For columns use header = True
  44. For axes use header = False (default)
  45. """
  46. # For each element of the index, zip the members with the previous row
  47. # If the 2 elements of the zipped list do not match, we can insert the new value into the row
  48. # or if an earlier member was different, all later members should be added to the row
  49. values = list(index.values)
  50. previous_value = [None] * len(values[0])
  51. result = []
  52. for value in values:
  53. row = [None] * len(value)
  54. # Once there's a difference in member of an index with the prior index, we need to store all subsequent members in the row
  55. prior_change = False
  56. for idx, (current_index_member, previous_index_member) in enumerate(zip(value, previous_value)):
  57. if current_index_member != previous_index_member or prior_change:
  58. row[idx] = current_index_member
  59. prior_change = True
  60. previous_value = value
  61. # If this is for a row index, we're already returning a row so just yield
  62. if not header:
  63. yield row
  64. else:
  65. result.append(row)
  66. # If it's for a header, we need to transpose to get it in row order
  67. # Example: result = [['A', 'A'], [None, 'B']] -> [['A', None], ['A', 'B']]
  68. if header:
  69. result = numpy.array(result).transpose().tolist()
  70. for row in result:
  71. yield row