| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687 |
- # Copyright (c) 2010-2024 openpyxl
- from itertools import accumulate
- import operator
- import numpy
- from openpyxl.compat.product import prod
- def dataframe_to_rows(df, index=True, header=True):
- """
- Convert a Pandas dataframe into something suitable for passing into a worksheet.
- If index is True then the index will be included, starting one row below the header.
- If header is True then column headers will be included starting one column to the right.
- Formatting should be done by client code.
- """
- from pandas import Timestamp
- if header:
- if df.columns.nlevels > 1:
- rows = expand_index(df.columns, header)
- else:
- rows = [list(df.columns.values)]
- for row in rows:
- n = []
- for v in row:
- if isinstance(v, numpy.datetime64):
- v = Timestamp(v)
- n.append(v)
- row = n
- if index:
- row = [None]*df.index.nlevels + row
- yield row
- if index:
- yield df.index.names
- expanded = ([v] for v in df.index)
- if df.index.nlevels > 1:
- expanded = expand_index(df.index)
- # Using the expanded index is preferable to df.itertuples(index=True) so that we have 'None' inserted where applicable
- for (df_index, row) in zip(expanded, df.itertuples(index=False)):
- row = list(row)
- if index:
- row = df_index + row
- yield row
- def expand_index(index, header=False):
- """
- Expand axis or column Multiindex
- For columns use header = True
- For axes use header = False (default)
- """
- # For each element of the index, zip the members with the previous row
- # If the 2 elements of the zipped list do not match, we can insert the new value into the row
- # or if an earlier member was different, all later members should be added to the row
- values = list(index.values)
- previous_value = [None] * len(values[0])
- result = []
- for value in values:
- row = [None] * len(value)
- # Once there's a difference in member of an index with the prior index, we need to store all subsequent members in the row
- prior_change = False
- for idx, (current_index_member, previous_index_member) in enumerate(zip(value, previous_value)):
- if current_index_member != previous_index_member or prior_change:
- row[idx] = current_index_member
- prior_change = True
- previous_value = value
- # If this is for a row index, we're already returning a row so just yield
- if not header:
- yield row
- else:
- result.append(row)
- # If it's for a header, we need to transpose to get it in row order
- # Example: result = [['A', 'A'], [None, 'B']] -> [['A', None], ['A', 'B']]
- if header:
- result = numpy.array(result).transpose().tolist()
- for row in result:
- yield row
|