A script to combine all XLSX files in a given folder into a single file.
Each original XLSX file becomes a worksheet or worksheets in the final file.
##The Basics
import xlrd
import csv
import os
import re
import pandas as pd
def dfs_from_excels(folder, v = True):
if v: print("Parsing ", folder)
output = {}
for file in os.listdir(folder):
if file.endswith(".xlsx") or file.endswith(".XLSX"):
if v: print("Parsing:", file)
name = os.path.splitext(file)[0]
wb = xlrd.open_workbook(os.path.join(folder,file))
for sheet in wb.sheet_names():
if v: print('Sheet:', sheet)
content = pd.read_excel(open(os.path.join(folder,file), 'rb'),sheet_name=sheet)
index = name + "-" + sheet
output[index] = content
if len(output) > 0:
return output
else:
return None
def dfs_to_excel(sheets, output, v = True):
if isinstance(sheets, dict) and isinstance(output, str) and isinstance(v, bool):
with pd.ExcelWriter(output) as writer:
for sheet in sheets:
content = sheets[sheet]
if v: print("Outputting", sheet)
if content.shape[1] > 0:
content.to_excel(writer, sheet_name=sheet, index=False)
return True
else:
return False
## Where are we talking about?
Location = {}
## Location to output CSV file when all is done
Location['input'] = input('Folder: ')
Location['output'] = os.path.join(Location['input'],'Combined.xlsx')
sheets = dfs_from_excels(Location['input'])
dfs_to_excel(sheets, Location['output'])
input("Press Enter to Continue")
