Python script to combine xlsx files - dishplate/blog GitHub Wiki


import os

import pandas as pd




# Path to the folder containing XLSX files

folder_path = r'D:\New-query-14-day-and-older-published-cves\OUTPUT'




# Create a new folder named "OUTPUT" within the folder_path

output_folder_path = os.path.join(folder_path, 'OUTPUT')

os.makedirs(output_folder_path, exist_ok=True)




# Initialize an empty list to store dataframes

dfs = []




# Iterate over each file in the folder

for filename in os.listdir(folder_path):

    if filename.endswith('.xlsx'):

        file_path = os.path.join(folder_path, filename)

        # Read each XLSX file into a pandas dataframe

        df = pd.read_excel(file_path)

        # Extract file name without extension

        file_name_without_extension = os.path.splitext(filename)[0]

        # Add a new column with the file name (minus the extension)

        df['file_name'] = file_name_without_extension

        # Append the dataframe to the list

        dfs.append(df)




# Concatenate all dataframes into one

combined_df = pd.concat(dfs, ignore_index=True)




# Save the combined dataframe to a new XLSX file in the OUTPUT folder

output_file_path = os.path.join(output_folder_path, 'combined_data.xlsx')

combined_df.to_excel(output_file_path, index=False)




print("\nScript is done!")