Nexpose ‐ process export from query builder with Pandas - dishplate/blog GitHub Wiki
import pandas as pd
import os
from datetime import datetime
# Get the current date
current_date = datetime.now().strftime('%Y-%m-%d')
# Path to the folder containing CSV files, this will process all files in the folder that is csv
folder_path = r'D:\new folder'
# Create a new folder named "OUTPUT" within the folder_path
output_folder_path = os.path.join(folder_path, 'OUTPUT')
os.makedirs(output_folder_path, exist_ok=True)
# Grab csv files in the folder_path
csv_files = [file for file in os.listdir(folder_path) if file.endswith('.csv')]
# Loop through each file in the directory
for file in csv_files:
file_path = os.path.join(folder_path, file)
df = pd.read_csv(file_path)
# Count the number of cells in 'cntCritical' that are greater than zero
count_greater_than_zero = (df['cntCritical'] > 0).sum()
print(f"Number of 'cntCritical' cells greater than zero in {file}: {count_greater_than_zero}")
# Convert the 'hostName' column to lowercase
df['hostName'] = df['hostName'].str.lower()
# Split the 'hostName' column based on periods and create new columns dynamically
max_splits = df['hostName'].str.count('.').max()
split_columns = [f'hostName Split_{i+1}' for i in range(max_splits)]
# Split the 'hostName' column and keep all split parts
split_data = df['hostName'].str.split('.', expand=True)
split_data.columns = split_columns[:len(split_data.columns)]
# Combine the split columns with the original DataFrame
df = pd.concat([df, split_data], axis=1)
# Deduplicate based on the first split column (Should be 'hostName Split_1)
df = df.drop_duplicates(subset=split_columns[0])
# Prevent the removal of rows that have a blank 'agentKey' cell
df_rem_agent = df[(~df['agentKey'].duplicated()) | df['agentKey'].isna()]
# Rearrange columns in the desired order, cntFindings is the total vulnerabilities, obeying the vuln filter in the query builder
column_order = ['hostName Split_1', 'hostName', 'cntCritical', 'cntSevere', 'cntModerate', 'exploitCount', 'malwareCount', 'osDescription', 'ip', 'osCertainty', 'lastScanTimeIso']
# Create a new DataFrame with the specified column order and all other columns
new_df = df_rem_agent[column_order + [col for col in df_rem_agent.columns if col not in column_order]]
# Sort the DataFrame by the 'Critical vulns' column in descending order
df = new_df.sort_values(by='cntCritical', ascending=False)
# Assuming df is your DataFrame containing all columns
columns_to_keep = ['hostName', 'cntCritical', 'cntSevere',
'cntModerate', 'exploitCount', 'malwareCount',
'osDescription', 'ip',
'lastScanTimeIso']
# Drop columns that are not in the list
df = df.drop(columns=df.columns.difference(columns_to_keep))
# Construct the output file path with xlsx extension
output_file_name = os.path.splitext(file)[0] + '_' + current_date + '.xlsx'
xlsx_file_path = os.path.join(output_folder_path, output_file_name)
# Write the DataFrame to an Excel file
writer = pd.ExcelWriter(xlsx_file_path, engine='xlsxwriter')
df.to_excel(writer, index=False)
# Get the xlsxwriter workbook and worksheet objects
workbook = writer.book
worksheet = writer.sheets['Sheet1'] # Assuming the sheet name is 'Sheet1'
# Define the format for bold and red font
bold_red_format = workbook.add_format({'bold': True, 'font_color': 'red'})
# # Apply the format to the rows that meet the condition
# for row_num, crit_vuln in enumerate(df['cntCritical'], start=2):
# if 2 <= row_num <= 11 and crit_vuln >= 1:
# worksheet.set_row(row_num - 1, None, bold_red_format)
writer._save()
print(f"Processed data written to: {xlsx_file_path}")
print("\nSCRIPT COMPLETE")