Python ‐ Pandas ‐ vulnerability diff - dishplate/blog GitHub Wiki

import pandas as pd

# Define file paths
new_data_file = "new_data.csv"
old_data_file = "old_data.csv"
output_file = "output_matches.csv"

# Read the CSV files
new_data_df = pd.read_csv(new_data_file)
old_data_df = pd.read_csv(old_data_file)

# Extract relevant columns (Column 1 from new_data, Column 3 from old_data)
new_data_column = new_data_df.iloc[:, 0].astype(str).str.strip()
old_data_column = old_data_df.iloc[:, 2].astype(str).str.strip()

# Check if each value in Column 3 of old_data exists in Column 1 of new_data
old_data_df['ExistsInNewData'] = old_data_column.isin(new_data_column)

# Merge the old_data with new_data based on the matching values
merged_df = pd.merge(
    old_data_df,
    new_data_df[new_data_df.columns[0](/dishplate/blog/wiki/new_data_df.columns[0)],  # Select only Column 1 from new_data
    left_on=old_data_column.name,
    right_on=new_data_df.columns[0],
    how='left'
)

# Rename columns for clarity
merged_df.rename(
    columns={
        old_data_column.name: 'OldData_Column3',
        new_data_df.columns[0]: 'NewData_Column1'
    },
    inplace=True
)

# Dynamically reorder the columns: Keep all old_data columns first, then the match from new_data
existing_columns = list(old_data_df.columns)
if 'NewData_Column1' in merged_df.columns:
    existing_columns.append('NewData_Column1')

# Reorder columns based on their actual presence
final_df = merged_df[existing_columns]

# Save the result to a CSV file
final_df.to_csv(output_file, index=False)

print(f"Output written to {output_file}")