Python ‐ Pandas ‐ vulnerability diff - dishplate/blog GitHub Wiki
import pandas as pd
# Define file paths
new_data_file = "new_data.csv"
old_data_file = "old_data.csv"
output_file = "output_matches.csv"
# Read the CSV files
new_data_df = pd.read_csv(new_data_file)
old_data_df = pd.read_csv(old_data_file)
# Extract relevant columns (Column 1 from new_data, Column 3 from old_data)
new_data_column = new_data_df.iloc[:, 0].astype(str).str.strip()
old_data_column = old_data_df.iloc[:, 2].astype(str).str.strip()
# Check if each value in Column 3 of old_data exists in Column 1 of new_data
old_data_df['ExistsInNewData'] = old_data_column.isin(new_data_column)
# Merge the old_data with new_data based on the matching values
merged_df = pd.merge(
old_data_df,
new_data_df[new_data_df.columns[0](/dishplate/blog/wiki/new_data_df.columns[0)], # Select only Column 1 from new_data
left_on=old_data_column.name,
right_on=new_data_df.columns[0],
how='left'
)
# Rename columns for clarity
merged_df.rename(
columns={
old_data_column.name: 'OldData_Column3',
new_data_df.columns[0]: 'NewData_Column1'
},
inplace=True
)
# Dynamically reorder the columns: Keep all old_data columns first, then the match from new_data
existing_columns = list(old_data_df.columns)
if 'NewData_Column1' in merged_df.columns:
existing_columns.append('NewData_Column1')
# Reorder columns based on their actual presence
final_df = merged_df[existing_columns]
# Save the result to a CSV file
final_df.to_csv(output_file, index=False)
print(f"Output written to {output_file}")