Pandas - keshavbaweja-git/guides GitHub Wiki
# Read excel
df = pd.read_excel('path-to-excel', sheet_name='', skiprows=0)
# Rename a column
df = df.rename(columns = {'current_col_name': 'new_col_name'})
# Drop columns
df = df.drop(columns = ['col_1', 'col_2'])
# Drop all columns except for the column list specified
df = df[df.columns.intersection(['co1_1', 'col_2'])]
# Drop all columns where column name does not contain a value
df = df.loc[:, ~df.columns.str.contains('value')]
# Filter rows where a column is not null/empty
df = df[df['col_name'].notna()]
df = df[df['col_name'].notnull()]
# Filter rows based on a column value
df = df[df['col_name'] > 0]
df = df[df['col_name'].isin(['value1', 'value2'])]
# Filter rows where column value is available in another DataFrame
df_1 = df_1[df_1['col_1'].isin(df_2['col_2'])]
# Drop rows with duplicate values for a column
df = df.drop_duplicates(['col_name'], keep='first')
# Merge DataFrames
df = pd.merge(df_1, df_2, left_on='', right_on='', how='left')
# Add a new column based on value of other columns
# map_col_value is a function that receives a row of the
# DataFrame and returns a value
df['new_column'] = df.apply(map_col_value, axis=1)
# Change all column values to upper case
df['col_name'] = df['col_name'].str.upper()
# Concatenate two DataFrames
df_concat = pd.concat([df_1, df_2], axis=0)