04a 01 Data Merging Basics - HannaAA17/Data-Scientist-With-Python-datacamp GitHub Wiki
# Merge the taxi_owners and taxi_veh tables setting a suffix
taxi_own_veh = taxi_owners.merge(taxi_veh, on='vid', suffixes=('_own','_veh'))
# Print the most popular fuel_type
print(taxi_own_veh['fuel_type'].value_counts())
output
<script.py> output:
HYBRID 2792
GASOLINE 611
FLEX FUEL 89
COMPRESSED NATURAL GAS 27
Name: fuel_type, dtype: int64
# Merge the licenses and biz_owners table on account
licenses_owners = licenses.merge(biz_owners,on="account")
# Group the results by title then count the number of accounts
counted_df = licenses_owners.groupby('title').agg({'account':'count'})
# Sort the counted_df in desending order
sorted_df = counted_df.sort_values('account',ascending=False)
# Use .head() method to print the first few rows of sorted_df
print(sorted_df.head())
output
account
title
PRESIDENT 6259
SECRETARY 5205
SOLE PROPRIETOR 1658
OTHER 1200
VICE PRESIDENT 970
df1.merge(df2, on='col')\
.merge(df3, on='col)
# Merge licenses and zip_demo, on zip; and merge the wards on ward
licenses_zip_ward = licenses.merge(zip_demo,on='zip') \
.merge(wards,on='ward')
# Print the results by alderman and show median income
print(licenses_zip_ward.groupby('alderman').agg({'income':'median'}))