python useful code - jangsoohoon/python GitHub Wiki

this is to combine the data which has same movie id =>

groups = genres_df.groupby('movie')
genres = [(list(set(x['movie'].values))[0], '/'.join(x['genre'].values)) for index, x in groups]

change string to classified number

from sklearn.preprocessing import LabelEncoder
encoder.fit(serverity_df["serverity"])
X_train_encoded = encoder.transform(serverity_df["serverity"])
print(encoder.classes_)

get data in on column by splitting and lower and strip

serverity_df["serverity"] = serverity_df.issue.str.lower().str.split('-').str[-1].str.strip()

resolve issues 'float' object has no attribute 'lower' reading the data from csv file

# change type to str when making the dataframe
component_df = pd.DataFrame(record_df["component"].astype(str))
print(component_df["component"])
total_component = list(set([component.lower() for component in component_df["component"]]))