AutoCar - PY09052021/FE_ICE_Task1 GitHub Wiki

https://github.com/PY09052021/FE_ICE_Task1

import requests from bs4 import BeautifulSoup import pandas as pd

URL = "https://www.investopedia.com/articles/investing/052014/how-googles-selfdriving-car-will-change-everything.asp" page = requests.get(URL)

#print(page.text)

soup = BeautifulSoup(page.content, "html.parser")

#print(soup) #txt_elements = soup.find_all("div", class_="comp article-body-content mntl-sc-page mntl-block") txt_elements = soup.find_all("p")

Auto = []
for txt_element in txt_elements: #print("Inner Text: {}".format(txt_element.text)) #print(txt_element, end="\n"*2) Auto.append(txt_element.text.replace('\n', ' ').strip())

#print(Auto)

df = pd.DataFrame(Auto, columns=['Content'])

df['Category'] = 'autonomous car' df['Complete_Filename'] = 'Investopedia'

#df.head()

df_bbc = pd.read_csv('C:\FE\News_dataset_BBC.csv', sep=';') #df_bbc.head()

frames = [df,df_bbc] df_final_news = pd.DataFrame(pd.concat(frames,ignore_index = True))

df_final_news = df_final_news.to_csv('C:\FE\autocar_dataset.csv')

Rest of the code is used from BBCNews example for Data Exploration and Model, so not copying here