AutoCar - PY09052021/FE_ICE_Task1 GitHub Wiki
https://github.com/PY09052021/FE_ICE_Task1
import requests from bs4 import BeautifulSoup import pandas as pd
URL = "https://www.investopedia.com/articles/investing/052014/how-googles-selfdriving-car-will-change-everything.asp" page = requests.get(URL)
#print(page.text)
soup = BeautifulSoup(page.content, "html.parser")
#print(soup) #txt_elements = soup.find_all("div", class_="comp article-body-content mntl-sc-page mntl-block") txt_elements = soup.find_all("p")
Auto = []
for txt_element in txt_elements:
#print("Inner Text: {}".format(txt_element.text))
#print(txt_element, end="\n"*2)
Auto.append(txt_element.text.replace('\n', ' ').strip())
#print(Auto)
df = pd.DataFrame(Auto, columns=['Content'])
df['Category'] = 'autonomous car' df['Complete_Filename'] = 'Investopedia'
#df.head()
df_bbc = pd.read_csv('C:\FE\News_dataset_BBC.csv', sep=';') #df_bbc.head()
frames = [df,df_bbc] df_final_news = pd.DataFrame(pd.concat(frames,ignore_index = True))
df_final_news = df_final_news.to_csv('C:\FE\autocar_dataset.csv')