Kaip nuskaityti norimą svetainės bloką (pirmą):
from bs4 import BeautifulSoup
import requests
r = requests.get("https://www.delfi.lt/")
soup = BeautifulSoup(r.text, 'html.parser')
block = soup.find("article")
print(block.prettify())
Kaip išrinkti norimą informaciją iš bloko:
from bs4 import BeautifulSoup
import requests
r = requests.get("https://www.delfi.lt/")
soup = BeautifulSoup(r.text, 'html.parser')
block = soup.find("article")
category = block.find(class_='headline-labels__label').get_text().strip()
title = block.find(class_='headline-title').a.get_text().strip()
link = block.find(class_='headline-title').a['href']
print(category)
print(title)
print(link)
Kaip gauti visų blokų informaciją:
from bs4 import BeautifulSoup
import requests
r = requests.get("https://www.delfi.lt/")
soup = BeautifulSoup(r.text, 'html.parser')
blocks = soup.find_all("article")
for block in blocks:
try:
try:
category = block.find(class_='headline-labels__label').get_text().strip()
except:
category = ""
title = block.find(class_='headline-title').a.get_text().strip()
link = block.find(class_='headline-title').a['href']
if link.startswith("/"):
link = "https://www.delfi.lt" + link
print(category)
print(title)
print(link)
except:
pass
Kaip įrašyti gautą informaciją į csv failą:
from bs4 import BeautifulSoup
import requests
import csv
with open("delfi_naujienos.csv", 'w', encoding="UTF-8", newline="") as file:
csv_writer = csv.writer(file)
csv_writer.writerow(["Title", "Category", "Link"])
r = requests.get("https://www.delfi.lt/")
soup = BeautifulSoup(r.text, 'html.parser')
blocks = soup.find_all("article")
for block in blocks:
# print(block.prettify())
try:
print('-------------------------------------------')
title = block.find(class_="headline-title").a.get_text().strip()
try:
category = block.find(class_="headline-labels__label").get_text().strip()
except:
category = ""
link = block.find(class_="headline-title").a['href']
if link.startswith("/"):
link = "https://www.delfi.lt" + link
print(title)
print(category)
print(link)
csv_writer.writerow([title, category, link])
except:
...
Pavyzdys Nr. 2
Norimo gamintojo parduodamų telefonų išrinkimas iš svetainės
from bs4 import BeautifulSoup
import requests
import csv
r = requests.get("https://www.telia.lt/prekes/telefonai-ir-priedai/mobilieji-telefonai/apple")
soup = BeautifulSoup(r.text, 'html.parser')
blocks = soup.find_all(class_='mobiles-product-card card card__product card--anim js-product-compare-product')
with open("telia_apple_telefonai.csv", 'w', encoding="UTF-8", newline="") as file:
csv_writer = csv.writer(file)
csv_writer.writerow(["Phone", "Price per Month", "Price"])
for block in blocks:
name = block.find(class_="mobiles-product-card__title js-open-product").get_text().strip()
price_per_month = block.find_all(class_="mobiles-product-card__price-marker")[0].get_text().strip()
price = block.find_all(class_="mobiles-product-card__price-marker")[1].get_text().strip()
print(name)
print(price_per_month)
print(price)
csv_writer.writerow([name, price_per_month, price])
Arba iš kelių puslapių, paverčiant viską į skaičius:
from bs4 import BeautifulSoup
import requests
import csv
with open("telia_samsung_telefonai.csv", 'w', encoding="UTF-8", newline="") as file:
csv_writer = csv.writer(file)
csv_writer.writerow(["Phone", "Price per Month", "Price"])
page_num = 1
while True:
r = requests.get(f"https://www.telia.lt/prekes/telefonai-ir-priedai/mobilieji-telefonai/samsung?page={page_num}")
page_num += 1
soup = BeautifulSoup(r.text, 'html.parser')
blocks = soup.find_all(class_='mobiles-product-card card card__product card--anim js-product-compare-product')
if not blocks:
print("No more pages")
break
for block in blocks:
name = block.find(class_="mobiles-product-card__title js-open-product").get_text().strip()
price_per_month = float(block.find_all(class_="mobiles-product-card__price-marker")[0].get_text().strip().split()[0].replace(",", "."))
price = float(block.find_all(class_="mobiles-product-card__price-marker")[1].get_text().strip().rsplit(maxsplit=1)[0].replace("\xa0", ""))
print(name)
print(price_per_month)
print(price)
csv_writer.writerow([name, price_per_month, price])
Užduotys
- Išbandyti šiose skaidrėse aprašytus žingsnius.
- Nuskaityti aktualius duomenis už norimos svetainės.