Beautiful Soup Atsakymai - robotautas/kursas GitHub Wiki

1

from bs4 import BeautifulSoup
import requests
from random import shuffle

r = requests.get("https://www.delfi.lt/")
titles = []
titles1 = []
titles2 = []

bad_words = ["Karas", "karas", "mirt", "rus", "apšaud"]

soup = BeautifulSoup(r.text, 'html.parser')
blocks = soup.find_all("article")

for block in blocks:
    try:
        title = block.find(class_='C-headline-title').a.get_text().strip()
        titles.append(title)
    except:
        pass

for title in titles:
    if ":" in title:
        if not any(word in title for word in bad_words):
            titles1.append(title.split(":", maxsplit=1)[0])
            titles2.append(title.split(":", maxsplit=1)[1])

shuffle(titles2)

for index in range(len(titles1)):
    print(f"{titles1[index]}:{titles2[index]}")

Alternatyva:

import requests
from bs4 import BeautifulSoup
from random import shuffle

html = requests.get('http://delfi.lt').text
soup = BeautifulSoup(html, "html.parser")

title_tags = soup.select('.CBarticleTitle')
titles = [i.get_text() for i in title_tags]
bad_words = ['COVID', 'mirt', 'NVSC', 'skiep']

first_parts = []
second_parts = []
for title in titles:
     if ':' in title:
         if not any(word in title for word in bad_words):
             splitted = title.split(":")
             first_parts.append(splitted[0])
             second_parts.append(splitted[1])

shuffle(second_parts)

for i in range(len(first_parts)):
    print(first_parts[i], ":", second_parts[i])

2

Scraping dalis:


import requests
from bs4 import BeautifulSoup
import pickle

quotes = []

url = "https://quotes.toscrape.com/"

page = 1

while True:
    r = requests.get(f"{url}page/{page}/")
    page += 1
    soup = BeautifulSoup(r.text, 'html.parser')


    blocks = soup.find_all(class_="quote")

    if len(blocks) < 1:
        break

    for block in blocks:
        quote = block.find(class_="text").get_text().strip()
        author = block.find(class_="author").get_text().strip()
        author_href = block.find("a")['href']

        author_r = requests.get(url + author_href)
        author_soup = BeautifulSoup(author_r.text, 'html.parser')
        author_born = author_soup.find(class_="author-details").p.get_text().strip()
        quote_dict = {
            "quote": quote,
            "author": author,
            "author_born": author_born,
        }
        quotes.append(quote_dict)
        print(quote)
        print(author)
        print(author_born)
        print("--------------------------------------------------------")

print(quotes)
with open("quotes.pkl", 'wb') as file:
    pickle.dump(quotes, file)

Žaidimo dalis:

import pickle
from random import choice

while True:
    with open("quotes.pkl", 'rb') as file:
        quote = choice(pickle.load(file))

    print(quote['quote'])
    answer1 = input("Quess the author: ")
    if answer1 == quote["author"]:
        print(f"Correct! Answer is {quote['author']}")
    else:
        print(" ".join(word[0] + "." for word in quote['author'].split()))
        answer2 = input("Quess the author: ")
        if answer2 == quote["author"]:
            print(f"Correct! Answer is {quote['author']}")
        else:
            print(quote['author_born'])
            answer3 = input("Quess the author: ")
            if answer3 == quote["author"]:
                print(f"Correct! Answer is {quote['author']}")
            else:
                print(f"Wrong! Answer is {quote['author']}")

    if_continue = input("Continue? y/n: ")
    if if_continue == "n":
        break

Alternatyva:

import requests
from bs4 import BeautifulSoup
from time import sleep
from random import randint


url = 'http://quotes.toscrape.com'
r = requests.get(url)

soup = BeautifulSoup(r.text, "html.parser")

# citatos
quotes_spans = soup.select('.text')
quotes_list = [i.get_text() for i in quotes_spans]

# nuorodos
a_blocks = soup.find_all('a', attrs={'class': None})
hrefs = [i['href'] for i in a_blocks if i.get_text()=="(about)"]

# atsakymai
author_blocks = soup.find_all('small', class_='author')
answers = [i.get_text() for i in author_blocks]

# uzuominos1
hints1 = []
for i in answers:
    splitted = i.split()
    hint = ''
    for i in splitted:
        if '.' not in i:
            hint += f'{i[0]}.'
        else:
            hint += i
    hints1.append(hint)

# uzuominos2
def get_second_hint(i):
    r = requests.get(url + hrefs[i])
    soup = BeautifulSoup(r.text, "html.parser")
    text = soup.select('p')[1].get_text()
    return text

# žaidimo ciklas
while True:
    i = randint(0, 9)
    print('\n',quotes_list[i])
    answer1 = input('Your answer: ')
    if answer1 == answers[i]:
        print(f"Correct! Answer is {answers[i]}")
    else:
        print(hints1[i])
        answer2 = input('Your answer: ')
        if answer2 == answers[i]:
            print(f"Correct! Answer is {answers[i]}")
        else:
            print(get_second_hint(i))
            answer3 = input('Your answer: ')
            if answer3 == answers[i]:
                print(f"Correct! Answer is {answers[i]}")
            else:
                print(f"Wrong! Correct answer is {answers[i]}")
    if_continue = input('Continue? y/n: ')
    if if_continue != 'y':
        break