HTML - mwicat/personal GitHub Wiki
Parse all hrefs
pip3 install beautifulsoup4
from bs4 import BeautifulSoup
import urllib.request
site_fname = 'site.html'
f = open(site_fname)
soup = BeautifulSoup(f, "html.parser")
url_re = re.compile("^http://")
urls = []
for link in soup.findAll('a', attrs={'href': url_re}):
urls.append(link.get('href'))