[승규] 크롤링 - boostcampwm2023/web04-ALGOCEAN GitHub Wiki
크롤링 전문가가 전하는 웹크롤링 차단 우회 5가지 방법 | Hashscraper-Blogㅣ해시스크래퍼 블로그
How do I get links in Cheerio? | ScrapingBee
const axios = require("axios");
const cheerio = require("cheerio");
const getHtml = async () => {
try {
// 1
const html = await axios.get("https://www.acmicpc.net/board/list/question/110",
{ headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' }},
{withCredentials: true}
);
// console.log(html.data);
const nextPageSelector = "#prev_page";
let ulList = [];
// 2
let page = 99;
const $ = cheerio.load(html.data);
// 3
// body > div.wrapper > div.container.content > div.row > div:nth-child(3) > div > table > tbody > tr:nth-child(3) > td:nth-child(1) > a
const bodyList = $("body > div.wrapper > div.container.content > div.row > div:nth-child(3) > div > table > tbody > tr");
bodyList.map((i, element) => {
console.log(i);
ulList[i] = {
rank: i + 1,
// 4
title: $(element).find("td:nth-child(1) > a").text().replace(/\s/g, ""),
artist: $(element).find("td:nth-child(1) > a").attr('href').replace(/\s/g, ""),
};
});
console.log("bodyList : ", ulList);
} catch (error) {
console.error(error);
}
};
getHtml();