[승규] 크롤링 - boostcampwm2023/web04-ALGOCEAN GitHub Wiki

크롤링 전문가가 전하는 웹크롤링 차단 우회 5가지 방법 | Hashscraper-Blogㅣ해시스크래퍼 블로그

How do I get links in Cheerio? | ScrapingBee

const axios = require("axios");
const cheerio = require("cheerio");

const getHtml = async () => {
    try {
        // 1
        const html = await axios.get("https://www.acmicpc.net/board/list/question/110",
            { headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' }},
            {withCredentials: true}
        );
        // console.log(html.data);
        const nextPageSelector = "#prev_page";
        let ulList = [];
        // 2
        let page = 99;
        const $ = cheerio.load(html.data);
        // 3
        // body > div.wrapper > div.container.content > div.row > div:nth-child(3) > div > table > tbody > tr:nth-child(3) > td:nth-child(1) > a
        const bodyList = $("body > div.wrapper > div.container.content > div.row > div:nth-child(3) > div > table > tbody > tr");
        bodyList.map((i, element) => {
            console.log(i);
            ulList[i] = {
                rank: i + 1,
                // 4
                title: $(element).find("td:nth-child(1) > a").text().replace(/\s/g, ""),
                artist: $(element).find("td:nth-child(1) > a").attr('href').replace(/\s/g, ""),
            };
        });
        console.log("bodyList : ", ulList);
    } catch (error) {
        console.error(error);
    }
};

getHtml();