python 静态函数 - Shuang0420/Shuang0420.github.io GitHub Wiki

没有系统学过python的类,所以遭殃了。。。 python定义静态函数要加 @staticmethod 另外,类函数调用静态函数/类函数的方式是 self.method() 上段代码

class ZhidaoSpider(BaseSpider):
    @staticmethod
    def clean_data(page):
        removeImg = re.compile('<img.*?>')
        replaceLine = re.compile('<tr>|<div>|</div>|<p>|</p>|\r|\n')
        replaceBR = re.compile('<br>|<br >|<br />')
        removeExtraTag = re.compile('<em>|</em>|<strong>|</strong>')
        page = re.sub(removeImg,"",page)
        page = re.sub(replaceLine,"",page)
        page = re.sub(replaceBR,"",page)
        page = re.sub(removeExtraTag,"",page)
        return page

    def first_parse(self, response):
        response = response.body.decode('gbk','ignore')
        response = self.clean_data(response)
        html = Selector(text=response)
        page = html.xpath('//div[@class="list"]/dl/dt/a')
        for i in page:
            item = dict()
            item['title'] = i.xpath('text()').extract_first()
            item['url'] = i.xpath('@href').extract_first()
            #print item['title'], item['url']
            yield Request(url=item['url'], meta={'item_1': item}, callback=self.second_parse)
⚠️ **GitHub.com Fallback** ⚠️