python 静态函数 - Shuang0420/Shuang0420.github.io GitHub Wiki
没有系统学过python的类,所以遭殃了。。。 python定义静态函数要加 @staticmethod 另外,类函数调用静态函数/类函数的方式是 self.method() 上段代码
class ZhidaoSpider(BaseSpider):
@staticmethod
def clean_data(page):
removeImg = re.compile('<img.*?>')
replaceLine = re.compile('<tr>|<div>|</div>|<p>|</p>|\r|\n')
replaceBR = re.compile('<br>|<br >|<br />')
removeExtraTag = re.compile('<em>|</em>|<strong>|</strong>')
page = re.sub(removeImg,"",page)
page = re.sub(replaceLine,"",page)
page = re.sub(replaceBR,"",page)
page = re.sub(removeExtraTag,"",page)
return page
def first_parse(self, response):
response = response.body.decode('gbk','ignore')
response = self.clean_data(response)
html = Selector(text=response)
page = html.xpath('//div[@class="list"]/dl/dt/a')
for i in page:
item = dict()
item['title'] = i.xpath('text()').extract_first()
item['url'] = i.xpath('@href').extract_first()
#print item['title'], item['url']
yield Request(url=item['url'], meta={'item_1': item}, callback=self.second_parse)