拿绿色下载站的最近更新为例,spider核心代码
# -*- coding: utf-8 -*-from scrapy.spider import Spiderfrom scrapy.http import Requestimport reclass MySpider(Spider): name = "downg" allowed_domains = ["downg.com"] start_urls = [ 'http://www.downg.com/new/0_%s.html' %x for x in xrange(1,7) ] def parse(self, response): urls_list=re.findall(r'class=app-name>