def parse_item(self, response):
loader = ItemLoader(GaokaopaiZhiyeItem(), response)
loader.add_value('url', response.url)
loader.add_value('code', response.url, re=ur'-([^-]+).html')
loader.add_css('name', u'.modTitle>h1::text')
def parse_category():
for e in response.css(u'.catType>a'):
yield {
'url': e.css('::attr(href)').extract_first(),
'code': e.css('::attr(href)').re_first(ur'-([^-]+).html'),
'name': e.css('::text').extract_first(),
}
loader.add_value('category', list(parse_category()))
loader.add_css('detail', u'.zhiyeShow')
item = loader.load_item()
return FormRequest(
url='http://www.gaokaopai.com/ajax-career-getRelateMajor.html',
formdata={'code': item['code'][0]},
meta={'item': item},
dont_filter=True,
callback=self.parse_majors
)