# -*- coding: utf-8 -*-
import scrapy, re
from dzdp.items import DzdpItem

class DianpingSpider(scrapy.Spider):
    name = 'dianping'
    allowed_domains = ['dianping.com']

    def start_requests(self):        
        for i in range(1, 3):
            url = 'http://www.dianping.com/search/around/13/10_k1RyUY8bmP3rMzFV/d500p{}'.format(i)
            yield scrapy.Request(url, callback=self.parse)

    def parse(self, response):
        shop_list = response.xpath('//div[@id="shop-all-list"][1]//div[@class="txt"]')
        i = response.url.split('p')[-1]
        for j in range(len(shop_list)):
            title = shop_list[j].xpath('.//div[@class="tit"]/a/@title').get()
            score = shop_list[j].xpath('.//div[contains(@class,"score")]/text()').get()
            comments = re.sub('\s', '', ''.join(shop_list[j].xpath('.//a[contains(@module,"review")]//text()').getall()))
            price = re.sub('\s','' ,''.join(shop_list[j].xpath('.//a[contains(@class,"price")]//text()').getall()))
            item = DzdpItem(_id='{}_{}'.format(i, j+1), title=title, score=score, comments=comments, price=price)
            yield item