# -*- coding: utf-8 -*-

# Define here the models for your spider middleware
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html

from scrapy import signals
from scrapy import FormRequest, Request
import random, requests


class FangDownloaderMiddleware(object):
    headers = {
                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Safari/605.1.15",
            }

    #处理response
    def process_response(self, request, response, spider):
        try:
            #如果返回的url中包含“captcha”，则运行此程序
            if ('captcha' in response.url):
                #记录原始的回调函数
                callback = request.callback
                #保留request的meta中的部分信息
                province, city = request.meta.get('info')
                #用requests发起一次请求，获取cookies
                r = requests.get(response.url, headers=self.headers)
                self.cookies = requests.utils.dict_from_cookiejar(r.cookies)
                #用requests下载验证码图片
                captcha_img_url = response.url.split('?t')[0] + 'captcha-image'
                captcha_data = self.get_captcha_data(captcha_img_url)
                #用scrapy的FormRequest构建request对象
                request = FormRequest(response.url, formdata=captcha_data, meta={'info': (province, city)}, callback=callback)
                #将cookies添加到request对象
                request.cookies = self.cookies
                #返回request
                return request
            #如果返回的url中不包含“captcha”，则直接返回response
            else:
                return response
        except:
            return response


    #用requests下载验证码图片
    def get_captcha_data(self, url):
        r = requests.get(url, headers=self.headers, cookies=self.cookies)
        #保存验证码图片
        with open('captcha.png', 'wb') as file:
            file.write(r.content)
        #输入验证码
        captcha_code = input('请输入验证码：')
        #构建formdata，其中token不知道是个什么值，我就用随机数代替了
        captcha_data = {
            'code': captcha_code,
            'submit': '提交',
            'token': str(random.randint(30,80)),
        }
        #返回formdata
        return captcha_data

