# -*- coding: utf-8 -*-

# Define here the models for your spider middleware
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html

import requests, numpy, pytesseract, re
from scrapy import signals
from urllib.parse import urljoin
from urllib.request import urlretrieve
from PIL import Image, ImageDraw, ImageFont
from fontTools.ttLib import TTFont
from scrapy.http.response.html import HtmlResponse



class DzdpDownloaderMiddleware(object):

    def process_response(self, request, response, spider):
        if "svgtextcss" in response.text:
            try:
                response1 = HtmlResponse(url=response.url, body=self.svg_to_text(response), request=request, encoding='utf-8')
                return response1
            except:
                return response
        else:
            return response
        


    def svg_to_text(self, response):
        text = response.text
        headers = { 
                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36" 
                }         
        css_url = urljoin('https://', response.xpath('//link[contains(@href, "svgtextcss")]/@href').get())
        r = requests.get(css_url, headers=headers)
        woff_list = re.findall('url\("([A-Za-z0-9._\/]*woff)\"?', r.text)
        replace_dict = dict()
        for woff in woff_list:
            woff_url = urljoin('https://', woff)
            fontPath = 'font.woff'
            urlretrieve(woff_url, fontPath)
            svg = self.fontConvert(fontPath)
            replace_dict.update(svg)
        for i in replace_dict.items():
            text = text.replace(i[0], i[1])
        return text


    def fontConvert(self, fontPath):     
        font = TTFont(fontPath)
        codeList = font.getGlyphOrder()[2:]
        im = Image.new("RGB", (1800, 1000), (255, 255, 255))
        dr = ImageDraw.Draw(im)
        font = ImageFont.truetype(fontPath, 40)
        count = 15
        arrayList = numpy.array_split(codeList,count)   #将列表切分成15份，以便于在图片上分行显示
        for t in range(count):
            newList = [i.replace("uni", "\\u") for i in arrayList[t]]
            text = "".join(newList)
            text = text.encode('utf-8').decode('unicode_escape')
            dr.text((0, 50 * t), text, font=font, fill="#000000")
        result = pytesseract.image_to_string(im, lang="chi_sim")
        result = result.replace(" ","").replace("\n","") 
        codeList = [i.replace('uni', '&#x')+';' for i in codeList]
        return dict(zip(codeList,list(result)))