Scrapy模拟登录赶集网的实现代码-张二牛笔记本

　　import scrapy

　　import re

　　class GanjiSpider(scrapy.Spider):

　　name = 'ganji'

　　allowed_domains = ['ganji.com']

　　start_urls = ['https://passport.ganji.com/login.php']

　　def parse(self, response):

　　hash_code = re.search(r'"__hash__":"(.+)"}', response.text).group(1) # 正则获取哈希

　　img_url = 'https://passport.ganji.com/ajax.php?dir=captcha&module=login_captcha' # 验证码url

　　yield scrapy.Request(img_url, callback=self.do_formdata, meta={'hash_code': hash_code}) # 发送获取验证码请求并保存验证码到本地

　　def do_formdata(self, response):

　　with open('yzm.jpg', 'wb') as f:

　　f.write(response.body)

　　# 验证码三种方案：1，保存下来手动输入，2，云打码，3 tesseract模块，在这里我们手动输入

　　code = input('请输入验证码：')

　　# 创建表单

　　formdata = {

　　'username': 'your_username',

　　'password': 'your_password',

　　'setcookie': '14',

　　'checkCode': code,

　　'next': '',

　　'source': 'passport',

　　'__hash__': response.request.meta['hash_code'] # meta是在respose.request中

　　}

　　login_url = "https://passport.ganji.com/login.php"

　　yield scrapy.FormRequest(url=login_url, formdata=formdata, callback=self.after_login) # 发送登录请求

　　def after_login(self, response):

　　print(response.text)

您可能感兴趣的文章:

Scrapy模拟登录赶集网的实现代码