Python爬虫常用小技巧之设置代理IP

  from bs4 import BeautifulSoup

  import requests

  import random

  def get_ip_list(url, headers):

  web_data = requests.get(url, headers=headers)

  soup = BeautifulSoup(web_data.text, 'lxml')

  ips = soup.find_all('tr')

  ip_list = []

  for i in range(1, len(ips)):

  ip_info = ips[i]

  tds = ip_info.find_all('td')

  ip_list.append(tds[1].text + ':' + tds[2].text)

  return ip_list

  def get_random_ip(ip_list):

  proxy_list = []

  for ip in ip_list:

  proxy_list.append('http://' + ip)

  proxy_ip = random.choice(proxy_list)

  proxies = {'http': proxy_ip}

  return proxies

  if __name__ == '__main__':

  url = 'http://www.xicidaili.com/nn/'

  headers = {

  'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.60 Safari/537.17'

  }

  ip_list = get_ip_list(url, headers=headers)

  proxies = get_random_ip(ip_list)

  print(proxies)