图文详解牛顿迭代算法原理及Python实现

  import pandas as pd

  import numpy as np

  import os

  import matplotlib.pyplot as plt

  import matplotlib as mpl

  from Logit import Logit

  '''

  * @breif: 从CSV中加载指定数据

  * @param[in]: file -> 文件名

  * @param[in]: colName -> 要加载的列名

  * @param[in]: mode -> 加载模式, set: 列名与该列数据组成的字典, df: df类型

  * @retval: mode模式下的返回值

  '''

  def loadCsvData(file, colName, mode='df'):

  assert mode in ('set', 'df')

  df = pd.read_csv(file, encoding='utf-8-sig', usecols=colName)

  if mode == 'df':

  return df

  if mode == 'set':

  res = {}

  for col in colName:

  res[col] = df[col].values

  return res

  if __name__ == '__main__':

  # ============================

  # 读取CSV数据

  # ============================

  csvPath = os.path.abspath(os.path.join(__file__, "/data/dataset3.0alpha.csv"))

  dataX = loadCsvData(csvPath, ["含糖率", "密度"], 'df')

  dataY = loadCsvData(csvPath, ["好瓜"], 'df')

  label = np.array([

  1 if i == "是" else 0

  for i in list(map(lambda s: s.strip(), list(dataY['好瓜'])))

  ])

  # ============================

  # 绘制样本点

  # ============================

  line_x = np.array([np.min(dataX['密度']), np.max(dataX['密度'])])

  mpl.rcParams['font.sans-serif'] = [u'SimHei']

  plt.title('对数几率回归模拟

  Logistic Regression Simulation')

  plt.xlabel('density')

  plt.ylabel('sugarRate')

  plt.scatter(dataX['密度'][label==0],

  dataX['含糖率'][label==0],

  marker='^',

  color='k',

  s=100,

  label='坏瓜')

  plt.scatter(dataX['密度'][label==1],

  dataX['含糖率'][label==1],

  marker='^',

  color='r',

  s=100,

  label='好瓜')

  # ============================

  # 实例化对数几率回归模型

  # ============================

  logit = Logit(dataX, label)

  # 采用牛顿迭代法

  logit.logitRegression(logit.newtomMethod)

  line_y = -logit.w[0, 0] / logit.w[1, 0] * line_x - logit.w[2, 0] / logit.w[1, 0]

  plt.plot(line_x, line_y, 'g-', label="牛顿迭代法")

  # 绘图

  plt.legend(loc='upper left')

  plt.show()