Python实现梯度下降

看了Andrew Ng的关于机器学习中梯度下降的学习，用最简单粗暴的解法实现下

注意的地方就是 $\theta_0,\theta_1$ 是同时更新的，所以用一个临时变量接了下

收敛条件的判断：可以让函数迭代指定的次数后退出，也可以认为n次迭代的结果和n-1次的结果非常接近时就代表下降到谷底，退出函数

步数alpha的设置和epsilon的选择，这个例子我尝试步数为0.0025时就会振荡无法收敛，epsilon等于于0.001时有时会产生局部最优解，建议是 $10^{-4}$

明天继续尝试最小二乘法，这个代码写得比较loser，先放这，学完再优化，记录下现在和以后的思考有什么区别

#!/usr/bin/python
# coding=utf-8
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import math   # This will import math module
# 构造训练集
# x 特征值
# y 实际结果
x = np.arange(0, 50, 1)
m = len(x)
y = x/2 + np.random.randn(m) -5

# 终止条件
loop_max = 100000  # 最大迭代次数(防止死循环)
epsilon = 1e-4   # 精确度

alpha = 0.002  # 步长(注意取值过大会导致振荡即不收敛,过小收敛速度变慢)
count = 0  # 循环次数
finish = 0  # 终止标志
theta = np.random.randn(2)# 初始化theta
#theta = [0.5,-0.5]
temp = np.zeros(2)
error = 0

while count < loop_max:
    count+=1
    sum = np.zeros(2)
    for i in range(m):
        sum[0] = sum[0] + (theta[0] + theta[1] * x[i] - y[i])
    temp0 = theta[0] - alpha * sum[0] / m

    for i in range(m):
        sum[1] = sum[1]+ (theta[0] + theta[1] * x[i] - y[i]) * x[i]
    temp1 = theta[1] - alpha * sum[1] / m

    theta[0] = temp0
    theta[1] = temp1

    # 判断是否已收敛
    if abs((sum[1]+ sum[0] - error)) < epsilon:
        finish = 1
        break
    else:
        error = sum[1]+ sum[0]
    print('intercept = %s slope = %s' % (theta[0], theta[1]))


#slope, intercept, r_value, p_value, slope_std_error = stats.linregress(x, y)
#print('intercept = %s slope = %s' % (intercept, slope))
print('loop count = %d\n' % count, theta)
plt.plot(x, y, 'r*')
plt.plot(x, theta[1] * x + theta[0], 'g')
#plt.plot(x, slope * x + intercept, 'b')
plt.show()

偷懒了两天后用normal equation方法实现了，结果和stats.linregress的结果完全一样，注意矩阵需要垂直排列，记录俩函数用来修改矩阵堆叠方式

vstack()函数 
函数原型：vstack(tup) ，参数tup可以是元组，列表，或者numpy数组，返回结果为numpy的数组 
作用：在垂直方向把元素堆叠起来
>>>import numpy as np
>>>a=[[1],[2],[3]]
>>>b=[[1],[2],[3]]
>>>c=[[1],[2],[3]]
>>>d=[[1],[2],[3]]
>>>print(np.vstack((a,b,c,d)))
[[1]
 [2]
 [3]
 [1]
 [2]
 [3]
 [1]
 [2]
 [3]
 [1]
 [2]
 [3]]
 
 stack函数原型为：stack(arrays, axis=0)
 import numpy as np
a=[[1,2,3],
   [4,5,6]]
print("列表a如下：")
print(a)

print("增加一维，新维度的下标为0")
c=np.stack(a,axis=0)
print(c)

print("增加一维，新维度的下标为1")
c=np.stack(a,axis=1)
print(c)

输出：
列表a如下：
[[1, 2, 3], [4, 5, 6]]
增加一维，新维度下标为0
[[1 2 3]
 [4 5 6]]
增加一维，新维度下标为1
[[1 4]
 [2 5]
 [3 6]]

Numpy中stack()，hstack()，vstack()函数详解

#!/usr/bin/python
# coding=utf-8
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# 构造训练集
# x 特征值
# y 实际结果
x1 = np.arange(0, 50, 1) + np.random.randn(50) -5
m = len(x1)
x0 = np.full(m, 1.0)
y = x1/2 + np.random.randn(m) -5
target_data = np.vstack(y)      # 将结果矩阵修改为垂直方向
x = np.stack((x0, x1), axis=1)  # 构建X矩阵

#print(x,y)
theta = np.dot(np.dot(np.linalg.inv(np.dot(x.T, x)), x.T), target_data)

print(theta)
slope, intercept, r_value, p_value, slope_std_error = stats.linregress(x1, y)
print('intercept = %s slope = %s' % (intercept, slope))
"""
得到的结果和stats.linregress函数完全一样，猜测这个函数也是如此实现的
"""
plt.plot(x1, y, '*')
plt.plot(x, slope * x + intercept, 'b')
plt.plot(x, theta[1] * x + theta[0], 'r')
plt.show()

通过学习别人的代码和修改完成了最终版本，要注意步长和终止条件，步长alpha通过多次尝试最后选取适合值

#!/usr/bin/python
# coding=utf-8
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# 构造训练集
# x 特征值
# y 实际结果
x1 = np.arange(0, 50, 1) + np.random.randn(50)
m = len(x1)
x0 = np.full(m, 1.0)
x = np.vstack([x0, x1]).T
y = x1/2 + np.random.randn(m) -5

# 两种终止条件
loop_max = 10000  # 最大迭代次数(防止死循环)
epsilon = 1e-4

# 初始化权值
np.random.seed(0)
theta = np.random.randn(2)

alpha = 0.002  # 步长(注意取值过大会导致振荡即不收敛,过小收敛速度变慢) 大于0.002会不收敛
error = np.zeros(2)
count = 0  # 循环次数

while count < loop_max:
    count += 1
    delta = np.zeros(2)
    for i in range(m):
        delta = delta + (np.dot(theta, x[i]) - y[i]) * x[i]/m
    theta = theta - alpha * delta

    # 判断是否已收敛
    if np.linalg.norm(theta - error) < epsilon: # np.linalg.norm 求范类：平方和，开方
        break
    else:
        error = theta
    print(theta)

print(theta,count)
slope, intercept, r_value, p_value, slope_std_error = stats.linregress(x1, y)
print('intercept = %s slope = %s' % (intercept, slope))
plt.plot(x1, y, 'g*')
plt.plot(x, theta[1] * x + theta[0], 'r')
plt.plot(x, slope * x + intercept, 'b')
plt.show()