"""
Linear regression is the most basic type of regression commonly used for
predictive analysis. The idea is pretty simple: we have a dataset and we have
features associated with it. Features should be chosen very cautiously
as they determine how much our model will be able to make future predictions.
We try to set the weight of these features, over many iterations, so that they best
fit our dataset. In this particular code, I had used a CSGO dataset (ADR vs
Rating). We try to best fit a line through dataset and estimate the parameters.
"""
import numpy as np
import requests
def collect_dataset():
"""Collect dataset of CSGO
The dataset contains ADR vs Rating of a Player
:return : dataset obtained from the link, as matrix
"""
response = requests.get(
"https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/"
"master/Week1/ADRvsRating.csv",
timeout=10,
)
lines = response.text.splitlines()
data = []
for item in lines:
item = item.split(",")
data.append(item)
data.pop(0) # This is for removing the labels from the list
dataset = np.matrix(data)
return dataset
def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta):
"""Run steep gradient descent and updates the Feature vector accordingly_
:param data_x : contains the dataset
:param data_y : contains the output associated with each data-entry
:param len_data : length of the data_
:param alpha : Learning rate of the model
:param theta : Feature vector (weight's for our model)
;param return : Updated Feature's, using
curr_features - alpha_ * gradient(w.r.t. feature)
"""
n = len_data
prod = np.dot(theta, data_x.transpose())
prod -= data_y.transpose()
sum_grad = np.dot(prod, data_x)
theta = theta - (alpha / n) * sum_grad
return theta
def sum_of_square_error(data_x, data_y, len_data, theta):
"""Return sum of square error for error calculation
:param data_x : contains our dataset
:param data_y : contains the output (result vector)
:param len_data : len of the dataset
:param theta : contains the feature vector
:return : sum of square error computed from given feature's
"""
prod = np.dot(theta, data_x.transpose())
prod -= data_y.transpose()
sum_elem = np.sum(np.square(prod))
error = sum_elem / (2 * len_data)
return error
def run_linear_regression(data_x, data_y):
"""Implement Linear regression over the dataset
:param data_x : contains our dataset
:param data_y : contains the output (result vector)
:return : feature for line of best fit (Feature vector)
"""
iterations = 100000
alpha = 0.0001550
no_features = data_x.shape[1]
len_data = data_x.shape[0] - 1
theta = np.zeros((1, no_features))
for i in range(iterations):
theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta)
error = sum_of_square_error(data_x, data_y, len_data, theta)
print(f"At Iteration {i + 1} - Error is {error:.5f}")
return theta
def mean_absolute_error(predicted_y, original_y):
"""Return sum of square error for error calculation
:param predicted_y : contains the output of prediction (result vector)
:param original_y : contains values of expected outcome
:return : mean absolute error computed from given feature's
"""
total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y))
return total / len(original_y)
def main():
"""Driver function"""
data = collect_dataset()
len_data = data.shape[0]
data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
data_y = data[:, -1].astype(float)
theta = run_linear_regression(data_x, data_y)
len_result = theta.shape[1]
print("Resultant Feature vector : ")
for i in range(len_result):
print(f"{theta[0, i]:.5f}")
if __name__ == "__main__":
main()
import tensorflow as tf
# creating a node
hello = tf.constant("hello , tensor")
# creating object
sess = tf.Session()
print(sess.run(hello))
b'hello , tensor'
# basic operation
# in normal python we write a = 2
# but in tensorflow
a = tf.constant(2)
b = tf.constant(3)
with tf.Session() as sess:
print("addition of number ", sess.run(a+b))
print("multiplication of number ", sess.run(a*b))
addition of number 5
multiplication of number 6
# but while using function we have to create placeholder which does define the data type
x = tf.placeholder(tf.int32)
y = tf.placeholder(tf.int32)
add = tf.add(x,y)
multi = tf.multiply(x,y)
# Launch the default graph.
with tf.Session() as sess:
print("Addition of number : ", sess.run(add , feed_dict={x:2,y:3}))
print("Multiply of number : ", sess.run(multi , feed_dict={x:2,y:3}))
Addition of number : 5
Multiply of number : 6
# matrix multiplication
#1x2
matrix1 = tf.constant([[3., 3.]])
# Create another Constant that produces a 2x1 matrix.
matrix2 = tf.constant([[2.],[2.]])
# Create a Matmul op that takes 'matrix1' and 'matrix2' as inputs.
# The returned value, 'product', represents the result of the matrix
# multiplication.
product = tf.matmul(matrix1, matrix2)
with tf.Session() as sess:
result = sess.run(product)
print(result)
[[12.]]
# import library
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# just use to make graph looks nicer
%config InlineBackend.figure_format = 'svg'
使用 TensorFlow 的线性回归
# y = f(x)
# we have set of input and set of output based on input dataset
# what we have to find out is f(x) which will be the relation between x and y (i.e input and output)
我们需要学习它们之间的关系,这被称为**假设**。
在线性回归的情况下,假设是一条直线,即**h(x) = wx + b**,其中 w 是称为**权重**的向量,b 是称为**偏差**的标量。权重和偏差称为模型的参数。
我们需要做的就是根据给定的数据集估算**w** 和**b** 的值,以便生成的假设产生**最低成本 J**,该成本由以下**成本函数**定义
其中 m 是给定数据集中数据点的数量。此成本函数也称为均方误差。
# let's start coding
learning_rate = 0.01
epochs = 200
n_sample= 30
# now take a random points
train_x = np.linspace(0,20,n_sample)
# y = mx + c + noise
train_y = 3*train_x + 4*np.random.randn(n_sample)
# let's plot a graph
plt.plot(train_x, train_y)
plt.show()
# Hmmm i don't like lines
# lets make points
plt.plot(train_x, train_y , 'o')
plt.show()
# bit nicer
# lets check difference between lines
# with noise and without noise
plt.plot(train_x, train_y , 'o')
plt.plot(train_x, 3*train_x)
plt.show()
# define variable
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
# only written single value
w = tf.Variable(np.random.randn() , name = 'weight')
b = tf.Variable(np.random.randn() , name = 'bias')
print(b.value())
Tensor("bias_4/read:0", shape=(), dtype=float32)
# pred = x*w + b
prediction = tf.add(tf.multiply(X, w) ,b)
# to minimize the cost
cost = tf.reduce_sum((prediction-Y)**2 / (2*n_sample))
# define optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# initalize all are parameters
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# number of procedure will be decided by epoch
for epoch in range(epochs):
for x, y in zip(train_x , train_y):
sess.run(optimizer , feed_dict={X:x , Y:y})
# let us see output
if (epoch%20) == 0:
c = sess.run(cost, feed_dict={X:train_x ,Y:train_y})
W = sess.run(w)
B = sess.run(b)
print("cost:{} w:{} b:{}".format(c ,W, B))
# we see that cost minimizing
#print(f'epoch:{epoch:04d} c={cost:.4f} W={W:.4f} B={B:.4f}')
weight = sess.run(w)
bias = sess.run(b)
plt.plot(train_x , train_y ,'o')
plt.plot(train_x ,weight * train_x + bias)
plt.show()
cost:54.30986404418945 w:2.182626247406006 b:0.5245328545570374
cost:8.714800834655762 w:2.99739408493042 b:0.586540699005127
cost:8.714640617370605 w:2.9978182315826416 b:0.5799477100372314
cost:8.714499473571777 w:2.9982216358184814 b:0.5736895799636841
cost:8.714378356933594 w:2.9986038208007812 b:0.5677502751350403
cost:8.71426773071289 w:2.998966693878174 b:0.5621128678321838
cost:8.714173316955566 w:2.9993114471435547 b:0.5567613840103149
cost:8.714091300964355 w:2.999638319015503 b:0.5516825914382935
cost:8.714018821716309 w:2.9999492168426514 b:0.546862006187439
cost:8.713956832885742 w:3.000243902206421 b:0.5422860980033875
这将是预测结果