import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


dataset = pd.read_csv('Salary_Data.csv')
dataset.head()


x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values


from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)


x_train

array([[ 9.6],
       [ 4. ],
       [ 5.3],
       [ 7.9],
       [ 2.9],
       [ 5.1],
       [ 3.2],
       [ 4.5],
       [ 8.2],
       [ 6.8],
       [ 1.3],
       [10.5],
       [ 3. ],
       [ 2.2],
       [ 5.9],
       [ 6. ],
       [ 3.7],
       [ 3.2],
       [ 9. ],
       [ 2. ],
       [ 1.1],
       [ 7.1],
       [ 4.9],
       [ 4. ]])


x_test

array([[ 1.5],
       [10.3],
       [ 4.1],
       [ 3.9],
       [ 9.5],
       [ 8.7]])


y_train

array([112635.,  55794.,  83088., 101302.,  56642.,  66029.,  64445.,
        61111., 113812.,  91738.,  46205., 121872.,  60150.,  39891.,
        81363.,  93940.,  57189.,  54445., 105582.,  43525.,  39343.,
        98273.,  67938.,  56957.])


y_test

array([ 37731., 122391.,  57081.,  63218., 116969., 109431.])


from sklearn.linear_model import LinearRegression

regressor = LinearRegression()
regressor.fit(x_train, y_train)

LinearRegression()


y_pred = regressor.predict(x_test)
y_pred

array([ 40748.96184072, 122699.62295594,  64961.65717022,  63099.14214487,
       115249.56285456, 107799.50275317])


plt.figure(figsize=(14,8), dpi=120)
plt.scatter(x_train, y_train, color='red')
plt.plot(x_train, regressor.predict(x_train), color="blue")
plt.title("Salary v/s Experience (Training Set)", fontsize=14)
plt.xlabel("Years of Experience", fontsize=14)
plt.grid(linestyle='--', color='grey', alpha=0.7)
plt.ylabel("Salary", fontsize=14)
plt.show()


plt.figure(figsize=(14,8), dpi=120)
plt.scatter(x_test, y_test, color='red')
plt.plot(x_train, regressor.predict(x_train), color="blue")
# We don't have to change the code for the regression line
plt.title("Salary v/s Experience (Test Set)", fontsize=14)
plt.xlabel("Years of Experience", fontsize=14)
plt.grid(linestyle='--', color='grey', alpha=0.7)
plt.ylabel("Salary", fontsize=14)
plt.show()


regressor.predict([[12]])

array([138531.00067138])


print(regressor.coef_)
print(regressor.intercept_)

[9312.57512673]
26780.09915062818

	YearsExperience	Salary
0	1.1	39343.0
1	1.3	46205.0
2	1.5	37731.0
3	2.0	43525.0
4	2.2	39891.0

Simple Linear Regression¶

Importing the libraries¶

Importing the dataset¶

Splitting the dataset into the Training set and Test set¶

Training the Simple Linear Regression model on the Training set¶

Predicting the Test set results¶

Visualising the Training set results¶

Visualising the Test set results¶

Making a Single Prediction¶

Getting the final linear regression equation with the values of the co-efficients¶