import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
df=pd.read_csv("Pecan.csv")
df.head()
Water per acre | Salinity level | Fertilizer per acre | Pecan Production | |
---|---|---|---|---|
0 | 68.138954 | 45.916165 | 42.896806 | 406.064207 |
1 | 79.907342 | 42.998570 | 49.763432 | 442.476260 |
2 | 75.301620 | 43.715734 | 48.242166 | 433.672569 |
3 | 91.255560 | 35.568128 | 53.779698 | 467.941026 |
4 | 100.998049 | 24.689234 | 50.967590 | 510.038689 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 56 entries, 0 to 55 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Water per acre 56 non-null float64 1 Salinity level 56 non-null float64 2 Fertilizer per acre 56 non-null float64 3 Pecan Production 56 non-null float64 dtypes: float64(4) memory usage: 1.9 KB
plt.scatter(df["Fertilizer per acre"],df["Pecan Production"])
<matplotlib.collections.PathCollection at 0x7f90e0139f70>
from sklearn.linear_model import LinearRegression
X=df.values[:, range(0,3)]
y=df.values[:,3]
model=LinearRegression().fit(X, y)
print(model.coef_)
[ 0.81272313 -3.30151539 0.39332994]
print(model.intercept_)
488.88676981057154
clientData=[[120, 5, 80],
[50, 1, 150],
[100, 80, 100],
[99, 78, 105]]
print(model.predict(clientData))
[601.37236317 585.22090186 345.37084508 353.12780244]