In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
df=pd.read_csv("Pecan.csv")
In [27]:
df.head()
Out[27]:
Water per acre Salinity level Fertilizer per acre Pecan Production
0 68.138954 45.916165 42.896806 406.064207
1 79.907342 42.998570 49.763432 442.476260
2 75.301620 43.715734 48.242166 433.672569
3 91.255560 35.568128 53.779698 467.941026
4 100.998049 24.689234 50.967590 510.038689
In [6]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56 entries, 0 to 55
Data columns (total 4 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Water per acre       56 non-null     float64
 1   Salinity level       56 non-null     float64
 2   Fertilizer per acre  56 non-null     float64
 3   Pecan Production     56 non-null     float64
dtypes: float64(4)
memory usage: 1.9 KB
In [9]:
plt.scatter(df["Fertilizer per acre"],df["Pecan Production"])
Out[9]:
<matplotlib.collections.PathCollection at 0x7f90e0139f70>
In [10]:
from sklearn.linear_model import LinearRegression
In [11]:
X=df.values[:, range(0,3)]
In [13]:
y=df.values[:,3]
In [15]:
model=LinearRegression().fit(X, y)
In [19]:
print(model.coef_)
[ 0.81272313 -3.30151539  0.39332994]
In [20]:
print(model.intercept_)
488.88676981057154
In [23]:
clientData=[[120, 5, 80],
          [50, 1, 150],
          [100, 80, 100],
           [99, 78, 105]]
In [25]:
print(model.predict(clientData))
[601.37236317 585.22090186 345.37084508 353.12780244]
In [ ]: