import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline


df=pd.read_csv("Pecan.csv")


df.head()


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56 entries, 0 to 55
Data columns (total 4 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Water per acre       56 non-null     float64
 1   Salinity level       56 non-null     float64
 2   Fertilizer per acre  56 non-null     float64
 3   Pecan Production     56 non-null     float64
dtypes: float64(4)
memory usage: 1.9 KB


plt.scatter(df["Fertilizer per acre"],df["Pecan Production"])

<matplotlib.collections.PathCollection at 0x7f90e0139f70>


from sklearn.linear_model import LinearRegression


X=df.values[:, range(0,3)]


y=df.values[:,3]


model=LinearRegression().fit(X, y)


print(model.coef_)

[ 0.81272313 -3.30151539  0.39332994]


print(model.intercept_)

488.88676981057154


clientData=[[120, 5, 80],
          [50, 1, 150],
          [100, 80, 100],
           [99, 78, 105]]


print(model.predict(clientData))

[601.37236317 585.22090186 345.37084508 353.12780244]

	Water per acre	Salinity level	Fertilizer per acre	Pecan Production
0	68.138954	45.916165	42.896806	406.064207
1	79.907342	42.998570	49.763432	442.476260
2	75.301620	43.715734	48.242166	433.672569
3	91.255560	35.568128	53.779698	467.941026
4	100.998049	24.689234	50.967590	510.038689