In [1]:
import pandas as pd
In [2]:
df=pd.read_csv('Pecan.csv', delimiter='\t')
In [3]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56 entries, 0 to 55
Data columns (total 5 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Row ID               56 non-null     int64  
 1   Water per acre       56 non-null     float64
 2   Salinity level       56 non-null     float64
 3   Fertilizer per acre  56 non-null     float64
 4   Pecan Yield          56 non-null     float64
dtypes: float64(4), int64(1)
memory usage: 2.3 KB
In [4]:
df.drop(columns=["Row ID"], inplace=True)
In [5]:
from sklearn.cluster import KMeans
In [6]:
km = KMeans(n_clusters=3).fit(df)
In [7]:
km.labels_
Out[7]:
array([0, 1, 0, 0, 2, 1, 2, 2, 1, 2, 2, 0, 2, 1, 1, 1, 1, 0, 0, 1, 1, 0,
       0, 0, 2, 0, 1, 2, 0, 1, 1, 2, 1, 2, 1, 2, 0, 2, 0, 1, 0, 2, 2, 0,
       2, 0, 1, 0, 1, 0, 1, 1, 0, 1, 2, 1], dtype=int32)
In [8]:
len(km.labels_)
Out[8]:
56
In [9]:
from sklearn.metrics import silhouette_score
In [10]:
silhouette_score(df, km.labels_)
Out[10]:
0.643891602317695
In [11]:
df['Cluster IDs'] = km.labels_
In [12]:
df.to_csv('PecanResults.csv', index=False)
In [ ]: