import pandas as pd
df=pd.read_csv('Pecan.csv', delimiter='\t')
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 56 entries, 0 to 55 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Row ID 56 non-null int64 1 Water per acre 56 non-null float64 2 Salinity level 56 non-null float64 3 Fertilizer per acre 56 non-null float64 4 Pecan Yield 56 non-null float64 dtypes: float64(4), int64(1) memory usage: 2.3 KB
df.drop(columns=["Row ID"], inplace=True)
from sklearn.cluster import KMeans
km = KMeans(n_clusters=3).fit(df)
km.labels_
array([0, 1, 0, 0, 2, 1, 2, 2, 1, 2, 2, 0, 2, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 2, 0, 1, 2, 0, 1, 1, 2, 1, 2, 1, 2, 0, 2, 0, 1, 0, 2, 2, 0, 2, 0, 1, 0, 1, 0, 1, 1, 0, 1, 2, 1], dtype=int32)
len(km.labels_)
56
from sklearn.metrics import silhouette_score
silhouette_score(df, km.labels_)
0.643891602317695
df['Cluster IDs'] = km.labels_
df.to_csv('PecanResults.csv', index=False)