I made an elbow method to validate the number of clusters i need to use as input on my K-Means algorithm, but i need a new confirmation, so i made the silhouette score, but the result is wrong, where is the error?
#READ EXCEL DB
fCenso<- read_excel('fCenso.xlsx')
#Z-SCORE
fCenso$'Z-Score Qtd Estabelecimentos' <- (fCenso$`Qtd estabelecimentos (Un)` - mean(fCenso$`Qtd estabelecimentos (Un)`))/sd(fCenso$`Qtd estabelecimentos (Un)`)
fCenso$'Z-Score Área colhida' <- (fCenso$`Área colhida (Há)` - mean(fCenso$`Área colhida (Há)`))/sd(fCenso$`Área colhida (Há)`)
#EXCLUDING DESCRIPTIVE VARIABLES (city, production culture, year)
fCensoPadronizado <- fCenso[, -c(1,2,3,4,5)]
#ELBOW METHOD (identify the number of clusters)
dev.off()
fviz_nbclust(fCensoPadronizado, kmeans, method = "wss", k.max = 10)
#ELABORATING THE K-MEANS ALGORITHM WITH 3 CLUSTERS AND APPLYING THE COLUMN IN THE ORIGINAL BASE
cluster_kmeans3 <- kmeans(fCensoPadronizado,
centers = 3)
fCenso$cluster_K3 <- factor(cluster_kmeans3$cluster)
#ELABORATING THE K-MEANS ALGORITHM WITH 4 CLUSTERS AND APPLYING THE COLUMN IN THE ORIGINAL BASE
cluster_kmeans4 <- kmeans(fCensoPadronizado,
centers = 4)
fCenso$cluster_K4 <- factor(cluster_kmeans4$cluster)
#ELABORATING THE K-MEANS ALGORITHM WITH 5 CLUSTERS AND APPLYING THE COLUMN IN THE ORIGINAL BASE
cluster_kmeans5 <- kmeans(fCensoPadronizado,
centers = 5)
fCenso$cluster_K5 <- factor(cluster_kmeans5$cluster)
#ELABORATING THE K-MEANS ALGORITHM WITH 6 CLUSTERS AND APPLYING THE COLUMN IN THE ORIGINAL BASE
cluster_kmeans6 <- kmeans(fCensoPadronizado,
centers = 6)
fCenso$cluster_K6 <- factor(cluster_kmeans6$cluster)
#RUNNING K-MEANS FOR DIFFERENT VALUES OF K (K = 3, 4, 5, 6)
k_means_result <- list()
for (k in 3:6) {
k_means_result[[as.character(k)]] <- kmeans(fCensoPadronizado, centers = k)
}
#CALCULATING THE SILHOUETTE SCORE FOR EACH VALUE OF K
silhouette_scores <- numeric(length(k_means_result))
for (i in seq_along(k_means_result)) {
silhouette_scores[i] <- mean(silhouette(k_means_result[[i]]$cluster, dist(fCensoPadronizado)))
}
#PRINTING THE SILHOUETTES SCORE'S RESULTS FOR EACH VALUE OF K
print(silhouette_scores)
The result:
> print(silhouette_scores)
[1] 1.612449 2.236173 3.186013 2.568394
Validated z-score calculations and library functionality.