So I am trying to estimate the actual number of clusters with the fviz_nbclust
function but it doesn't stop showing me this error:
Error in do_one(nmeth) : NA/NaN/Inf in foreign function call (arg 1)
In addition warning messages:
1: In stats::dist(x) : NAs introduced by coercion
2: In storage.mode(x) <- "double" : NAs introduced by coercion
I have used sum(is.na(stand_numeric_data$variable))
for all the columns of my dataset and it returns 0 for all the variables so I am assuming I don't have NA values. Any tips? I am new to programming so any suggestion would be appreciated.
movies_data <- read.csv("movies_metadata.csv", na.string = "True")
only_numeric <- movies_data %>% select(16, 17, 23, 24, 21) #subset of columns
only_numeric <- subset(only_numeric, grepl('^\\d+$', only_numeric$revenue))
only_numeric <- subset(only_numeric, grepl('^\\d+$', only_numeric$runtime))
only_numeric <- subset(only_numeric, grepl('^\\d+$', only_numeric$vote_average))
only_numeric <- subset(only_numeric, grepl('^\\d+$', only_numeric$vote_count))
library(caret) #standardization
preproc1 <- preProcess(only_numeric[,c(1:4,5)], method=c("center", "scale"))
stand_numeric_data <- predict(preproc1, only_numeric[,c(1:4,5)])
sum(is.na(stand_numeric_data$revenue))
library(factoextra) #estimate the actual number of clusters
fviz_nbclust(stand_numeric_data, kmeans, method = "wss")
Error in do_one(nmeth) : NA/NaN/Inf in foreign function call (arg 1)
In addition warning messages:
1: In stats::dist(x) : NAs introduced by coercion
2: In storage.mode(x) <- "double" : NAs introduced by coercion
dput(head(movies_data, 5))
structure(list(adult = c("False", "False", "False", "False",
"False"), belongs_to_collection = c("{'id': 10194, 'name': 'Toy Story Collection', 'poster_path': '/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg', 'backdrop_path': '/9FBwqcd9IRruEDUrTdcaafOMKUq.jpg'}",
"", "{'id': 119050, 'name': 'Grumpy Old Men Collection', 'poster_path': '/nLvUdqgPgm3F85NMCii9gVFUcet.jpg', 'backdrop_path': '/hypTnLot2z8wpFS7qwsQHW1uV8u.jpg'}",
"", "{'id': 96871, 'name': 'Father of the Bride Collection', 'poster_path': '/nts4iOmNnq7GNicycMJ9pSAn204.jpg', 'backdrop_path': '/7qwE57OVZmMJChBpLEbJEmzUydk.jpg'}"
), budget = c("30000000", "65000000", "0", "16000000", "0"),
genres = c("[{'id': 16, 'name': 'Animation'}, {'id': 35, 'name': 'Comedy'}, {'id': 10751, 'name': 'Family'}]",
"[{'id': 12, 'name': 'Adventure'}, {'id': 14, 'name': 'Fantasy'}, {'id': 10751, 'name': 'Family'}]",
"[{'id': 10749, 'name': 'Romance'}, {'id': 35, 'name': 'Comedy'}]",
"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'name': 'Drama'}, {'id': 10749, 'name': 'Romance'}]",
"[{'id': 35, 'name': 'Comedy'}]"), homepage = c("http://toystory.disney.com/toy-story",
"", "", "", ""), id = c("862", "8844", "15602", "31357",
"11862"), imdb_id = c("tt0114709", "tt0113497", "tt0113228",
"tt0114885", "tt0113041"), original_language = c("en", "en",
"en", "en", "en"), original_title = c("Toy Story", "Jumanji",
"Grumpier Old Men", "Waiting to Exhale", "Father of the Bride Part II"
), overview = c("Led by Woody, Andy's toys live happily in his room until Andy's birthday brings Buzz Lightyear onto the scene. Afraid of losing his place in Andy's heart, Woody plots against Buzz. But when circumstances separate Buzz and Woody from their owner, the duo eventually learns to put aside their differences.",
"When siblings Judy and Peter discover an enchanted board game that opens the door to a magical world, they unwittingly invite Alan -- an adult who's been trapped inside the game for 26 years -- into their living room. Alan's only hope for freedom is to finish the game, which proves risky as all three find themselves running from giant rhinoceroses, evil monkeys and other terrifying creatures.",
"A family wedding reignites the ancient feud between next-door neighbors and fishing buddies John and Max. Meanwhile, a sultry Italian divorcée opens a restaurant at the local bait shop, alarming the locals who worry she'll scare the fish away. But she's less interested in seafood than she is in cooking up a hot time with Max.",
"Cheated on, mistreated and stepped on, the women are holding their breath, waiting for the elusive \"good man\" to break a string of less-than-stellar lovers. Friends and confidants Vannah, Bernie, Glo and Robin talk it all out, determined to find a better way to breathe.",
"Just when George Banks has recovered from his daughter's wedding, he receives the news that she's pregnant ... and that George's wife, Nina, is expecting too. He was planning on selling their home, but that's a plan that -- like George -- will have to change with the arrival of both a grandchild and a kid of his own."
), popularity = c("21.946943", "17.015539", "11.7129", "3.859495",
"8.387519"), poster_path = c("/rhIRbceoE9lR4veEXuwCC2wARtG.jpg",
"/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg", "/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg",
"/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg", "/e64sOI48hQXyru7naBFyssKFxVd.jpg"
), production_companies = c("[{'name': 'Pixar Animation Studios', 'id': 3}]",
"[{'name': 'TriStar Pictures', 'id': 559}, {'name': 'Teitler Film', 'id': 2550}, {'name': 'Interscope Communications', 'id': 10201}]",
"[{'name': 'Warner Bros.', 'id': 6194}, {'name': 'Lancaster Gate', 'id': 19464}]",
"[{'name': 'Twentieth Century Fox Film Corporation', 'id': 306}]",
"[{'name': 'Sandollar Productions', 'id': 5842}, {'name': 'Touchstone Pictures', 'id': 9195}]"
), production_countries = c("[{'iso_3166_1': 'US', 'name': 'United States of America'}]",
"[{'iso_3166_1': 'US', 'name': 'United States of America'}]",
"[{'iso_3166_1': 'US', 'name': 'United States of America'}]",
"[{'iso_3166_1': 'US', 'name': 'United States of America'}]",
"[{'iso_3166_1': 'US', 'name': 'United States of America'}]"
), release_date = c("1995-10-30", "1995-12-15", "1995-12-22",
"1995-12-22", "1995-02-10"), revenue = c(373554033, 262797249,
0, 81452156, 76578911), runtime = c(81, 104, 101, 127, 106
), spoken_languages = c("[{'iso_639_1': 'en', 'name': 'English'}]",
"[{'iso_639_1': 'en', 'name': 'English'}, {'iso_639_1': 'fr', 'name': 'Français'}]",
"[{'iso_639_1': 'en', 'name': 'English'}]", "[{'iso_639_1': 'en', 'name': 'English'}]",
"[{'iso_639_1': 'en', 'name': 'English'}]"), status = c("Released",
"Released", "Released", "Released", "Released"), tagline = c("",
"Roll the dice and unleash the excitement!", "Still Yelling. Still Fighting. Still Ready for Love.",
"Friends are the people who let you be yourself... and never let you forget it.",
"Just When His World Is Back To Normal... He's In For The Surprise Of His Life!"
), title = c("Toy Story", "Jumanji", "Grumpier Old Men",
"Waiting to Exhale", "Father of the Bride Part II"), video = c("False",
"False", "False", "False", "False"), vote_average = c(7.7,
6.9, 6.5, 6.1, 5.7), vote_count = c(5415L, 2413L, 92L, 34L,
173L)), row.names = c(NA, 5L), class = "data.frame")
summary(stand_numeric_data)
revenue runtime vote_average vote_count
Min. :-0.1114 Min. :-2.10206 Min. :-1.5192 Min. :-0.1414
1st Qu.:-0.1114 1st Qu.:-0.20831 1st Qu.:-1.5192 1st Qu.:-0.1381
Median :-0.1114 Median : 0.08303 Median : 0.1963 Median :-0.1381
Mean : 0.0000 Mean : 0.00000 Mean : 0.0000 Mean : 0.0000
3rd Qu.:-0.1114 3rd Qu.: 0.37438 3rd Qu.: 0.8825 3rd Qu.:-0.1248
Max. :28.9583 Max. :20.35581 Max. : 1.9118 Max. :29.3968
title
Length:11406
Class :character
Mode :character
I could able to reproduce your error using
iris
data set as