How to create 'clustered dotplots' for categorical data?

164 views Asked by At

I wish to create a graphic, like this one from the software called Fathom.

http://fathom.concord.org/help/HelpFiles/_img331.png

I have a two-way table of categorical frequency data that I wish to create something like a fluctuation plot, but the key difference is that you can see the individual data points. I've tried ggfluctuation(...), levelplots(...) and all manner of packages (like ggplot2), but with no success. I can find nothing on any forums to help either.

I'd be exceptionally grateful if someone could help direct me to, or create some code, that would achieve my objective.

1

There are 1 answers

4
d.b On BEST ANSWER

Here is improved version.

sample_data = structure(list(set = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), class = "factor", .Label = c("09t0101 TJ", 
"09t0102 MW", "09t0201 EH", "09t0202 NH")), grade = structure(c(1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("1", 
"2", "3", "4"), class = "factor"), freq = c(7L, 8L, 2L, 3L, 11L, 
4L, 11L, 3L, 3L, 8L, 3L, 8L, 3L, 9L, 3L, 2L)), .Names = c("set", 
"grade", "freq"), row.names = c(NA, -16L), class = "data.frame")

group = unique(sample_data$set) #Obtain the unique 'set' values for y-axis
max_x = length(unique(sample_data$grade)) #Obtain the maximum number of 'grades' to plot on x-axis
max_y = length(group) #Obtain the maximum number of 'set' to plot on y-axis
pdf(file="plot.pdf",width=8,height=6)
par(mar = c(5, 10, 4, 2)) #c(bottom, left, top, right)
plot(max_x,max_y,xlim=c(0.5,max_x+0.5),ylim=c(0.5,max_y +0.5),pch=NA,xlab="Grades",ylab=NA,xaxt="n",yaxt="n",asp=1) #asp = 1 IMPORTANT
axis(side = 2, at=c(1:length(group)), labels=c(as.vector(group)),las=2)
axis(side = 1, at=c(1:length(unique(sample_data$grade))), labels=c(as.vector(unique(sample_data$grade))))

r = 0.15 #The diameter of circles to be plotted

for (i in 1:length(group)){
a = subset(sample_data,sample_data$set==group[i]) #Subset new data.frame corresponding to first 'set'

for (j in 1:nrow(a)){
matrix_sz = ceiling(sqrt(a$freq[j])) #Determine the size of square matrix that can accomodate all the frequency
matrix_x = matrix(nrow = matrix_sz, ncol = matrix_sz) #Initiate matrix
matrix_y = matrix(nrow = matrix_sz, ncol = matrix_sz) #Initiate matrix
matrix_x[,1] = -1*((matrix_sz/2) - 0.5) #Find out relatve x co-ordinates for first column 
matrix_y[1,] = 1*((matrix_sz/2) - 0.5) #Find out relatve y co-ordinates for first row

# Find out other relative co-ordinates if the size of square matrix is more than 1x1
if (matrix_sz > 1){
for (column in 2:matrix_sz){
matrix_x[,column] = matrix_x[,column - 1] + 1
}
for (row in 2:matrix_sz){
matrix_y[row,] = matrix_y[row-1,] - 1
}
}

#Determine the co-ordinate of the center of the square matrix grid
xx = as.integer(a$grade[j])
yy = i
fq = 1 #To keep track of the corresponding 'freq'

# Plot circles around the center based on relative co-ordinates
for (row in 1:matrix_sz){
for (column in 1:matrix_sz){
if (fq > a$freq[j]){break} #Break if the necessary number of points have been plotted
xx1 = xx + r * matrix_x[row, column]
yy1 = yy + r * matrix_y[row, column]
# points (x = xx1, y = yy1, pch=1)
fq = fq + 1
symbols (x = xx1, y = yy1, circles=c(r/2.25),add =TRUE,inches=FALSE,bg = "gray")
}
}
}
}
dev.off()

enter image description here