Create Lollipop-like plot with R

1.6k views Asked by At

I have a .csv file that looks like that:

Pos,ReadsME_016,ReadsME_017,ReadsME_018,ReadsME_019,ReadsME_020,ReadsME_021
95952794,62.36,62.06,55.56,51,60.35,44.27
95952795,100,100,100,100,100,100
95952833,0,0,-,0,-,-
95952846,0,0,-,0,0,-
95952876,0,-,0,0,0,0
95952877,38.89,28.98,25.67,36.99,37.91,16.86
95952878,100,100,100,100,100,100
95952884,0,-,0,-,-,0
95952897,18.7,20.52,20.94,16.43,22.68,12.55
95952898,100,100,75,80,-,100
95952902,10.88,8.93,10.22,10.63,13.51,6.06
95952903,100,100,100,75,-,100
95952915,10.75,8.7,7.91,8.35,15.12,8.88

What I want is to create a plot that is similar to this one:

http://www.scfbm.org/content/9/1/11/figure/F2

However, all my attempts failed. Unfortunately, the tool is yet not available and I cannot read the source code. I've thought of ggplot and melt, but I do not come close to this graph. How can I achieve that all read samples (ReadsME_016,ReadsME_017,..) are listed on the x-axes and the positions are listed on the y-axes? I don’t know how to deal with both x- & y-axes being categorical while the plotted values should show percentages?

dataset <- melt(dataset, id.vars="Pos")
ggplot(dataset, aes(x=value, y=Pos, colour=variable)) + geom_point() 

Here is the complete .csv file:

Pos,ReadsME_016,ReadsME_017,ReadsME_018,ReadsME_019,ReadsME_020,ReadsME_021,ReadsME_022,ReadsME_023,ReadsME_024,ReadsME_025,ReadsME_026,ReadsME_027,ReadsME_028,ReadsME_030,ReadsME_031,ReadsME_032
95952794,62.36,62.06,55.56,51.0,60.35,44.27,53.73,61.69,57.04,64.16,61.48,59.42,66.93,49.71,55.23,66.67
95952795,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,-,100.0,100.0,100.0,100.0,-
95952833,0.0,0.0,-,0.0,-,-,100.0,-,-,-,-,0.0,-,-,0.0,-
95952846,0.0,0.0,-,0.0,0.0,-,0.0,0.0,-,-,-,0.0,-,-,-,-
95952876,0.0,-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-
95952877,38.89,28.98,25.67,36.99,37.91,16.86,29.65,35.38,35.43,36.87,34.04,33.91,35.04,19.09,38.35,0.0
95952878,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,-,100.0,100.0,100.0,100.0,-
95952884,0.0,-,0.0,-,-,0.0,-,-,100.0,-,-,0.0,-,-,-,-
95952897,18.7,20.52,20.94,16.43,22.68,12.55,18.3,22.28,21.05,22.55,24.81,20.63,22.05,13.06,22.8,0.0
95952898,100.0,100.0,75.0,80.0,-,100.0,80.0,100.0,100.0,-,-,-,100.0,-,100.0,-
95952902,10.88,8.93,10.22,10.63,13.51,6.06,9.62,15.73,14.08,18.65,13.28,16.44,15.02,8.92,11.11,100.0
95952903,100.0,100.0,100.0,75.0,-,100.0,100.0,100.0,100.0,-,-,100.0,100.0,100.0,100.0,-
95952915,10.75,8.7,7.91,8.35,15.12,8.88,7.32,9.76,11.45,8.99,10.57,14.07,10.36,6.35,10.04,0.0
95952916,100.0,100.0,100.0,100.0,-,100.0,100.0,100.0,100.0,-,-,100.0,100.0,-,100.0,-
95952925,10.39,8.33,8.59,10.51,14.19,10.99,6.98,11.56,13.93,15.0,14.29,16.26,9.76,5.86,12.96,0.0
95952926,100.0,100.0,100.0,100.0,-,100.0,100.0,100.0,100.0,-,-,-,100.0,-,100.0,-
95952937,19.53,14.97,11.97,14.43,19.26,17.18,19.48,12.31,21.17,21.57,23.08,26.24,16.38,13.47,21.82,0.0
95952938,100.0,100.0,100.0,100.0,-,100.0,100.0,-,-,-,-,-,-,-,100.0,-
95952825,-,0.0,-,-,-,-,-,-,-,-,0.0,-,-,0.0,0.0,-
95952975,-,0.0,-,-,-,-,-,-,0.0,-,-,-,-,-,-,-
95952669,-,-,0.0,-,-,0.0,0.0,-,-,-,-,-,-,-,0.0,-
95952718,-,-,0.0,0.0,0.0,-,0.0,-,-,-,0.0,-,-,0.0,0.0,-
95952868,-,-,0.0,-,0.0,-,-,0.0,-,-,0.0,-,-,-,-,-
95952957,-,-,0.0,-,-,-,-,0.0,0.0,0.0,-,0.0,-,-,-,-
95952976,-,-,0.0,-,0.0,0.0,0.0,100.0,-,0.0,-,-,-,-,0.0,-
95952681,-,-,-,0.0,-,0.0,-,0.0,-,-,-,-,-,0.0,-,-
95952779,-,-,-,0.0,-,-,-,-,-,-,-,-,-,-,-,-
95952811,-,-,-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-,-,-,0.0,-
95952821,-,-,-,0.0,-,-,-,-,-,-,-,-,-,-,-,-
95952823,-,-,-,0.0,-,-,-,-,-,-,-,-,-,-,-,-
95952859,-,-,-,0.0,0.0,-,-,0.0,0.0,-,0.0,-,-,0.0,0.0,-
95952882,-,-,-,0.0,-,-,-,-,-,-,0.0,-,-,-,-,-
95953023,-,-,-,0.0,-,0.0,-,-,-,-,-,-,-,-,-,-
95953058,-,-,-,0.0,-,0.0,-,-,-,-,-,-,-,-,-,-
95952664,-,-,-,-,-,0.0,0.0,-,-,0.0,-,-,-,-,0.0,-
95952801,-,-,-,-,-,0.0,-,-,-,-,-,-,-,-,-,-
95952968,-,-,-,-,-,-,0.0,-,-,0.0,-,-,-,-,-,-
95952797,-,-,-,-,-,-,-,-,0.0,-,-,-,-,-,-,-
95952851,-,-,-,-,-,-,-,-,-,-,0.0,-,-,-,-,-
95952894,-,-,-,-,-,-,-,-,-,-,0.0,-,-,-,-,-
95952807,-,-,-,-,-,-,-,-,-,-,-,-,-,0.0,-,-
95952712,-,-,-,-,-,-,-,-,-,-,-,-,-,-,0.0,-
1

There are 1 answers

3
MrFlick On BEST ANSWER

First, you want to make sure you are reading in your data properly. You have non-numeric values (specifically "-") mixed in with numeric values. I'm assuming those are missing values. Make sure you let R know that with na.strings="-". Then, to get something more consistent with the example plot, i changed your variables around

library(reshape2) # for melt()
library(ggplot2) # for ggplot()

dataset <- read.table("file.txt", header=TRUE, sep=",", na.strings="-")
ggplot(melt(dataset, id.vars="Pos"), 
    aes(x=Pos, y=variable, colour=cut(value, breaks=5))) + 
    geom_point() 

enter image description here