plot a single sample

Question

PLot betavalues many cpgs for conditions

0

Entering edit mode

6.0 years ago

Pin.Bioinf ▴ 340

Hello, I have a table of betavalues for the cpgs of one gene for the different conditions(columns) and I want to plot those values as a line with uncertainty range. (x would be the 5 conditions, and y the betavalues for each condition) The thing is, I don't know how to handle this structure to plot it, neither how to plot it.

 > table_BS
      X              X01PM0471C_BS              X01PM0471T_BS              X06PM1188C_BS              X06PM1188T_BS              X07PM1349C_BS
    1300                0.88647                 0.8786                0.89372                0.90263                0.90162
    1301                0.78965                0.87323                0.80041                0.83332                 0.7667
    1302                0.87391                0.87248                0.85595                0.83394                0.91689
    1303                0.91277                0.88628                0.94335                0.91428                0.93645
    1304                0.91465                  0.907                0.91991                0.89802                0.91308
    1305                0.69227                0.72461                0.81498                0.70621                0.79752
    1306                0.88589                0.89261                0.89747                0.88588                0.89203
    1307                0.77612                0.79795                0.80669                0.81899                0.83879
    1308                 0.7598                0.78125                0.78424                 0.7711                0.68918
    1309                0.86144                0.83854                0.85294                0.83879                0.88717

Can you help me? I'd like something like this: https://goo.gl/images/y8TYJU

betavalues plots • 1.3k views

ADD COMMENT • link updated 5.9 years ago by Kevin Blighe 87k • written 6.0 years ago by Pin.Bioinf ▴ 340

0

Entering edit mode

facet them with geom_smooth().

with OP data: without facet:

test=read.csv("test.txt", sep="\t", header = T, strip.white = T, stringsAsFactors = F)
library(tidyr)

gtest=gather(test,"condition","expn", -X)
library(ggplot2)

ggplot(gtest, aes(X,expn, color=condition, group=condition))+
    geom_smooth(linetype=0, fill="grey80", alpha=0.2)+
    geom_point()+
    geom_line()+
    theme_bw()+
    scale_colour_manual(values=c("green", "blue", "red", "orange","steelblue"))+
    theme(legend.position=c(0.8, 0.85))

with facet: test=read.csv("test.txt", sep="\t", header = T, strip.white = T, stringsAsFactors = F) library(tidyr) gtest=gather(test,"condition","expn", -X) library(ggplot2) names(gtest)

ggplot(gtest, aes(X,expn, color=condition, group=condition))+
    geom_point()+
    geom_line()+
    geom_smooth()+
    facet_grid(~condition)+
    theme_bw()

ADD REPLY • link 5.9 years ago by cpad0112 21k

score 0 · Answer 1 · 2018-06-03

You just need to plot the confidence intervals but, to do that, you need to model the data. You can just do this via a LOESS fit. It also depends on whether the example provided, i.e., fitting sine / cosine functions, is what you want. You may have just picked that figure because it looks nice.

If you do want to fit a sine function, look here: Fit a sinusoidal term to data.

To fit a simple LOESS, look at my code:

plot a single sample

df
      X X01PM0471C_BS X01PM0471T_BS X06PM1188C_BS X06PM1188T_BS X07PM1349C_BS
1  1300       0.88647       0.87860       0.89372       0.90263       0.90162
2  1301       0.78965       0.87323       0.80041       0.83332       0.76670
3  1302       0.87391       0.87248       0.85595       0.83394       0.91689
4  1303       0.91277       0.88628       0.94335       0.91428       0.93645
5  1304       0.91465       0.90700       0.91991       0.89802       0.91308
6  1305       0.69227       0.72461       0.81498       0.70621       0.79752
7  1306       0.88589       0.89261       0.89747       0.88588       0.89203
8  1307       0.77612       0.79795       0.80669       0.81899       0.83879
9  1308       0.75980       0.78125       0.78424       0.77110       0.68918
10 1309       0.86144       0.83854            NA            NA            NA

p1 <- ggplot(df, aes(x=X, y=X01PM0471C_BS)) +
  geom_point() +
  #xlim(0,5) + ylim(0,5) +
  geom_smooth(method='loess',formula=y~x) +
  stat_smooth(method="loess", fullrange=TRUE) +
  ggtitle("My LOESS model")
plot(p1)

plot all samples

df.melt <- melt(df, id="X")
      X      variable   value
1  1300 X01PM0471C_BS 0.88647
2  1301 X01PM0471C_BS 0.78965
3  1302 X01PM0471C_BS 0.87391
4  1303 X01PM0471C_BS 0.91277
5  1304 X01PM0471C_BS 0.91465
6  1305 X01PM0471C_BS 0.69227
7  1306 X01PM0471C_BS 0.88589
8  1307 X01PM0471C_BS 0.77612
9  1308 X01PM0471C_BS 0.75980
10 1309 X01PM0471C_BS 0.86144
11 1300 X01PM0471T_BS 0.87860
12 1301 X01PM0471T_BS 0.87323
13 1302 X01PM0471T_BS 0.87248
14 1303 X01PM0471T_BS 0.88628
15 1304 X01PM0471T_BS 0.90700
16 1305 X01PM0471T_BS 0.72461


p2 <- ggplot(df.melt, aes(x=X, y=value, group=variable)) +
  geom_point() +
  #xlim(0,5) + ylim(0,5) +
  geom_smooth(method='loess', formula=y~x) +
  stat_smooth(method="loess", fullrange=TRUE, aes(colour=variable)) +
  ggtitle("My LOESS model")
plot(p2)

plot just 2 samples

df.melt.sub <- df.melt[which(df.melt$variable %in% c("X01PM0471C_BS","X06PM1188T_BS")),]

p3 <- ggplot(df.melt.sub, aes(x=X, y=value, group=variable)) +
  geom_point() +
  #xlim(0,5) + ylim(0,5) +
  geom_smooth(method='loess', formula=y~x) +
  stat_smooth(method="loess", fullrange=TRUE, aes(colour=variable)) +
  ggtitle("My LOESS model")
plot(p3)