Boxplot with outliers label & sample size
Una herramienta útil para hacer un boxplot e identificar los outliers. La función del boxplot es la siguiente:
boxplot.with.outlier.label <- function(y, label_name, ..., spread_text = T, data, plot = T, range = 1.5, label.col = "blue", jitter_if_duplicate = T, jitter_only_positive_duplicates = F) { # jitter_if_duplicate - will jitter (Actually just add a bit of numbers) so to be able to decide on which location to plot the label when having identical variables... require(plyr) # for is.formula and ddply # a function to jitter data in case of ties in Y's jitter.duplicate <- function(x, only_positive = F) { if(only_positive) { ss <- x > 0 } else { ss <- T } ss_dup <- duplicated(x[ss]) # ss <- ss & ss_dup temp_length <- length(x[ss][ss_dup]) x[ss][ss_dup] <- x[ss][ss_dup] + seq(from = 0.00001, to = 0.00002, length.out = temp_length) x } # jitter.duplicate(c(1:5)) # jitter.duplicate(c(1:5,5,2)) # duplicated(jitter.duplicate(c(1:5,5,2))) # jitter.duplicate(c(0,0,1:5,5,2)) # duplicated(jitter.duplicate(c(0,0,1:5,5,2))) # handle cases where if(jitter_if_duplicate) { # warning("duplicate jutter of values in y is ON") if(!missing(data)) { #e.g: we DO have data # if(exists("y") && is.formula(y)) { # F && NULL # F & NULL y_name <- as.character(substitute(y)) # I could have also used as.list(match.call()) # credit to Uwe Ligges and Marc Schwartz for the help # https://mail.google.com/mail/?shva=1#inbox/12dd7ca2f9bfbc39 if(length(y_name) > 1) { # then it is a formula (for example: "~", "y", "x" model_frame_y <- model.frame(y, data = data) temp_y <- model_frame_y[,1] temp_y <- jitter.duplicate(temp_y, jitter_only_positive_duplicates) # notice that the default of the function is to work only with positive values... # the_txt <- paste(names(model_frame_y)[1], "temp_y", sep = "<<-") # wrong... the_txt <- paste("data['",names(model_frame_y)[1],"'] <- temp_y", sep = "") eval(parse(text = the_txt)) # jutter out y var so to be able to handle identical values. } else { # this isn't a formula data[,y_name] <- jitter.duplicate(data[,y_name], jitter_only_positive_duplicates) y <- data[,y_name] # this will make it possible for boxplot(y, data) to work later (since it is not supposed to work with data when it's not a formula, but now it does :)) } } else { # there is no "data" if(is.formula(y)) { # if(exists("y") && is.formula(y)) { # F && NULL # F & NULL temp_y <- model.frame(y)[,1] temp_y <- jitter.duplicate(temp_y, jitter_only_positive_duplicates) # notice that the default of the function is to work only with positive values... environment(y) <- new.env() assign(names(model.frame(y))[1], temp_y, environment(y)) # Credit and thanks for doing this goes to Niels Richard Hansen (2 Jan 30, 2011) # http://r.789695.n4.nabble.com/environment-question-changing-variables-from-a-formula-through-model-frame-td3246608.html # warning("Your original variable (in the global environemnt) was just jittered.") # maybe I should add a user input before doing this.... # the_txt <- paste(names(model_frame_y)[1], "temp_y", sep = "<<-") # eval(parse(text = the_txt)) # jutter out y var so to be able to handle identical values. } else { y <- jitter.duplicate(y, jitter_only_positive_duplicates) } } } # the_txt <- paste("print(",names(model_frame_y)[1], ")") # eval(parse(text = the_txt)) # jutter out y var so to be able to handle identical values. # print(ls()) # y should be a formula of the type: y~x, y~a*b # or it could be simply y if(missing(data)) { boxdata <- boxplot(y, plot = plot,range = range ,...) } else { boxdata <- boxplot(y, plot = plot,data = data, range = range ,...) } if(length(boxdata$names) == 1 && boxdata$names =="") boxdata$names <- 1 # this is for cases of type: boxplot(y) (when there is no dependent group) if(length(boxdata$out) == 0 ) stop("No outliers detected for this boxplot") if(!missing(data)) attach(data) # this might lead to problams I should check out for alternatives for using attach here... # creating a data.frame with information from the boxplot output about the outliers (location and group) boxdata_group_name <- factor(boxdata$group) levels(boxdata_group_name) <- boxdata$names[as.numeric(levels(boxdata_group_name))] # the subseting is for cases where we have some sub groups with no outliers boxdata_outlier_df <- data.frame(group = boxdata_group_name, y = boxdata$out, x = boxdata$group) # Let's extract the x,y variables from the formula: if(is.formula(y)) { model_frame_y <- model.frame(y) y <- model_frame_y[,1] x <- model_frame_y[,-1] if(!is.null(dim(x))) { # then x is a matrix/data.frame of the type x1*x2*..and so on - and we should merge all the variations... x <- apply(x,1, paste, collapse = ".") } } else { # if(missing(x)) x <- rep(1, length(y)) x <- rep(1, length(y)) # we do this in case y comes as a vector and without x } # and put all the variables (x, y, and outlier label name) into one data.frame DATA <- data.frame(label_name, x ,y) if(!missing(data)) detach(data) # we don't need to have "data" attached anymore. # let's only keep the rows with our outliers boxplot.outlier.data <- function(xx, y_name = "y") { y <- xx[,y_name] boxplot_range <- range(boxplot.stats(y, coef = range )$stats) ss <- (y < boxplot_range[1]) | (y > boxplot_range[2]) return(xx[ss,]) } outlier_df <-ddply(DATA, .(x), boxplot.outlier.data) # create propor x/y locations to handle over-laping dots... if(spread_text) { # credit: Greg Snow require(TeachingDemos) temp_x <- boxdata_outlier_df[,"x"] temp_y1 <- boxdata_outlier_df[,"y"] temp_y2 <- temp_y1 for(i in unique(temp_x)) { tmp <- temp_x == i temp_y2[ tmp ] <- spread.labs( temp_y2[ tmp ], 1.3*strheight('A'), maxiter=6000, stepsize = 0.05) #, min=0 ) } } # max(strwidth(c("asa", "a")) # move_text_right <- max(strwidth(outlier_df[,"label_name"])) # plotting the outlier labels :) (I wish there was a non-loop wise way for doing this) for(i in seq_len(dim(boxdata_outlier_df)[1])) { # ss <- (outlier_df[,"x"] %in% boxdata_outlier_df[i,]$group) & (outlier_df[,"y"] %in% boxdata_outlier_df[i,]$y) # if(jitter_if_duplicate) { # ss <- (outlier_df[,"x"] %in% boxdata_outlier_df[i,]$group) & closest.number(outlier_df[,"y"] boxdata_outlier_df[i,]$y) # } else { ss <- (outlier_df[,"x"] %in% boxdata_outlier_df[i,]$group) & (outlier_df[,"y"] %in% boxdata_outlier_df[i,]$y) # } current_label <- outlier_df[ss,"label_name"] temp_x <- boxdata_outlier_df[i,"x"] temp_y <- boxdata_outlier_df[i,"y"] # cbind(boxdata_outlier_df, temp_y2) # outlier_df if(spread_text) { temp_y_new <- temp_y2[i] # not ss move_text_right <- strwidth(current_label) text( temp_x+move_text_right, temp_y_new, current_label, col = label.col) # strwidth segments( temp_x+(move_text_right/6), temp_y, temp_x+(move_text_right*.47), temp_y_new ) } else { text(temp_x, temp_y, current_label, pos = 4, col = label.col) } } # outputing some of the information we collected list(boxdata = boxdata, boxdata_outlier_df = boxdata_outlier_df, outlier_df=outlier_df) }
Un ejemplo:
boxplot.with.outlier.label (Lip~Egon,Cod, varwidth=T, ylab="Lipidos(ug/mg)", xlab="Estado gonada", data=P1_98gon, ylim=c(50,350)) sample.size <- tapply(P1_98gon$Lip, P1_98gon$Egon, length) ####Codigo para determinar el tamaño muestral ss.ch <- paste("N=", sample.size, sep="") ####Codigo pegar N= a los datos de tamaño muestral mtext(ss.ch, at=1:length(unique(P1_98gon$Lip)), line=2, side=3) ####Codigo poner una linea de texto en el eje x en cada boxplot
Si queremos que no se superpongan los labels instalar el paquete {teachingDemos} y {plyr}
boxplot.with.outlier.label (Lip~Egon,Cod, varwidth=T, ylab="Lipidos(ug/mg)", xlab="Estado gonada", data=A99gon, ylim=c(50,350), spread_text = F) #añadimos spread_text=F
Fuente: R-Staistics blog
Tags: boxplot, label, sample size
February 17th, 2012 at 3:58 am
体育博彩规则…
[...]Grupo R : RMyP » Blog Archive » Boxplot with outliers label & sample size[...]…
February 17th, 2012 at 3:06 pm
威廉希尔备用网址…
[...]Grupo R : RMyP » Blog Archive » Boxplot with outliers label & sample size[...]…
March 17th, 2012 at 4:55 am
足彩14场胜负规则…
[...]Grupo R : RMyP » Blog Archive » Boxplot with outliers label & sample size[...]…
April 23rd, 2012 at 9:02 pm
namitha sex video…
Grupo R : RMyP » Blog Archive » Boxplot with outliers label…
April 27th, 2012 at 12:20 pm
learn spanish lessons in london cheap…
Grupo R : RMyP » Blog Archive » Boxplot with outliers label…
April 27th, 2012 at 2:13 pm
mannequin homme photo…
[...]Grupo R : RMyP » Blog Archive » Boxplot with outliers label & sample size[...]…
May 18th, 2012 at 3:37 am
how to draw people…
Grupo R : RMyP » Blog Archive » Boxplot with outliers label…
May 19th, 2012 at 6:29 am
sony nex f3…
[...]Grupo R : RMyP » Blog Archive » Boxplot with outliers label & sample size[...]…
May 25th, 2012 at 5:36 am
WEB TASARIMI…
[...]Grupo R : RMyP » Blog Archive » Boxplot with outliers label & sample size[...]…
May 28th, 2012 at 1:24 pm
Make Money…
[...]Grupo R : RMyP » Blog Archive » Boxplot with outliers label & sample size[...]…
June 8th, 2012 at 7:48 am
paginas web creacion…
[...]Grupo R : RMyP » Blog Archive » Boxplot with outliers label & sample size[...]…
June 16th, 2012 at 5:33 pm
free samples…
[...]Grupo R : RMyP » Blog Archive » Boxplot with outliers label & sample size[...]…
June 18th, 2012 at 5:10 am
tutoriales gratis…
[...]Grupo R : RMyP » Blog Archive » Boxplot with outliers label & sample size[...]…
June 18th, 2012 at 10:53 pm
shopping cart hero 3…
[...]Grupo R : RMyP » Blog Archive » Boxplot with outliers label & sample size[...]…
June 21st, 2012 at 3:53 pm
Web Design Blog…
[...]Grupo R : RMyP » Blog Archive » Boxplot with outliers label & sample size[...]…
June 26th, 2012 at 1:46 pm
web tasarım, webtasarım, website tasarımı, site tasarımı…
[...]Grupo R : RMyP » Blog Archive » Boxplot with outliers label & sample size[...]…
June 28th, 2012 at 8:18 am
Thiết kế web – Thiết kế web top 1 Google, dịch vụ chuyên nghiệp, bảo hành vĩnh viễn.thiet ke web,thiet ke website,thiet ke web chuyen nghiep,thiết kế web,thiết kế website,cong ty thiet ke web, website gia re…
[...]Grupo R : RMyP » Blog Archive » Boxplot with outliers label & sample size[...]…