library(rstan) library(ggplot2) library(ggstance) library(reshape2) library(plyr) rstan_options(auto_write = TRUE) options(mc.cores = parallel::detectCores()) d <- read.csv("countriesSporcle.csv") d$Percentage <- as.numeric(gsub("%","",d$Percentage.Answered)) d$GDP <- d$Nominal.GDP..millions.USD. d <- d[complete.cases(d$GDP),] #d$UN.statistical.region <- factor(d$UN.statistical.region,levels(d$UN.statistical.region)[c(13,1:12,14:22)]) d$UN.statistical.region <- factor(d$UN.statistical.region) #d$UN.statistical.region <- relevel(d$UN.statistical.region,"Western Europe") d$UN.continental.region <- factor(d$UN.continental.region) #d$UN.continental.region <- relevel(d$UN.continental.region,"Europe") d2 <- unique(d[,c("UN.continental.region","UN.statistical.region")]) sporcle_data <- list( N = nrow(d), C = nlevels(d$UN.statistical.region), X = model.matrix(~UN.statistical.region-1, data=d), p = log(d$GDP), y = d$Percentage ) fit <- stan(file = 'sporcle.stan', data = sporcle_data, iter = 1000, chains = 8) #names(fit)[grepl("beta_continent",names(fit))] <- paste0("beta_continent_",levels(d$UN.continental.region)) names(fit)[grepl("beta_continent",names(fit))] <- paste0("beta_continent_",levels(d$UN.statistical.region)) #print(fit) #plot(fit,pars=c("beta_population","beta_continent")) extractedFit <- extract(fit) res <- data.frame( iteration = seq(1, nrow(extractedFit$beta_population)), "log-Nominal-GDP" = extractedFit$beta_population ) #for (i in 1:length(levels(d$UN.continental.region))){ # res[[paste0(levels(d$UN.continental.region))[i]]] <- extractedFit$beta_continent[,i] #} for (i in 1:length(levels(d$UN.statistical.region))){ res[[paste0(levels(d$UN.statistical.region))[i]]] <- extractedFit$beta_continent[,i] } res <- melt(res,id.vars = c("iteration")) res$variable <- factor(res$variable, levels = rev(levels(res$variable))) #res$variable[res$variable=="log.Population"] <- "log(Population)" #res$type <- grepl("Population",res$variable) names(d2)[names(d2) == "UN.statistical.region"] <- "variable" names(d2)[names(d2) == "UN.continental.region"] <- "Continent" d2 <- d2[order(d2$Continent,d2$variable),] d2 <- rbind(data.frame("variable" = "log.Nominal.GDP", "Continent" = "AAA"),d2) res <- merge(res,d2,by=c("variable"), drop=F) res$variable <- factor(res$variable, levels = rev(levels(d2$variable)[d2$variable])) res$Continent <- factor(res$Continent, levels = rev(levels(d2$Continent)[d2$Continent])) ressummary <- ddply(res, .(variable), function(x){ data.frame( mean = mean(x$value), upper = quantile(x$value, 0.975), lower = quantile(x$value, 0.025) ) }) res <- ddply(res, .(variable), function(x){ lower <- quantile(x$value, 0.01) upper <- quantile(x$value, 0.99) x[x$value >= lower & x$value <= upper,] }) g <- ggplot(res) g <- g + geom_violinh(aes(x = value, y = variable, color=Continent, fill=Continent)) g <- g + theme(legend.position = "none") g <- g + geom_segment(data = ressummary, aes(x = mean, xend=mean, y=as.integer(variable)-0.45, yend=as.integer(variable)+0.45)) g <- g + geom_segment(data = ressummary, aes(x = upper, xend=upper, y=as.integer(variable)-0.25, yend=as.integer(variable)+0.25), linetype="dotted") g <- g + geom_segment(data = ressummary, aes(x = lower, xend=lower, y=as.integer(variable)-0.25, yend=as.integer(variable)+0.25), linetype="dotted") g <- g + geom_vline(xintercept=0, linetype="dashed", color="grey") g <- g + ggtitle("Estimated Effect Sizes for Sporcle Quiz Data") g <- g + xlab("Effect Size (Percentage points)") g <- g + ylab("Variable") g ggsave(g, file="GDP-effects.pdf", width = 7, height=7, units="in") ggsave(g, file="GDP-effects.png", width = 7, height=7, units="in")