pacman::p_load(tidyverse, # tidy family and related pacakges below kableExtra, # nice tables MuMIn, # multi-model inference emmeans, # post-hoc tests gridExtra, # may not use this pander, # nice tables metafor, # package for meta-analysis ape, # phylogenetic analysis MuMIn, # multi-model inference patchwork, # putting ggplots together - you need to install via devtool here, # making reading files easy orchaRd # plotting orchard plots# more too add)eval(metafor:::.MuMIn)
1.2 Loading data: paper and tree data
Code
dat <-read.csv(here("data", "clean_data.csv"))#dim(dat)#head(dat)tree <-read.tree(here("data", "species_tree.tre"))
1.3 Custom functions
Code
# function to calculate effect sizes# Zr - correlation# there is always neffect_size <-function(m1, m2, sd1, sd2, n1, n2, n, # 12 arguments est , se, p_val, direction, method){if(method =="mean_method"){ h <- n/n1 + n/n2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 s_pool <-sqrt( ((n1-1)*sd1^2+ (n2-1)*sd2^2) / (n -2) ) j <-1- (3/ (4*n -9)) d <- ((m2 - m1) / s_pool) * j r_pb <- d /sqrt(d^2+ h) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r }elseif(method =="count_method"){ h <- n/n1 + n/n2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 s1 <-sqrt(m1) s2 <-sqrt(m2) s_pool <-sqrt( ((n1-1)*s1^2+ (n2-1)*s2^2) / (n -2) ) j <-1- (3/ (4*n -9)) d <- ((m2 - m1) / s_pool) * j r_pb <- d /sqrt(d^2+ h) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r }elseif(method =="percent_method1"){ h <- n/n1 + n/n2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 m1 <- m1/100 m2 <- m2/100 s1 <-1/sqrt(8) s2 <-1/sqrt(8) m1 <-asin(sqrt(m1/100)) m2 <-asin(sqrt(m2/100)) s_pool <-sqrt( ((n1-1)*s1^2+ (n2-1)*s2^2) / (n -2) ) j <-1- (3/ (4*n -9)) d <- ((m2 - m1) / s_pool) * j r_pb <- d /sqrt(d^2+ h) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r }elseif(method =="percent_method2"){ h <- n/n1 + n/n2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 m1 <- m1/100 m2 <- m2/100 sd1 <- sd1/100 sd2 <- sd2/100 s1 <-1/sqrt(sd1^2/(4*m1*(1-m1))) s2 <-1/sqrt(sd2^2/(4*m2*(1-m2))) m1 <-asin(sqrt(m1/100)) m2 <-asin(sqrt(m2/100)) s_pool <-sqrt( ((n1-1)*s1^2+ (n2-1)*s2^2) / (n -2) ) j <-1- (3/ (4*n -9)) d <- ((m2 - m1) / s_pool) * j r_pb <- d /sqrt(d^2+ h) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r }elseif(method =="proportion_method1"){ h <- n/n1 + n/n2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 s1 <-1/sqrt(8) s2 <-1/sqrt(8) m1 <-asin(sqrt(m1)) m2 <-asin(sqrt(m2)) s_pool <-sqrt( ((n1-1)*s1^2+ (n2-1)*s2^2) / (n -2) ) j <-1- (3/ (4*n -9)) d <- ((m2 - m1) / s_pool) * j r_pb <- d /sqrt(d^2+ h) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r }elseif(method =="proportion_method2"){ h <- n/n1 + n/n2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 s1 <-1/sqrt(sd1^2/(4*m1*(1-m1))) s2 <-1/sqrt(sd2^2/(4*m2*(1-m2))) m1 <-asin(sqrt(m1/100)) m2 <-asin(sqrt(m2/100)) s_pool <-sqrt( ((n1-1)*s1^2+ (n2-1)*s2^2) / (n -2) ) j <-1- (3/ (4*n -9)) d <- ((m2 - m1) / s_pool) * j r_pb <- d /sqrt(d^2+ h) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r }elseif(method =="t_method1"){ p <- n1/n # prop for n1 q <- n2/n # prop for n2 r_pb <- est/sqrt(est^2+ n -2) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r }elseif(method =="t_method2"){ n1 <- n/2 n2 <- n/2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 r_pb <- est/sqrt(est^2+ n -2) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r }elseif(method =="F_method1"){ p <- n1/n # prop for n1 q <- n2/n # prop for n2 r_pb <-sqrt(est)/sqrt(est + n -2) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r_b = r_b*(direction) r <- r_b }elseif(method =="F_method2"){ n1 <- n/2 n2 <- n/2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 r_pb <-sqrt(est)/sqrt(est + n -2) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r_b = r_b*(direction) r <- r_b }elseif(method =="p_method1"){ p <- n1/n # prop for n1 q <- n2/n # prop for n2 t <-qt(1- p_val, n -2) r_pb <- t/sqrt(t^2+ n -2) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r_b <- r_b*(direction) r <- r_b }elseif(method =="p_method2"){ n1 <- n/2 n2 <- n/2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 t <-qt(1- p_val, n -2) r_pb <- t/sqrt(t^2+ n -2) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r_b <- r_b*(direction) r <- r_b }elseif(method =="correlation_method1"){ r <- est }elseif(method =="correlation_method2"){ r <-2*sin((pi/6)*est) }elseif(method =="estimate_method1"){ p <- n1/n # prop for n1 q <- n2/n # prop for n2 t <- est/se r_pb <- t/sqrt(t^2+ n -2) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r }elseif(method =="estimate_method2"){ n1 <- n/2 n2 <- n/2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 t <- est/se r_pb <- t/sqrt(t^2+ n -2) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r } if(r >=1){# if over 1, we use 0.99 Zr <-atanh(0.99) }elseif(r <=-1){ Zr <-atanh(-0.99) # r = correlation } else { Zr <-atanh(r) # r = correlation } VZr <-1/(n -3)data.frame(ri = r, yi = Zr , vi = VZr)}########### functions for absolute values# folded meanfolded_mu <-function(mean, variance){ mu <- mean sigma <-sqrt(variance) fold_mu <- sigma*sqrt(2/pi)*exp((-mu^2)/(2*sigma^2)) + mu*(1-2*pnorm(-mu/sigma)) fold_mu} # folded variancefolded_v <-function(mean, variance){ mu <- mean sigma <-sqrt(variance) fold_mu <- sigma*sqrt(2/pi)*exp((-mu^2)/(2*sigma^2)) + mu*(1-2*pnorm(-mu/sigma)) fold_se <-sqrt(mu^2+ sigma^2- fold_mu^2)# adding se to make bigger mean fold_v <-fold_se^2 fold_v} #' Title: Contrast name generator#'#' @param name: a vector of character stringscont_gen <-function(name) { combination <-combn(name, 2) name_dat <-t(combination) names <-paste(name_dat[, 1], name_dat[, 2], sep ="-")return(names)}#' @title get_pred1: intercept-less model#' @description Function to get CIs (confidence intervals) and PIs (prediction intervals) from rma objects (metafor)#' @param model: rma.mv object #' @param mod: the name of a moderator get_pred1 <-function (model, mod =" ") { name <-firstup(as.character(stringr::str_replace(row.names(model$beta), mod, ""))) len <-length(name)if (len !=1) { newdata <-diag(len) pred <- metafor::predict.rma(model, newmods = newdata,tau2.levels =1:len) }else { pred <- metafor::predict.rma(model) } estimate <- pred$pred lowerCL <- pred$ci.lb upperCL <- pred$ci.ub lowerPR <- pred$cr.lb upperPR <- pred$cr.ub table <-tibble(name =factor(name, levels = name, labels = name), estimate = estimate,lowerCL = lowerCL, upperCL = upperCL,pval = model$pval,lowerPR = lowerPR, upperPR = upperPR)}#' @title get_pred2: normal model#' @description Function to get CIs (confidence intervals) and PIs (prediction intervals) from rma objects (metafor)#' @param model: rma.mv object #' @param mod: the name of a moderator get_pred2 <-function (model, mod =" ") { name <-as.factor(str_replace(row.names(model$beta), paste0("relevel", "\\(", mod,", ref = name","\\)"),"")) len <-length(name)if(len !=1){ newdata <-diag(len) pred <-predict.rma(model, intercept =FALSE, newmods = newdata[ ,-1]) }else { pred <-predict.rma(model) } estimate <- pred$pred lowerCL <- pred$ci.lb upperCL <- pred$ci.ub lowerPR <- pred$cr.lb upperPR <- pred$cr.ub table <-tibble(name =factor(name, levels = name, labels = name), estimate = estimate,lowerCL = lowerCL, upperCL = upperCL,pval = model$pval,lowerPR = lowerPR, upperPR = upperPR)}#' @title mr_results#' @description Function to put results of meta-regression and its contrasts#' @param res1: data frame 1#' @param res1: data frame 2mr_results <-function(res1, res2) { restuls <-tibble(`Fixed effect`=c(as.character(res1$name), cont_gen(res1$name)),Estimate =c(res1$estimate, res2$estimate),`Lower CI [0.025]`=c(res1$lowerCL, res2$lowerCL),`Upper CI [0.975]`=c(res1$upperCL, res2$upperCL),`P value`=c(res1$pval, res2$pval),`Lower PI [0.025]`=c(res1$lowerPR, res2$lowerPR),`Upper PI [0.975]`=c(res1$upperPR, res2$upperPR), )}#' @title all_models#' @description Function to take all possible models and get their results#' @param model: intercept-less model#' @param mod: the name of a moderator all_models <-function(model, mod =" ", type ="normal") {# getting the level names out level_names <-levels(factor(model$data[[mod]])) dat2 <- model$data mod <- mod# creating a function to run models run_rma1 <-function(name) {# variance covarinace matrix for sampling errors VCV1 <-vcalc(vi = dat$vi, cluster = dat$shared_group, rho =0.5) VCV1 <-nearPD(VCV1)$matrma.mv(yi, V = VCV1,mods =~relevel(dat2[[mod]], ref = name),random =list(~1| effectID,~1| paperID,~1| species_cleaned),data = dat2,test ="t",sparse =TRUE,control =list(optimizer ="Nelder-Mead")) } run_rma2 <-function(name) {# variance covarinace matrix for sampling errors VCVa <-vcalc(vi = dat2$vi_abs, cluster = dat$shared_group, rho =0.5) VCVa <-nearPD(VCVa)$matrma.mv(abs_yi2, V = VCVa,mods =~relevel(dat2[[mod]], ref = name),random =list(~1| effectID,~1| paperID,~1| species_cleaned),data = dat2,test ="t",sparse =TRUE,control =list(optimizer ="Nelder-Mead")) } run_rma3 <-function(name) {# variance covarinace matrix for sampling errors VCV1 <-vcalc(vi = dat$vi, cluster = dat$shared_group, rho =0.5) VCV1 <-nearPD(VCV1)$matrma.mv(yi, V = VCV1,mods =~relevel(dat2[[mod]], ref = name),random =list( # hetero in relation to modformula(paste("~", mod, "| effectID")),~1| paperID,~1| species_cleaned),struct ="DIAG",data = dat2,test ="t",sparse =TRUE,control =list(optimizer ="Nelder-Mead")) }# results of meta-regression including all contrast results; taking the last level out ([-length(level_names)])# this does not work for hetero model?if (type =="normal"){ model_all <- purrr::map(level_names[-length(level_names)], run_rma1) }elseif(type =="absolute"){ model_all <- purrr::map(level_names[-length(level_names)], run_rma2) }elseif(type =="hetero"){ model_all <- purrr::map(level_names[-length(level_names)], run_rma3) }# getting estimates from intercept-less models (means for all the groups) res1 <-get_pred1(model, mod = mod)# getting estiamtes from all contrast models res2_pre <- purrr::map(model_all, ~get_pred2(.x, mod = mod))# a list of the numbers to take out unnecessary contrasts contra_list <-Map(seq, from=1, to=1:(length(level_names) -1)) res2 <- purrr::map2_dfr(res2_pre, contra_list, ~.x[-(.y), ]) # creating a table res_tab <-mr_results(res1, res2) %>%kable("html", digits =3) %>%kable_styling("striped", position ="left") %>%scroll_box(width ="100%")# results res_tab}
1.4 Preparing data: obtaining effect sizes etc.
Code
# calcuating effect sizeseffect2 <-pmap_dfr(list(dat$mean_group_1, dat$mean_group_2, dat$variance_group_1, dat$variance_group_2, dat$n_group_1, dat$n_group_2, dat$n, dat$effect_size_value, dat$effect_size_variance, dat$effect_size_p_value_numeric, dat$direction_change, dat$function_needed), effect_size) # merging two data framesdat <-cbind(dat, effect2)# adding absolute value (effect sizes) toodat <- dat %>%mutate(abs_yi =abs(yi), # we use this one (conservative)abs_yi2 =folded_mu(yi, vi), # alternative wayabs_vi =folded_v(yi, vi)) %>%mutate_if(is.character, as.factor) # changing all character to factor# dat$Zr <- unlist(effect1[1,])# dat$VZr <- unlist(effect1[2,])# renaming X to effectIDcolnames(dat)[colnames(dat) =="X"] <-"effectID"# creating the phylogeny columndat$phylogeny <-gsub(" ", "_", dat$species_cleaned)# checking species names match between tree and dataset# match(unique(dat$phylogeny), tree$tip.label)# match(tree$tip.label, unique(dat$phylogeny))# # intersect(unique(dat$phylogeny), tree$tip.label)# setdiff(unique(dat$phylogeny), tree$tip.label)# # # match(unique(dat$phylogeny), tree$tip.label)# sum(is.na(match(unique(dat$phylogeny), tree$tip.label)))# creating a correlation matrix from a treetree <-compute.brlen(tree)cor_tree <-vcv(tree,corr=T) # creating a VCV for sampling error variances # we assure shared groups will have r = 0.5VCV <-vcalc(vi = dat$vi, cluster = dat$shared_group, rho =0.5)VCV <-nearPD(VCV)$mat# creating VCV for absolute valuesVCV_abs <-vcalc(vi = dat$abs_vi, cluster = dat$shared_group, rho =0.5)VCV_abs <-nearPD(VCV_abs)$mat
# phylogeney accounts nothing (0) so we can take it outmod1 <-rma.mv(yi = yi, V = VCV,mod =~1,data = dat, random =list(~1| effectID,~1| paperID,~1| species_cleaned),test ="t",sparse =TRUE)summary(mod1)
#r2_ml(mod_focusA1)# homoorchard_plot(mod_focusA, mod ="fitness_main_focus", xlab ="Effect Size: Zr", group ="paperID", branch.size =4, angle =90)
Code
# heteroorchard_plot(mod_focusA1, mod ="fitness_main_focus", xlab ="Effect Size: Zr", group ="paperID", branch.size =4, angle =90)
Code
tab_focusA1 <-all_models(mod_focusA1, mod ="fitness_main_focus", type ="hetero")# saving tab_sex as RDSsaveRDS(tab_focus, here("Rdata", "tab_focusA1.RDS"))
# use "ML" for model selection######################## Mulit-variable models######################## we need - tomod_full <-rma.mv(yi = yi, V = VCV,mod =~1+ sex + age_class_clean + whose_fitness + fitness_higher_level + dispersal_type + dispersal_phase,data = dat, random =list(~1| effectID,~1| paperID,~1| species_cleaned),method ="ML",test ="t",sparse =TRUE)summary(mod_full)
# relative importance of each predictor for all the modelsimportance <-sw(candidates)importance
whose_fitness dispersal_phase age_class_clean
Sum of weights: 0.42 0.37 0.33
N containing models: 32 32 32
fitness_higher_level sex dispersal_type
Sum of weights: 0.24 0.23 0.15
N containing models: 32 32 32