library(ggplot2)
library(ape)
library(geiger)
library(boot)
library(nlme)
library(phytools)


# Load Data ---------------------------------------------------------------
piantadosi <- read.csv("Piantadosi_2011.csv") #From Piantadosi, et al. (2011)
indo_euro_tree <- read.nexus("indo_euro.nex") #From Bouckaert, et al. (2012)
indo_euro_tree <- indo_euro_tree$TREE1

row.names(piantadosi) <- piantadosi$Language

Population_Size_Ethnologue <- piantadosi$Population_Size_Ethnologue
Corr4Gram <- piantadosi$Corr4Gram

names(Population_Size_Ethnologue) <- row.names(piantadosi)
names(Corr4Gram) <- row.names(piantadosi)


# Trim tree to languages in Piantadosi et al. (2011) ----------------------
tips_to_delete <- name.check(indo_euro_tree, piantadosi)$tree_not_data
indo_euro_tree <- drop.tip(indo_euro_tree, tips_to_delete)

plot(indo_euro_tree) #Show trimmed tree
name.check(indo_euro_tree, piantadosi)

# Analyze correlation -----------------------------------
model <- lm(piantadosi$Corr4Gram~log(Population_Size_Ethnologue), data=piantadosi)

sqrt(summary(model)$r.squared) #Correlation between raw values

# Create contrasts --------------------------------------------------------
ContrastPopSize <- pic(log(Population_Size_Ethnologue), indo_euro_tree)
ContrastGram4 <- pic(Corr4Gram, indo_euro_tree)


# Analyze correlation between contrasts -----------------------------------
model <- lm(ContrastGram4~ContrastPopSize - 1)

sqrt(summary(model)$r.squared) #Correlation between contrasts

# Bootstrap 95% confidence intervals --------------------------------------
rsq <- function(formula, data, indices) {
  d <- data[indices,] # allows boot to select sample 
  fit <- lm(formula, data=d)
  return(sqrt(summary(fit)$r.square))
} 

results_raw <- boot(data=piantadosi, statistic=rsq,
                    R=10000, formula=Corr4Gram~log(Population_Size_Ethnologue))
boot.ci(results_raw, type="bca") #CI for raw values

contrasts<- as.data.frame(ContrastGram4)
contrasts$ContrastPopSize <- ContrastPopSize
results_contrasts <- boot(data=contrasts, statistic=rsq, 
                R=10000, formula=ContrastGram4~ContrastPopSize - 1)

boot.ci(results_contrasts, type="bca") #CI for contrasts
