library(foreign)
library(dplyr)
library(Hmisc)
library(stringr)

####FUNCTIONS####
source( file="CultureFst.r")

#################

args = commandArgs(trailingOnly=TRUE)

wvs_full <- read.csv("wvs_alleles_merged.csv")

dimensions <- read.csv("allele-dimensions-data.csv")
include_vars_full <- as.character(dimensions[dimensions$CAT.All==1,]$V)

samples <- c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)

robust.df <- data.frame(arg = c(NA), sample = c(NA), data_pc = c(NA))

for (s in samples) {
  size <- round(s*length(include_vars_full))
  
  include_vars <- sample(include_vars_full, size, replace = FALSE)
  wvs <- wvs_full[,(names(wvs_full) %in% include_vars | names(wvs_full) %in% c("S002", "S003", "X048WVS"))]
  
  robust.df <- rbind(robust.df, c(args[1], s, sum(!is.na(wvs))/sum(!is.na(wvs_full))))
  
  #Create a new variable that splits the US into regions
  wvs$pop <- wvs$S003
  
  #Move pop to the front
  col_idx <- grep("pop", names(wvs))
  wvs <- wvs[, c(col_idx, (1:ncol(wvs))[-col_idx])]
  
  #Only look at countries of interest and years of interest (based on replication)
  wvs.rep_subset <- wvs[str_detect(wvs$S002,"^2005") | str_detect(wvs$S002,"^2010"),]
  
  wvs.rep_subset[wvs.rep_subset<0] <- NA
  
  # Look for traits recommended by Bell, et al. (2009)
  #loci <- c("v1") # all traits
  loci <- ls(wvs.rep_subset)
  loci <- loci[loci != "X048WVS"]
  loci <- loci[loci != "pop"]
  loci <- loci[loci != "Region"]
  loci <- loci[loci != "S002"]
  loci <- loci[loci != "S003"]
  # what is the type of data, discrete or continuous
  type_full <- c(0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	1, 	1, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0 )
  
  temp <- include_vars_full %in% include_vars
  type <- type_full[temp]
  names(type) <- loci # make sure to give names to elements of this vector
  
  
  # run fst function with bootstrap
  #ans.fst = CultureFst( wvs.rep_subset, loci, type, bootstrap=TRUE, no.samples=100, label="test_ex_boot")
  # run fst function without bootstrap
  ans.fst = CultureFst( wvs.rep_subset, loci, type, bootstrap=FALSE, no.samples=100, label="test_ex")
  
  write.csv(ans.fst$mean.fst, paste0("robust_questions/fst_values_s",as.character(s),"_a_",args[1],".csv"))
}

write.csv(robust.df, paste0("robust_questions/robust_df", args[1], ".csv"))
