library(foreign)
library(dplyr)
library(Hmisc)
library(stringr)

####FUNCTIONS####
source( file="CultureFst.r")

#################

args = commandArgs(trailingOnly=TRUE)

wvs <- read.csv("wvs_alleles_merged.csv")

dimensions <- read.csv("allele-dimensions-data.csv")
include_vars <- as.character(dimensions[dimensions$CAT.All==1,]$V)

samples <- c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)

robust.df <- data.frame(arg = c(NA), sample = c(NA), data_pc = c(NA))

createNAs <- function (x, pctNA = 0.1) {
  n <- nrow(x)
  p <- ncol(x)
  NAloc <- rep(FALSE, n * p)
  NAloc[sample.int(n * p, floor(n * p * pctNA))] <- TRUE
  x[matrix(NAloc, nrow = n, ncol = p)] <- NA
  return(x)
}

for (s in samples) {
  
  wvs <- wvs[,(names(wvs) %in% include_vars | names(wvs) %in% c("S002", "S003", "X048WVS"))]
  
  
  wvs$pop <- wvs$S003
  
  #Move pop to the front
  col_idx <- grep("pop", names(wvs))
  wvs <- wvs[, c(col_idx, (1:ncol(wvs))[-col_idx])]
  
  #Only look at countries of interest and years of interest (based on replication)
  wvs.rep_subset <- wvs[str_detect(wvs$S002,"^2005") | str_detect(wvs$S002,"^2010"),]
  
  wvs.rep_subset[wvs.rep_subset<0] <- NA
  
  # Look for traits recommended by Bell, et al. (2009)
  #loci <- c("v1") # all traits
  loci <- ls(wvs.rep_subset)
  loci <- loci[loci != "X048WVS"]
  loci <- loci[loci != "pop"]
  loci <- loci[loci != "Region"]
  loci <- loci[loci != "S002"]
  loci <- loci[loci != "S003"]
  # what is the type of data, discrete or continuous
  type <- c(0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	1, 	1, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0, 	0 )
  
  names(type) <- loci # make sure to give names to elements of this vector
  
  start_cols <- wvs.rep_subset[c(1:4)]
  end_cols <- wvs.rep_subset[-c(1:4)]
  before_remove <- sum(!is.na(end_cols))
  end_cols <- createNAs(end_cols, s)
  after_remove <- sum(!is.na(end_cols))
  wvs.rep_subset <- cbind(start_cols,end_cols)
  robust.df <- rbind(robust.df, c(args[1], s, after_remove/before_remove))
  
  # run fst function with bootstrap
  #ans.fst = CultureFst( wvs.rep_subset, loci, type, bootstrap=TRUE, no.samples=100, label="test_ex_boot")
  # run fst function without bootstrap
  ans.fst = CultureFst( wvs.rep_subset, loci, type, bootstrap=FALSE, no.samples=100, label="test_ex")
  
  write.csv(ans.fst$mean.fst, paste0("robust_values/fst_values_s",as.character(s),"_a_",args[1],".csv"))
}

write.csv(robust.df, paste0("robust_values/robust_df", args[1], ".csv"))
