##############################
###  FST Map Country Fixes ###
##############################

# This code includes fixes to include previously missing countries in the FST map (due to merge issues): Great Britain, Vietnam, South Korea.
# Also, the FST value for "Serbia & Montenegro" is applied to both Serbia and Montenegro (separate shapefile cases) in the map.
# Mapping of French Guiana (overseas department of France) is suppressed.
# Finally, the title on the map is centered.
# (Updates from March 29, 2018)

# After fixing these issues in the Fst vs. USA map, this script also produces a fst vs. China map.
# (Added August 30, 2018)

### Load packages, shapefile, & FST data
########################################
# Here, we'll map country-level FST data against USA. 
  
  library("foreign")
  library("ggplot2")
  library("ggmap")
  library("dplyr")
  library("maptools")
  library("RColorBrewer")
  library("broom")
  library("scales")
  
  # Load country-level boundary shapefile (1:50m scale) and convert to dataframe 
  # All shapfiles were downloaded from http://www.naturalearthdata.com/downloads.
  
  # Use readShapeSpatial() command {maptools} to read data from a shapefile into a Spatial*DataFrame object. Use shape file name with NO extension.
  world <- readShapeSpatial("ne_50m_admin_0_countries")
  
  # Rename rows in shapefile to identify them by country name (using "name" variable). When you convert the shapefile into a dataframe, the row
  # names will be used to create an "id" variable.
  row.names(world) <- as.character(world@data$name)
  
  # Use the tidy() command {broom} to convert shapefile to a dataframe that ggplot2 can work with.
  world.df <- tidy(world)
  
  # Rename "id" variable in new data frame as "country"
  names(world.df)[names(world.df)=="id"] <- "country"
  
  # Load Country-level FST data vs. USA 
  #setwd("/Volumes/Seagate External Backup/Lab Manager files/Maps/R Scripts/FST map updates_3_29_18")
  country.fst <- read.csv("weird_sino.csv")
  
  # Several countries are listed differently in shapefile vs. FST data. Rename in FST dataframe so that they match across data sources.
  levels(country.fst$country) <- c(levels(country.fst$country), "United Kingdom", "Korea", "Vietnam", "United States")
  country.fst$country[country.fst$country=="Great Britain"] <- "United Kingdom"
  country.fst$country[country.fst$country=="South Korea"] <- "Korea"
  country.fst$country[country.fst$country=="Viet Nam"] <- "Vietnam"
  
  # Serbia and Montenegro are combined in FST data, but listed as separate cases in shapefile data. Create separate FST observations
  # for the two countries in order to map the FST value to both countries.
  levels(country.fst$country) <- c(levels(country.fst$country), "Serbia", "Montenegro")
  country.fst$country[country.fst$country=="Serbia and Montenegro"] <- "Serbia"
  montenegro <- subset(country.fst, country=="Serbia")
  montenegro$country[montenegro$country=="Serbia"] <- "Montenegro"
  country.fst <- rbind(country.fst, montenegro)
  rm(montenegro)
  
  # Merge shapefile and fst data using left_join() function {dplyr}
  merge.country <- left_join(world.df, country.fst)
  rm(country.fst, world, world.df)


### Prep data for mapping
#########################
  
  # Calculate scale breaks - creates 4 equally divisions between FST data maximum and minimum.
  fst.max <- as.numeric(max(merge.country$United.States, na.rm=T))
  fst.min <- as.numeric(min(merge.country$United.States, na.rm=T))
  x <- (fst.max-fst.min) / 4
  div <- fst.min
  div2 <- (div + x)
  div3 <- (div + 2*x)
  div4 <- (div + 3*x)
  div5 <- fst.max
  
  
  # Cut fst data into a factor with the scale breaks defined above. Assign to a new variable. 
  merge.country$fst_vsUSA_scale <- cut(merge.country$United.States,
                                      breaks=c(0, div2, div3, div4, div5),
                                      labels=c((paste(round(div, digits=3), "-", round(div2, digits=3)-0.001)), (paste(round(div2, digits=3), "-", round(div3, digits=3)-0.001)), (paste(round(div3, digits=3), "-", round(div4, digits=3)-0.001)), (paste(round(div4, digits=3), "-", round(div5, digits=3)+ 0.001))))
  
  
  # Set fst value for REFERENCE COUNTRY to 0.
  # In this example, USA is the reference country.
  
  # First, add "0.00" as a level for the fst variable factor
  levels(merge.country$fst_vsUSA_scale) <- c(levels(merge.country$fst_vsUSA_scale), "0.00")
  merge.country$fst_vsUSA_scale[merge.country$country=="United States"] <- "0.00" 
  merge.country$fst_vsUSA_scale <- relevel(merge.country$fst_vsUSA_scale, "0.00") # makes 0.00 the "first" factor level (so it shows up first on the legend)
  
  # Remove Antartica from data so that it will not appear in maps
  merge.country <- subset(merge.country, country!="Antarctica")

  
### Prepare French Guiana data (to be "unmapped")
#################################################
# French Guiana is mapped as part of France. To un-map it, import regional shapefile, and subset French Guiana. French Guiana will
# be added as its own layer to the map in gray.

  # Load regional-level fst data #
  #setwd("/Volumes/Seagate External Backup/Lab Manager files/Maps/ArcMap/Shapefiles")
  regional <- readShapeSpatial("ne_10m_admin_1_states_provinces")
  
  # Subset French data
  france <- subset(regional, regional@data$admin=="France")
  row.names(france) <- as.character(france@data$name)
  
  # Use the tidy() command {broom} to convert shapefile to a dataframe that ggplot2 can work with.
  france.df <- tidy(france)
  
  # Rename "id" variable in new data frame as "country"
  names(france.df)[names(france.df)=="id"] <- "region"
  
  # Subset French Guiana
  guiana.df <- subset(france.df, region=="Guyane française")
  rm(france)



### Plot the map
################
# This map plots country-level FST vs. USA data. USA (reference region) displays in lightest blue. Full range of blue tones
# display FST values from 0 to this sample's maximum Fst of 0.241. French Guiana is colored in grey in order to "unmap" it. Title is centered.
  
  ggplot() + 
    geom_polygon(data = merge.country, aes(x=long, y=lat, group=group, fill = fst_vsUSA_scale))+
    geom_polygon(data = guiana.df, aes(x=long, y=lat, group=group), fill = "gray80")+
    geom_polygon(data= merge.country, aes(x=long, y=lat, group=group), color= "black", size=0.1, fill=NA)+
    coord_quickmap() +
    scale_fill_manual(name = "FST value", values=c("#CCFFFF", "lightskyblue2", "steelblue2", "dodgerblue2", "royalblue3", "#003399", "midnightblue"), na.value= "gray80")+
    theme_nothing(legend = TRUE) +
    labs(title = "National FST values vs. United States")+
    theme(plot.title = element_text(face= "bold", size = 25, hjust=0.5))+
    theme(legend.title = element_text(face= "bold", size=20),
          legend.position= c(0.2,0.35), # 0,0 is bottom left, 1,1 is top right
          legend.text = element_text(size=16), 
          legend.key = element_rect(size = 4))+ # adds white border around legend keys
    guides(fill = guide_legend(keywidth = 3.5, keyheight = 2))+ # adjust size of legend keys
    annotate("text", x = 140, y = -80, label = "Sources: World Value Survey, Natural Earth Data", size= 5) 
  
  ggsave(filename = "fst_vs_USA.png", width = 22, height= 13, unit = "in")

rm(list = ls())

  
  
###################################################
### Create a country-level map of FST vs. China ###
###################################################

# This is basically the same script as above, but for cultural distance from China rather than from the United States. 
  
  
### Load packages, shapefile, & FST data
########################################
# Load country-level boundary shapefile (1:50m scale) and convert to dataframe 
  # All shapfiles were downloaded from http://www.naturalearthdata.com/downloads.
#  setwd("/Volumes/Seagate External Backup/Lab Manager files/Maps/ArcMap/Shapefiles/Countries 1_50")
  
  # Use readShapeSpatial() command {maptools} to read data from a shapefile into a Spatial*DataFrame object. Use shape file name with NO extension.
  world <- readShapeSpatial("ne_50m_admin_0_countries")
  
  # Rename rows in shapefile to identify them by country name (using "name" variable). When you convert the shapefile into a dataframe, the row
  # names will be used to create an "id" variable.
  row.names(world) <- as.character(world@data$name)
  
  # Use the tidy() command {broom} to convert shapefile to a dataframe that ggplot2 can work with.
  world.df <- tidy(world)
  
  # Rename "id" variable in new data frame as "country"
  names(world.df)[names(world.df)=="id"] <- "country"
  
  # Load Country-level FST data vs. China
  #setwd("/Volumes/Seagate External Backup/Lab Manager files/Maps/R Scripts/FST map updates_3_29_18")
  country.fst <- read.csv("weird_sino.csv")
  
  # Several countries are listed differently in shapefile vs. FST data. Rename in FST dataframe so that they match across data sources.
  levels(country.fst$country) <- c(levels(country.fst$country), "United Kingdom", "Korea", "Vietnam", "United States")
  country.fst$country[country.fst$country=="Great Britain"] <- "United Kingdom"
  country.fst$country[country.fst$country=="South Korea"] <- "Korea"
  country.fst$country[country.fst$country=="Viet Nam"] <- "Vietnam"
  
  # Serbia and Montenegro are combined in FST data, but listed as separate cases in shapefile data. Create separate FST observations
  # for the two countries in order to map the FST value to both countries.
  levels(country.fst$country) <- c(levels(country.fst$country), "Serbia", "Montenegro")
  country.fst$country[country.fst$country=="Serbia and Montenegro"] <- "Serbia"
  montenegro <- subset(country.fst, country=="Serbia")
  montenegro$country[montenegro$country=="Serbia"] <- "Montenegro"
  country.fst <- rbind(country.fst, montenegro)
  rm(montenegro)
  
  # Merge shapefile and fst data using left_join() function {dplyr}
  merge.country <- left_join(world.df, country.fst)
  rm(country.fst, world, world.df)
  
  
### Prep data for mapping
#########################
  
  # Calculate scale breaks - creates 4 equally divisions between FST data maximum and minimum.
  fst.max <- as.numeric(max(merge.country$China, na.rm=T))
  fst.min <- as.numeric(min(merge.country$China, na.rm=T))
  x <- (fst.max-fst.min) / 4
  div <- fst.min
  div2 <- (div + x)
  div3 <- (div + 2*x)
  div4 <- (div + 3*x)
  div5 <- fst.max
  
  
  # Cut fst data into a factor with the scale breaks defined above. Assign to a new variable. 
  merge.country$fst_vsChina_scale <- cut(merge.country$China,
                                       breaks=c(0, div2, div3, div4, div5),
                                       labels=c((paste(round(div, digits=3), "-", round(div2, digits=3)-0.001)), (paste(round(div2, digits=3), "-", round(div3, digits=3)-0.001)), (paste(round(div3, digits=3), "-", round(div4, digits=3)-0.001)), (paste(round(div4, digits=3), "-", round(div5, digits=3)+ 0.001))))
  
  
  # Set fst value for REFERENCE COUNTRY (China) to 0.
  
  # First, add "0.00" as a level for the fst variable factor
  levels(merge.country$fst_vsChina_scale) <- c(levels(merge.country$fst_vsChina_scale), "0.00")
  merge.country$fst_vsChina_scale[merge.country$country=="China"] <- "0.00" 
  merge.country$fst_vsChina_scale <- relevel(merge.country$fst_vsChina_scale, "0.00") # makes 0.00 the "first" factor level (so it shows up first on the legend)
  
  # Remove Antartica from data so that it will not appear in maps
  merge.country <- subset(merge.country, country!="Antarctica")
  
  
### Prepare French Guiana data (to be "unmapped")
#################################################
  # French Guiana is mapped as part of France. To un-map it, import regional shapefile, and subset French Guiana. French Guiana will
  # be added as its own layer to the map in gray.
  
  # Load regional-level fst data #
  #setwd("/Volumes/Seagate External Backup/Lab Manager files/Maps/ArcMap/Shapefiles")
  regional <- readShapeSpatial("ne_10m_admin_1_states_provinces")
  
  # Subset French data
  france <- subset(regional, regional@data$admin=="France")
  row.names(france) <- as.character(france@data$name)
  
  # Use the tidy() command {broom} to convert shapefile to a dataframe that ggplot2 can work with.
  france.df <- tidy(france)
  
  # Rename "id" variable in new data frame as "country"
  names(france.df)[names(france.df)=="id"] <- "region"
  
  # Subset French Guiana
  guiana.df <- subset(france.df, region=="Guyane française")
  rm(france)
  
  
  
### Plot the map
################
  # This map plots country-level FST vs. China data. China (reference country) displays in lightest blue. Full range of blue tones
  # display FST values from 0 to this sample's maximum Fst of 0.260. French Guiana is colored in grey in order to "unmap" it. Title is centered.
  
  ggplot() + 
    geom_polygon(data = merge.country, aes(x=long, y=lat, group=group, fill = fst_vsChina_scale))+
    geom_polygon(data = guiana.df, aes(x=long, y=lat, group=group), fill = "gray80")+
    geom_polygon(data= merge.country, aes(x=long, y=lat, group=group), color= "black", size=0.1, fill=NA)+
    coord_quickmap() +
    scale_fill_manual(name = "FST value", values=c("#CCFFFF", "lightskyblue2", "steelblue2", "dodgerblue2", "royalblue3", "#003399", "midnightblue"), na.value= "gray80")+
    theme_nothing(legend = TRUE) +
    labs(title = "National FST values vs. China")+
    theme(plot.title = element_text(face= "bold", size = 25, hjust=0.5))+
    theme(legend.title = element_text(face= "bold", size=20),
          legend.position= c(0.2,0.35), # 0,0 is bottom left, 1,1 is top right
          legend.text = element_text(size=16), 
          legend.key = element_rect(size = 4))+ # adds white border around legend keys
    guides(fill = guide_legend(keywidth = 3.5, keyheight = 2))+ # adjust size of legend keys
    annotate("text", x = 140, y = -80, label = "Sources: World Value Survey, Natural Earth Data", size= 5) 
  
  ggsave(filename = "fst_vs_China.png", width = 22, height= 13, unit = "in")
  