#############################################################
### Nonmetric Multidimensional Scaling Graphs -- Fst Data ###
#############################################################

library(ggplot2)
library(MASS)
library(stringr)
library(ggrepel) # cointains geom_text_repel, which moves ggplots text labels away from points to resolve overlaps


### Function for filling in matrix
source(file = "../left_to_symmetric.R")

### Prepare data for plotting
d.temp <- read.csv("US_EU_India_China_Fst_100+.csv")
d <- read.csv("US_EU_India_China_Fst.csv")
d <- d[d$X %in% d.temp$X,colnames(d) %in% colnames(d.temp)]

row.names(d) <- d[, 1]
d <- d[, -1]
d.mat <- as.matrix(d)
d.mat2 <- left.to.sym(d.mat)

d.dist <- as.dist(d.mat2)

fit <- isoMDS(d.dist, k=2)
fit.df <- data.frame(fit)

# Add "label" variable, for identifying points
fit.df$label <- NA
fit.df$label <- rownames(fit.df)
fit.df$label <- gsub(".*: ", "", fit.df$label) # remove country code prefix from region names

# Add variable to indicate country, for grouping by color
fit.df$country <- NA
fit.df$country[str_detect(rownames(fit.df), "CN:")==T] <- "China"
fit.df$country[str_detect(rownames(fit.df), "IN:")==T] <- "India"
fit.df$country[str_detect(rownames(fit.df), "US:")==T] <- "United States" 
fit.df$country[str_detect(rownames(fit.df), ":")==F] <- rownames(fit.df)[str_detect(rownames(fit.df), ":")==F]

# Add a variable for larger regional grouping (includes EU)
fit.df$region <- NA
fit.df$region[fit.df$country == "China" | fit.df$country == "India" | fit.df$country == "United States"] <- fit.df$country[fit.df$country == "China" | fit.df$country == "India" |fit.df$country == "United States"]
fit.df$region[fit.df$country != "China" & fit.df$country != "India" & fit.df$country != "United States"] <- "European Union"




######################
### Generate plots ###
######################

### Coloring Country/EU by Color, with Polygons

# Create hulls for each country group
China <- fit.df[fit.df$region == "China", ][chull(fit.df[fit.df$region == "China", c("points.1", "points.2")]), ] 
India <- fit.df[fit.df$region == "India", ][chull(fit.df[fit.df$region == "India", c("points.1", "points.2")]), ] 
US <-  fit.df[fit.df$region == "United States", ][chull(fit.df[fit.df$region == "United States", c("points.1", "points.2")]), ] 
EU <- fit.df[fit.df$region == "European Union", ][chull(fit.df[fit.df$region == "European Union", c("points.1", "points.2")]), ] 
hull.data.EU <- rbind(China, India, US, EU)  

ggplot(data = fit.df, aes(x= points.1, y = points.2)) +
  geom_polygon(data = hull.data.EU, aes(x= points.1, y = points.2, fill = region, group = region), alpha = 0.2) +
  geom_point(aes(color = region), size = 3, show.legend = F) +
  geom_text_repel(aes(label = label), alpha=0.7, size = 3) + 
  theme_classic() +
  theme(axis.text.x = element_blank(),  
        axis.text.y = element_blank(), 
        axis.ticks = element_blank())+
  labs(x = "NMDS 1", y = "NMDS 2") +
  guides(fill=guide_legend(override.aes = list(alpha = 0.75), title = "Region"))

ggsave(file = "NMDS_FST_Color by country or EU_Polygon.png", height = 8, width = 8, units = "in")

