library(XML)
library(plyr)
library(reshape2)
library(ggplot2)
library(grid)

get_scores_for_bowl <- function(num) {
  base_url <- "http://en.wikipedia.org/wiki/Super_Bowl_"
  page_html <- htmlParse(paste(base_url, num, sep = ""))
  scores_html <- getNodeSet(page_html, "//table[@align='center' and @style='background-color:transparent;']")
  scores <- readHTMLTable(scores_html[[1]], stringsAsFactors = F)
  scores <- cbind(scores, c("NFC", "AFC"))
  names(scores) <- c("team", "q1", "q2", "q3", "q4", "final_score", "conf")
  scores <- transform(scores,
                      bowl = num,
                      q1_score = as.numeric(q1),
                      q2_score = as.numeric(q1) + as.numeric(q2),
                      q3_score = as.numeric(q1) + as.numeric(q2) + as.numeric(q3),
                      final_score = as.numeric(final_score))
  return(scores)
}

integer_to_num <- function(int) {
  if (int < 0 || int > 3999) {
    stop("Value must be in the range 0 - 3,999.")
  } else if (int == 0) {
    return("N")
  } else {
    vals <- c(1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1)
    nums <- c("M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", "I")
    num <- ""
    for (i in 1:13) {
      while (int >= vals[i]) {
        int <- int - vals[i]
        num <- paste(num, nums[i], sep = "")
      }
    }
    return(num)
  }
}

get_all_scores <- function() {
  nums <- lapply(1:46, integer_to_num)
  scores_list <- lapply(nums, get_scores_for_bowl)
  scores <- data.frame(do.call(rbind, scores_list))
  return(scores)
}

last_digit <- function(n) {
  n_str <- as.character(n)
  last_char <- substring(n_str, nchar(n_str))
  return(last_char)
}

scores <- get_all_scores()

digits <- ddply(scores,
                .(bowl, conf),
                summarize,
                q1_digit = last_digit(q1_score),
                q2_digit = last_digit(q2_score),
                q3_digit = last_digit(q3_score),
                final_digit = last_digit(final_score))

digits_melted <- melt(digits, id = c("bowl", "conf"), measure.var = c("q1_digit", "q2_digit", "q3_digit", "final_digit"))

get_digits <- function(qtr) {
  digits_melted_qtr <- subset(digits_melted, variable == paste(qtr, "_digit", sep = ""))
  digits_qtr <- dcast(digits_melted_qtr, bowl ~ conf + variable)
  names(digits_qtr) <- c("bowl", "AFC_digit", "NFC_digit")
  return(digits_qtr)
}

q1_digits <- get_digits("q1")
q2_digits <- get_digits("q2")
q3_digits <- get_digits("q3")
final_digits <- get_digits("final")

get_probs_from_digits <- function(digits) {
  prob_mtx <- matrix(ncol = 10, nrow = 10)
  for (i in 1:10) {
    for (j in 1:10) {
      prob_mtx[i, j] <- round(100 * nrow(subset(digits, AFC_digit == i - 1 & NFC_digit == j - 1)) / 46, digits = 1)
    }
  }
  probs <- as.data.frame(prob_mtx)
  names(probs) = 0:9
  row.names(probs) = 0:9
  return(probs)
}

q1_probs <- get_probs_from_digits(q1_digits)
q2_probs <- get_probs_from_digits(q2_digits)
q3_probs <- get_probs_from_digits(q3_digits)
final_probs <- get_probs_from_digits(final_digits)

grid <- do.call(rbind, lapply(0:9, function(n) cbind(0:9, n)))
boxes <- data.frame(afc = grid[,1], nfc = grid[,2])
boxes$q1 <- melt(q1_probs)$value
boxes$q2 <- melt(q2_probs)$value
boxes$q3 <- melt(q3_probs)$value
boxes$final <- melt(final_probs)$value

capitalize <- function(str) {
  s <- strsplit(str, " ")[[1]]
  paste(toupper(substring(s, 1,1)), substring(s, 2), sep = "", collapse = " ")
}

heatmap <- function(qtr) {
  ggplot(boxes, aes_string(xmin = "afc", xmax = "afc + 1", ymin = "nfc", ymax = "nfc + 1", fill = qtr)) +
  geom_rect(color = "gray70") +
  geom_text(aes_string(label = qtr, x = "afc + 0.5", y = "nfc + 0.5"), family = "Gill Sans MT", fontface = "italic", size = 5, color = "gray50") +
  scale_fill_gradient(low = "#FFFFFF", high = "#08306B", name = "% win\n") +
  scale_x_continuous(breaks = .5:9.5,labels=0:9) +
  scale_y_continuous(breaks = .5:9.5,labels=0:9) +
  xlab("\nAFC champion") + ylab("NFC champion\n") +
  ggtitle(paste(capitalize(qtr), "win probabilities (%)\n")) +
  theme(plot.title = element_text(family = "Gill Sans MT", face = "bold", size = 24),
        axis.text.x = element_text(family = "Gill Sans MT", size = 20, color = "black"),
        axis.title.x = element_text(family = "Gill Sans MT", face = "bold", size = 20),
        axis.text.y = element_text(family = "Gill Sans MT", size = 20, color = "black"),
        axis.title.y = element_text(family = "Gill Sans MT", face = "bold", size = 20),
        legend.text = element_text(family = "Gill Sans MT",  size = 14),
        legend.title = element_text(family = "Gill Sans MT", face = "bold", size = 16),
        axis.ticks = element_line(color = "white"),
        panel.grid.minor = element_line(color = "white"),
        panel.background = element_rect(fill = "white", color = NA))
}

q1_heatmap <- heatmap("q1")
q2_heatmap <- heatmap("q2")
q3_heatmap <- heatmap("q3")
final_heatmap <- heatmap("final")
ggsave(plot = q1_heatmap, filename = "q1_heatmap.png", height = 8, width = 8)
ggsave(plot = q2_heatmap, filename = "q2_heatmap.png", height = 8, width = 8)
ggsave(plot = q3_heatmap, filename = "q3_heatmap.png", height = 8, width = 8)
ggsave(plot = final_heatmap, filename = "final_heatmap.png", height = 8, width = 8)