I was inspired by FiveThirtyEight, and have been trying to calculate the ELO Ratings of College Basketball teams from the 2016 season up until the present day. Halfway through calculating the ELO ratings for each game, however, the results have turned from numbers into NA numbers.
I am trying to determine if the data that I pulled was bad and if there is a way to work around that problem. What do I do? Here is the code so far.
library(elo)
library(data.table)
K <- 20.0
# The rate at which ELO ratings change.
REVERT <- 1/3
# Between seasons, a team retains 0.66 of the previous season
ratings.
CBB <- read.csv("CBB3.csv", header = TRUE, stringsAsFactors =
FALSE)
Team.A.Score <- as.integer(CBB$Team.A.Score)
# Pull Team.A score
Team.B.Score <- as.integer(CBB$Team.B.Score)
# Pull Team.B score
A.Adv <- as.integer(CBB$A.Adv)
# Team A Homefield Advantage
B.Adv <- as.integer(CBB$B.Adv)
# Team B Homefield Advantage
Season <- CBB$Season
# Create a season column
# Run the initial ELO scores
elo <- elo.run(score(Team.A.Score,
Team.B.Score)~adjust(Team.A, A.Adv) + adjust(Team.B, B.Adv) +
regress(Season, 1505, REVERT), k=K*log(abs(Team.A.Score -
Team.B.Score) + 1), data=CBB)
# Parse data from the ELO results and College FB data
elodf <- as.data.frame(elo)
# Transform the "elo" variable into a data frame
Location <- CBB$Game.Location
# Create a location column
Season <- CBB$Season
# Create a season column
# Find the ELO ratings before the change
elo.A <- elodf$elo.A
# ELO score of Team.A
elo.B <- elodf$elo.B
# ELO score of Team.B
elo.delta <- elodf$update
# ELO change
elo.A.before <- elo.A - elo.delta
# Team.A ELO before the update
elo.B.before <- elo.B + elo.delta
# Team.B ELO before the update
# Add a point differential multiplier to factor
autocorrelation
PD <- Team.A.Score - Team.B.Score
# Point differential
multiplier <- log(PD+1)*(2.2/((abs(elo.A.before-
elo.B.before)*0.001+2.2)))
# FINAL ELO EQUATION!
elo.final <- elo.run(score(Team.A.Score,
Team.B.Score)~adjust(Team.A, A.Adv) + adjust(Team.B, B.Adv) +
regress(Season, 1505, REVERT), k=K*multiplier, data=CBB)
elodf1 <- as.data.frame(elo.final)
# There is a way to calculate the point spread of how a game
should go based on the ELO scores. Take the difference of the
ELO scores and divide by 25
Points.Spread <- (elo.A.before-elo.B.before)/25
# Add Date, Netural, Location, Team.A and Team.B scores,
Season, and Points.Spread to elodf1
elodf1$Day.of.Game <- Game.Date
elodf1$Location <- Location
elodf1$Team.A.Score <- Team.A.Score
elodf1$Team.B.Score <- Team.B.Score
elodf1$Season <- Season
elodf1$Points.Spread <- Points.Spread
Comments
Post a Comment