top of page

Click here for my predicted MVP's best plays from last season.

Most Valuable Player (MVP)

Visualizations

Defensive Player of the Year (DPOY)

Click here for my predicted DPOY's best plays from last season.

Rookie of the Year (ROY)

Click here for my predicted ROY's most athletic plays.

Who am I?

Hello and welcome to my website! My name is Zhi Yang Lin and I am a senior majoring in Mathematical Economics at the University of Pennsylvania (Penn). This website was created as the final project for OIDD 215: Analytics and Digital Economy.

​

For questions and concerns, please email linzy@sas.upenn.edu.

Introductions

23561592_1767314346612110_151149012221136062_n.png

Data

For the entirety of this project, I used nbastatR, which was provided by Alex Bresler and draws data from sources that "include, but are not limited to: NBA Stats API, Basketball Insiders, Basketball-Reference, HoopsHype, and RealGM​.​" Needless to say, all work was done in RStudio.

R Script

#Set up
install.packages("devtools")
devtools::install_github("abresler/nbastatR")
library("nbastatR")
library(ggplot2)
library(reshape2)

​

#Cleaning up the data set
seasonstats=as.data.frame(get_bref_players_seasons(seasons=2018))
seasonstats=(seasonstats[,-c(1,3,4,6:9,11:13,15,19:22,27,28,32:34,41,43:52,58,59,61)])
names(seasonstats)=c("Name","Position","First_Season","Num_Games_Played","Minutes_Played",
                     "PER","True_Shooting_PCT","Rebound_PCT","Assist_PCT",
                     "Steal_PCT","Block_PCT", "Offensive_Win_Shares",
                     "Defensive_Win_Shares","WS",
                     "+/-", "Value_Over_Replacement_Player",
                     "Num_Games_Started","FG_PCT","3-Point_FG_PCT",
                     "2-Point_FG_PCT","Free_Throw_PCT", "Tot_DRebounds",
                     "Tot_Rebounds","Tot_Assists","Tot_Steals","Tot_Blocks",
                     "Tot_Points")
REB=seasonstats$Tot_Rebounds/seasonstats$Num_Games_Played
AST=seasonstats$Tot_Assists/seasonstats$Num_Games_Played
PTS=seasonstats$Tot_Points/seasonstats$Num_Games_Played
STL=seasonstats$Tot_Steals/seasonstats$Num_Games_Played
BLK=seasonstats$Tot_Blocks/seasonstats$Num_Games_Played
DRB=seasonstats$Tot_DRebounds/seasonstats$Num_Games_Played
Pergamestats=cbind(REB,AST,PTS,STL,BLK,DRB)
seasonstats=cbind(seasonstats,Pergamestats)

​

#Visualizations

#MVP

#In the last 3 decades, the lowest win share of any NBA MVP is 9.6
mvpcand=seasonstats[which(seasonstats$`WS`>9.6),c(1,6,14,15,28:30)]
mvpcand.m=melt(mvpcand,id.vars="Name")
ggplot(mvpcand.m, aes(x=Name, y=value))+
  geom_bar(aes(fill=reorder(variable,value)), position="dodge", stat="identity",
  color="black")+labs(title="MVP Candidates")+
  scale_fill_discrete(name="Measures")+
  theme(axis.text.x=element_text(angle=45,hjust=1))
mvpsum=function(i){
  return(sum(mvpcand[i,c(2:7)]))
}
MVPtotal=NULL
for (i in 1:5) {
  MVPtotal[i]=print(mvpsum(i))
}
mvpcand=cbind(seasonstats[which(seasonstats$`WS`>9.6),c(1,6,14,15,28:30)],MVPtotal)
MVP=mvpcand[which(mvpcand$MVPtotal==max(mvpcand$MVPtotal)),1]
MVP

​

#ROY

#In the last 3 decades, the lowest win share of any ROY is 1.3.
#In the last 3 decades, the lowest minutes per game of any ROY is 26.4
#Create a minutes played per game column for filtering

MPPG=seasonstats$Minutes_Played/seasonstats$Num_Games_Played
seasonstats=cbind(seasonstats,MPPG)


#Change the entry of Ben Simmons's First_Season to 2017 
seasonstats[which(seasonstats$Name=="Ben Simmons"),3]=2017
roycand=seasonstats[which(seasonstats$First_Season==2017 & 
                          seasonstats$WS>1.3 & seasonstats$MPPG>26.4),c(1,6,14,15,28:30)]
roycand.m=melt(roycand,id.vars="Name")
ggplot(roycand.m, aes(x=Name, y=value))+
  geom_bar(aes(fill=reorder(variable,value)), position="dodge", stat="identity",
           color="black")+labs(title="ROY Candidates")+
  scale_fill_discrete(name="Measures")+
  theme(axis.text.x=element_text(angle=45,hjust=1))
roysum=function(i){
  return(sum(roycand[i,c(2:7)]))
}
ROYtotal=NULL
for (i in 1:7) {
  ROYtotal[i]=print(roysum(i))
}
roycand=cbind(roycand,ROYtotal)
ROY=roycand[which(roycand$ROYtotal==max(roycand$ROYtotal)),1]
ROY

 

#DPOY
#In the last 3 decades, the lowest win share of any DPOY is 7.6
#In the last 3 decades, the lowest blocks per game of any DPOY is 0.7
#In the last 3 decades, the lowest steals per game of any DPOY is 0.4
#In the last 3 decades, the lowest minutes per game of any DPOY is 29.0

dpoycand=seasonstats[which(seasonstats$WS>7.6 & seasonstats$BLK>0.7 & seasonstats$STL>0.4
                           &seasonstats$MPPG>29.0),c(1,6,13,31:33)]
dpoycand.m=melt(dpoycand, id.vars="Name")
ggplot(dpoycand.m, aes(x=Name, y=value))+
  geom_bar(aes(fill=reorder(variable,value)), position="dodge", stat="identity",
           color="black")+labs(title="DPOY Candidates")+
  scale_fill_discrete(name="Measures")+
  theme(axis.text.x=element_text(angle=45,hjust=1))
dpoysum=function(i){
  return(sum(dpoycand[i,c(2:6)]))
}
DPOYtotal=NULL
for (i in 1:8) {
  DPOYtotal[i]=print(dpoysum(i))
}
dpoycand=cbind(dpoycand,DPOYtotal)
DOY=dpoycand[which(dpoycand$DPOYtotal==max(dpoycand$DPOYtotal)),1]
DOY

©2018 BY NBA '17-'18 AWARDS. PROUDLY CREATED WITH WIX.COM

bottom of page