
Who am I?
Hello and welcome to my website! My name is Zhi Yang Lin and I am a senior majoring in Mathematical Economics at the University of Pennsylvania (Penn). This website was created as the final project for OIDD 215: Analytics and Digital Economy.
​
For questions and concerns, please email linzy@sas.upenn.edu.
Introductions


Data
For the entirety of this project, I used nbastatR, which was provided by Alex Bresler and draws data from sources that "include, but are not limited to: NBA Stats API, Basketball Insiders, Basketball-Reference, HoopsHype, and RealGM​.​" Needless to say, all work was done in RStudio.
R Script
#Set up
install.packages("devtools")
devtools::install_github("abresler/nbastatR")
library("nbastatR")
library(ggplot2)
library(reshape2)
​
#Cleaning up the data set
seasonstats=as.data.frame(get_bref_players_seasons(seasons=2018))
seasonstats=(seasonstats[,-c(1,3,4,6:9,11:13,15,19:22,27,28,32:34,41,43:52,58,59,61)])
names(seasonstats)=c("Name","Position","First_Season","Num_Games_Played","Minutes_Played",
"PER","True_Shooting_PCT","Rebound_PCT","Assist_PCT",
"Steal_PCT","Block_PCT", "Offensive_Win_Shares",
"Defensive_Win_Shares","WS",
"+/-", "Value_Over_Replacement_Player",
"Num_Games_Started","FG_PCT","3-Point_FG_PCT",
"2-Point_FG_PCT","Free_Throw_PCT", "Tot_DRebounds",
"Tot_Rebounds","Tot_Assists","Tot_Steals","Tot_Blocks",
"Tot_Points")
REB=seasonstats$Tot_Rebounds/seasonstats$Num_Games_Played
AST=seasonstats$Tot_Assists/seasonstats$Num_Games_Played
PTS=seasonstats$Tot_Points/seasonstats$Num_Games_Played
STL=seasonstats$Tot_Steals/seasonstats$Num_Games_Played
BLK=seasonstats$Tot_Blocks/seasonstats$Num_Games_Played
DRB=seasonstats$Tot_DRebounds/seasonstats$Num_Games_Played
Pergamestats=cbind(REB,AST,PTS,STL,BLK,DRB)
seasonstats=cbind(seasonstats,Pergamestats)
​
#Visualizations
#MVP
#In the last 3 decades, the lowest win share of any NBA MVP is 9.6
mvpcand=seasonstats[which(seasonstats$`WS`>9.6),c(1,6,14,15,28:30)]
mvpcand.m=melt(mvpcand,id.vars="Name")
ggplot(mvpcand.m, aes(x=Name, y=value))+
geom_bar(aes(fill=reorder(variable,value)), position="dodge", stat="identity",
color="black")+labs(title="MVP Candidates")+
scale_fill_discrete(name="Measures")+
theme(axis.text.x=element_text(angle=45,hjust=1))
mvpsum=function(i){
return(sum(mvpcand[i,c(2:7)]))
}
MVPtotal=NULL
for (i in 1:5) {
MVPtotal[i]=print(mvpsum(i))
}
mvpcand=cbind(seasonstats[which(seasonstats$`WS`>9.6),c(1,6,14,15,28:30)],MVPtotal)
MVP=mvpcand[which(mvpcand$MVPtotal==max(mvpcand$MVPtotal)),1]
MVP
​
#ROY
#In the last 3 decades, the lowest win share of any ROY is 1.3.
#In the last 3 decades, the lowest minutes per game of any ROY is 26.4
#Create a minutes played per game column for filtering
MPPG=seasonstats$Minutes_Played/seasonstats$Num_Games_Played
seasonstats=cbind(seasonstats,MPPG)
#Change the entry of Ben Simmons's First_Season to 2017
seasonstats[which(seasonstats$Name=="Ben Simmons"),3]=2017
roycand=seasonstats[which(seasonstats$First_Season==2017 &
seasonstats$WS>1.3 & seasonstats$MPPG>26.4),c(1,6,14,15,28:30)]
roycand.m=melt(roycand,id.vars="Name")
ggplot(roycand.m, aes(x=Name, y=value))+
geom_bar(aes(fill=reorder(variable,value)), position="dodge", stat="identity",
color="black")+labs(title="ROY Candidates")+
scale_fill_discrete(name="Measures")+
theme(axis.text.x=element_text(angle=45,hjust=1))
roysum=function(i){
return(sum(roycand[i,c(2:7)]))
}
ROYtotal=NULL
for (i in 1:7) {
ROYtotal[i]=print(roysum(i))
}
roycand=cbind(roycand,ROYtotal)
ROY=roycand[which(roycand$ROYtotal==max(roycand$ROYtotal)),1]
ROY
#DPOY
#In the last 3 decades, the lowest win share of any DPOY is 7.6
#In the last 3 decades, the lowest blocks per game of any DPOY is 0.7
#In the last 3 decades, the lowest steals per game of any DPOY is 0.4
#In the last 3 decades, the lowest minutes per game of any DPOY is 29.0
dpoycand=seasonstats[which(seasonstats$WS>7.6 & seasonstats$BLK>0.7 & seasonstats$STL>0.4
&seasonstats$MPPG>29.0),c(1,6,13,31:33)]
dpoycand.m=melt(dpoycand, id.vars="Name")
ggplot(dpoycand.m, aes(x=Name, y=value))+
geom_bar(aes(fill=reorder(variable,value)), position="dodge", stat="identity",
color="black")+labs(title="DPOY Candidates")+
scale_fill_discrete(name="Measures")+
theme(axis.text.x=element_text(angle=45,hjust=1))
dpoysum=function(i){
return(sum(dpoycand[i,c(2:6)]))
}
DPOYtotal=NULL
for (i in 1:8) {
DPOYtotal[i]=print(dpoysum(i))
}
dpoycand=cbind(dpoycand,DPOYtotal)
DOY=dpoycand[which(dpoycand$DPOYtotal==max(dpoycand$DPOYtotal)),1]
DOY