Skip to content

Commit e9a4ee1

Browse files
20250227 - clean up
1 parent 66230d4 commit e9a4ee1

12 files changed

+62
-61
lines changed

basic-statistics.qmd

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ For instance, when evaluating whether Quarterbacks have <u>longer</u> careers th
456456
#| layout-ncol: 2
457457
#| fig-cap: "Interpretation of *p*-Values When Examining The Differences Between Groups. The vertical black lines reflect the group means."
458458
#| fig-alt: "Interpretation of *p*-Values When Examining The Differences Between Groups. The vertical black lines reflect the group means."
459-
#| fig-subcap:
459+
#| fig-subcap:
460460
#| - "What is the probability my data would look like this..."
461461
#| - "...if in the population, the groups were really this?"
462462
#| code-fold: true
@@ -555,7 +555,7 @@ For instance, when evaluating whether number of carries is <u>positively</u> ass
555555
#| layout-ncol: 2
556556
#| fig-cap: "Interpretation of *p*-Values When Examining The Association Between Variables."
557557
#| fig-alt: "Interpretation of *p*-Values When Examining The Association Between Variables."
558-
#| fig-subcap:
558+
#| fig-subcap:
559559
#| - "What is the probability my data would look like this..."
560560
#| - "...if in the population, the association was really this?"
561561
#| code-fold: true
@@ -711,7 +711,7 @@ z_crit <- qnorm(1-(0.05/2), m1, sd1)
711711
min1 <- m1-sd1*4
712712
max1 <- m1+sd1*4
713713
min2 <- m2-sd2*4
714-
max2 <- m2+sd2*4
714+
max2 <- m2+sd2*4
715715
# create x sequence
716716
x <- seq(min(min1,min2), max(max1, max2), .01)
717717
# generate normal dist #1
@@ -726,52 +726,53 @@ df2 <- data.frame("x" = x, "y" = y2)
726726
# Alpha polygon
727727
y.poly <- pmin(y1,y2)
728728
poly1 <- data.frame(x=x, y=y.poly)
729-
poly1 <- poly1[poly1$x >= z_crit, ]
729+
poly1 <- poly1[poly1$x >= z_crit, ]
730730
poly1<-rbind(poly1, c(z_crit, 0)) # add lower-left corner
731731
732732
# Beta polygon
733733
poly2 <- df2
734-
poly2 <- poly2[poly2$x <= z_crit,]
734+
poly2 <- poly2[poly2$x <= z_crit,]
735735
poly2<-rbind(poly2, c(z_crit, 0)) # add lower-left corner
736736
737737
# power polygon; 1-beta
738738
poly3 <- df2
739-
poly3 <- poly3[poly3$x >= z_crit,]
739+
poly3 <- poly3[poly3$x >= z_crit,]
740740
poly3 <-rbind(poly3, c(z_crit, 0)) # add lower-left corner
741741
742-
# combine polygons.
742+
# combine polygons
743743
poly1$id <- 3 # alpha, give it the highest number to make it the top layer
744744
poly2$id <- 2 # beta
745745
poly3$id <- 1 # power; 1 - beta
746746
poly <- rbind(poly1, poly2, poly3)
747-
poly$id <- factor(poly$id, labels=c("power","beta","alpha"))
747+
poly$id <- factor(poly$id, labels = c("power", "beta", "alpha"))
748748
749749
# plot with ggplot2
750750
ggplot(poly, aes(x,y, fill=id, group=id)) +
751751
geom_polygon(show.legend=F, alpha=I(8/10)) +
752752
# add line for treatment group
753-
geom_line(data=df1, aes(x,y, color="H0", group=NULL, fill=NULL), linewidth=1.5, show_guide=F) +
753+
geom_line(data=df1, aes(x, y, color = "H0", group = NULL, fill = NULL), linewidth = 1.5, show_guide = FALSE) +
754754
# add line for treatment group. These lines could be combined into one dataframe.
755-
geom_line(data=df2, aes(color="HA", group=NULL, fill=NULL),linewidth=1.5, show_guide=F) +
755+
geom_line(data=df2, aes(color = "HA", group = NULL, fill = NULL), linewidth = 1.5, show_guide = FALSE) +
756756
# add vlines for z_crit
757757
geom_vline(xintercept = z_crit, linewidth=1, linetype="dashed") +
758-
# change colors
759-
scale_color_manual("Group",
760-
values= c("HA" = "#981e0b","H0" = "black")) +
761-
scale_fill_manual("test", values= c("alpha" = "#0d6374","beta" = "#be805e","power"="#7cecee")) +
758+
# change colors
759+
scale_color_manual(
760+
"Group",
761+
values = c("HA" = "#981e0b", "H0" = "black")) +
762+
scale_fill_manual("test", values= c("alpha" = "#0d6374", "beta" = "#be805e", "power"="#7cecee")) +
762763
# beta arrow
763-
annotate("segment", x=0.1, y=0.045, xend=1.3, yend=0.01, arrow = arrow(length = unit(0.3, "cm")), linewidth=1) +
764+
annotate("segment", x = 0.1, y = 0.045, xend = 1.3, yend = 0.01, arrow = arrow(length = unit(0.3, "cm")), linewidth = 1) +
764765
annotate("text", label="beta", x=0, y=0.05, parse=T, size=8) +
765766
# alpha arrow
766-
annotate("segment", x=4, y=0.043, xend=3.4, yend=0.01, arrow = arrow(length = unit(0.3, "cm")), linewidth=1) +
767+
annotate("segment", x = 4, y = 0.043, xend = 3.4, yend = 0.01, arrow = arrow(length = unit(0.3, "cm")), linewidth = 1) +
767768
annotate("text", label="frac(alpha,2)", x=4.2, y=0.05, parse=T, size=8) +
768769
# power arrow
769-
annotate("segment", x=6, y=0.2, xend=4.5, yend=0.15, arrow = arrow(length = unit(0.3, "cm")), linewidth=1) +
770-
annotate("text", label=expression(paste(1-beta, " (\"power\")")), x=6.1, y=0.21, parse=T, size=8) +
770+
annotate("segment", x = 6, y = 0.2, xend = 4.5, yend = 0.15, arrow = arrow(length = unit(0.3, "cm")), linewidth = 1) +
771+
annotate("text", label = expression(paste(1-beta, " (\"power\")")), x = 6.1, y = 0.21, parse = TRUE, size = 8) +
771772
# H_0 title
772-
annotate("text", label="H[0]", x=m1, y=0.28, parse=T, size=8) +
773+
annotate("text", label = "H[0]", x = m1, y = 0.28, parse = TRUE, size = 8) +
773774
# H_a title
774-
annotate("text", label="H[1]", x=m2, y=0.28, parse=T, size=8) +
775+
annotate("text", label = "H[1]", x = m2, y = 0.28, parse = TRUE, size = 8) +
775776
ggtitle("Statistical Power") +
776777
# remove some elements
777778
theme(

causal-inference.qmd

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ Predicting within-person change provides stronger evidence consistent with causa
168168
However, predicting within-person change does not, by itself, control for time-varying [confounds](#sec-causalDiagramConfounding).
169169
So, it can also be useful to control for time-varying [confounds](#sec-causalDiagramConfounding), such as by use of [control variables](#sec-causalInferenceControlVariables).
170170

171-
#### Control Variables {#sec-causalInferenceControlVariables}
171+
#### Control Variables {#sec-causalInferenceControlVariables}
172172

173173
One of the plausible alternatives to the inference that `X` causes `Y` is that there are third variable [confounds](#sec-causalDiagramConfounding) that influence both `X` and `Y`, thus explaining why `X` and `Y` are associated, as depicted in Figures [-@fig-correlationAndCausation3] and [-@fig-ZCausesXandY].
174174
Thus, another approach that can help increase [internal validity](#sec-internalValidity) is to include plausible [confounds](#sec-causalDiagramConfounding) as control variables.
@@ -372,7 +372,7 @@ plot(dagitty::graphLayout(mediationDag))
372372
dagitty::impliedConditionalIndependencies(mediationDag)
373373
374374
dagitty::adjustmentSets(
375-
mediationDag,
375+
mediationDag,
376376
exposure = "M1",
377377
outcome = "Y",
378378
effect = "total")
@@ -419,7 +419,7 @@ An example of confounding is depicted in @fig-counfounding:
419419
confounding <- ggdag::confounder_triangle(
420420
x = "Player Endurance",
421421
y = "Field Goals Made",
422-
z = "Stadium Altitude")
422+
z = "Stadium Altitude")
423423
424424
confounding %>%
425425
ggdag(
@@ -437,7 +437,7 @@ The output indicates that player endurance (`X`) and field goals made (`Y`) are
437437

438438
```{r}
439439
dagitty::adjustmentSets(
440-
confounding,
440+
confounding,
441441
exposure = "x",
442442
outcome = "y",
443443
effect = "total")
@@ -483,7 +483,7 @@ In other words, in this example, player preparation is the mechanism that fully
483483

484484
```{r}
485485
dagitty::adjustmentSets(
486-
full_mediation,
486+
full_mediation,
487487
exposure = "x",
488488
outcome = "y",
489489
effect = "direct")
@@ -493,7 +493,7 @@ The output indicates that, to obtain an unbiased estimate of the *direct* causal
493493

494494
```{r}
495495
dagitty::adjustmentSets(
496-
full_mediation,
496+
full_mediation,
497497
exposure = "x",
498498
outcome = "y",
499499
effect = "total")
@@ -536,7 +536,7 @@ For instance, coaching quality could also influence player fantasy points throug
536536

537537
```{r}
538538
dagitty::adjustmentSets(
539-
partial_mediation,
539+
partial_mediation,
540540
exposure = "x",
541541
outcome = "y",
542542
effect = "direct")
@@ -546,7 +546,7 @@ As with [full mediation](#sec-causalDiagramMediationPartial), the output indicat
546546

547547
```{r}
548548
dagitty::adjustmentSets(
549-
partial_mediation,
549+
partial_mediation,
550550
exposure = "x",
551551
outcome = "y",
552552
effect = "total")
@@ -588,13 +588,13 @@ In this example, `X` and `M` are conditionally independent with `Z` when account
588588

589589
```{r}
590590
dagitty::adjustmentSets(
591-
descendentDag,
591+
descendentDag,
592592
exposure = "X",
593593
outcome = "Y",
594594
effect = "direct")
595595
596596
dagitty::adjustmentSets(
597-
descendentDag,
597+
descendentDag,
598598
exposure = "X",
599599
outcome = "Y",
600600
effect = "total")
@@ -633,7 +633,7 @@ In this example collision, diet (`X`) and coaching strategy (`Y`) are independen
633633

634634
```{r}
635635
dagitty::adjustmentSets(
636-
colliderBias1,
636+
colliderBias1,
637637
exposure = "x",
638638
outcome = "y",
639639
effect = "total")
@@ -672,7 +672,7 @@ In this example of collider bias, there are no conditional independencies.
672672

673673
```{r}
674674
dagitty::adjustmentSets(
675-
colliderBias2,
675+
colliderBias2,
676676
exposure = "x",
677677
outcome = "y",
678678
effect = "total")
@@ -718,7 +718,7 @@ As the output indicates, there are several conditional independencies.
718718

719719
```{r}
720720
dagitty::adjustmentSets(
721-
mBias,
721+
mBias,
722722
exposure = "x",
723723
outcome = "y",
724724
effect = "total")
@@ -782,7 +782,7 @@ As the output indicates, there are several conditional independencies.
782782

783783
```{r}
784784
dagitty::adjustmentSets(
785-
butterflyBias,
785+
butterflyBias,
786786
exposure = "x",
787787
outcome = "y",
788788
effect = "total")

cognitive-bias.qmd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ Indeed, it is estimated that nearly half (~45%) of the variability in fantasy fo
224224
A manager who won their league in the prior season may believe they will perform better than they actually will (overestimation), will perform better than average (overplacement), and may hold excessive confidence regarding the accuracy of their predictions about which players will perform well or poorly (overprecision).
225225
These various types of overconfidence may lead them to draft high-risk players based on gut feeling, neglecting statistical analysis and expert consensus.
226226

227-
People tend to focus on the role of skill and to neglect the role of luck when explaining the past and predicting the future, giving people an illusion of control [@Kahneman2011].
227+
People tend to focus on the role of skill and to neglect the role of luck when explaining the past and predicting the future, giving people an illusion of control [@Kahneman2011].
228228
Players' performance in fantasy football, and human behavior more generally, is complex and multiply determined (i.e., is influenced by many factors).
229229
Despite the bluster of so-called experts who pretend to know more than they can know, no one can consistently and accurately predict how all players will perform.
230230
Remain humble in your predictions; do not be more confident than is warranted.
@@ -344,7 +344,7 @@ Loss aversion can also influence trade negotiations.
344344
Risk aversion leads people to select safer options but may lead them to miss out on higher-gain opportunities.
345345
For instance, risk aversion may lead a fantasy manager to start players who are more steady (i.e., show greater game-to-game [consistency](#sec-evalHistoricalConsistency)) over players who are more volatile (i.e., show greater game-to-game variability) but have higher upside potential.
346346

347-
In mixed gambles, in which it is possible for a person to experience either a gain or a loss, [loss aversion](#sec-cognitiveBiasesLossAversion) tends to lead to risk-averse choices [@Kahneman2011].
347+
In mixed gambles, in which it is possible for a person to experience either a gain or a loss, [loss aversion](#sec-cognitiveBiasesLossAversion) tends to lead to risk-averse choices [@Kahneman2011].
348348
By contrast, when all of a person's options are poor, people tend to engage in risk seeking, as has been observed in entrepreneurs and in generals [@Kahneman2011].
349349
In fantasy football, risk seeking may be more likely when a manager has a team full of underperforming players and a weak record.
350350

data-visualization.qmd

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ confidenceLevel <- .95 # for 95% confidence interval
114114
player_stats_seasonal_offense_summary <- player_stats_seasonal %>%
115115
filter(position_group %in% c("QB","RB","WR","TE")) %>%
116116
group_by(position_group) %>%
117-
summarise(
117+
summarise(
118118
n = sum(!is.na(fantasyPoints)),
119119
mean = mean(fantasyPoints, na.rm = TRUE),
120120
sd = sd(fantasyPoints, na.rm = TRUE)
@@ -630,7 +630,7 @@ ggplot2::ggplot(
630630
ylim = c(0,NA),
631631
expand = FALSE) +
632632
scale_x_continuous(
633-
breaks = seq(from = 20, to = 40, by = 5)
633+
breaks = seq(from = 20, to = 40, by = 5)
634634
) +
635635
scale_y_continuous(
636636
breaks = seq(from = 0, to = 2500, by = 250)
@@ -664,7 +664,7 @@ ggplot2::ggplot(
664664
ylim = c(0,NA),
665665
expand = FALSE) +
666666
scale_x_continuous(
667-
breaks = seq(from = 20, to = 40, by = 5)
667+
breaks = seq(from = 20, to = 40, by = 5)
668668
) +
669669
scale_y_continuous(
670670
breaks = seq(from = 0, to = 2500, by = 250)
@@ -698,7 +698,7 @@ ggplot2::ggplot(
698698
ylim = c(0,NA),
699699
expand = FALSE) +
700700
scale_x_continuous(
701-
breaks = seq(from = 20, to = 40, by = 5)
701+
breaks = seq(from = 20, to = 40, by = 5)
702702
) +
703703
scale_y_continuous(
704704
breaks = seq(from = 0, to = 2500, by = 250)
@@ -738,7 +738,7 @@ plot_ypcByPlayerAge <- ggplot2::ggplot(
738738
ylim = c(0,NA),
739739
expand = FALSE) +
740740
scale_x_continuous(
741-
breaks = seq(from = 20, to = 40, by = 5)
741+
breaks = seq(from = 20, to = 40, by = 5)
742742
) +
743743
scale_y_continuous(
744744
breaks = seq(from = 0, to = 2500, by = 250)
@@ -778,7 +778,7 @@ plot_ypcByPlayerAge <- ggplot2::ggplot(
778778
ylim = c(0,NA),
779779
expand = FALSE) +
780780
scale_x_continuous(
781-
breaks = seq(from = 20, to = 40, by = 5)
781+
breaks = seq(from = 20, to = 40, by = 5)
782782
) +
783783
scale_y_continuous(
784784
breaks = seq(from = 0, to = 2500, by = 250)
@@ -1231,7 +1231,7 @@ ggplot2::ggplot(
12311231
y = def_epa)) +
12321232
nflplotR::geom_mean_lines(
12331233
aes(
1234-
x0 = off_epa ,
1234+
x0 = off_epa,
12351235
y0 = def_epa)) +
12361236
nflplotR::geom_nfl_logos(
12371237
aes(

download-football-data.qmd

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2065,8 +2065,8 @@ nfl_advancedStatsPFR_seasonal <- nfl_advancedStatsPFR_seasonal %>%
20652065
by = c("pfr_id","season")
20662066
) %>%
20672067
select(
2068-
pfr_id, season, pfr_player_name, pos, age, team, g, gs,
2069-
contains(".pass"), contains(".rush"), contains(".rec"), contains(".def"),
2068+
pfr_id, season, pfr_player_name, pos, age, team, g, gs,
2069+
contains(".pass"), contains(".rush"), contains(".rec"), contains(".def"),
20702070
everything())
20712071
```
20722072

@@ -3601,7 +3601,7 @@ Note: the following code takes a while to run.
36013601
#nfl_actualFantasyPoints_weekly_raw <- ffanalytics:::actual_points_scoring(
36023602
# season = 2023,
36033603
# summary_level = c("week"),
3604-
# stat_type = c("player", "dst", "team"),
3604+
# stat_type = c("player", "dst", "team"),
36053605
# season_type = c("REG", "POST", "REG+POST"),
36063606
# scoring_rules = scoring_obj,
36073607
# vor_baseline = NULL,
@@ -3619,7 +3619,7 @@ for(i in 1:length(seasons)){
36193619
ffanalytics:::actual_points_scoring(
36203620
season = seasons[i],
36213621
summary_level = c("week"),
3622-
stat_type = c("player"),
3622+
stat_type = c("player"),
36233623
#season_type = c("REG"),
36243624
scoring_rules = scoring_obj,
36253625
vor_baseline = NULL,
@@ -3629,7 +3629,7 @@ for(i in 1:length(seasons)){
36293629
ffanalytics:::actual_points_scoring(
36303630
season = seasons[i],
36313631
summary_level = c("week"),
3632-
stat_type = c("dst"),
3632+
stat_type = c("dst"),
36333633
#season_type = c("REG"),
36343634
scoring_rules = scoring_obj,
36353635
vor_baseline = NULL,
@@ -3767,7 +3767,7 @@ Note: the following code takes a while to run.
37673767
#nfl_actualFantasyPoints_seasonal_raw <- ffanalytics:::actual_points_scoring(
37683768
# season = 2023,
37693769
# summary_level = c("season"),
3770-
# stat_type = c("player", "dst", "team"),
3770+
# stat_type = c("player", "dst", "team"),
37713771
# season_type = c("REG"),
37723772
# scoring_rules = scoring_obj,
37733773
# vor_baseline = NULL,
@@ -3785,7 +3785,7 @@ for(i in 1:length(seasons)){
37853785
ffanalytics:::actual_points_scoring(
37863786
season = seasons[i],
37873787
summary_level = c("season"),
3788-
stat_type = c("player"),
3788+
stat_type = c("player"),
37893789
season_type = c("REG"),
37903790
scoring_rules = scoring_obj,
37913791
vor_baseline = NULL,
@@ -3795,7 +3795,7 @@ for(i in 1:length(seasons)){
37953795
ffanalytics:::actual_points_scoring(
37963796
season = seasons[i],
37973797
summary_level = c("season"),
3798-
stat_type = c("dst"),
3798+
stat_type = c("dst"),
37993799
season_type = c("REG"),
38003800
scoring_rules = scoring_obj,
38013801
vor_baseline = NULL,

draft.qmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ In general, Kickers and Defenses tend to have the lowest dropoff (i.e., the lowe
8181
Defenses, in particular, appear to be among the least predictable of the positions [@Lee2022].
8282

8383
Another important concept is a player's [value over a typical replacement player](#sec-fantasyValueVORP) at that position (shortened to "value over replacement player"; VORP), which is described in @sec-fantasyValueVORP.
84-
A player's [value over a typical replacement player](#sec-fantasyValueVORP) provides a way to more fairly compare (and thus rank) players across different positions.
84+
A player's [value over a typical replacement player](#sec-fantasyValueVORP) provides a way to more fairly compare (and thus rank) players across different positions.
8585

8686
Another important concept is a player's [uncertainty](#sec-fantasyValueUncertainty), which is described in @sec-fantasyValueUncertainty.
8787

0 commit comments

Comments
 (0)