Example 5.2 from Experimental Design and Analysis for Tree Improvement
Source:R/Exam5.2.R
Exam5.2.Rd
Exam5.2 presents the height of 37 seedlots from 6 sites.
References
E.R. Williams, C.E. Harwood and A.C. Matheson (2023). Experimental Design and Analysis for Tree Improvement. CSIRO Publishing (https://www.publish.csiro.au/book/3145/).
Author
Muhammad Yaseen (myaseen208@gmail.com)
Sami Ullah (samiullahuos@gmail.com)
Examples
library(car)
library(dae)
library(dplyr)
library(emmeans)
library(ggplot2)
library(lmerTest)
library(magrittr)
library(predictmeans)
data(DataExam5.2)
# Pg. 75
fm5.7 <-
lm(
formula = ht ~ site*seedlot
, data = DataExam5.2
)
# Pg. 77
anova(fm5.7)
#> Warning: ANOVA F-tests on an essentially perfect fit are unreliable
#> Analysis of Variance Table
#>
#> Response: ht
#> Df Sum Sq Mean Sq F value Pr(>F)
#> site 5 4157543 831509 NaN NaN
#> seedlot 36 4425296 122925 NaN NaN
#> site:seedlot 150 1351054 9007 NaN NaN
#> Residuals 0 0 NaN
fm5.9 <-
lm(
formula = ht ~ site*seedlot
, data = DataExam5.2
)
# Pg. 77
anova(fm5.9)
#> Warning: ANOVA F-tests on an essentially perfect fit are unreliable
#> Analysis of Variance Table
#>
#> Response: ht
#> Df Sum Sq Mean Sq F value Pr(>F)
#> site 5 4157543 831509 NaN NaN
#> seedlot 36 4425296 122925 NaN NaN
#> site:seedlot 150 1351054 9007 NaN NaN
#> Residuals 0 0 NaN
ANOVAfm5.9 <- anova(fm5.9)
#> Warning: ANOVA F-tests on an essentially perfect fit are unreliable
ANOVAfm5.9[4, 1:3] <- c(384, 384*964, 964)
ANOVAfm5.9[3, 4] <- ANOVAfm5.9[3, 3]/ANOVAfm5.9[4, 3]
ANOVAfm5.9[3, 5] <-
pf(
q = ANOVAfm5.9[3, 4]
, df1 = ANOVAfm5.9[3, 1]
, df2 = ANOVAfm5.9[4, 1]
, lower.tail = FALSE
)
# Pg. 77
ANOVAfm5.9
#> Analysis of Variance Table
#>
#> Response: ht
#> Df Sum Sq Mean Sq F value Pr(>F)
#> site 5 4157543 831509 NaN NaN
#> seedlot 36 4425296 122925 NaN NaN
#> site:seedlot 150 1351054 9007 9.3434 < 2.2e-16 ***
#> Residuals 384 370176 964
#> ---
#> Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Tab5.14 <-
DataExam5.2 %>%
summarise(
Mean = round(mean(ht, na.rm = TRUE), 0)
, .by = seedlot
) %>%
left_join(
DataExam5.2 %>%
nest_by(seedlot) %>%
mutate(fm2 = list(lm(ht ~ sitemean, data = data))) %>%
summarise(Slope = round(coef(fm2)[2], 2))
, by = "seedlot"
) %>%
as.data.frame()
#> `summarise()` has grouped output by 'seedlot'. You can override using the
#> `.groups` argument.
# Pg. 81
Tab5.14
#> seedlot Mean Slope
#> 1 13877 291 0.80
#> 2 13866 302 0.56
#> 3 13689 463 1.07
#> 4 13688 458 1.20
#> 5 13861 538 1.08
#> 6 13854 536 1.16
#> 7 13686 726 1.77
#> 8 13684 549 1.34
#> 9 13864 371 0.79
#> 10 13863 586 1.19
#> 11 13683 673 1.78
#> 12 13681 610 1.33
#> 13 13680 645 2.08
#> 14 14623 652 0.49
#> 15 14175 378 1.22
#> 16 14660 445 0.88
#> 17 13691 511 1.61
#> 18 13653 477 1.35
#> 19 13846 381 0.93
#> 20 13621 304 0.98
#> 21 14176 177 0.22
#> 22 13871 220 0.64
#> 23 14622 355 1.65
#> 24 13876 240 0.77
#> 25 13519 342 0.71
#> 26 13514 290 0.66
#> 27 13148 230 0.35
#> 28 13990 260 0.29
#> 29 14537 671 1.15
#> 30 14106 673 1.18
#> 31 12013 529 1.30
#> 32 14130 390 0.45
#> 33 14485 108 0.16
#> 34 14166 210 0.61
#> 35 11935 211 0.61
#> 36 14170 244 0.59
#> 37 14152 150 0.63
DevSS2 <-
DataExam5.2 %>%
nest_by(seedlot) %>%
mutate(fm2 = list(lm(ht ~ sitemean, data = data))) %>%
summarise(SSE = anova(fm2)[2, 2]) %>%
ungroup() %>%
summarise(Dev = sum(SSE)) %>%
as.numeric()
#> Warning: There were 2 warnings in `summarise()`.
#> The first warning was:
#> ℹ In argument: `SSE = anova(fm2)[2, 2]`.
#> ℹ In row 12.
#> Caused by warning in `anova.lm()`:
#> ! ANOVA F-tests on an essentially perfect fit are unreliable
#> ℹ Run dplyr::last_dplyr_warnings() to see the 1 remaining warning.
#> `summarise()` has grouped output by 'seedlot'. You can override using the
#> `.groups` argument.
ANOVAfm5.9.1 <-
rbind(
ANOVAfm5.9[1:3, ]
, c(
ANOVAfm5.9[2, 1]
, ANOVAfm5.9[3, 2] - DevSS2
, (ANOVAfm5.9[3, 2] - DevSS2)/ANOVAfm5.9[2, 1]
, NA
, NA
)
, c(
ANOVAfm5.9[3, 1]-ANOVAfm5.9[2, 1]
, DevSS2
, DevSS2/(ANOVAfm5.9[3, 1]-ANOVAfm5.9[2, 1])
, DevSS2/(ANOVAfm5.9[3, 1]-ANOVAfm5.9[2, 1])/ANOVAfm5.9[4, 3]
, pf(
q = DevSS2/(ANOVAfm5.9[3, 1]-ANOVAfm5.9[2, 1])/ANOVAfm5.9[4, 3]
, df1 = ANOVAfm5.9[3, 1]-ANOVAfm5.9[2, 1]
, df2 = ANOVAfm5.9[4, 1]
, lower.tail = FALSE
)
)
, ANOVAfm5.9[4, ]
)
rownames(ANOVAfm5.9.1) <-
c(
"site"
, "seedlot"
, "site:seedlot"
, " regressions"
, " deviations"
, "Residuals"
)
# Pg. 82
ANOVAfm5.9.1
#> Analysis of Variance Table
#>
#> Response: ht
#> Df Sum Sq Mean Sq F value Pr(>F)
#> site 5 4157543 831509 NaN NaN
#> seedlot 36 4425296 122925 NaN NaN
#> site:seedlot 150 1351054 9007 9.3434 < 2.2e-16 ***
#> regressions 36 703203 19533
#> deviations 114 647851 5683 5.8951 < 2.2e-16 ***
#> Residuals 384 370176 964
#> ---
#> Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Code <-
c(
"a","a","a","a","b","b","b","b"
, "c","d","d","d","d","e","f","g"
, "h","h","i","i","j","k","l","m"
,"n","n","n","o","p","p","q","r"
, "s","t","t","u","v"
)
Tab5.14$Code <- Code
ggplot(
data = Tab5.14
, mapping = aes(x = Mean, y = Slope)
) +
geom_point(size = 2) +
geom_text(
mapping = aes(label = Code)
, hjust = -0.5
, vjust = -0.5
) +
theme_bw() +
labs(
x = "SeedLot Mean"
, y = "Regression Coefficient"
)