R Code [zeigen / verbergen]
<- read_excel("data/repeated_tomato.xlsx") |>
tomato_tbl clean_names() |>
mutate(variety = relevel(as_factor(variety), ref = "control"),
time_fct = as_factor(time))
Letzte Änderung am 08. May 2025 um 15:37:00
<- read_excel("data/repeated_tomato.xlsx") |>
tomato_tbl clean_names() |>
mutate(variety = relevel(as_factor(variety), ref = "control"),
time_fct = as_factor(time))
ggplot(tomato_tbl,
aes(time_fct, diameter, fill = variety)) +
theme_minimal() +
geom_boxplot(outlier.size = 0.5) +
theme(legend.position = "top") +
scale_fill_metro() +
labs(x = "Zeitpunkte der Messung", y = "Mittleres Fruchtdurchmesser [cm]",
color = "Sorte")
<- tomato_tbl |>
ggrepel_tbl group_by(time, variety) |>
summarise(mean = mean(diameter, na.rm = TRUE)) |>
filter(time %in% c(11))
ggplot(tomato_tbl,
aes(time, diameter, color = variety,
group = variety)) +
theme_minimal() +
## geom_point2(position = position_dodge(0.5)) +
stat_summary(fun = "mean", geom = "line") +
stat_summary(fun = "mean", geom = "point", size = 1) +
## stat_summary(fun = "median", geom = "line", linetype = 2) +
scale_color_metro() +
theme(legend.position = "top") +
scale_x_continuous(breaks = 2:11, expand = expansion(mult = c(0.1, .1))) +
labs(x = "Zeitpunkte der Messung", y = "Mittleres Fruchtdurchmesser [cm]",
color = "Sorte") +
geom_text_repel(data = ggrepel_tbl, aes(x = 11, y = mean, label = variety), show.legend = FALSE,
nudge_x = 0.5, hjust = "left", size = 3, segment.color = "gray80")
Das ganze kommt dann in das Kapitel statistisches Testen in R plus den entsprechenden Kapiteln
Equivalent to Welch’s t-test in GLS framework
Common statistical tests are linear models
Are parametric tests on rank transformed data equivalent to non-parametric test on raw data?
Conover und Iman (1981) mit Rank Transformations as a Bridge Between Parametric and Nonparametric Statistics
set.seed(20250345)
<- tibble(grp = gl(3, 7, labels = c("cat", "dog", "fox")),
ranked_tbl rsp_lognormal = c(round(rlnorm(7, 4, 1), 2),
round(rlnorm(7, 4, 1), 2),
round(rlnorm(7, 4, 1), 2)),
ranked_lognormal = rank(rsp_lognormal),
rsp_normal = c(round(rnorm(7, 4, 1), 2),
round(rnorm(7, 5, 1), 2),
round(rnorm(7, 7, 1), 2)),
ranked_normal = rank(rsp_normal))
|>
ranked_tbl filter(grp != "fox") |>
group_by(grp) |>
summarise(mean(rsp_normal), sd(rsp_normal), mean(ranked_normal), sd(ranked_normal)) |>
mutate_if(is.numeric, round, 2) |>
set_names(c("Gruppe", "$\\bar{y}_{normal}$", "$s_{normal}$", "$\\bar{y}_{ranked}$", "$s_{ranked}$")) |>
tt(width = 1, align = "c", theme = "striped")
Gruppe | $\bar{y}_{normal}$ | $s_{normal}$ | $\bar{y}_{ranked}$ | $s_{ranked}$ |
---|---|---|---|---|
cat | 3.44 | 1.02 | 4.86 | 3.44 |
dog | 5.26 | 1.31 | 10.86 | 4.45 |
t.test(ranked_normal ~ grp, data = filter(ranked_tbl, grp != "fox")) |>
tidy() |>
select(p.value)
# A tibble: 1 × 1
p.value
<dbl>
1 0.0162
wilcox.test(rsp_normal ~ grp, data = filter(ranked_tbl, grp != "fox")) |>
tidy() |>
select(p.value)
# A tibble: 1 × 1
p.value
<dbl>
1 0.0175
aov(ranked_normal ~ grp, data = ranked_tbl) |>
tidy()
# A tibble: 2 × 6
term df sumsq meansq statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 grp 2 541. 270. 21.3 0.0000181
2 Residuals 18 229. 12.7 NA NA
kruskal.test(ranked_normal ~ grp, data = ranked_tbl) |>
tidy()
# A tibble: 1 × 4
statistic p.value parameter method
<dbl> <dbl> <int> <chr>
1 14.1 0.000886 2 Kruskal-Wallis rank sum test
<- function(x) sign(x) * rank(abs(x)) signed_rank
rank(c(3.6, 3.4, -5.0, 8.2))
[1] 3 2 1 4
signed_rank(c(3.6, 3.4, -5.0, 8.2))
[1] 2 1 -3 4
# the t-statistic not assuming equal variances
t.test(rsp_normal ~ grp, data = filter(ranked_tbl, grp != "fox"), var.equal = FALSE)
Welch Two Sample t-test
data: rsp_normal by grp
t = -2.9197, df = 11.318, p-value = 0.01357
alternative hypothesis: true difference in means between group cat and group dog is not equal to 0
95 percent confidence interval:
-3.1998081 -0.4544776
sample estimates:
mean in group cat mean in group dog
3.435714 5.262857
library(nlme)
summary(gls(rsp_normal ~ grp, data = filter(ranked_tbl, grp != "fox"),
weights = varIdent(form = ~ 1 | grp)))
Generalized least squares fit by REML
Model: rsp_normal ~ grp
Data: filter(ranked_tbl, grp != "fox")
AIC BIC logLik
49.35749 51.29711 -20.67874
Variance function:
Structure: Different standard deviations per stratum
Formula: ~1 | grp
Parameter estimates:
cat dog
1.000000 1.284679
Coefficients:
Value Std.Error t-value p-value
(Intercept) 3.435714 0.3843972 8.937927 0.0000
grpdog 1.827143 0.6258007 2.919688 0.0128
Correlation:
(Intr)
grpdog -0.614
Standardized residuals:
Min Q1 Med Q3 Max
-1.3645596 -0.6355371 -0.1011953 0.4646938 1.7581826
Residual standard error: 1.017019
Degrees of freedom: 14 total; 12 residual
conflicts_prefer(stats::chisq.test)
[conflicted] Will prefer stats::chisq.test over any other package.
<- data.frame(mood = c('happy', 'sad', 'meh'),
D counts = c(60, 90, 70))
chisq.test(D$counts)
Chi-squared test for given probabilities
data: D$counts
X-squared = 6.3636, df = 2, p-value = 0.04151
glm(counts ~ mood, data = D, family = poisson()) |>
anova(test = 'Rao')
Analysis of Deviance Table
Model: poisson, link: log
Response: counts
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Rao Pr(>Chi)
NULL 2 6.2697
mood 2 6.2697 0 0.0000 6.3636 0.04151 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Jetzt brauchen wir eine 2x2 Tabelle also zwei Spalten…
= data.frame(
D mood = c('happy', 'happy', 'meh', 'meh', 'sad', 'sad'),
sex = c('male', 'female', 'male', 'female', 'male', 'female'),
Freq = c(100, 70, 30, 32, 110, 120)
)
::loglm(Freq ~ mood + sex, D) MASS
Call:
MASS::loglm(formula = Freq ~ mood + sex, data = D)
Statistics:
X^2 df P(> X^2)
Likelihood Ratio 5.119915 2 0.07730804
Pearson 5.099859 2 0.07808717
Kann auch in technische Gleichheit mit rein
<- read_excel("data/nirs_qs_data.xlsx") |>
nirs_wide_tbl clean_names()
<- nirs_wide_tbl |>
nirs_long_tbl pivot_longer(cols = jd_ts:last_col(),
values_to = "values",
names_to = c("method", "type"),
names_sep = "_") |>
mutate(gulleart = as_factor(gulleart),
method = as_factor(method),
type = as_factor(type))
Warning: Expected 2 pieces. Additional pieces discarded in 3 rows [3, 8, 13].
Technical note: Validation and comparison of 2 commercially available activity loggers
Quick and easy ways to deal with long labels in ggplot2
Adding Custom Fonts to ggplot in R
Die Sache mit der Schriftart in {ggplot}
.
library(ggrepel)
Getting started with {ggrepel}
{ggtext
}: Improved text rendering support for ggplot2
update_geom_defaults("label",
list(family = "IBM Plex Sans Condensed"))
update_geom_defaults(ggtext::GeomRichText,
list(family = "IBM Plex Sans Condensed"))
update_geom_defaults("label_repel",
list(family = "IBM Plex Sans Condensed"))
{gganimate}
{dbplyr}
?Das R Paket {dbplyr}
Database Queries With R