Skip to content

Commit b802417

Browse files
paldayararslan
andauthored
add CI computation for emmeans and empairs (#72)
Co-authored-by: Alex Arslan <[email protected]>
1 parent 07db436 commit b802417

File tree

3 files changed

+91
-8
lines changed

3 files changed

+91
-8
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "Effects"
22
uuid = "8f03c58b-bd97-4933-a826-f71b64d2cca2"
3-
version = "1.6.0"
3+
version = "1.7.0"
44
authors = ["Beacon Biosignals, Inc."]
55

66
[deps]

src/emmeans.jl

Lines changed: 62 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,28 @@ of the underlying standard deviations.
1414
"""
1515
pooled_sem(sems...) = sqrt(sum(abs2, sems))
1616

17+
function _ci!(df::AbstractDataFrame, level;
18+
eff_col::AbstractString, err_col::AbstractString,
19+
lower_col::AbstractString,
20+
upper_col::AbstractString)
21+
transform!(df,
22+
[eff_col, err_col, "dof"] => ByRow() do eff, err, ν
23+
scale = quantile(TDist(ν), (1 + level) / 2)
24+
lower = eff - scale * err
25+
upper = eff + scale * err
26+
return (lower, upper)
27+
end => [lower_col, upper_col])
28+
return df
29+
end
30+
1731
# similar to effects (and the underlying math is the same) but
1832
# the establishment of the reference grid is different
1933
# we don't allow specifying the "typifier" here -- if you want that,
2034
# then choose a less full service function
2135
"""
2236
emmeans(model::RegressionModel; eff_col=nothing, err_col=:err,
23-
invlink=identity, levels=Dict(), dof=nothing)
37+
invlink=identity, levels=Dict(), dof=nothing,
38+
ci_level=nothing, lower_col=:lower, upper_col=:upper)
2439
2540
Compute estimated marginal means, a.k.a. least-square (LS) means for a model.
2641
@@ -39,6 +54,13 @@ a large number of observations, `dof=Inf` may be appropriate.
3954
4055
`invlink`, `eff_col` and `err_col` work exactly as in [`effects!`](@ref).
4156
57+
If `ci_level` is provided, then `ci_level` confidence intervals are computed using
58+
the Wald approximation based on the standard errors and quantiles of the ``t``-distribution.
59+
If `dof` is not provided, then the degrees of freedom are assumed to be infinite,
60+
which is equivalent to using the normal distribution.
61+
The corresponding lower and upper edges of the interval are placed in `lower_col`
62+
and `upper_col`, respectively.
63+
4264
Estimated marginal means are closely related to effects and are also known as
4365
least-square means. The functionality here is a convenience
4466
wrapper for [`effects`](@ref) and maps onto the concept of least-square means
@@ -49,7 +71,8 @@ not currently supported. The documentation for the [R package emmeans](https://c
4971
explains [the background in more depth](https://cran.r-project.org/web/packages/emmeans/vignettes/basics.html).
5072
"""
5173
function emmeans(model::RegressionModel; eff_col=nothing, err_col=:err,
52-
invlink=identity, levels=Dict(), dof=nothing)
74+
invlink=identity, levels=Dict(), dof=nothing, ci_level=nothing,
75+
lower_col=:lower, upper_col=:upper)
5376
form = formula(model)
5477
typical = mean
5578
defaults = Dict{Symbol,Vector}()
@@ -68,18 +91,30 @@ function emmeans(model::RegressionModel; eff_col=nothing, err_col=:err,
6891
grid = expand_grid(levels)
6992
eff_col = string(something(eff_col, _responsename(model)))
7093
err_col = string(err_col)
94+
lower_col = string(lower_col)
95+
upper_col = string(upper_col)
7196

7297
result = effects!(grid, model; eff_col, err_col, typical, invlink)
7398
if !isnothing(dof)
7499
result[!, :dof] .= _dof(dof, model)
75100
end
101+
if !isnothing(ci_level)
102+
# we keep this separate so that we don't add a DoF column
103+
# if there is no CI
104+
if isnothing(dof)
105+
result[!, :dof] .= Inf
106+
end
107+
_ci!(result, ci_level; eff_col, err_col, lower_col, upper_col)
108+
end
76109
return result
77110
end
78111

79112
"""
80113
empairs(model::RegressionModel; eff_col=nothing, err_col=:err,
81-
invlink=identity, levels=Dict(), dof=nothing, padjust=identity)
82-
empairs(df::AbstractDataFrame; eff_col, err_col=:err, padjust=identity)
114+
invlink=identity, levels=Dict(), dof=nothing, padjust=identity,
115+
ci_level=nothing, lower_col=:lower, upper_col=:upper)
116+
empairs(df::AbstractDataFrame; eff_col, err_col=:err, padjust=identity,
117+
ci_level=nothing, lower_col=:lower, upper_col=:upper)
83118
84119
Compute pairwise differences of estimated marginal means.
85120
@@ -104,9 +139,19 @@ If `padjust` is provided, then it is used to compute adjust the p-values for
104139
multiple comparisons. [`MultipleTesting.jl`](https://juliangehring.github.io/MultipleTesting.jl/stable/)
105140
provides a number of useful possibilities for this.
106141
142+
If `ci_level` is provided, then `ci_level` confidence intervals are computed using
143+
the Wald approximation based on the standard errors and quantiles of the ``t``-distribution.
144+
If `dof` is not provided, then the degrees of freedom are assumed to be infinite,
145+
which is equivalent to using the normal distribution.
146+
The corresponding lower and upper edges of the interval are placed in `lower_col`
147+
and `upper_col`, respectively.
148+
107149
!!! note
108150
`padjust` is silently ignored if `dof` is not provided.
109151
152+
!!! note
153+
Confidence intervals are **not** adjusted for multiple comparisons.
154+
110155
!!! warning
111156
This feature is experimental and the precise column names and presentation of
112157
contrasts/differences may change without being considered breaking.
@@ -122,18 +167,22 @@ provides a number of useful possibilities for this.
122167
discussed in [the documentation for the R package `emmeans`](https://cran.r-project.org/web/packages/emmeans/vignettes/transformations.html).
123168
"""
124169
function empairs(model::RegressionModel; eff_col=nothing, err_col=:err,
125-
invlink=identity, levels=Dict(), dof=nothing, padjust=identity)
170+
invlink=identity, levels=Dict(), dof=nothing, padjust=identity,
171+
ci_level=nothing, lower_col=:lower, upper_col=:upper)
126172
eff_col = something(eff_col, _responsename(model))
127173
em = emmeans(model; eff_col, err_col, invlink, levels, dof)
128-
return empairs(em; eff_col, err_col, padjust)
174+
return empairs(em; eff_col, err_col, padjust, ci_level, lower_col, upper_col)
129175
end
130176

131-
function empairs(df::AbstractDataFrame; eff_col, err_col=:err, padjust=identity)
177+
function empairs(df::AbstractDataFrame; eff_col, err_col=:err, padjust=identity,
178+
ci_level=nothing, lower_col=:lower, upper_col=:upper)
132179
# need to enforce that we're all the same type
133180
# (mixing string and symbol is an issue with Not
134181
# and a few other things below)
135182
eff_col = string(eff_col)
136183
err_col = string(err_col)
184+
lower_col = string(lower_col)
185+
upper_col = string(upper_col)
137186
stats_cols = [eff_col, err_col]
138187
"dof" in names(df) && push!(stats_cols, "dof")
139188

@@ -170,5 +219,11 @@ function empairs(df::AbstractDataFrame; eff_col, err_col=:err, padjust=identity)
170219
end => "Pr(>|t|)")
171220
transform!(result_df, "Pr(>|t|)" => padjust => "Pr(>|t|)")
172221
end
222+
if !isnothing(ci_level)
223+
if "dof" stats_cols
224+
result_df[!, :dof] .= Inf
225+
end
226+
_ci!(result_df, ci_level; eff_col, err_col, lower_col, upper_col)
227+
end
173228
return result_df
174229
end

test/emmeans.jl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,20 @@ model_scaled = lm(@formula(weight ~ 1 + sex * age), growthdata;
5353
end
5454
em = emmeans(m; levels=Dict(:age => 23))
5555
@test all(em.age .== 23)
56+
57+
@testset "confint" begin
58+
em = emmeans(m; ci_level=0.68)
59+
@test all(==(Inf), em[!, :dof])
60+
# 68% CI is approximately one standard error
61+
@test em[!, :weight] + em[!, :err] em[!, :upper] rtol = 1e-3
62+
@test em[!, :weight] - em[!, :err] em[!, :lower] rtol = 1e-3
63+
64+
em = emmeans(m; ci_level=0.68, dof=dof_residual)
65+
@test all(==(10), em[!, :dof])
66+
# 68% CI is approximately one standard error
67+
@test em[!, :weight] + em[!, :err] em[!, :upper] rtol = 1e-3
68+
@test em[!, :weight] - em[!, :err] em[!, :lower] rtol = 1e-3
69+
end
5670
end
5771

5872
# R> pairs(em)
@@ -101,6 +115,20 @@ bonferroni(pvals) = adjust(PValues(pvals), Bonferroni())
101115
rtol=0.001))
102116
end
103117

118+
@testset "confint" begin
119+
em = empairs(m; ci_level=0.68)
120+
@test all(==(Inf), em[!, :dof])
121+
# 68% CI is approximately one standard error
122+
@test em[!, :weight] + em[!, :err] em[!, :upper] rtol = 1e-3
123+
@test em[!, :weight] - em[!, :err] em[!, :lower] rtol = 1e-3
124+
125+
em = emmeans(m; ci_level=0.68, dof=dof_residual)
126+
@test all(==(10), em[!, :dof])
127+
# 68% CI is approximately one standard error
128+
@test em[!, :weight] + em[!, :err] em[!, :upper] rtol = 1e-3
129+
@test em[!, :weight] - em[!, :err] em[!, :lower] rtol = 1e-3
130+
end
131+
104132
@testset "AbstractString crossing" begin
105133
# this model is utter nonsense, but it creates a particular pattern
106134
# with InlineStrings that fails if our type restriction is too tight

0 commit comments

Comments
 (0)