Modérateur : Groupe des modérateurs
Eric Pagot a écrit :J'ai trouvé la solution pour calculer les anova de type I ou II. Le type II ne fait pas de hiérarchie de facteurs, c'est la fonction Anova (avec une majuscule) qui permet de tenir compte de données non orthogonales.
C'est d'ailleurs très souvent le cas quand il s'agit de relever le sexe des animaux.
Or, les moyennes correspondantes à ces calculs de sommes de carrés sont des moyennes ajustées (c'est à dire équivalentes à des populations avec des données initiales identiques).
Je ne sais pas s'il est possible de récupérer cette info. En effet, sous R, on ne peut calculer que les données brutes, mais quelquefois, avec les ajustements, il peut y avoir majoration importante dans un lot.
Eric Pagot a écrit :Je n'ai pas saisi votre réponse. Comment fait-on pour connaître les moyennes des sous-groupes que j'ai indiqués ? Est-ce que je dois prendre les valeurs indiquée par fitted.value ?
> lm(POIDS ~ LOT*SEXE, data=exemple)
Call:
lm(formula = POIDS ~ LOT * SEXE, data = exemple)
Coefficients:
(Intercept) LOT[T.B] SEXE[T.M] LOT[T.B]:SEXE[T.M]
1.150e+01 -5.000e-01 -3.230e-16 1.667e-01
> fitted(lm(POIDS ~ LOT*SEXE, data=exemple))
1 2 3 4 5 6 7 8 9 10 11 12 13
11.50000 11.50000 11.50000 11.50000 11.50000 11.50000 11.50000 11.50000 11.50000 11.50000 11.16667 11.16667 11.16667
14 15 16 17 18 19 20
11.16667 11.16667 11.16667 11.00000 11.00000 11.00000 11.00000
La sortie que j'ai indiquée correspond à une sortie Systat, mais ce doit être probablement la même chose avec SAS.
Le lien indiqué ne fonctionne pas...
Code : Tout sélectionner
# Data from Manual Splus6, p. 629
mydata <- data.frame(
fat = factor(c(rep(1, times = 12), rep(2, times = 12), rep(3, times = 12))),
surfact = factor(rep(c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3), times = 3)),
flour = factor(rep(1:4, times = 9)),
y = c(6.7, 4.3, 5.7, NA, 7.1, NA, 5.9, 5.6,
NA, 5.5, 6.4, 5.8, NA, 5.9, 7.4, 7.1, NA, 5.6, NA, 6.8,
6.4, 5.1, 6.2, 6.3, 7.1, 5.9, NA, NA, 7.3, 6.6,
8.1, 6.8, NA, 7.5, 9.1, NA)
)
mydata
mydata <- mydata[!is.na(mydata$y), ]
mydata
# Model fitting
fm <- lm(formula = y ~ flour + fat * surfact, data = mydata)
drop1(fm, test = "F")
# Function marg_means (draft version)
marg_means <- function(formula, fm, digits = 4) {
# 'fm' must be of class lm or aov
f <- formula(deparse(formula))
tmp.data <- model.frame(fm)[, -1]
tmp.list <- list(0)
for(j in 1:ncol(tmp.data)) {
if(is.factor(tmp.data[, j])) tmp.list[[j]] <- levels(tmp.data[, j])
if(is.numeric(tmp.data[, j])) tmp.list[[j]] <- mean(tmp.data[, j])
}
names(tmp.list) <- names(tmp.data)
tmp.data <- expand.grid(tmp.list)
myformula <- formula(paste("y ~", formula(fm)[3]))
X <- model.matrix(lm(formula = myformula, data = cbind(tmp.data, y = rep(0, nrow(tmp.data)))))
datamu <- cbind(rows = 1:nrow(tmp.data), tmp.data, mu = X %*% coef(fm))
varmu <- X %*% vcov(fm) %*% t(X)
if(as.character(f[2]) != "1") {
dataselec <- ctabs(f, datamu)$counts
tab <- dataselec
tab$mu <- rep(0, nrow(tab))
tab$se <- rep(0, nrow(tab))
for(i in 1:nrow(dataselec)) {
selec <- dataselec[i, ]
tmp.datamu <- merge(datamu, selec, by = names(dataselec)[-ncol(dataselec)], all = FALSE)
tmp.varmu <- varmu[tmp.datamu$rows, tmp.datamu$rows]
x <- matrix(1 / nrow(tmp.datamu), ncol = nrow(tmp.datamu))
mu <- x %*% tmp.datamu$mu
se <- (x %*% tmp.varmu %*% t(x))^0.5
tab$mu[i] <- mu
tab$se[i] <- se
}
} else {
tmp.datamu <- datamu
tmp.varmu <- varmu
x <- matrix(1 / nrow(tmp.datamu), ncol = nrow(tmp.datamu))
mu <- x %*% tmp.datamu$mu
se <- (x %*% tmp.varmu %*% t(x))^0.5
tab <- data.frame(mu = mu, se = se, n = 0)
}
tab <- tab[, -match("n", names(tab))]
z <- (ncol(tab) - 1):ncol(tab)
tab[, z] <- round(tab[, z], digits = digits)
tab
}
# Marginal means
marg_means(formula = ~ 1, fm = fm)
marg_means(formula = ~ flour, fm = fm)
marg_means(formula = ~ fat, fm = fm)
marg_means(formula = ~ surfact, fm = fm)
marg_means(formula = ~ fat + surfact, fm = fm)
# Results from Manual Splus6, p. 632-633
#Tables of adjusted means
#Grand mean
#6.633281
#se 0.084599
#Flour
#1 2 3 4
#7.3020 5.7073 6.9815 6.5423
#se 0.1995 0.1467 0.1621 0.1785
#Fat
#1 2 3
#5.8502 6.5771 7.4725
#se 0.1365 0.1477 0.1565
#Surfactant
#1 2 3
#6.3960 6.5999 6.9039
#se 0.1502 0.1432 0.1473
#Fat:Surfactant
#Dim 1 : Fat
#Dim 2 : Surfactant
#1 2 3
#1 5.5364 5.8913 6.1229
#se 0.2404 0.2392 0.2414
#2 7.0229 6.7085 6.0000
#se 0.2414 0.3006 0.2034
#3 6.6286 7.2000 8.5889
#se 0.3007 0.2034 0.3001
Delphine Meziere a écrit : Erreur dans marg_means(formula = ~flour, fm = fm) : impossible de trouver la fonction "ctabs"
Code : Tout sélectionner
emm <- function(fm, formula, digits = 4, ...) {
CALL <- match.call()
# in case the formula was provided as an object
CALL[[2]] <- formula(deparse(formula))
f <- formula(deparse(formula))
tmp.data <- model.frame(fm)[, -1]
tmp.list <- list(0)
for(j in 1:ncol(tmp.data)) {
if(is.factor(tmp.data[, j])) tmp.list[[j]] <- levels(tmp.data[, j])
if(is.numeric(tmp.data[, j])) tmp.list[[j]] <- mean(tmp.data[, j])
}
names(tmp.list) <- names(tmp.data)
tmp.data <- expand.grid(tmp.list)
tmp.formula <- formula(paste("y ~", formula(fm)[3]))
X <- model.matrix(lm(formula = tmp.formula, data = cbind(tmp.data, y = rep(0, nrow(tmp.data)))))
datamu <- cbind(rows = 1:nrow(tmp.data), tmp.data, mu = X %*% coef(fm))
varmu <- X %*% vcov(fm) %*% t(X)
## Right-hand != "~ 1"
if(as.character(f[2]) != "1"){
z <- attr(terms(f), "term.labels")
if(length(z) >= 2)
f <- formula(paste(z[1], " ~ ", paste(z[2:length(z)], collapse = "+"), sep = ""))
dataselec <- ctab(formula = f, data = datamu)$counts
tab <- dataselec
tab$mu <- rep(0, nrow(tab))
tab$se <- rep(0, nrow(tab))
for(i in 1:nrow(dataselec)) {
selec <- dataselec[i, ]
tmp.datamu <- merge(datamu, selec, by = names(dataselec)[-ncol(dataselec)], all = FALSE)
tmp.varmu <- varmu[tmp.datamu$rows, tmp.datamu$rows]
x <- matrix(1 / nrow(tmp.datamu), ncol = nrow(tmp.datamu))
mu <- x %*% tmp.datamu$mu
se <- (x %*% tmp.varmu %*% t(x))^0.5
tab$mu[i] <- mu
tab$se[i] <- se
}
tab <- tab[, -match("n.ctab", names(tab))]
}
## Right-hand == "~ 1"
else{
tmp.datamu <- datamu
tmp.varmu <- varmu
x <- matrix(1 / nrow(tmp.datamu), ncol = nrow(tmp.datamu))
mu <- x %*% tmp.datamu$mu
se <- (x %*% tmp.varmu %*% t(x))^0.5
tab <- data.frame(mu = mu, se = se, n = 0)
tab <- tab[, -match("n", names(tab))]
}
z <- (ncol(tab) - 1):ncol(tab)
tab[, z] <- round(tab[, z], digits = digits)
rownames(tab) <- seq(nrow(tab))
# initial call
cat("\n")
print(CALL)
cat("\n")
# estimated marginal means
tab
}
Code : Tout sélectionner
ctab <- function (formula, data, weights = NULL, digits = 2)
{
dfname <- as.character(substitute(data))
if (!is.data.frame(data))
stop(paste("\n\nThe object ", dfname, " is not data frame.\n\n",
sep = ""))
weights <- as.character(substitute(weights))
if (length(weights) > 0)
data[, match(weights, names(data))] <- as.numeric(as.character(data[,
match(weights, names(data))]))
CALL <- match.call()
CALL[[2]] <- formula(deparse(formula))
data <- as.data.frame(lapply(data, function(x) if (mode(x) ==
"character")
factor(x)
else x))
tmp <- data
f <- formula
if (length(weights) > 0) {
listvar <- all.vars(formula)
newf <- formula(paste(weights, "~", paste(listvar, collapse = " + ")))
tmp <- splitbin(formula = newf, data = data)$tab
}
if (substring(deparse(formula)[1], 1, 5) == "cbind") {
listvar <- all.vars(formula)
rhs.var <- attr(terms(formula), "term.labels")
tmp <- splitbin(formula = formula, data = data)$tab
if (length(rhs.var) == 0)
f <- formula(paste(listvar[1], "~ 1"))
else f <- formula(paste(listvar[1], "~", paste(rhs.var,
collapse = " + ")))
}
listvar <- all.vars(f)
nbvar <- length(listvar)
right.var <- attr(terms(f), "term.labels")
nbfact <- length(right.var)
if (nbvar == 1)
tab <- with(tmp, table(eval(parse(text = listvar)), dnn = list(listvar)))
else {
newlistvar <- listvar[c(nbvar, 1, (nbvar - 1):2)]
newf <- formula(paste("~", paste(newlistvar, collapse = " + ")))
tab <- xtabs(formula = newf, data = tmp, drop.unused.levels = TRUE)
}
counts <- data.frame(tab)
names(counts)[ncol(counts)] <- "n.ctab"
counts <- counts[counts$n.ctab > 0, ]
if (length(dim(tab)) == 1)
tab.counts <- addmargins(tab, FUN = list(Total = sum))
else tab.counts <- ftable(formula = f, data = addmargins(tab,
margin = 2, FUN = list(Total = sum)))
if (nbvar == 1)
tab.p <- prop.table(tab)
else {
tab.p <- ftable(formula = f, data = tab)
tab.p <- prop.table(tab.p, margin = 1)
}
tab.p <- round(100 * tab.p, digits = digits)
structure(list(CALL = CALL, formula = CALL[[2]], f = formula(deparse(f)),
tab = tab, tab.counts = tab.counts, tab.p = tab.p, counts = counts,
digits = digits), class = "ctab")
}
Code : Tout sélectionner
> mydata <- data.frame(
+ fat = factor(c(rep(1, times = 12), rep(2, times = 12), rep(3, times = 12))),
+ surfact = factor(rep(c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3), times = 3)),
+ flour = factor(rep(1:4, times = 9)),
+ y = c(6.7, 4.3, 5.7, NA, 7.1, NA, 5.9, 5.6,
+ NA, 5.5, 6.4, 5.8, NA, 5.9, 7.4, 7.1, NA, 5.6, NA, 6.8,
+ 6.4, 5.1, 6.2, 6.3, 7.1, 5.9, NA, NA, 7.3, 6.6,
+ 8.1, 6.8, NA, 7.5, 9.1, NA)
+ )
> mydata <- mydata[!is.na(mydata$y), ]
> fm <- lm(formula = y ~ flour + fat * surfact, data = mydata)
> emm(formula = ~ 1, fm = fm)
emm(fm = ~1, formula = ~1)
mu se
1 6.6333 0.0846
> emm(formula = ~ flour, fm = fm)
emm(fm = ~flour, formula = ~flour)
flour mu se
1 1 7.3020 0.1995
2 2 5.7073 0.1467
3 3 6.9815 0.1621
4 4 6.5423 0.1785
> emm(formula = ~ fat, fm = fm)
emm(fm = ~fat, formula = ~fat)
fat mu se
1 1 5.8502 0.1365
2 2 6.5771 0.1477
3 3 7.4725 0.1565
> emm(formula = ~ surfact, fm = fm)
emm(fm = ~surfact, formula = ~surfact)
surfact mu se
1 1 6.3960 0.1502
2 2 6.5999 0.1432
3 3 6.9039 0.1473
> emm(formula = ~ fat + surfact, fm = fm)
emm(fm = ~fat + surfact, formula = ~fat + surfact)
surfact fat mu se
1 1 1 5.5364 0.2404
2 2 1 5.8913 0.2392
3 3 1 6.1229 0.2414
4 1 2 7.0229 0.2414
5 2 2 6.7085 0.3006
6 3 2 6.0000 0.2034
7 1 3 6.6286 0.3007
8 2 3 7.2000 0.2034
9 3 3 8.5889 0.3001
> # Results from Manual Splus6, p. 632-633
>
> #Tables of adjusted means
> # Grand mean
> # 6.633281
> # se 0.084599
> #Flour
> # 1 2 3 4
> # 7.3020 5.7073 6.9815 6.5423
> #se 0.1995 0.1467 0.1621 0.1785
> #Fat
> # 1 2 3
> # 5.8502 6.5771 7.4725
> #se 0.1365 0.1477 0.1565
> #Surfactant
> # 1 2 3
> # 6.3960 6.5999 6.9039
> #se 0.1502 0.1432 0.1473
> #Fat:Surfactant
> #Dim 1 : Fat
> #Dim 2 : Surfactant
> # 1 2 3
> #1 5.5364 5.8913 6.1229
> #se 0.2404 0.2392 0.2414
> #2 7.0229 6.7085 6.0000
> #se 0.2414 0.3006 0.2034
> #3 6.6286 7.2000 8.5889
> #se 0.3007 0.2034 0.3001
Retourner vers « Questions en cours »
Utilisateurs parcourant ce forum : Aucun utilisateur enregistré et 1 invité