Boucle pour t.test sur plus de 2 échantillons

Postez ici vos questions, réponses, commentaires ou suggestions - Les sujets seront ultérieurement répartis dans les archives par les modérateurs

Modérateur : Groupe des modérateurs

Patricia OBEID
Messages : 68
Enregistré le : 10 Avr 2017, 19:03

Boucle pour t.test sur plus de 2 échantillons

Messagepar Patricia OBEID » 20 Mar 2018, 13:12

Bonjour,
je cherche comment écrire le calcul du t.test, la récupération de la pvalue et le tracé d'un barplot pour plus de deux échantillons.
Il me faut écrire une boucle pour appliquer sur plus de 15 échantillons ce que je faisais sur mon échantillon seul.
Voici le script que j'utilise pour un échantillon avec 3 traitements (NT, TA et TB) où je compare NT à TA et NT à TB :

Code : Tout sélectionner

test <- pairwise.t.test(data1$QR,data1$Name)
test$p.value
# return
# NT         TA         TB
# TA 0.007265354         NA         NA
# TB 0.035774070 0.36596602         NA
# TC 0.460629603 0.01686606 0.08697365

x11()
op <- par (las=2, cex=0.7, oma=c(1,0,0,0))
barplot(Stat$QR_Moy,names.arg=Stat$Name,ylim=c(0,2),main="PCR Séverine du")
box()   #Encadrer le diagramme
arrows(bp,Stat$QR_Moy-Stat$Ecartype,bp, Stat$QR_Moy+Stat$Ecartype, lwd=1.5, angle=90,length=0.1,code=3)   
par(op)

etoiles <- function(pvalue) {
  if (pvalue <= 0.001) return("***")
  if (pvalue <= 0.01) return("***")
  if (pvalue <= 0.05) return("*")
   if (pvalue > 0.05) return("NS")
  ""
}

limits <- 2:length(bp)
text(bp[limits],Stat$QR_Moy[limits]+Stat$Ecartype[limits]+0.05,sapply(test$p.value[,1],etoiles),cex=1.5)


Aujourd'hui, je pars d'un dataframe de plus de 15 échantillons ...

Code : Tout sélectionner

data <- structure(list(TheGene = c("ACAT1", "ACAT1", "ACAT1", "ACAT1",
"ACAT1", "ACAT1", "ACAT1", "ACAT1", "ACAT1", "ADIPO", "ADIPO",
"ADIPO", "ADIPO", "ADIPO", "ADIPO", "ADIPO", "ADIPO", "ADIPO",
"APOC1", "APOC1", "APOC1", "APOC1", "APOC1", "APOC1", "APOC1",
"APOC1", "APOC1", "APPL1", "APPL1", "APPL1", "APPL1", "APPL1",
"APPL1", "APPL1", "APPL1", "APPL1", "CPEnn", "CPEnn", "CPEnn",
"CPEnn", "CPEnn", "CPEnn", "CPEnn", "CPEnn", "CPEnn", "DGAT1",
"DGAT1", "DGAT1", "DGAT1", "DGAT1", "DGAT1", "DGAT1", "DGAT1",
"DGAT1", "FADS2", "FADS2", "FADS2", "FADS2", "FADS2", "FADS2",
"FADS2", "FADS2", "FADS2", "FASNn", "FASNn", "FASNn", "FASNn",
"FASNn", "FASNn", "FASNn", "FASNn", "FASNn", "FOXOn", "FOXOn",
"FOXOn", "FOXOn", "FOXOn", "FOXOn", "FOXOn", "FOXOn", "FOXOn",
"HSD17", "HSD17", "HSD17", "HSD17", "HSD17", "HSD17", "HSD17",
"HSD17", "HSD17", "MGAT1", "MGAT1", "MGAT1", "MGAT1", "MGAT1",
"MGAT1", "MGAT1", "MGAT1", "MGAT1", "NR1H3", "NR1H3", "NR1H3",
"NR1H3", "NR1H3", "NR1H3", "NR1H3", "NR1H3", "NR1H3", "PTGS2",
"PTGS2", "PTGS2", "PTGS2", "PTGS2", "PTGS2", "PTGS2", "PTGS2",
"PTGS2", "RXRAn", "RXRAn", "RXRAn", "RXRAn", "RXRAn", "RXRAn",
"RXRAn", "RXRAn", "RXRAn", "SCDnn", "SCDnn", "SCDnn", "SCDnn",
"SCDnn", "SCDnn", "SCDnn", "SCDnn", "SCDnn", "SREBF", "SREBF",
"SREBF", "SREBF", "SREBF", "SREBF", "SREBF", "SREBF", "SREBF",
"TGFB1", "TGFB1", "TGFB1", "TGFB1", "TGFB1", "TGFB1", "TGFB1",
"TGFB1", "TGFB1"), Samples2 = c("NT", "NT", "NT", "TA", "TA",
"TA", "TB", "TB", "TB", "NT", "NT", "NT", "TA", "TA", "TA", "TB",
"TB", "TB", "NT", "NT", "NT", "TA", "TA", "TA", "TB", "TB", "TB",
"NT", "NT", "NT", "TA", "TA", "TA", "TB", "TB", "TB", "NT", "NT",
"NT", "TA", "TA", "TA", "TB", "TB", "TB", "NT", "NT", "NT", "TA",
"TA", "TA", "TB", "TB", "TB", "NT", "NT", "NT", "TA", "TA", "TA",
"TB", "TB", "TB", "NT", "NT", "NT", "TA", "TA", "TA", "TB", "TB",
"TB", "NT", "NT", "NT", "TA", "TA", "TA", "TB", "TB", "TB", "NT",
"NT", "NT", "TA", "TA", "TA", "TB", "TB", "TB", "NT", "NT", "NT",
"TA", "TA", "TA", "TB", "TB", "TB", "NT", "NT", "NT", "TA", "TA",
"TA", "TB", "TB", "TB", "NT", "NT", "NT", "TA", "TA", "TA", "TB",
"TB", "TB", "NT", "NT", "NT", "TA", "TA", "TA", "TB", "TB", "TB",
"NT", "NT", "NT", "TA", "TA", "TA", "TB", "TB", "TB", "NT", "NT",
"NT", "TA", "TA", "TA", "TB", "TB", "TB", "NT", "NT", "NT", "TA",
"TA", "TA", "TB", "TB", "TB"), Name = c("ACAT1_NT", "ACAT1_NT",
"ACAT1_NT", "ACAT1_TA", "ACAT1_TA", "ACAT1_TA", "ACAT1_TB", "ACAT1_TB",
"ACAT1_TB", "ADIPO_NT", "ADIPO_NT", "ADIPO_NT", "ADIPO_TA", "ADIPO_TA",
"ADIPO_TA", "ADIPO_TB", "ADIPO_TB", "ADIPO_TB", "APOC1_NT", "APOC1_NT",
"APOC1_NT", "APOC1_TA", "APOC1_TA", "APOC1_TA", "APOC1_TB", "APOC1_TB",
"APOC1_TB", "APPL1_NT", "APPL1_NT", "APPL1_NT", "APPL1_TA", "APPL1_TA",
"APPL1_TA", "APPL1_TB", "APPL1_TB", "APPL1_TB", "CPEnn_NT", "CPEnn_NT",
"CPEnn_NT", "CPEnn_TA", "CPEnn_TA", "CPEnn_TA", "CPEnn_TB", "CPEnn_TB",
"CPEnn_TB", "DGAT1_NT", "DGAT1_NT", "DGAT1_NT", "DGAT1_TA", "DGAT1_TA",
"DGAT1_TA", "DGAT1_TB", "DGAT1_TB", "DGAT1_TB", "FADS2_NT", "FADS2_NT",
"FADS2_NT", "FADS2_TA", "FADS2_TA", "FADS2_TA", "FADS2_TB", "FADS2_TB",
"FADS2_TB", "FASNn_NT", "FASNn_NT", "FASNn_NT", "FASNn_TA", "FASNn_TA",
"FASNn_TA", "FASNn_TB", "FASNn_TB", "FASNn_TB", "FOXOn_NT", "FOXOn_NT",
"FOXOn_NT", "FOXOn_TA", "FOXOn_TA", "FOXOn_TA", "FOXOn_TB", "FOXOn_TB",
"FOXOn_TB", "HSD17_NT", "HSD17_NT", "HSD17_NT", "HSD17_TA", "HSD17_TA",
"HSD17_TA", "HSD17_TB", "HSD17_TB", "HSD17_TB", "MGAT1_NT", "MGAT1_NT",
"MGAT1_NT", "MGAT1_TA", "MGAT1_TA", "MGAT1_TA", "MGAT1_TB", "MGAT1_TB",
"MGAT1_TB", "NR1H3_NT", "NR1H3_NT", "NR1H3_NT", "NR1H3_TA", "NR1H3_TA",
"NR1H3_TA", "NR1H3_TB", "NR1H3_TB", "NR1H3_TB", "PTGS2_NT", "PTGS2_NT",
"PTGS2_NT", "PTGS2_TA", "PTGS2_TA", "PTGS2_TA", "PTGS2_TB", "PTGS2_TB",
"PTGS2_TB", "RXRAn_NT", "RXRAn_NT", "RXRAn_NT", "RXRAn_TA", "RXRAn_TA",
"RXRAn_TA", "RXRAn_TB", "RXRAn_TB", "RXRAn_TB", "SCDnn_NT", "SCDnn_NT",
"SCDnn_NT", "SCDnn_TA", "SCDnn_TA", "SCDnn_TA", "SCDnn_TB", "SCDnn_TB",
"SCDnn_TB", "SREBF_NT", "SREBF_NT", "SREBF_NT", "SREBF_TA", "SREBF_TA",
"SREBF_TA", "SREBF_TB", "SREBF_TB", "SREBF_TB", "TGFB1_NT", "TGFB1_NT",
"TGFB1_NT", "TGFB1_TA", "TGFB1_TA", "TGFB1_TA", "TGFB1_TB", "TGFB1_TB",
"TGFB1_TB"), QR = c(0.931955732, 0.906471353, 1.183724489, 0.808507652,
0.778264973, 0.612734221, 0.754364313, 0.621287672, 0.604298528,
0.927659117, 0.950439478, 1.1341934, 0.676736762, 0.833160684,
0.655954052, 0.707924135, 0.750886452, 0.695762422, 0.879649076,
1.00695555, 1.128964405, 0.724471077, 0.888842681, 0.666649339,
0.776468875, 0.635075491, 0.763129604, 0.943874313, 0.977159968,
1.08422687, 0.735433432, 0.847724076, 0.597357568, 0.720298431,
0.607799194, 0.565134695, 0.889870106, 0.970410231, 1.158025215,
0.695762422, 0.807574167, 0.603600818, 1.08047573, 1.410949807,
1.025741121, 0.909618394, 0.938437997, 1.171480819, 0.645430354,
0.754364313, 0.663575871, 0.854607174, 0.731197647, 0.845767679,
0.857574155, 1.059463094, 1.10063288, 0.805710428, 0.897095409,
0.805710428, 1.112136086, 1.147372093, 1.131575882, 0.906471353,
1.074252648, 1.026926789, 0.951538103, 1.012788784, 0.851650458,
1.012788784, 0.866537046, 0.837019613, 0.923382311, 0.996540263,
1.086734863, 0.795536484, 0.765778999, 0.692554734, 0.91383145,
0.829319546, 0.891928519, 0.942784536, 1, 1.060687741, 1.558329159,
1.777685362, 1.510472586, 1.04608494, 1.042465761, 0.904379378,
1.022192327, 0.90229223, 1.08422687, 0.5182308, 0.581022793,
0.455861244, 0.740548776, 0.766664172, 0.771996743, 0.91806402,
0.905424761, 1.203025036, 0.605696368, 0.698177934, 0.622724811,
0.847724076, 0.660516573, 0.905424761, 0.887816443, 0.951538103,
1.183724489, 0.180282758, 0.215137859, 0.194566243, 0.479078287,
0.437796806, 0.398688344, 0.92873141, 1.009284801, 1.066832243,
0.925518243, 1.155352697, 0.906471353, 0.958156574, 1.055797631,
1.055797631, 0.835087919, 1.042465761, 1.148698355, 0.622005827,
0.576343173, 0.562529242, 1.010451446, 0.920187651, 0.812252396,
0.980552422, 0.953739165, 1.069299999, 0.595290867, 0.642454713,
0.53650617, 0.7456997, 0.865536561, 0.816014485, 0.906471353,
0.97828948, 1.127660927, 0.91594529, 0.958156574, 0.802922882,
0.944965349, 0.971531941, 1.108288387)), .Names = c("TheGene",
"Samples2", "Name", "QR"), class = "data.frame", row.names = c(NA,
-153L))


et voici le dataframe "Stat"

Code : Tout sélectionner

Stat <- structure(list(Name = structure(1:51, .Label = c("ACAT1_NT",
"ACAT1_TA", "ACAT1_TB", "ADIPO_NT", "ADIPO_TA", "ADIPO_TB", "APOC1_NT",
"APOC1_TA", "APOC1_TB", "APPL1_NT", "APPL1_TA", "APPL1_TB", "CPEnn_NT",
"CPEnn_TA", "CPEnn_TB", "DGAT1_NT", "DGAT1_TA", "DGAT1_TB", "FADS2_NT",
"FADS2_TA", "FADS2_TB", "FASNn_NT", "FASNn_TA", "FASNn_TB", "FOXOn_NT",
"FOXOn_TA", "FOXOn_TB", "HSD17_NT", "HSD17_TA", "HSD17_TB", "MGAT1_NT",
"MGAT1_TA", "MGAT1_TB", "NR1H3_NT", "NR1H3_TA", "NR1H3_TB", "PTGS2_NT",
"PTGS2_TA", "PTGS2_TB", "RXRAn_NT", "RXRAn_TA", "RXRAn_TB", "SCDnn_NT",
"SCDnn_TA", "SCDnn_TB", "SREBF_NT", "SREBF_TA", "SREBF_TB", "TGFB1_NT",
"TGFB1_TA", "TGFB1_TB"), class = "factor"), QR_Moy = c(1.00738,
0.733169, 0.659984, 1.0041, 0.72195, 0.718191, 1.00519, 0.759988,
0.724891, 1.00175, 0.726838, 0.631077, 1.0061, 0.702312, 1.17239,
1.00651, 0.68779, 0.810524, 1.00589, 0.836172, 1.13036, 1.00255,
0.938659, 0.905448, 1.00222, 0.75129, 0.87836, 1.00116, 1.6155,
0.997643, 1.0029, 0.518372, 0.759737, 1.00884, 0.6422, 0.804555,
1.00769, 0.196662, 0.438521, 1.00162, 0.995781, 1.02325, 1.00875,
0.586959, 0.914297, 1.0012, 0.591417, 0.809084, 1.00414, 0.892342,
1.00826), Ecartype = c(0.153246, 0.10539, 0.0821764, 0.113241,
0.0968698, 0.0289607, 0.124667, 0.115276, 0.0780682, 0.0733372,
0.125404, 0.0801583, 0.137594, 0.102144, 0.208405, 0.143592,
0.0583644, 0.0688408, 0.130084, 0.0527611, 0.0176494, 0.0865061,
0.0813375, 0.0941237, 0.0818242, 0.0529977, 0.0438594, 0.0589601,
0.142484, 0.0807892, 0.0924883, 0.0625809, 0.0168297, 0.16829,
0.0492206, 0.128034, 0.155742, 0.0175218, 0.0401999, 0.0693691,
0.138521, 0.0563731, 0.1595, 0.0311271, 0.0992307, 0.0604833,
0.0530804, 0.0602183, 0.112838, 0.0802635, 0.087638), ErreurType = c(0.0884766193522334,
0.0608469448698947, 0.0474445666610344, 0.0653797218333024, 0.0559278051063452,
0.01672046794092, 0.0719765260090631, 0.0665546296311033, 0.0450726962851495,
0.0423412521616134, 0.0724020331574558, 0.0462794160827828, 0.0794399329388774,
0.0589728658961051, 0.120322682850464, 0.0829028798534767, 0.033696702051091,
0.0397452544112292, 0.0751040324172633, 0.0304616352877408, 0.0101898858410354,
0.0499443201215447, 0.0469602275202112, 0.0543423435321236, 0.0472412238962258,
0.030598236361431, 0.0253222363964955, 0.0340406296064473, 0.0822631757552146,
0.0466436663676145, 0.0533981449018909, 0.0361310994611291, 0.00971663182538065,
0.0971622768019221, 0.0284175266596749, 0.0739204643654246, 0.0899176856241307,
0.0101162159466868, 0.023209423086396, 0.0400502685584421, 0.0799751366384162,
0.0325470244600537, 0.0920873679357453, 0.0179712395640924, 0.057290871356875,
0.0349200495364769, 0.030645983228693, 0.0347670517151416, 0.065147049674819,
0.0463401533311015, 0.0505978228912404)), .Names = c("Name",
"QR_Moy", "Ecartype", "ErreurType"), row.names = c(NA, -51L), class = "data.frame")


Mon soucis est que je ne sais pas comment indiquer que le t.test doit être appliqué pour chaque gène (TheGene) sur les valeurs de "QR" entre NT et TA ou TB.
Merci de votre aide précieuse,
Patricia

Alexandre Dangléant
Messages : 270
Enregistré le : 30 Mar 2010, 14:38

Re: Boucle pour t.test sur plus de 2 échantillons

Messagepar Alexandre Dangléant » 21 Mar 2018, 13:31

Bonjour,

Si j'ai bien compris :

Code : Tout sélectionner

# Sur 1 échantillon
pairwise.t.test(data[data$TheGene == "ACAT1", "QR"], data[data$TheGene == "ACAT1", "Name"])

# La boucle :
sapply(unique(data$TheGene), function(x) {
         pairwise.t.test(data[data$TheGene == x, "QR"], data[data$TheGene ==x, "Name"])$p.value
      }, simplify = FALSE
)
# Sortie :
#$ACAT1
#ACAT1_NT  ACAT1_TA
#ACAT1_TA 0.05754228        NA
#ACAT1_TB 0.03311922 0.4740936
#
#$ADIPO
#ADIPO_NT  ADIPO_TA
#ADIPO_TA 0.02147539        NA
#ADIPO_TB 0.02147539 0.9598088
# etc...

En changeant l'argument simplify en TRUE, la sortie devient une df :
#

Code : Tout sélectionner

#ACAT1      ADIPO      APOC1       APPL1      CPEnn      DGAT1      FADS2     FASNn       FOXOn        HSD17       MGAT1      NR1H3        PTGS2 RXRAn       SCDnn        SREBF     TGFB1
#[1,] 0.05754228 0.02147539 0.06371497 0.025195101 0.10827897 0.02168775 0.08760759 0.8110176 0.007509467 0.0008534362 0.000296462 0.03483197 0.0001222963     1 0.009987916 0.0003950558 0.5524222
#[2,] 0.03311922 0.02147539 0.05710530 0.009582922 0.23894278 0.09939453 0.11125285 0.6686229 0.090382434 0.9672334471 0.007639837 0.18588276 0.0005973978     1 0.333189584 0.0074305249 0.9591856
#[3,]         NA         NA         NA          NA         NA         NA         NA        NA          NA           NA          NA         NA           NA    NA          NA           NA        NA
#[4,] 0.47409363 0.95980878 0.70413804 0.266721788 0.03041431 0.17565328 0.01354428 0.8110176 0.090382434 0.0008534362 0.007639837 0.18588276 0.0192943377     1 0.021494512 0.0074305249 0.5524222
#

Les données des matrices de p.value sont mis "en ligne", donc la première ligne = TA / NT, la 2e TA / TB, la 3e TA / TA (d'où le NA) et la 4e NT / TB.

En espérant avoir aidé.
A+


Retourner vers « Questions en cours »

Qui est en ligne

Utilisateurs parcourant ce forum : Aucun utilisateur enregistré et 1 invité