Je travaille sur des données de généalogie, et je souhaite représenter certaines infos sous la forme d'une frise.
Voici mon script me permettant d'obtenir le graphique voulu:
Code : Tout sélectionner
# packages
library(dplyr)
library(lubridate)
library(tidyverse)
# data
datagen <- tibble::tribble(
~ID_INDIVIDU, ~ID_PERE, ~ID_MERE, ~PATRONYME, ~Prénom, ~Naissance_Date, ~Mariage_Date, ~Décès_Date, ~Sex,
"I706", NA, NA, "NOM1", "Pierre", NA, "06/01/1747", NA, "M",
"I670", NA, NA, "NOM2", "Pierre", NA, "06/06/1682", NA, "M",
"I194", "I195", "I199", "NOM3", "Pierre", "07/11/1760", "24/10/1785", "19/02/1816", "M",
"I60", "I101", NA, "NOM4", "Pierre", NA, "21/11/1752", NA, "M",
"I538", "I556", "I557", "NOM5", "Pierre", "04/11/1782", "15/03/1813", NA, "M",
"I58", "I92", "I93", "NOM6", "Pierre", "23/12/1736", "10/07/1762", NA, "M",
"I92", "I99", "I100", "NOM6", "Pierre", NA, "15/09/1733", NA, "M",
"I606", NA, NA, "NOM7", "Pierre", NA, NA, "29/09/1803", "M",
"I36", "I44", "I45", "NOM8", "Pierre", "20/11/1756", "31/01/1785", "26/04/1826", "M",
"I44", "I47", "I46", "NOM8", "Pierre", "30/10/1725", "07/07/1750", NA, "M",
"I678", NA, NA, "NOM9", "Pierre", NA, "13/05/1688", NA, "M",
"I190", NA, NA, "NOM10", "Pierre", NA, "17/07/1738", NA, "M",
"I288", "I312", "I313", "NOM11", "Marie", "08/10/1820", "17/02/1846", "31/10/1891", "F",
"I310", "I346", "I347", "NOM12", "Marie", "29/09/1780", "20/09/1798", "08/01/1825", "F",
"I346", "I398", "I399", "NOM12", "Marie", "10/07/1749", NA, NA, "F",
"I614", "I695", "I696", "NOM13", "Marie", "18/02/1759", "25/09/1788", "12/01/1817", "F",
"I695", NA, NA, "NOM13", "Marie", NA, "31/01/1758", NA, "F",
"I178", NA, NA, "NOM14", "Marie", NA, "18/02/1732", NA, "F",
"I278", "I386", "I387", "NOM15", "Marie", "21/07/1827", "25/11/1851", "22/05/1857", "F",
"I280", "I298", "I299", "NOM15", "Marie", "24/03/1808", "09/12/1830", "05/09/1876", "F",
"I298", "I300", "I301", "NOM15", "Marie", "27/07/1784", "20/08/1800", "13/04/1838", "F",
"I84", NA, NA, "NOM16", "Marie", "1742", NA, "09/06/1805", "F",
"I438", "I419", "I420", "NOM17", "Marie", NA, "26/01/1751", "10/04/1805", "F",
"I396", NA, NA, "NOM18", "Marie", "1751", NA, "12/01/1829", "F",
"I197", "I208", "I209", "NOM19", "Marie", "23/10/1765", "20/05/1814", "07/11/1843", "F",
"I208", NA, NA, "NOM19", "Marie", NA, NA, "05/11/1796", "F"
)
# format data
datagen$Naissance_Date <- dmy(datagen$Naissance_Date) # date de naissance
datagen$Mariage_Date <- dmy(datagen$Mariage_Date) # date de mariage
datagen$Décès_Date <- dmy(datagen$Décès_Date) # date de décès
datagenlong <- datagen %>%
mutate(nom_comp = paste(Prénom, PATRONYME, sep = " \n ")) %>%
pivot_longer(
cols = ends_with("_Date"),
names_to = "event", values_to = "date", values_drop_na = TRUE
) %>%
arrange(date)
# plot
plot <- ggplot(data = datagenlong, aes(date, fct_rev(fct_reorder(nom_comp, date, .fun = "min")))) +
geom_point(aes(shape = event, color = Sex), size = 5, show.legend = FALSE) +
geom_line(aes(color = Sex), lwd = 1.5, show.legend = FALSE) +
scale_x_date(limits = c(floor_date(as.Date(min(datagenlong$date)), "5 years"), ceiling_date(as.Date(max(datagenlong$date)), "5 years"))) +
scale_shape_manual(values = c("alive_Date" = 46, "Naissance_Date" = 19, "Mariage_Date" = 38, "Décès_Date" = 15)) +
theme(
axis.title.x = element_blank(), axis.title.y = element_blank(),
axis.text.x = element_text(size = 15), axis.text.y = element_text(size = 12)
)
print(plot)
Seulement, voilà: certaines personnes possédant le même nom, je me retrouve avec plusieurs individus sur une même ligne (ex: Pierre NOM8, Marie NOM15...). Comment puis-je faire pour qu'à chaque ligne corresponde un individu et un seul (champ: ID_INDIVIDU), mais que les labels soient bien ceux du champ "nom_comp" ?
Je sèche...
Merci pour votre aide !