MM4DBER 2024Load packages:
NOTE:To install new packages (e.g.,DT,plotly,gg3D,gganimate,viridis) make a change to this script file and thenSAVEthe document. A yellow banner should appear at the top of the RSTUDIO window asking if you would like to install packages (if any of these packages does not install this will not interfere with completing the activity)
library(tidyverse)
library(glue)
library(MplusAutomation)
library(here)
library(janitor)
library(DT)
library(gt)
library(plotly)
library(gg3D)
library(gganimate)
library(viridis)
lsay_data <- read_csv("https://garberadamc.github.io/project-site/data/lca_lsay_sci.csv",
na = c("9999", "9999.00")) %>%
clean_names() %>%
dplyr::select(1:5, Enjoy = ab39m, Useful = ab39t,
Logical = ab39u, Job = ab39w, Adult = ab39x)
Use {DT::datatable()} to take a look at the data
datatable(lsay_data, rownames = FALSE, filter="top",
options = list(pageLength = 5, scrollX=T) )
Figure. Path diagram of science attitude indicators.
missing*)different (e.g., item labels)Save response frequencies for the 4 class model with
response is _____.dat.
patterns <- mplusObject(
TITLE = "C4 LCA - Save response patterns",
VARIABLE =
"categorical = ********;
usevar = ********;
classes = c(*);",
ANALYSIS =
"estimator = mlr;
type = mixture;
starts = ******;",
SAVEDATA =
"File=3step_savedata.dat;
Save=cprob;
Missflag= 999;
response is resp_patterns.dat; ",
OUTPUT = "sampstat residual patterns tech10 tech11 tech14",
PLOT =
"type = plot3;
series = Enjoy-Adult(*);",
usevariables = colnames(*******),
rdata = *******)
patterns_fit <- mplusModeler(patterns,
dataout=here("mplus_files", "LSAY.dat"),
modelout=here("mplus_files", "patterns.inp") ,
check=TRUE, run = TRUE, hashfilename = FALSE)
Read in observed respnse pattern data
patterns <- read_table2(here("mplus_files", "resp_patterns.dat"),
col_names=FALSE, na = "*")
colnames(patterns) <- c("Frequency", "ENJOY", "USEFUL", "LOGICAL", "JOB", "ADULT",
"CPROB1", "CPROB2", "CPROB3", "CPROB4", "C_MODAL")
Order responses by highest frequency
order_highest <- patterns %>%
arrange(desc(Frequency))
Order responses by C_MODAL
loop_cond <- lapply(1:4, function(k) {
order_cond <- patterns %>%
filter(C_MODAL == k) %>%
arrange(desc(Frequency)) %>%
head(5)
})
table_data1 <- bind_rows(loop_cond) %>%
as.data.frame()
table_data2 <- rbind(order_highest[1:5,], table_data1)
Use {gt} to make a nicely formatted table
table_data2 %>%
gt() %>%
tab_header(
title = md("**Observed Response Patterns**"),
subtitle = md(" ")) %>%
tab_source_note(
source_note = md("Data Source: **Longitudinal Study of American Youth.**")) %>%
cols_label(
ENJOY = "Enjoy",
USEFUL = "Useful",
LOGICAL = "Logical",
JOB = "Job",
ADULT = "Adult",
CPROB1 = html("Pk=1"),
CPROB2 = html("Pk=2"),
CPROB3 = html("Pk=3"),
CPROB4 = html("Pk=4"),
C_MODAL = md("*k*")) %>%
tab_row_group(
group = "Unconditional response patterns",
rows = 1:5) %>%
tab_row_group(
group = "k=1 conditional response patterns",
rows = 6:10) %>%
tab_row_group(
group = "k=2 conditional response patterns",
rows = 11:15) %>%
tab_row_group(
group = "k=3 conditional response patterns",
rows = 16:20) %>%
tab_row_group(
group = "k=4 conditional response patterns",
rows = 21:25) %>%
row_group_order(
groups = c("Unconditional response patterns",
"k=1 conditional response patterns",
"k=2 conditional response patterns",
"k=3 conditional response patterns",
"k=4 conditional response patterns")) %>%
tab_options(column_labels.font.weight = "bold")
| Observed Response Patterns | ||||||||||
| Frequency | Enjoy | Useful | Logical | Job | Adult | Pk=1 | Pk=2 | Pk=3 | Pk=4 | k |
|---|---|---|---|---|---|---|---|---|---|---|
| Unconditional response patterns | ||||||||||
| 558 | 0 | 0 | 0 | 0 | 0 | 0.000 | 0.117 | 0.000 | 0.883 | 4 |
| 529 | 1 | 1 | 1 | 1 | 1 | 0.957 | 0.000 | 0.043 | 0.000 | 1 |
| 313 | 1 | 0 | 0 | 0 | 0 | 0.000 | 0.307 | 0.000 | 0.693 | 4 |
| 135 | 1 | 0 | 1 | 0 | 0 | 0.002 | 0.977 | 0.000 | 0.021 | 2 |
| 94 | 1 | 1 | 1 | 0 | 1 | 0.687 | 0.000 | 0.313 | 0.000 | 1 |
| k=1 conditional response patterns | ||||||||||
| 529 | 1 | 1 | 1 | 1 | 1 | 0.957 | 0.000 | 0.043 | 0.000 | 1 |
| 94 | 1 | 1 | 1 | 0 | 1 | 0.687 | 0.000 | 0.313 | 0.000 | 1 |
| 78 | 0 | 1 | 1 | 1 | 1 | 0.859 | 0.000 | 0.141 | 0.000 | 1 |
| 62 | 1 | 1 | 0 | 1 | 1 | 0.580 | 0.000 | 0.420 | 0.000 | 1 |
| 55 | 1 | 1 | 1 | 1 | 0 | 0.650 | 0.350 | 0.000 | 0.000 | 1 |
| k=2 conditional response patterns | ||||||||||
| 135 | 1 | 0 | 1 | 0 | 0 | 0.002 | 0.977 | 0.000 | 0.021 | 2 |
| 88 | 0 | 0 | 1 | 0 | 0 | 0.000 | 0.934 | 0.000 | 0.066 | 2 |
| 74 | 1 | 1 | 1 | 0 | 0 | 0.063 | 0.937 | 0.000 | 0.000 | 2 |
| 47 | 1 | 1 | 0 | 0 | 0 | 0.006 | 0.994 | 0.000 | 0.000 | 2 |
| 44 | 1 | 0 | 0 | 1 | 0 | 0.004 | 0.643 | 0.000 | 0.353 | 2 |
| k=3 conditional response patterns | ||||||||||
| 91 | 1 | 0 | 0 | 0 | 1 | 0.003 | 0.000 | 0.937 | 0.060 | 3 |
| 88 | 1 | 0 | 1 | 1 | 1 | 0.337 | 0.000 | 0.663 | 0.000 | 3 |
| 76 | 1 | 0 | 1 | 0 | 1 | 0.048 | 0.000 | 0.951 | 0.001 | 3 |
| 70 | 1 | 0 | 0 | 1 | 1 | 0.031 | 0.000 | 0.964 | 0.006 | 3 |
| 53 | 0 | 0 | 0 | 0 | 1 | 0.001 | 0.000 | 0.763 | 0.236 | 3 |
| k=4 conditional response patterns | ||||||||||
| 558 | 0 | 0 | 0 | 0 | 0 | 0.000 | 0.117 | 0.000 | 0.883 | 4 |
| 313 | 1 | 0 | 0 | 0 | 0 | 0.000 | 0.307 | 0.000 | 0.693 | 4 |
| 53 | 0 | 0 | 0 | 1 | 0 | 0.000 | 0.353 | 0.000 | 0.647 | 4 |
| 11 | 0 | 0 | NA | 0 | 0 | 0.000 | 0.231 | 0.000 | 0.769 | 4 |
| 9 | 0 | NA | 0 | 0 | 0 | 0.000 | 0.170 | 0.000 | 0.829 | 4 |
| Data Source: Longitudinal Study of American Youth. | ||||||||||
The goal of this activity is to provide an opportunity for you to “get your hands into” the posterior probabilities and get practice calculating the Average Posterior Probabilities (AvePP) for each class. Specifically, we are going to recreate this table from the Mplus output. One way to think of the AvePP values are as the class-specific classification errors in the model.
Instructions:
AvePP table in the Mplus output file under the
header “Average Latent Class Probabilities for Most Likely Latent Class
Membership (Row) by Latent Class (Column)”. We are going to replicate
the values from this table.PATTERNS_BY_C.xlsx) ordered by class in each respective
tab.Steps for estimating the Average Posterior Probabilities (AvePP):
C = 1N2) multiply the frequency for that specific response
pattern by the posterior probability for class 1 (CPROB1).CPROB2, CPROB3,
CPROB4)Total Frequency Count by summing the
frequency columnFollow up questions:
Order response patterns (rows) by modal assignment (K)
order_modal <- patterns %>%
arrange(desc(C_MODAL)) %>%
rownames_to_column() %>%
rename('pat_num' = "rowname") %>%
drop_na(ENJOY:ADULT)
Prepare plot data
p1_long <- order_modal %>%
dplyr::select(pat_num:ADULT, C_MODAL) %>%
pivot_longer(`ENJOY`:`ADULT`,
names_to = "var",
values_to = "value") %>%
mutate(obs = rep(1:32, each=5)) %>%
mutate(Class = factor(C_MODAL)) %>%
mutate(var = ordered(var,
levels = c("ENJOY","USEFUL","LOGICAL","JOB","ADULT"))) %>%
select(-pat_num, -C_MODAL)
out_c4 <- readModels(here("mplus_files"),
filefilter = "patterns", quiet = TRUE)
# extract posterior probabilities
probs_c4 <- as.data.frame(
out_c4[["gh5"]][["means_and_variances_data"]]
[["estimated_probs"]][["values"]]
[seq(2, 10, 2),])
rownames(probs_c4) <- c("ENJOY","USEFUL","LOGICAL","JOB","ADULT")
long_c4 <- probs_c4 %>% rownames_to_column() %>%
rename('var' = "rowname") %>%
pivot_longer(`V1`:`V4`, # The columns I'm gathering together
names_to = "c", # new column name for existing names
values_to = "value") %>% # new column name to store values
mutate(Class = rep(1:4,5)) %>%
arrange(Class) %>%
mutate(obs = rep(33:36,each=5)) %>%
mutate(Frequency = rep(c(829,782,619,833),each=5)) %>%
mutate(var = ordered(var,
levels = c("ENJOY","USEFUL","LOGICAL","JOB","ADULT"))) %>%
select(6,1,3,5,4)
p2_long <- rbind(p1_long, long_c4) %>%
mutate(Class = as.numeric(Class))
Visualize observed response patterns with {plotly}. This
plot is interactive!
gg <- ggplot(p2_long, aes(x=var, y=value, color = Class, size=Frequency)) +
geom_line(aes(as.numeric(var), frame = obs)) +
scale_color_viridis() + labs(x="Indicator", y= "Probability")
ggplotly(gg) %>% animation_opts(frame = 1000, transition = 0) %>%
animation_slider(currentvalue =
list(prefix = "Pattern ", font = list(color="red")))
Make a 3D plot with packages {ggplot2},
{gg3D}, and {gganimate}.
theta= 170 # change perspective (tilt)
phi=40 # change perspective (rotation)
resp3d <- ggplot(p1_long, aes(x=as.numeric(var),
y=as.numeric(value),
z = as.numeric(obs)),
alpha = .8) +
axes_3D(theta=theta, phi=phi) +
stat_3D(theta=theta, phi=phi, geom="path",
aes(colour = Class, size = Frequency), alpha = .8) +
scale_color_manual(values=c("#FDE725FF", "#DE7065FF", "#238A8DFF", "#482677FF")) +
theme_void() +
annotate("text", x = -.3, y = 0.05, label = "Indicators ") +
annotate("text", x = .35, y = -.4, label = "Probability") +
annotate("text", x = .25, y = .42, label = "Pattern") +
annotate("text", x = .2, y = 0, label = "0.0") +
annotate("text", x = .34, y = -.33, label = "1.0") +
annotate("text", x = -.05, y = 0, angle = 6,
label = "Enjoy - Useful - Logical - Job - Adult") +
transition_states(obs, transition_length=1, state_length=5) +
shadow_mark(alpha = .1,) +
labs(title = "Observed response pattern = {closest_state}")
animate(resp3d, fps = 2)
anim_save(here("13-response-patterns", "figures", "responses_3d_anim.gif"),
height = 6, width = 8, dpi = "retina")
Drew A. Linzer, Jeffrey B. Lewis (2011). poLCA: An R Package for Polytomous Variable Latent Class Analysis. Journal of Statistical Software, 42(10), 1-29. URL http://www.jstatsoft.org/v42/i10/.
Hallquist, M. N., & Wiley, J. F. (2018). MplusAutomation: An R Package for Facilitating Large-Scale Latent Variable Analyses in Mplus. Structural equation modeling: a multidisciplinary journal, 25(4), 621-638.
Miller, J. D., Hoffer, T., Suchner, R., Brown, K., & Nelson, C. (1992). LSAY codebook. Northern Illinois University.
Muthén, B. O., Muthén, L. K., & Asparouhov, T. (2017). Regression and mediation analysis using Mplus. Los Angeles, CA: Muthén & Muthén.
Muthén, L.K. and Muthén, B.O. (1998-2017). Mplus User’s Guide. Eighth Edition. Los Angeles, CA: Muthén & Muthén
R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL http://www.R-project.org/
Wickham et al., (2019). Welcome to the tidyverse. Journal of Open Source Software, 4(43), 1686, https://doi.org/10.21105/joss.01686