library(tidyverse)Physiology data preparation
Data for the physiology activity comes from: https://www.physionet.org/content/wearable-exam-stress/1.0.0/
compile_params <- function(str_, stu_) {
eda <- read_csv(paste0("../GEMs_local/phys_data/", stu_, "/", str_, "/EDA.csv"),
col_names = F)
hr <- read_csv(paste0("../GEMs_local/phys_data/", stu_, "/", str_, "/HR.csv"),
col_names = F)
temp <- read_csv(paste0("../GEMs_local/phys_data/", stu_, "/", str_, "/TEMP.csv"),
col_names = F)
eda_time <- as.POSIXct(pull(eda[1, 1]), tz = "America/Chicago")
hr_time <- as.POSIXct(pull(hr[1, 1]), tz = "America/Chicago")
temp_time <- as.POSIXct(pull(temp[1, 1]), tz = "America/Chicago")
eda <- eda[-c(1:2), ]
hr <- hr[-c(1:2), ]
temp <- temp[-c(1:2), ]
# measure taken at 4Hz, or 4x per second
eda_adj <- eda %>% mutate(date_time = eda_time + milliseconds(row_number() -
1) * 250) %>% mutate(timestamp = floor_date(date_time, unit = "second"))
eda_adj <- eda_adj %>% group_by(timestamp) %>% summarize(eda = mean(X1))
# measure taken once per second
hr_adj <- hr %>% mutate(timestamp = hr_time + seconds(row_number() - 1), hr = X1) %>% select(-X1)
# measure taken at 4Hz, or 4x per second
temp_adj <- temp %>% mutate(date_time = temp_time + milliseconds(row_number() -
1) * 250) %>% mutate(timestamp = floor_date(date_time, unit = "second"))
temp_adj <- temp_adj %>% group_by(timestamp) %>% summarize(temp = mean(X1))
# Merge everything
dat <- full_join(eda_adj, hr_adj) %>% full_join(temp_adj) %>% mutate(test = str_) %>% mutate(studentID = stu_)
return(dat)
}
dat <- data.frame()
for (student in c("S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10")) {
for (exam in c("Midterm 1", "Midterm 2", "Final")) {
sub_dat <- compile_params(str_ = exam, stu_ = student)
dat <- bind_rows(dat, sub_dat)
}
}
dat <- na.omit(dat)write_csv(dat, "data/phys_data_messy.csv")corrplot::corrplot(cor(dat[, 2:4], use = "complete.obs"))
ggplot(data = dat,
aes(x = timestamp, y = temp, color = studentID)) +
geom_line() + scale_x_datetime(
date_minor_breaks = "60 min",
date_breaks = "60 min",
date_labels = "%H:%M",
timezone = "America/Chicago"
) + facet_wrap(studentID ~ test, scales = "free_x")# Mostly filter to remove any measurements outside of testing window.
dat_m1 <- dat %>% filter(
test == "Midterm 1" &
timestamp >= as.POSIXct("2018-10-13 09:00:00", tz = "America/Chicago") &
timestamp <= as.POSIXct("2018-10-13 10:30:00", tz = "America/Chicago") &
eda > 0.01 &
temp >= 25
)
dat_m2 <- dat %>% filter(
test == "Midterm 2" &
timestamp >= as.POSIXct("2018-11-10 09:00:00", tz = "America/Chicago") &
timestamp <= as.POSIXct("2018-11-10 10:30:00", tz = "America/Chicago") &
eda > 0.01 &
temp >= 25
)
dat_f <- dat %>% filter(
test == "Final" &
timestamp >= as.POSIXct("2018-12-05 11:00:00", tz = "America/Chicago") &
timestamp <= as.POSIXct("2018-12-05 14:00:00", tz = "America/Chicago") &
eda > 0.01 &
temp >= 25
) %>%
# Remove some drop offs at the end
mutate(temp =
case_when(
test == "Final" &
timestamp > as.POSIXct("2018-12-05 13:50:00", tz = "America/Chicago") ~ NA,
TRUE ~ temp
))
dat_clean <- bind_rows(dat_m1, dat_m2, dat_f) %>%
group_by(test, studentID) %>%
# Smooth curves by student and test
mutate(
eda = stats::filter(eda, sides = 2, filter = rep(1 / 120, 120)),
hr = stats::filter(hr, sides = 2, filter = rep(1 / 120, 120)),
temp = stats::filter(temp, sides = 2, filter = rep(1 / 120, 120))
)
dat_clean <- na.omit(dat_clean)
write_csv(dat_clean, "data/phys_data.csv")