- Calculate Total Polypenol Intakes
- SCRIPTS
- Total daily Polyphenol Intake Numbers BY ENTRY (Record/Recall)
- Total daily Polyphenol Intake Numbers AVERAGE FOR SUBJECT
Calculate Total Polypenol Intakes
This script calculates total polyphenol intake (mg, mg/1000kcal) for provided dietary data.
INPUTS
- Diet_FooDB_polyphenol_content.csv.bz2: Disaggregated dietary data, mapped to FooDB polyphenol content, at the compound-level
- Diet_total_nutrients.csv - total daily nutrient data to go with dietary data.
OUTPUTS
- summary_total_intake_by_subject.csv
- summary_total_intake_by_entry.csv
SCRIPTS
suppressMessages(library(dplyr))
suppressMessages(library(vroom))
suppressMessages(library(tidyr))
suppressMessages(library(stringr))
# Load provided file paths
source("provided_files.R")
# Load dietary data mapped to polyphenol content
input_polyphenol_content = vroom::vroom('outputs/Diet_FooDB_polyphenol_content.csv.bz2',
show_col_types = FALSE)
input_kcal = vroom::vroom('outputs/Diet_total_nutrients.csv', show_col_types = FALSE) %>%
# Ensure consistent KCAL naming whether ASA24 or NHANES
dplyr::rename_with(~ "Total_KCAL", .cols = any_of(c("Total_KCAL", # Specific to ASA24
"Total_DRXIKCAL"))) %>% # Specific to NHANES
dplyr::select(c(subject,
# Ensures we pull correct columns for record or recall
any_of(c("RecallNo", "RecordNo", "RecordDayNo")),
Total_KCAL))
# Merge the two files
input_polyphenol_kcal = dplyr::left_join(input_polyphenol_content, input_kcal)
## Joining with `by = join_by(subject, RecallNo)`
Specify grouping variables
Column grouping depends on whether output is from a record or recall.
if ("RecallNo" %in% names(input_polyphenol_kcal)) {
group_vars = c("subject", "RecallNo")
} else if ("RecordNo" %in% names(input_polyphenol_kcal)) {
group_vars = c("subject", "RecordNo", "RecordDayNo")
} else {
stop("Data must contain RecallNo or RecordNo.")
}
Total daily Polyphenol Intake Numbers BY ENTRY (Record/Recall)
content_by_entry = input_polyphenol_kcal %>%
# Recall - Sum by Subject, Recall
# Record - Sum by Subject, Record Number, Day in Record Number
dplyr::group_by(across(all_of(group_vars))) %>%
dplyr::mutate(pp_recallsum_mg = sum(pp_consumed, na.rm = TRUE),
pp_recallsum_mg1000kcal = pp_recallsum_mg/(Total_KCAL/1000)) %>%
dplyr::ungroup() %>%
dplyr::distinct(across(all_of(group_vars)), .keep_all = TRUE) %>%
dplyr::select(c(subject,
any_of(c("RecallNo", "RecordNo", "RecordDayNo")),
pp_recallsum_mg, Total_KCAL, pp_recallsum_mg1000kcal))
# Write Output
vroom::vroom_write(content_by_entry, "outputs/summary_total_intake_by_entry.csv", delim = ",")
Total daily Polyphenol Intake Numbers AVERAGE FOR SUBJECT
content_by_subject = content_by_entry %>%
# Average by Participant
dplyr::group_by(subject) %>%
dplyr::mutate(pp_average_mg = mean(pp_recallsum_mg, na.rm = TRUE),
kcal_average = mean(Total_KCAL, na.rm = TRUE),
pp_average_mg_1000kcal = pp_average_mg/(kcal_average/1000)) %>%
dplyr::ungroup() %>%
dplyr::distinct(subject, .keep_all = TRUE) %>%
dplyr::select(c(subject, pp_average_mg, kcal_average, pp_average_mg_1000kcal))
# Write Output
vroom::vroom_write(content_by_subject, "outputs/summary_total_intake_by_subject.csv", delim = ",")