Disaggregation of ASA24 Foods
This script takes in ASA24 ITEMS files, disaggregates WWEIA food codes to ingredients, and calculates the new ingredient weight. This script also calculates total caloric intake & other nutrients for each participant recall so polyphenol intakes can be standardized to caloric intake later on.
INPUTS
-
Your Dietary Data - This script does not provide filtering for portion or nutrient outliers. These may be performed in advance. The ASA24 website has cleaning recommendations here: “Reviewing and Cleaning ASA24 Data”.
-
FDA_FDD_All_Records_v_3.1.xlsx - FDD FoodCodes to Ingredients and Ingredient Percentages
OUTPUTS
- Recall_Disaggregated.csv.bz2: Dietary data that has been disaggregated using FDD.
- Recall_total_nutrients.csv: Total daily kcal intakes for unique subject and recall combination.
SCRIPT
# Load packages
suppressMessages(library(dplyr))
suppressMessages(library(vroom))
suppressMessages(library(tidyr))
suppressMessages(library(stringr))
suppressMessages(library(readxl))
Load Example Dietary Data and FDA-FDD V3.6
# Load provided file paths
source("provided_files.R")
# Load User Dietary Data
input_data = vroom::vroom(diet_input_file, show_col_types = FALSE) %>%
rename(subject = UserName)
# FDD Disaggregation options
# Rename for ease of use.
FDD_V3 = read_xlsx(FDD_file) %>%
rename(latest_survey = "Latest Survey",
wweia_food_code = "WWEIA Food Code",
wweia_food_description = "WWEIA Food Description",
fdd_ingredient = "Basic Ingredient Description",
ingredient_percent = "Ingredient Percent") %>%
select(wweia_food_code, wweia_food_description,
fdd_ingredient, ingredient_percent) %>%
mutate(wweia_food_code = as.integer(wweia_food_code))
Dietary Data Filtering
Filter IN Individuals With More than Recall and filter OUT incomplete recalls.
- RecallStatus: 2=Complete; 5=Breakoff/Quit
input_data_clean = input_data %>%
group_by(subject) %>%
# Filter in more than one recall
filter(n_distinct(RecallNo) > 1) %>%
ungroup() %>%
# Filter out incomplete recalls
filter(!RecallStatus==5)
Sum Recall to get total kcal and Additional Nutrient Summaries
input_total_nutrients = input_data_clean %>%
group_by(subject, RecallNo) %>%
summarize(across(KCAL:B12_ADD, ~ sum(.x, na.rm = TRUE), .names = "Total_{.col}")) %>%
ungroup()
## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.
Minimize the number of columns to the essential data
input_data_clean_minimal = input_data_clean %>%
rename(wweia_food_code = FoodCode,
food_description = Food_Description) %>%
select(c(subject, RecallNo, wweia_food_code, food_description, FoodAmt))
Apply Ingredient Percentage Adjustment for Coffee and Tea Brewing
FDD_V3_adjusted = FDD_V3 %>%
group_by(wweia_food_code) %>%
mutate(
# Create Flag
has_tea = any(str_detect(fdd_ingredient, regex("Tea", ignore_case = TRUE))),
has_coffee = any(str_detect(fdd_ingredient, regex("Coffee", ignore_case = TRUE))),
has_water = any(str_detect(fdd_ingredient, regex("Water", ignore_case = TRUE))),
# Add combined coffee|tea + water percentages
brewing_adjustment_total = case_when(
# Tea + Water
has_tea & has_water ~ sum(
ingredient_percent[str_detect(fdd_ingredient, regex("Tea|Water", ignore_case = TRUE))],
na.rm = TRUE),
# Coffee + Water
has_coffee & has_water ~ sum(
ingredient_percent[str_detect(fdd_ingredient, regex("Coffee|Water", ignore_case = TRUE))],
na.rm = TRUE),
TRUE ~ NA_real_),
# Ensure new brewing adjustment percentage is applied to only coffee|tea
brewing_adjustment_percentage = if_else(
str_detect(fdd_ingredient, regex("Coffee|Tea", ignore_case = TRUE)),
brewing_adjustment_total,
NA_real_)) %>%
select(-c(has_tea, has_coffee, has_water, brewing_adjustment_total)) %>%
ungroup()
Disaggregate Food Codes and compute final Ingredient Weights
merge = left_join(input_data_clean_minimal, FDD_V3_adjusted, by = "wweia_food_code",
relationship = "many-to-many") %>%
# Compute final ingredient weight
# If brewing adjustment exists, it will use this adjustment first.
mutate(FoodAmt_Ing_g = FoodAmt * (
coalesce(brewing_adjustment_percentage, ingredient_percent) / 100))
Write output files
Ensure outputs directory is created
if (!dir.exists("outputs")) dir.create("outputs", recursive = TRUE)
Write Files
vroom::vroom_write(merge, 'outputs/Recall_Disaggregated.csv.bz2')
vroom::vroom_write(input_total_nutrients, 'outputs/Recall_total_nutrients.csv')