Calculate DII Foods and Food Components
This script takes in your disaggregated and FooDB-linked descriptions to calculate intake of 7 specific food categories (onion, ginger, garlic, tea, pepper, turmeric, thyme/oregano).
INPUTS
- Recall_Disaggregated_mapped.csv.bz2 - Disaggregated dietary data, mapped to FooDB foods, From Step 2 of Polyphenol Estimator
- FDA-FDD V3.1 - All of FDA FDD descriptions
OUTPUTS
- Recall_DII_foods_by_recall.csv - Intake of 7 DII food categories by participant recall
SCRIPT
Load packages
suppressMessages(library(dplyr))
suppressMessages(library(vroom))
suppressMessages(library(tidyr))
suppressMessages(library(stringr))
suppressMessages(library(readxl))
Load data
# Load provided file paths
source("provided_files.R")
# Load Dietary data that has been disaggregated and connected to FooDB
input_mapped = vroom::vroom('outputs/Recall_Disaggregated_mapped.csv.bz2',
show_col_types = FALSE)
# Load FDA-FDD 3.1
fdd = read_xlsx(FDD_file) %>%
distinct(`Basic Ingredient Description`) %>%
rename(fdd_ingredient = 1)
Isolate FDD descriptions for each food group
Ingredient description must contain only one ingredient
garlic = fdd %>%
filter(grepl("garlic", fdd_ingredient, ignore.case = TRUE)) %>%
mutate(component = 'GARLIC')
ginger = fdd %>%
filter(grepl("ginger", fdd_ingredient, ignore.case = TRUE)) %>%
mutate(component = 'GINGER')
onions = fdd %>%
filter(grepl("onion", fdd_ingredient, ignore.case = TRUE)) %>%
mutate(component = 'ONION')
turmeric = fdd %>%
filter(grepl("turmeric", fdd_ingredient, ignore.case = TRUE)) %>%
mutate(component = 'TURMERIC')
tea = fdd %>%
filter(grepl("tea", fdd_ingredient, ignore.case = TRUE)) %>%
# Ensure no herbal teas are included
filter(grepl("black|oolong|green", fdd_ingredient, ignore.case = TRUE)) %>%
mutate(component = 'TEA')
pepper = fdd %>%
filter(grepl("pepper", fdd_ingredient, ignore.case = TRUE)) %>%
# Ensure we are getting just spices and not fresh peppers
filter(grepl("spices", fdd_ingredient, ignore.case = TRUE)) %>%
mutate(component = 'PEPPER')
# Thyme or oregano
thymeoregano = fdd %>%
filter(grepl("thyme|oregano", fdd_ingredient, ignore.case = TRUE)) %>%
mutate(component = 'THYME')
# SAFFRON AND ROSEMARY do not exist in FDD V3.1
# rosemary = fdd %>% filter(grepl("rosemary", fdd_ingredient, ignore.case = TRUE)) %>% mutate(component = "ROSEMARY")
# saffron = fdd %>% filter(grepl("saffron", fdd_ingredient, ignore.case = TRUE)) %>% mutate(component = 'SAFFRON')
Merge the foods together into a singular dataframe
DII_foods = garlic %>%
full_join(ginger) %>%
full_join(onions) %>%
full_join(turmeric) %>%
full_join(tea) %>%
full_join(pepper) %>%
full_join(thymeoregano)
# These can be added with future updates
# full_join(saffron) %>%
# full_join(rosemary)
Derive food component intakes
component_sums = input_mapped %>%
# Extract relevant DII foods
filter(fdd_ingredient %in% DII_foods$fdd_ingredient) %>%
# let's keep the columns we will need to simplify our df
select(c(subject, RecallNo, fdd_ingredient, FoodAmt_Ing_g)) %>%
# Merge the component name
left_join(DII_foods, by = 'fdd_ingredient') %>%
# Add component ingredient intakes together
group_by(subject, RecallNo, component) %>%
mutate(component_sum = sum(FoodAmt_Ing_g)) %>%
ungroup() %>%
# Keep distinct entries
distinct(subject, RecallNo, component, .keep_all = TRUE) %>%
# Remove food name and intakes now that we have the total component intake
select(-c(fdd_ingredient, FoodAmt_Ing_g)) %>%
# Make Wide
pivot_wider(names_from = component, values_from = component_sum)
Export Food Intake Amounts for DII Calculation
vroom::vroom_write(component_sums, 'outputs/Recall_DII_foods_by_recall.csv', delim = ",")