Calculate DII Foods and Food Components

This script takes in your disaggregated and FooDB-linked descriptions to calculate intake of 7 specific food categories (onion, ginger, garlic, tea, pepper, turmeric, thyme/oregano).

INPUTS

  • Diet_Disaggregated_mapped.csv.bz2 - Disaggregated dietary data, mapped to FooDB foods, From Step 2 of the polyphenol estimation pipeline
  • FDA-FDD V3.1 - All of FDA FDD descriptions

OUTPUTS

  • Diet_DII_foods_by_entry.csv - Intake of 7 DII food categories by participant recall or record

SCRIPT

Load packages

suppressMessages(library(dplyr))
suppressMessages(library(vroom))
suppressMessages(library(tidyr))
suppressMessages(library(stringr))
suppressMessages(library(readxl))

Load data

# Load provided file paths
source("provided_files.R")

# Load Dietary data that has been disaggregated and connected to FooDB
input_mapped = vroom::vroom('outputs/Diet_Disaggregated_mapped.csv.bz2', 
                            show_col_types = FALSE)

# Load FDA-FDD 3.1
fdd = read_xlsx(FDD_file) %>%
  dplyr::distinct(`Basic Ingredient Description`) %>%
  dplyr::rename(fdd_ingredient = 1) 

Specify grouping variables

Column grouping depends on whether output is from a record or recall.

if ("RecallNo" %in% names(input_mapped)) {
  group_vars = c("subject", "RecallNo", "component")
  
} else if ("RecordNo" %in% names(input_mapped)) {
  group_vars = c("subject", "RecordNo", "RecordDayNo", "component")
  
} else {
  stop("Data must contain RecallNo or RecordNo.")
}

Isolate FDD descriptions for each food group

Ingredient description must contain only one ingredient

garlic = fdd %>%
  dplyr::filter(grepl("garlic", fdd_ingredient, ignore.case = TRUE)) %>%
  mutate(component = 'GARLIC')

ginger = fdd  %>%
  dplyr::filter(grepl("ginger", fdd_ingredient, ignore.case = TRUE)) %>%
  dplyr::mutate(component = 'GINGER')

onions = fdd %>%
  dplyr::filter(grepl("onion", fdd_ingredient, ignore.case = TRUE)) %>%
  dplyr::mutate(component = 'ONION')

turmeric = fdd %>%
  dplyr::filter(grepl("turmeric", fdd_ingredient, ignore.case = TRUE)) %>%
  dplyr::mutate(component = 'TURMERIC')

tea = fdd %>%
  dplyr::filter(grepl("tea", fdd_ingredient, ignore.case = TRUE)) %>%
  # Ensure no herbal teas are included
  dplyr::filter(grepl("black|oolong|green", fdd_ingredient, ignore.case = TRUE)) %>%
  dplyr::mutate(component = 'TEA')

pepper = fdd %>%
  dplyr::filter(grepl("pepper", fdd_ingredient, ignore.case = TRUE)) %>%
  # Ensure we are getting just spices and not fresh peppers
  dplyr::filter(grepl("spices", fdd_ingredient, ignore.case = TRUE)) %>%
  dplyr::mutate(component = 'PEPPER')

# Thyme or oregano
thymeoregano = fdd %>%
  dplyr::filter(grepl("thyme|oregano", fdd_ingredient, ignore.case = TRUE)) %>%
  dplyr::mutate(component = 'THYME')

# SAFFRON AND ROSEMARY do not exist in FDD V3.1
# rosemary = fdd %>% dplyr::filter(grepl("rosemary", fdd_ingredient, ignore.case = TRUE)) %>% dplyr::mutate(component = "ROSEMARY")
# saffron = fdd %>% dplyr::filter(grepl("saffron", fdd_ingredient, ignore.case = TRUE)) %>% dplyr::mutate(component = 'SAFFRON')

Merge the foods together into a singular dataframe

DII_foods = garlic %>%
  dplyr::full_join(ginger) %>%
  dplyr::full_join(onions) %>%
  dplyr::full_join(turmeric) %>%
  dplyr::full_join(tea) %>%
  dplyr::full_join(pepper) %>%
  dplyr::full_join(thymeoregano)
  # These can be added with future updates
  # dplyr::full_join(saffron) %>%
  # dplyr::full_join(rosemary)

Derive food component intakes

component_sums = input_mapped %>%
  # Extract relevant DII foods
  dplyr::filter(fdd_ingredient %in% DII_foods$fdd_ingredient) %>%
  # let's keep the columns we will need to simplify our df
  dplyr::select(c(subject,
                  any_of(c("RecallNo", "RecordNo", "RecordDayNo")),
                  fdd_ingredient, FoodAmt_Ing_g)) %>%
  # Merge the component name
  dplyr::left_join(DII_foods, by = 'fdd_ingredient') %>%
  # Add component ingredient intakes together
  dplyr::group_by(across(all_of(group_vars))) %>%
  dplyr::mutate(component_sum = sum(FoodAmt_Ing_g, na.rm = TRUE)) %>%
  dplyr::ungroup() %>%
  # Keep distinct entries
  dplyr::distinct(across(all_of(group_vars)), .keep_all = TRUE)%>%
  # Remove food name and intakes now that we have the total component intake
  dplyr::select(-c(fdd_ingredient, FoodAmt_Ing_g)) %>%
  # Make Wide
  tidyr::pivot_wider(names_from = component, values_from = component_sum) 

# In smaller groups, some foods may be missing.
food_list = c("GARLIC", "GINGER", "ONION", "TURMERIC", "TEA", "PEPPER", "THYME")
missing_cols = setdiff(food_list, names(component_sums))
# Add any missing colums as 0
component_sums[missing_cols] =0

Export Food Intake Amounts for DII Calculation

vroom::vroom_write(component_sums, 'outputs/Diet_DII_foods_by_entry.csv', delim = ",")