16  Additional Feature Engineering

16.1 Demographic Features

Code
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Code
source(here::here('Functions/connect_help.R'))

NHANES_Tables
 [1] "ACQ"      "ALB_CR"   "ALQ"      "ALQY"     "AUQ"      "BIOPRO"  
 [7] "BMX"      "BPQ"      "BPX"      "CBC"      "CDQ"      "CMV"     
[13] "COT"      "CRCO"     "DBQ"      "DEMO"     "DEQ"      "DIQ"     
[19] "DLQ"      "DPQ"      "DR1IFF"   "DR1TOT"   "DR2IFF"   "DR2TOT"  
[25] "DRXFCD"   "DS1IDS"   "DS1TOT"   "DS2TOT"   "DSBI"     "DSII"    
[31] "DSQIDS"   "DSQTOT"   "DUQ"      "DXX"      "DXXFEM"   "DXXSPN"  
[37] "Ds2IDS"   "ECQ"      "FASTQX"   "FERTIN"   "FETIB"    "FOLATE"  
[43] "FOLFMS"   "GHB"      "GLU"      "HDL"      "HEPA"     "HEPBD"   
[49] "HEPC"     "HEPE"     "HEQ"      "HIQ"      "HIV"      "HOQ"     
[55] "HSCRP"    "HSQ"      "HSV"      "HUQ"      "IHGEM"    "IMQ"     
[61] "INS"      "KIQ"      "LUX"      "MCQ"      "OCQ"      "OHQ"     
[67] "OHXDEN"   "OHXREF"   "OSQ"      "PAQ"      "PAQY"     "PBCD"    
[73] "PFAS"     "PFQ"      "PUQMEC"   "RHQ"      "RXQASA"   "RXQ_DRUG"
[79] "RXQ_RX"   "SLQ"      "SMQ"      "SMQFAM"   "SMQRTU"   "SMQSHS"  
[85] "SSPFAS"   "SXQ"      "TCHOL"    "TFR"      "TRIGLY"   "UCFLOW"  
[91] "UCM"      "UCPREG"   "UHG"      "UIO"      "UNI"      "VIC"     
[97] "VOCWB"    "VTQ"     

\(~\)


\(~\)

Code
# Open_NHANES_table_help('DEMO')

16.1.1 Age & Gender

Code
AGE_GENDER_TBL <- DEMO %>%
  mutate(Gender = if_else(RIAGENDR == 2 , "Female", "Male")) %>%
  mutate(Age = if_else(is.na(RIDAGEYR), RIDAGEMN/12 , RIDAGEYR)) %>%
  select(SEQN, Age, Gender) 

AGE_GENDER_TBL %>%
  glimpse()
Rows: ??
Columns: 3
Database: sqlite 3.46.0 [C:\Users\jkyle\Documents\GitHub\Jeff_R_Data_Wrangling\DATA\sql_db\NHANES.sqlite]
$ SEQN   <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, …
$ Age    <dbl> 2, 77, 10, 1, 49, 19, 59, 13, 11, 43, 15, 37, 70, 81, 38, 85, 2…
$ Gender <chr> "Female", "Male", "Female", "Male", "Male", "Female", "Female",…

16.1.2 Race/Hispanic origin

We see that only RIDRETH1 is available in both 2017-2018 and 1999-2000:

Code
RACE_TBL <- DEMO %>%
  select(SEQN, RIDRETH1) %>%
  mutate(Race = 
           case_when(RIDRETH1 == 1 ~ "Mexican American",
                     RIDRETH1 == 2 ~ "Other Hispanic",
                     RIDRETH1 == 3 ~ "White",
                     RIDRETH1 == 4 ~ "Black",
                     RIDRETH1 == 5 ~ "Other",
                     !(RIDRETH1 %in% c(1,2,3,4,5)) ~ NA )) %>%
  select(-RIDRETH1)

RACE_TBL %>% 
  head()
# Source:   SQL [6 x 2]
# Database: sqlite 3.46.0 [C:\Users\jkyle\Documents\GitHub\Jeff_R_Data_Wrangling\DATA\sql_db\NHANES.sqlite]
   SEQN Race 
  <dbl> <chr>
1     1 Black
2     2 White
3     3 White
4     4 Black
5     5 White
6     6 Other

16.1.3 US Armed Forces

We see that only DMQMILIZ is available since 2011-2012, prior to 2011, the veteran status information (released in the variable DMQMILIT in the demographics file in 1999-2010)

Code
USFA_TBL <- DEMO %>%
  select(SEQN, DMQMILIZ, DMQMILIT) %>%
  mutate(USAF = 
           case_when((DMQMILIZ == 1 | DMQMILIT == 1) ~ "Yes",
                     (DMQMILIZ == 2 | DMQMILIT == 2) ~ "No",
                     !(DMQMILIZ %in% c(1,2) | DMQMILIT %in% c(1,2) ) |(is.na(DMQMILIZ) & is.na(DMQMILIT))   ~ NA )) %>%
  select(-DMQMILIZ, -DMQMILIT)

USFA_TBL %>% 
  head()
# Source:   SQL [6 x 2]
# Database: sqlite 3.46.0 [C:\Users\jkyle\Documents\GitHub\Jeff_R_Data_Wrangling\DATA\sql_db\NHANES.sqlite]
   SEQN USAF 
  <dbl> <chr>
1     1 <NA> 
2     2 Yes  
3     3 <NA> 
4     4 <NA> 
5     5 Yes  
6     6 No   

16.1.4 Country of Birth

Due to the concerns of disclosure risk, starting in 2011, country of birth was recoded into two categories: 1) Born in 50 U.S. states or Washington, DC; and 2) Born in other countries, including U.S. territories.

Code
BIRTH_COUNTRY_TBL <- DEMO %>%
  select(SEQN, DMDBORN, DMDBORN2, DMDBORN4 ) %>%
  mutate(Birth_Country = 
           case_when(DMDBORN == 1 | DMDBORN2 == 1 | DMDBORN4 == 1 ~ "USA",
                     DMDBORN == 2 | DMDBORN2 == 2 | DMDBORN4 == 2 ~ "Other",
                     (DMDBORN > 2 | DMDBORN2 > 2 | DMDBORN4 > 2 ) | 
                       (is.na(DMDBORN) & is.na(DMDBORN2) & is.na(DMDBORN4)) ~ NA  )) %>%
  select(SEQN, Birth_Country)

BIRTH_COUNTRY_TBL %>% 
  head()
# Source:   SQL [6 x 2]
# Database: sqlite 3.46.0 [C:\Users\jkyle\Documents\GitHub\Jeff_R_Data_Wrangling\DATA\sql_db\NHANES.sqlite]
   SEQN Birth_Country
  <dbl> <chr>        
1     1 USA          
2     2 USA          
3     3 <NA>         
4     4 USA          
5     5 USA          
6     6 USA          

16.1.5 Education

Code
EDUCATION_TBL <- DEMO %>%
  select(SEQN, DMDEDUC3, DMDEDUC2) %>%
  mutate(Grade_Level = 
           case_when(DMDEDUC3 ==0~"Never attended / kindergarten only",
DMDEDUC3 ==1~"1st grade",
DMDEDUC3 ==2~"2nd grade",
DMDEDUC3 ==3~"3rd grade",
DMDEDUC3 ==4~"4th grade",
DMDEDUC3 ==5~"5th grade",
DMDEDUC3 ==6~"6th grade",
DMDEDUC3 ==7~"7th grade",
DMDEDUC3 ==8~"8th grade",
DMDEDUC3 ==9~"9th grade",
DMDEDUC3 ==10~"10th grade",
DMDEDUC3 ==11~"11th grade",
DMDEDUC3 ==12~"12th grade, no diploma",
DMDEDUC3 ==13~"High school graduate",
DMDEDUC3 ==14~"GED or equivalent",
DMDEDUC3 ==15~"More than high school",
DMDEDUC3 ==55~"Less than 5th grade",
DMDEDUC3 ==66~"Less than 9th grade",
DMDEDUC3 ==77~"Refused",
DMDEDUC3 ==99~"Don't Know",
is.na(DMDEDUC3) |  DMDEDUC3 > 66 ~ NA)) %>%
  select(-DMDEDUC3) %>%
  mutate(Grade_Range = 
           case_when(DMDEDUC2 ==1~"Less than 9th grade",
DMDEDUC2 ==2~"9-11th grade (Includes 12th grade with no diploma)",
DMDEDUC2 ==3~"High school graduate/GED or equivalent",
DMDEDUC2 ==4~"Some college or AA degree",
DMDEDUC2 ==5~"College graduate or above",
DMDEDUC2 > 5 | is.na(DMDEDUC2)  ~ NA )) %>%
  select(-DMDEDUC2)

EDUCATION_TBL %>% 
  head()
# Source:   SQL [6 x 3]
# Database: sqlite 3.46.0 [C:\Users\jkyle\Documents\GitHub\Jeff_R_Data_Wrangling\DATA\sql_db\NHANES.sqlite]
   SEQN Grade_Level           Grade_Range              
  <dbl> <chr>                 <chr>                    
1     1 <NA>                  <NA>                     
2     2 <NA>                  College graduate or above
3     3 3rd grade             <NA>                     
4     4 <NA>                  <NA>                     
5     5 <NA>                  College graduate or above
6     6 More than high school <NA>                     

16.1.6 Marital Status

Code
MARITAL_STATUS_TBL <- DEMO %>%
  select(SEQN, DMDMARTL) %>%
  mutate(Marital_Status = 
           case_when(DMDMARTL ==1~"Married",
DMDMARTL ==2~"Widowed",
DMDMARTL ==3~"Divorced",
DMDMARTL ==4~"Separated",
DMDMARTL ==5~"Never married",
DMDMARTL ==6~"Living with partner",
DMDMARTL >6 | is.na(DMDMARTL) ~ NA )) %>%
  select(-DMDMARTL)

MARITAL_STATUS_TBL %>% 
  head()
# Source:   SQL [6 x 2]
# Database: sqlite 3.46.0 [C:\Users\jkyle\Documents\GitHub\Jeff_R_Data_Wrangling\DATA\sql_db\NHANES.sqlite]
   SEQN Marital_Status
  <dbl> <chr>         
1     1 <NA>          
2     2 <NA>          
3     3 <NA>          
4     4 <NA>          
5     5 Married       
6     6 Never married 

16.1.7 Pregnancy Status

Code
PREG_STATUS_TBL <- DEMO %>%
  select(SEQN, RIDEXPRG) %>%
  mutate(Pregnant = 
           case_when(RIDEXPRG == 1 ~ "Pregnant",
                     RIDEXPRG == 2 ~ "Not Pregnant",
                     RIDEXPRG > 2 | is.na(RIDEXPRG) ~ NA )) %>%
  select(-RIDEXPRG)

PREG_STATUS_TBL %>% 
  head()
# Source:   SQL [6 x 2]
# Database: sqlite 3.46.0 [C:\Users\jkyle\Documents\GitHub\Jeff_R_Data_Wrangling\DATA\sql_db\NHANES.sqlite]
   SEQN Pregnant    
  <dbl> <chr>       
1     1 <NA>        
2     2 <NA>        
3     3 <NA>        
4     4 <NA>        
5     5 <NA>        
6     6 Not Pregnant

16.1.8 Income

Code
INCOME_TBL <- DEMO %>%
  select(SEQN, INDHHIN2, INDFMIN2, INDFMPIR) %>%
  mutate(Household_Icome = case_when(INDHHIN2 ==1~"$ 0 to $ 4,999",
INDHHIN2 ==2~"$ 5,000 to $ 9,999",
INDHHIN2 ==3~"$10,000 to $14,999",
INDHHIN2 ==4~"$15,000 to $19,999",
INDHHIN2 ==5~"$20,000 to $24,999",
INDHHIN2 ==6~"$25,000 to $34,999",
INDHHIN2 ==7~"$35,000 to $44,999",
INDHHIN2 ==8~"$45,000 to $54,999",
INDHHIN2 ==9~"$55,000 to $64,999",
INDHHIN2 ==10~"$65,000 to $74,999",
INDHHIN2 ==12~"$20,000 and Over",
INDHHIN2 ==13~"Under $20,000",
INDHHIN2 ==14~"$75,000 to $99,999",
INDHHIN2 ==15~"$100,000 and Over",
INDHHIN2 > 15 | is.na(INDHHIN2) ~ NA)) %>%
  select(-INDHHIN2) %>%
  mutate(Family_Income = case_when(INDFMIN2 ==1~"$ 0 to $ 4,999",
INDFMIN2 ==2~"$ 5,000 to $ 9,999",
INDFMIN2 ==3~"$10,000 to $14,999",
INDFMIN2 ==4~"$15,000 to $19,999",
INDFMIN2 ==5~"$20,000 to $24,999",
INDFMIN2 ==6~"$25,000 to $34,999",
INDFMIN2 ==7~"$35,000 to $44,999",
INDFMIN2 ==8~"$45,000 to $54,999",
INDFMIN2 ==9~"$55,000 to $64,999",
INDFMIN2 ==10~"$65,000 to $74,999",
INDFMIN2 ==12~"$20,000 and Over",
INDFMIN2 ==13~"Under $20,000",
INDFMIN2 ==14~"$75,000 to $99,999",
INDFMIN2 ==15~"$100,000 and Over",
INDFMIN2 > 15 | is.na(INDFMIN2) ~ NA )) %>%
  select(-INDFMIN2) %>%
  mutate(Poverty_Income_Ratio = 
           case_when(INDFMPIR <= 5 ~ INDFMPIR,
                     is.na(INDFMPIR) ~ NA)) %>%
  select(-INDFMPIR)


INCOME_TBL %>% 
  head()
# Source:   SQL [6 x 4]
# Database: sqlite 3.46.0 [C:\Users\jkyle\Documents\GitHub\Jeff_R_Data_Wrangling\DATA\sql_db\NHANES.sqlite]
   SEQN Household_Icome Family_Income Poverty_Income_Ratio
  <dbl> <lgl>           <lgl>                        <dbl>
1     1 NA              NA                            0.86
2     2 NA              NA                            5   
3     3 NA              NA                            1.47
4     4 NA              NA                            0.57
5     5 NA              NA                            5   
6     6 NA              NA                            1.21

16.1.9 Demographic Features Final

Code
DEMO_FEATURES <- DEMO %>%
  select(SEQN) %>%
  left_join(AGE_GENDER_TBL) %>%
  left_join(RACE_TBL) %>%
  left_join(USFA_TBL) %>%
  left_join(BIRTH_COUNTRY_TBL) %>%
  left_join(EDUCATION_TBL) %>%
  left_join(MARITAL_STATUS_TBL) %>%
  left_join(PREG_STATUS_TBL) %>%
  left_join(INCOME_TBL)
Joining with `by = join_by(SEQN)`
Joining with `by = join_by(SEQN)`
Joining with `by = join_by(SEQN)`
Joining with `by = join_by(SEQN)`
Joining with `by = join_by(SEQN)`
Joining with `by = join_by(SEQN)`
Joining with `by = join_by(SEQN)`
Joining with `by = join_by(SEQN)`
Code
DEMO_FEATURES %>%
  glimpse()
Rows: ??
Columns: 13
Database: sqlite 3.46.0 [C:\Users\jkyle\Documents\GitHub\Jeff_R_Data_Wrangling\DATA\sql_db\NHANES.sqlite]
$ SEQN                 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15…
$ Age                  <dbl> 2, 77, 10, 1, 49, 19, 59, 13, 11, 43, 15, 37, 70,…
$ Gender               <chr> "Female", "Male", "Female", "Male", "Male", "Fema…
$ Race                 <chr> "Black", "White", "White", "Black", "White", "Oth…
$ USAF                 <chr> NA, "Yes", NA, NA, "Yes", "No", "No", NA, NA, "Ye…
$ Birth_Country        <chr> "USA", "USA", NA, "USA", "USA", "USA", "USA", "US…
$ Grade_Level          <chr> NA, NA, "3rd grade", NA, NA, "More than high scho…
$ Grade_Range          <chr> NA, "College graduate or above", NA, NA, "College…
$ Marital_Status       <chr> NA, NA, NA, NA, "Married", "Never married", "Marr…
$ Pregnant             <chr> NA, NA, NA, NA, NA, "Not Pregnant", "Not Pregnant…
$ Household_Icome      <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Family_Income        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Poverty_Income_Ratio <dbl> 0.86, 5.00, 1.47, 0.57, 5.00, 1.21, NA, 0.53, NA,…
Code
DEMO_FEATURES %>%
  show_query()
<SQL>
SELECT
  `DEMO`.`SEQN` AS `SEQN`,
  `Age`,
  `Gender`,
  `Race`,
  `USAF`,
  `Birth_Country`,
  `Grade_Level`,
  `Grade_Range`,
  `Marital_Status`,
  `Pregnant`,
  `Household_Icome`,
  `Family_Income`,
  `Poverty_Income_Ratio`
FROM `DEMO`
LEFT JOIN (
  SELECT
    `SEQN`,
    CASE WHEN ((`RIDAGEYR` IS NULL)) THEN (`RIDAGEMN` / 12.0) WHEN NOT ((`RIDAGEYR` IS NULL)) THEN `RIDAGEYR` END AS `Age`,
    `Gender`
  FROM (
    SELECT
      `DEMO`.*,
      CASE WHEN (`RIAGENDR` = 2.0) THEN 'Female' WHEN NOT (`RIAGENDR` = 2.0) THEN 'Male' END AS `Gender`
    FROM `DEMO`
  ) AS `q01`
) AS `...2`
  ON (`DEMO`.`SEQN` = `...2`.`SEQN`)
LEFT JOIN (
  SELECT
    `SEQN`,
    CASE
WHEN (`RIDRETH1` = 1.0) THEN 'Mexican American'
WHEN (`RIDRETH1` = 2.0) THEN 'Other Hispanic'
WHEN (`RIDRETH1` = 3.0) THEN 'White'
WHEN (`RIDRETH1` = 4.0) THEN 'Black'
WHEN (`RIDRETH1` = 5.0) THEN 'Other'
WHEN (NOT((`RIDRETH1` IN (1.0, 2.0, 3.0, 4.0, 5.0)))) THEN NULL
END AS `Race`
  FROM `DEMO`
) AS `...3`
  ON (`DEMO`.`SEQN` = `...3`.`SEQN`)
LEFT JOIN (
  SELECT
    `SEQN`,
    CASE
WHEN ((`DMQMILIZ` = 1.0 OR `DMQMILIT` = 1.0)) THEN 'Yes'
WHEN ((`DMQMILIZ` = 2.0 OR `DMQMILIT` = 2.0)) THEN 'No'
WHEN (NOT((`DMQMILIZ` IN (1.0, 2.0) OR `DMQMILIT` IN (1.0, 2.0))) OR ((`DMQMILIZ` IS NULL) AND (`DMQMILIT` IS NULL))) THEN NULL
END AS `USAF`
  FROM `DEMO`
) AS `...4`
  ON (`DEMO`.`SEQN` = `...4`.`SEQN`)
LEFT JOIN (
  SELECT
    `SEQN`,
    CASE
WHEN (`DMDBORN` = 1.0 OR `DMDBORN2` = 1.0 OR `DMDBORN4` = 1.0) THEN 'USA'
WHEN (`DMDBORN` = 2.0 OR `DMDBORN2` = 2.0 OR `DMDBORN4` = 2.0) THEN 'Other'
WHEN ((`DMDBORN` > 2.0 OR `DMDBORN2` > 2.0 OR `DMDBORN4` > 2.0) OR ((`DMDBORN` IS NULL) AND (`DMDBORN2` IS NULL) AND (`DMDBORN4` IS NULL))) THEN NULL
END AS `Birth_Country`
  FROM `DEMO`
) AS `...5`
  ON (`DEMO`.`SEQN` = `...5`.`SEQN`)
LEFT JOIN (
  SELECT
    `SEQN`,
    `Grade_Level`,
    CASE
WHEN (`DMDEDUC2` = 1.0) THEN 'Less than 9th grade'
WHEN (`DMDEDUC2` = 2.0) THEN '9-11th grade (Includes 12th grade with no diploma)'
WHEN (`DMDEDUC2` = 3.0) THEN 'High school graduate/GED or equivalent'
WHEN (`DMDEDUC2` = 4.0) THEN 'Some college or AA degree'
WHEN (`DMDEDUC2` = 5.0) THEN 'College graduate or above'
WHEN (`DMDEDUC2` > 5.0 OR (`DMDEDUC2` IS NULL)) THEN NULL
END AS `Grade_Range`
  FROM (
    SELECT
      `SEQN`,
      `DMDEDUC2`,
      CASE
WHEN (`DMDEDUC3` = 0.0) THEN 'Never attended / kindergarten only'
WHEN (`DMDEDUC3` = 1.0) THEN '1st grade'
WHEN (`DMDEDUC3` = 2.0) THEN '2nd grade'
WHEN (`DMDEDUC3` = 3.0) THEN '3rd grade'
WHEN (`DMDEDUC3` = 4.0) THEN '4th grade'
WHEN (`DMDEDUC3` = 5.0) THEN '5th grade'
WHEN (`DMDEDUC3` = 6.0) THEN '6th grade'
WHEN (`DMDEDUC3` = 7.0) THEN '7th grade'
WHEN (`DMDEDUC3` = 8.0) THEN '8th grade'
WHEN (`DMDEDUC3` = 9.0) THEN '9th grade'
WHEN (`DMDEDUC3` = 10.0) THEN '10th grade'
WHEN (`DMDEDUC3` = 11.0) THEN '11th grade'
WHEN (`DMDEDUC3` = 12.0) THEN '12th grade, no diploma'
WHEN (`DMDEDUC3` = 13.0) THEN 'High school graduate'
WHEN (`DMDEDUC3` = 14.0) THEN 'GED or equivalent'
WHEN (`DMDEDUC3` = 15.0) THEN 'More than high school'
WHEN (`DMDEDUC3` = 55.0) THEN 'Less than 5th grade'
WHEN (`DMDEDUC3` = 66.0) THEN 'Less than 9th grade'
WHEN (`DMDEDUC3` = 77.0) THEN 'Refused'
WHEN (`DMDEDUC3` = 99.0) THEN 'Don''t Know'
WHEN ((`DMDEDUC3` IS NULL) OR `DMDEDUC3` > 66.0) THEN NULL
END AS `Grade_Level`
    FROM `DEMO`
  ) AS `q01`
) AS `...6`
  ON (`DEMO`.`SEQN` = `...6`.`SEQN`)
LEFT JOIN (
  SELECT
    `SEQN`,
    CASE
WHEN (`DMDMARTL` = 1.0) THEN 'Married'
WHEN (`DMDMARTL` = 2.0) THEN 'Widowed'
WHEN (`DMDMARTL` = 3.0) THEN 'Divorced'
WHEN (`DMDMARTL` = 4.0) THEN 'Separated'
WHEN (`DMDMARTL` = 5.0) THEN 'Never married'
WHEN (`DMDMARTL` = 6.0) THEN 'Living with partner'
WHEN (`DMDMARTL` > 6.0 OR (`DMDMARTL` IS NULL)) THEN NULL
END AS `Marital_Status`
  FROM `DEMO`
) AS `...7`
  ON (`DEMO`.`SEQN` = `...7`.`SEQN`)
LEFT JOIN (
  SELECT
    `SEQN`,
    CASE
WHEN (`RIDEXPRG` = 1.0) THEN 'Pregnant'
WHEN (`RIDEXPRG` = 2.0) THEN 'Not Pregnant'
WHEN (`RIDEXPRG` > 2.0 OR (`RIDEXPRG` IS NULL)) THEN NULL
END AS `Pregnant`
  FROM `DEMO`
) AS `...8`
  ON (`DEMO`.`SEQN` = `...8`.`SEQN`)
LEFT JOIN (
  SELECT
    `SEQN`,
    `Household_Icome`,
    `Family_Income`,
    CASE
WHEN (`INDFMPIR` <= 5.0) THEN `INDFMPIR`
WHEN ((`INDFMPIR` IS NULL)) THEN NULL
END AS `Poverty_Income_Ratio`
  FROM (
    SELECT
      `SEQN`,
      `INDFMPIR`,
      `Household_Icome`,
      CASE
WHEN (`INDFMIN2` = 1.0) THEN '$ 0 to $ 4,999'
WHEN (`INDFMIN2` = 2.0) THEN '$ 5,000 to $ 9,999'
WHEN (`INDFMIN2` = 3.0) THEN '$10,000 to $14,999'
WHEN (`INDFMIN2` = 4.0) THEN '$15,000 to $19,999'
WHEN (`INDFMIN2` = 5.0) THEN '$20,000 to $24,999'
WHEN (`INDFMIN2` = 6.0) THEN '$25,000 to $34,999'
WHEN (`INDFMIN2` = 7.0) THEN '$35,000 to $44,999'
WHEN (`INDFMIN2` = 8.0) THEN '$45,000 to $54,999'
WHEN (`INDFMIN2` = 9.0) THEN '$55,000 to $64,999'
WHEN (`INDFMIN2` = 10.0) THEN '$65,000 to $74,999'
WHEN (`INDFMIN2` = 12.0) THEN '$20,000 and Over'
WHEN (`INDFMIN2` = 13.0) THEN 'Under $20,000'
WHEN (`INDFMIN2` = 14.0) THEN '$75,000 to $99,999'
WHEN (`INDFMIN2` = 15.0) THEN '$100,000 and Over'
WHEN (`INDFMIN2` > 15.0 OR (`INDFMIN2` IS NULL)) THEN NULL
END AS `Family_Income`
    FROM (
      SELECT
        `SEQN`,
        `INDFMIN2`,
        `INDFMPIR`,
        CASE
WHEN (`INDHHIN2` = 1.0) THEN '$ 0 to $ 4,999'
WHEN (`INDHHIN2` = 2.0) THEN '$ 5,000 to $ 9,999'
WHEN (`INDHHIN2` = 3.0) THEN '$10,000 to $14,999'
WHEN (`INDHHIN2` = 4.0) THEN '$15,000 to $19,999'
WHEN (`INDHHIN2` = 5.0) THEN '$20,000 to $24,999'
WHEN (`INDHHIN2` = 6.0) THEN '$25,000 to $34,999'
WHEN (`INDHHIN2` = 7.0) THEN '$35,000 to $44,999'
WHEN (`INDHHIN2` = 8.0) THEN '$45,000 to $54,999'
WHEN (`INDHHIN2` = 9.0) THEN '$55,000 to $64,999'
WHEN (`INDHHIN2` = 10.0) THEN '$65,000 to $74,999'
WHEN (`INDHHIN2` = 12.0) THEN '$20,000 and Over'
WHEN (`INDHHIN2` = 13.0) THEN 'Under $20,000'
WHEN (`INDHHIN2` = 14.0) THEN '$75,000 to $99,999'
WHEN (`INDHHIN2` = 15.0) THEN '$100,000 and Over'
WHEN (`INDHHIN2` > 15.0 OR (`INDHHIN2` IS NULL)) THEN NULL
END AS `Household_Icome`
      FROM `DEMO`
    ) AS `q01`
  ) AS `q01`
) AS `...9`
  ON (`DEMO`.`SEQN` = `...9`.`SEQN`)

\(~\)


\(~\)

16.2 Lab Features

Code
# browseURL('https://wwwn.cdc.gov/nchs/nhanes/search/datapage.aspx?Component=Laboratory&CycleBeginYear=2017')
Code
ALB_CR <- tbl(NHANES_DB, "ALB_CR")
HDL <- tbl(NHANES_DB, 'HDL')
TRIGLY <- tbl(NHANES_DB, 'TRIGLY')
TCHOL <- tbl(NHANES_DB, "TCHOL")
INS <- tbl(NHANES_DB,"INS") 
GLU <- tbl(NHANES_DB, 'GLU')
UCPREG <- tbl(NHANES_DB,"UCPREG")

16.2.1 Mapping Column Issues

Code
prep_LABS_TBL <- DEMO %>%
  select(SEQN, yr_range) %>%
  left_join(ALB_CR) %>%
  left_join(HDL) %>%
  left_join(TRIGLY) %>%
  left_join(TCHOL) %>%
  left_join(INS, 
            by=c("SEQN", "yr_range"),
            suffix=c('.TCHOL',''))  %>%
  left_join(GLU, 
            by=c("SEQN", "yr_range"),
            suffix=c('.INS','.GLU')) %>%
  left_join(UCPREG)
Joining with `by = join_by(SEQN, yr_range)`
Joining with `by = join_by(SEQN, yr_range)`
Joining with `by = join_by(SEQN, yr_range)`
Joining with `by = join_by(SEQN, yr_range)`
Joining with `by = join_by(SEQN, yr_range)`
Code
prep_LABS_TBL %>%
  glimpse()
Rows: ??
Columns: 43
Database: sqlite 3.46.0 [C:\Users\jkyle\Documents\GitHub\Jeff_R_Data_Wrangling\DATA\sql_db\NHANES.sqlite]
$ SEQN           <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, …
$ yr_range       <chr> "1999-2000", "1999-2000", "1999-2000", "1999-2000", "19…
$ URXUMA         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ URXUMS         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ URXUCR         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ URXCRS         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ URDACT         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ URXUMA2        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ URDUMA2S       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ URXUCR2        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ URDUCR2S       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ URDACT2        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ URDUMALC       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ URDUCRLC       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBDHDD         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBDHDDSI       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ WTSAF2YR.TCHOL <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBXTR          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBDTRSI        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBDLDL         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBDLDLSI       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBXAPB         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBDAPBSI       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBDLDLM        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBDLDMSI       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBDLDLN        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBDLDNSI       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBXTC          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBDTCSI        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ WTSAF2YR.INS   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBXIN.INS      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBDINSI.INS    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ PHAFSTHR.INS   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ PHAFSTMN.INS   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBDINLC        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ WTSAF2YR.GLU   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBXGLU         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBDGLUSI       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBXIN.GLU      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBDINSI.GLU    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ PHAFSTHR.GLU   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ PHAFSTMN.GLU   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ URXPREG        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
Code
lab_features <- colnames(prep_LABS_TBL)[!colnames(prep_LABS_TBL) %in% c('SEQN','yr_range')] 
lab_features
 [1] "URXUMA"         "URXUMS"         "URXUCR"         "URXCRS"        
 [5] "URDACT"         "URXUMA2"        "URDUMA2S"       "URXUCR2"       
 [9] "URDUCR2S"       "URDACT2"        "URDUMALC"       "URDUCRLC"      
[13] "LBDHDD"         "LBDHDDSI"       "WTSAF2YR.TCHOL" "LBXTR"         
[17] "LBDTRSI"        "LBDLDL"         "LBDLDLSI"       "LBXAPB"        
[21] "LBDAPBSI"       "LBDLDLM"        "LBDLDMSI"       "LBDLDLN"       
[25] "LBDLDNSI"       "LBXTC"          "LBDTCSI"        "WTSAF2YR.INS"  
[29] "LBXIN.INS"      "LBDINSI.INS"    "PHAFSTHR.INS"   "PHAFSTMN.INS"  
[33] "LBDINLC"        "WTSAF2YR.GLU"   "LBXGLU"         "LBDGLUSI"      
[37] "LBXIN.GLU"      "LBDINSI.GLU"    "PHAFSTHR.GLU"   "PHAFSTMN.GLU"  
[41] "URXPREG"       
Code
lab_features_tibble <- tibble(lab_features)

lab_features_tibble %>%
  head()
# A tibble: 6 × 1
  lab_features
  <chr>       
1 URXUMA      
2 URXUMS      
3 URXUCR      
4 URXCRS      
5 URDACT      
6 URXUMA2     
Code
lab_features_tibble <- lab_features_tibble %>%
  mutate(appears_in_other_tables = str_detect(lab_features , "[.]"))

lab_features_tibble  %>%
  head()
# A tibble: 6 × 2
  lab_features appears_in_other_tables
  <chr>        <lgl>                  
1 URXUMA       FALSE                  
2 URXUMS       FALSE                  
3 URXUCR       FALSE                  
4 URXCRS       FALSE                  
5 URDACT       FALSE                  
6 URXUMA2      FALSE                  
Code
lab_features_tibble %>%
  filter(appears_in_other_tables == TRUE) %>%
  separate(lab_features, into=c('feature','SRC_TBL'), sep='[.]')
# A tibble: 11 × 3
   feature  SRC_TBL appears_in_other_tables
   <chr>    <chr>   <lgl>                  
 1 WTSAF2YR TCHOL   TRUE                   
 2 WTSAF2YR INS     TRUE                   
 3 LBXIN    INS     TRUE                   
 4 LBDINSI  INS     TRUE                   
 5 PHAFSTHR INS     TRUE                   
 6 PHAFSTMN INS     TRUE                   
 7 WTSAF2YR GLU     TRUE                   
 8 LBXIN    GLU     TRUE                   
 9 LBDINSI  GLU     TRUE                   
10 PHAFSTHR GLU     TRUE                   
11 PHAFSTMN GLU     TRUE                   
Code
lab_features_tibble <- tibble(lab_features) %>%
  mutate(tmp = lab_features) %>%
  separate(tmp, 
           into=c('feature','SRC_TBL'), 
           sep='[.]')
Warning: Expected 2 pieces. Missing pieces filled with `NA` in 30 rows [1, 2, 3, 4, 5,
6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, ...].
Code
lab_features_tibble
# A tibble: 41 × 3
   lab_features feature  SRC_TBL
   <chr>        <chr>    <chr>  
 1 URXUMA       URXUMA   <NA>   
 2 URXUMS       URXUMS   <NA>   
 3 URXUCR       URXUCR   <NA>   
 4 URXCRS       URXCRS   <NA>   
 5 URDACT       URDACT   <NA>   
 6 URXUMA2      URXUMA2  <NA>   
 7 URDUMA2S     URDUMA2S <NA>   
 8 URXUCR2      URXUCR2  <NA>   
 9 URDUCR2S     URDUCR2S <NA>   
10 URDACT2      URDACT2  <NA>   
# ℹ 31 more rows
Code
lab_features_tibble %>%
  filter(feature == 'WTSAF2YR')
# A tibble: 3 × 3
  lab_features   feature  SRC_TBL
  <chr>          <chr>    <chr>  
1 WTSAF2YR.TCHOL WTSAF2YR TCHOL  
2 WTSAF2YR.INS   WTSAF2YR INS    
3 WTSAF2YR.GLU   WTSAF2YR GLU    
Code
lab_features_to_map <- lab_features_tibble %>%
  filter(!is.na(SRC_TBL)) %>%
  arrange(feature)

lab_features_to_map
# A tibble: 11 × 3
   lab_features   feature  SRC_TBL
   <chr>          <chr>    <chr>  
 1 LBDINSI.INS    LBDINSI  INS    
 2 LBDINSI.GLU    LBDINSI  GLU    
 3 LBXIN.INS      LBXIN    INS    
 4 LBXIN.GLU      LBXIN    GLU    
 5 PHAFSTHR.INS   PHAFSTHR INS    
 6 PHAFSTHR.GLU   PHAFSTHR GLU    
 7 PHAFSTMN.INS   PHAFSTMN INS    
 8 PHAFSTMN.GLU   PHAFSTMN GLU    
 9 WTSAF2YR.TCHOL WTSAF2YR TCHOL  
10 WTSAF2YR.INS   WTSAF2YR INS    
11 WTSAF2YR.GLU   WTSAF2YR GLU    
Code
lab_features_to_map %>%
  filter(feature == 'WTSAF2YR')
# A tibble: 3 × 3
  lab_features   feature  SRC_TBL
  <chr>          <chr>    <chr>  
1 WTSAF2YR.TCHOL WTSAF2YR TCHOL  
2 WTSAF2YR.INS   WTSAF2YR INS    
3 WTSAF2YR.GLU   WTSAF2YR GLU    
Code
prep_LABS_TBL %>%
  filter(!is.na(WTSAF2YR.TCHOL) | !is.na(WTSAF2YR.INS) | !is.na(WTSAF2YR.GLU)) %>%
  select(contains('WTSAF2YR'))
# Source:   SQL [?? x 3]
# Database: sqlite 3.46.0 [C:\Users\jkyle\Documents\GitHub\Jeff_R_Data_Wrangling\DATA\sql_db\NHANES.sqlite]
   WTSAF2YR.TCHOL WTSAF2YR.INS WTSAF2YR.GLU
            <dbl>        <dbl>        <dbl>
 1             0            NA           0 
 2         67557.           NA       67557.
 3         80194.           NA       80194.
 4         15668.           NA       15668.
 5         93400.           NA       93400.
 6             0            NA           0 
 7             0            NA           0 
 8         67603.           NA       67603.
 9             0            NA           0 
10         71020.           NA       71020.
# ℹ more rows
Code
LABS_TBL <- prep_LABS_TBL %>%
  mutate(WTSAF2YR = coalesce(WTSAF2YR.INS, WTSAF2YR.TCHOL , WTSAF2YR.GLU))

LABS_TBL %>%
  select(yr_range, WTSAF2YR, WTSAF2YR.TCHOL, WTSAF2YR.INS, WTSAF2YR.GLU) %>%
  filter(!is.na(WTSAF2YR))
# Source:   SQL [?? x 5]
# Database: sqlite 3.46.0 [C:\Users\jkyle\Documents\GitHub\Jeff_R_Data_Wrangling\DATA\sql_db\NHANES.sqlite]
   yr_range  WTSAF2YR WTSAF2YR.TCHOL WTSAF2YR.INS WTSAF2YR.GLU
   <chr>        <dbl>          <dbl>        <dbl>        <dbl>
 1 2005-2006       0              0            NA           0 
 2 2005-2006   67557.         67557.           NA       67557.
 3 2005-2006   80194.         80194.           NA       80194.
 4 2005-2006   15668.         15668.           NA       15668.
 5 2005-2006   93400.         93400.           NA       93400.
 6 2005-2006       0              0            NA           0 
 7 2005-2006       0              0            NA           0 
 8 2005-2006   67603.         67603.           NA       67603.
 9 2005-2006       0              0            NA           0 
10 2005-2006   71020.         71020.           NA       71020.
# ℹ more rows
Code
lab_features_to_map %>%
  filter(feature == 'LBDINSI')
# A tibble: 2 × 3
  lab_features feature SRC_TBL
  <chr>        <chr>   <chr>  
1 LBDINSI.INS  LBDINSI INS    
2 LBDINSI.GLU  LBDINSI GLU    
Code
prep_LABS_TBL %>%
  mutate(LBDINSI = coalesce(LBDINSI.INS, LBDINSI.GLU)) %>%
  filter(!is.na(LBDINSI.INS) | !is.na(LBDINSI.GLU) ) %>%
  select(contains('LBDINSI'))
# Source:   SQL [?? x 3]
# Database: sqlite 3.46.0 [C:\Users\jkyle\Documents\GitHub\Jeff_R_Data_Wrangling\DATA\sql_db\NHANES.sqlite]
   LBDINSI.INS LBDINSI.GLU LBDINSI
         <dbl>       <dbl>   <dbl>
 1          NA        60.2    60.2
 2          NA        53.9    53.9
 3          NA        67.6    67.6
 4          NA        87.1    87.1
 5          NA        62.8    62.8
 6          NA        23.5    23.5
 7          NA        36.2    36.2
 8          NA       126.    126. 
 9          NA        39.2    39.2
10          NA        18.8    18.8
# ℹ more rows
Code
check_LABS_TBL <- prep_LABS_TBL %>%
  mutate(WTSAF2YR = coalesce(WTSAF2YR.TCHOL, WTSAF2YR.INS, WTSAF2YR.GLU)) %>%
  mutate(PHAFSTMN = coalesce(PHAFSTMN.INS, PHAFSTMN.GLU)) %>%
  mutate(PHAFSTHR = coalesce(PHAFSTHR.INS, PHAFSTHR.GLU)) %>%
  mutate(LBXIN = coalesce(LBXIN.INS,LBXIN.GLU)) %>%
  mutate(LBDINSI = coalesce(LBDINSI.INS,LBDINSI.GLU))

check_maped_cols <- check_LABS_TBL %>%
  select(contains(c('WTSAF2YR','PHAFSTMN','PHAFSTHR','LBXIN','LBDINSI'))) %>%
  colnames()

check_maped_cols
 [1] "WTSAF2YR.TCHOL" "WTSAF2YR.INS"   "WTSAF2YR.GLU"   "WTSAF2YR"      
 [5] "PHAFSTMN.INS"   "PHAFSTMN.GLU"   "PHAFSTMN"       "PHAFSTHR.INS"  
 [9] "PHAFSTHR.GLU"   "PHAFSTHR"       "LBXIN.INS"      "LBXIN.GLU"     
[13] "LBXIN"          "LBDINSI.INS"    "LBDINSI.GLU"    "LBDINSI"       
Code
check_LABS_TBL %>%
  select(all_of(check_maped_cols)) %>%
  filter_at(.vars=check_maped_cols, any_vars(!is.na(.))) %>%
  glimpse()
Rows: ??
Columns: 16
Database: sqlite 3.46.0 [C:\Users\jkyle\Documents\GitHub\Jeff_R_Data_Wrangling\DATA\sql_db\NHANES.sqlite]
$ WTSAF2YR.TCHOL <dbl> 0.000, 67556.810, 80193.962, 15668.017, 93399.539, 0.00…
$ WTSAF2YR.INS   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ WTSAF2YR.GLU   <dbl> 0.000, 67556.810, 80193.962, 15668.017, 93399.539, 0.00…
$ WTSAF2YR       <dbl> 0.000, 67556.810, 80193.962, 15668.017, 93399.539, 0.00…
$ PHAFSTMN.INS   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ PHAFSTMN.GLU   <dbl> 3, 9, 29, 32, 35, NA, NA, 54, 17, 54, 33, 0, 27, 35, 22…
$ PHAFSTMN       <dbl> 3, 9, 29, 32, 35, NA, NA, 54, 17, 54, 33, 0, 27, 35, 22…
$ PHAFSTHR.INS   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ PHAFSTHR.GLU   <dbl> 10, 14, 11, 12, 14, NA, NA, 9, 3, 10, 11, 14, 13, 9, 14…
$ PHAFSTHR       <dbl> 10, 14, 11, 12, 14, NA, NA, 9, 3, 10, 11, 14, 13, 9, 14…
$ LBXIN.INS      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBXIN.GLU      <dbl> NA, 10.03, 8.99, 11.27, 14.51, NA, NA, 10.46, 3.91, 6.0…
$ LBXIN          <dbl> NA, 10.03, 8.99, 11.27, 14.51, NA, NA, 10.46, 3.91, 6.0…
$ LBDINSI.INS    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBDINSI.GLU    <dbl> NA, 60.18, 53.94, 67.62, 87.06, NA, NA, 62.76, 23.46, 3…
$ LBDINSI        <dbl> NA, 60.18, 53.94, 67.62, 87.06, NA, NA, 62.76, 23.46, 3…

16.2.2 Lab Features Final

Code
LABS_TBL <- check_LABS_TBL %>% 
  select(-WTSAF2YR.TCHOL, -WTSAF2YR.INS, -WTSAF2YR.GLU) %>%
  select(-PHAFSTMN.INS, -PHAFSTMN.GLU) %>%
  select(-PHAFSTHR.INS, -PHAFSTHR.GLU) %>%
  select(-LBXIN.INS , -LBXIN.GLU) %>%
  select(-LBDINSI.INS , -LBDINSI.GLU)


LABS_TBL  %>%
  filter_at(vars(lab_features_tibble$feature), any_vars(!is.na(.))) %>%
  glimpse()
Rows: ??
Columns: 37
Database: sqlite 3.46.0 [C:\Users\jkyle\Documents\GitHub\Jeff_R_Data_Wrangling\DATA\sql_db\NHANES.sqlite]
$ SEQN     <dbl> 31128, 31129, 31130, 31131, 31132, 31133, 31134, 31137, 31139…
$ yr_range <chr> "2005-2006", "2005-2006", "2005-2006", "2005-2006", "2005-200…
$ URXUMA   <dbl> 82.1, 17.8, NA, 18.0, 6.5, 25.2, 10.1, 287.5, 5.9, 17.5, 5.1,…
$ URXUMS   <dbl> 82.1, 17.8, NA, 18.0, 6.5, 25.2, 10.1, 287.5, 5.9, 17.5, 5.1,…
$ URXUCR   <dbl> 291, 288, NA, 202, 162, 275, 140, 329, 125, 176, 91, 74, 143,…
$ URXCRS   <dbl> 25724, 25459, NA, 17857, 14321, 24310, 12376, 29084, 11050, 1…
$ URDACT   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ URXUMA2  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ URDUMA2S <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ URXUCR2  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ URDUCR2S <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ URDACT2  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ URDUMALC <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ URDUCRLC <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ LBDHDD   <dbl> 55, 46, NA, 39, 59, 54, 49, 45, NA, 57, NA, NA, 44, 45, 48, 4…
$ LBDHDDSI <dbl> 1.42, 1.19, NA, 1.01, 1.53, 1.40, 1.27, 1.16, NA, 1.47, NA, N…
$ LBXTR    <dbl> NA, NA, NA, 86, 65, 61, 195, NA, NA, NA, NA, NA, NA, NA, NA, …
$ LBDTRSI  <dbl> NA, NA, NA, 0.971, 0.734, 0.689, 2.202, NA, NA, NA, NA, NA, N…
$ LBDLDL   <dbl> NA, NA, NA, 49, 75, 81, 98, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ LBDLDLSI <dbl> NA, NA, NA, 1.267, 1.940, 2.095, 2.534, NA, NA, NA, NA, NA, N…
$ LBXAPB   <dbl> NA, NA, NA, 50, 75, 75, 111, NA, NA, NA, NA, NA, NA, NA, NA, …
$ LBDAPBSI <dbl> NA, NA, NA, 0.50, 0.75, 0.75, 1.11, NA, NA, NA, NA, NA, NA, N…
$ LBDLDLM  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ LBDLDMSI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ LBDLDLN  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ LBDLDNSI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ LBXTC    <dbl> 129, 170, NA, 105, 147, 147, 186, 129, NA, 141, NA, NA, 108, …
$ LBDTCSI  <dbl> 3.34, 4.40, NA, 2.72, 3.80, 3.80, 4.81, 3.34, NA, 3.65, NA, N…
$ LBDINLC  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ LBXGLU   <dbl> NA, NA, NA, 90, 157, 84, 100, NA, NA, NA, NA, NA, NA, NA, NA,…
$ LBDGLUSI <dbl> NA, NA, NA, 4.996, 8.715, 4.663, 5.551, NA, NA, NA, NA, NA, N…
$ URXPREG  <dbl> NA, NA, NA, 2, NA, 2, NA, 2, 2, NA, NA, NA, NA, NA, NA, NA, N…
$ WTSAF2YR <dbl> NA, NA, 0.00, 67556.81, 80193.96, 15668.02, 93399.54, NA, 0.0…
$ PHAFSTMN <dbl> NA, NA, 3, 9, 29, 32, 35, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ PHAFSTHR <dbl> NA, NA, 10, 14, 11, 12, 14, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ LBXIN    <dbl> NA, NA, NA, 10.03, 8.99, 11.27, 14.51, NA, NA, NA, NA, NA, NA…
$ LBDINSI  <dbl> NA, NA, NA, 60.18, 53.94, 67.62, 87.06, NA, NA, NA, NA, NA, N…

\(~\)


\(~\)

16.3 Examination Features

Code
# browseURL('https://wwwn.cdc.gov/nchs/nhanes/search/datapage.aspx?Component=Examination&CycleBeginYear=2017')
Code
BPX <- tbl(NHANES_DB, 'BPX')
BMX <- tbl(NHANES_DB,'BMX')

16.3.1 Examination Features

Code
MEASURES_TBL <- DEMO %>%
  select(SEQN, yr_range) %>%
  left_join(BPX) %>%
  left_join(BMX)
Joining with `by = join_by(SEQN, yr_range)`
Joining with `by = join_by(SEQN, yr_range)`
Code
MEASURES_TBL %>%
  glimpse()
Rows: ??
Columns: 84
Database: sqlite 3.46.0 [C:\Users\jkyle\Documents\GitHub\Jeff_R_Data_Wrangling\DATA\sql_db\NHANES.sqlite]
$ SEQN     <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18…
$ yr_range <chr> "1999-2000", "1999-2000", "1999-2000", "1999-2000", "1999-200…
$ PEASCST1 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ PEASCTM1 <dbl> 151, 764, 571, 47, 694, 581, 852, 807, 579, 584, 751, 1169, 7…
$ PEASCCT1 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ BPXCHR   <dbl> 110, NA, NA, 108, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ BPQ150A  <dbl> NA, 2, 2, NA, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, NA, NA, NA,…
$ BPQ150B  <dbl> NA, 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA,…
$ BPQ150C  <dbl> NA, 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA,…
$ BPQ150D  <dbl> NA, 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA,…
$ BPAARM   <dbl> NA, 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA, NA, NA,…
$ BPACSZ   <dbl> NA, 3, 2, NA, 4, 2, 4, 2, 3, 4, 3, 4, 2, 3, 3, 3, NA, NA, NA,…
$ BPXPLS   <dbl> NA, 68, 104, NA, 66, 70, 58, 96, 84, 58, 62, 64, 102, 72, 68,…
$ BPXDB    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BPXPULS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ BPXPTY   <dbl> NA, 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA, NA, NA,…
$ BPXML1   <dbl> NA, 130, 140, NA, 140, 140, 160, 130, 140, 190, 130, 200, 170…
$ BPXSY1   <dbl> NA, 106, 110, NA, 122, 116, 130, NA, 104, 152, 110, 182, 140,…
$ BPXDI1   <dbl> NA, 58, 60, NA, 82, 64, 78, NA, 60, 98, 52, 108, 78, 56, 68, …
$ BPAEN1   <dbl> NA, 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA,…
$ BPXSY2   <dbl> NA, 98, 104, NA, 122, 116, 122, 106, 114, 142, 110, 172, 130,…
$ BPXDI2   <dbl> NA, 56, 64, NA, 84, 60, 80, 44, 52, 94, 52, 98, 62, NA, 68, 5…
$ BPAEN2   <dbl> NA, 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA,…
$ BPXSY3   <dbl> NA, 98, 112, NA, 122, 112, 124, 100, 110, 142, 104, 176, 130,…
$ BPXDI3   <dbl> NA, 56, 62, NA, 82, 80, 82, 48, 48, 96, 50, 100, 70, 58, 70, …
$ BPAEN3   <dbl> NA, 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA,…
$ BPXSY4   <dbl> NA, NA, NA, NA, NA, NA, NA, 96, NA, NA, NA, NA, NA, 138, NA, …
$ BPXDI4   <dbl> NA, NA, NA, NA, NA, NA, NA, 56, NA, NA, NA, NA, NA, 64, NA, N…
$ BPAEN4   <dbl> NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA, 2, NA, NA,…
$ BPXSAR   <dbl> NA, 98, 108, NA, 122, 114, 123, 98, 112, 142, 107, 174, 130, …
$ BPXDAR   <dbl> NA, 56, 63, NA, 83, 70, 81, 52, 50, 95, 51, 99, 66, 61, 69, 6…
$ BMAEXLEN <dbl> 289, 376, 199, 170, 277, 252, 303, 257, 178, 218, 311, 274, 2…
$ BMAEXSTS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ BMAEXCMT <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMXWT    <dbl> 12.5, 75.4, 32.9, 13.3, 92.5, 59.2, 78.0, 40.7, 45.5, 111.8, …
$ BMIWT    <dbl> 3, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3,…
$ BMXRECUM <dbl> 93.2, NA, NA, 87.1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMIRECUM <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMXHEAD  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMIHEAD  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMXHT    <dbl> 91.6, 174.0, 136.6, NA, 178.3, 162.0, 162.9, 162.0, 156.9, 19…
$ BMIHT    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMXBMI   <dbl> 14.90, 24.90, 17.63, NA, 29.10, 22.56, 29.39, 15.51, 18.48, 3…
$ BMXLEG   <dbl> NA, NA, 34.1, NA, 45.2, 39.7, 43.0, 41.6, 42.2, 46.6, 40.5, 4…
$ BMILEG   <dbl> NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ BMXCALF  <dbl> NA, 37.5, 29.0, NA, 42.6, 34.0, 37.2, 30.0, 32.7, 43.7, 37.8,…
$ BMICALF  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMXARML  <dbl> 18.6, 38.2, 25.5, 20.4, 39.7, 34.5, 38.1, 36.7, 32.0, 43.0, 3…
$ BMIARML  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMXARMC  <dbl> 15.2, 29.8, 19.7, 16.4, 35.8, 26.0, 31.7, 20.1, 22.1, 37.6, 2…
$ BMIARMC  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMXWAIST <dbl> 45.7, 98.0, 64.7, NA, 99.9, 81.6, 90.7, 64.1, 64.6, 108.0, 76…
$ BMIWAIST <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA…
$ BMXTHICR <dbl> NA, NA, 38.2, NA, 56.2, 47.0, 55.7, 39.1, 45.3, 64.0, 39.9, 5…
$ BMITHICR <dbl> NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA,…
$ BMXTRI   <dbl> 8.3, 12.8, 8.0, 10.8, 17.4, 20.3, 26.4, 6.2, 11.1, 15.5, 8.0,…
$ BMITRI   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMXSUB   <dbl> 5.0, 20.4, 6.5, 10.2, 38.6, 16.8, 34.2, 5.2, 7.6, 26.6, 7.8, …
$ BMISUB   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, 1, 1, NA, …
$ BMAAMP   <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
$ BMAUREXT <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMAUPREL <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMAULEXT <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMAUPLEL <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMALOREX <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMALORKN <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMALLEXT <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMALLKNE <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMDSTATS <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMDRECUF <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMDSUBF  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMDTHICF <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMDLEGF  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMDARMLF <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMDCALFF <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMDBMIC  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMXSAD1  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMXSAD2  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMXSAD3  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMXSAD4  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMDAVSAD <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMDSADCM <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMXHIP   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ BMIHIP   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…

This concludes the section on engineering features.