CLiREN-LMS
Data Cleaning and Preparation in R

Writing Readable Cleaning Scripts

Code Example 1

30-45 minutes Applied Step 7 of 8
Code

Code Example 1

7 / 8
Code

Code Example 1

r

# Project: Clinical Research Data Management Practice Study
# Script: 02_clean_enrollment_data.R
# Purpose: Import enrollment export, prepare variables, and generate query outputs
# Input: data_raw/redcap_enrollment_export_2026-06-01.csv
# Outputs:
#   outputs/enrollment_query_listing_2026-06-01.csv
#   data_clean/enrollment_prepared_2026-06-01.csv

library(tidyverse)
library(janitor)

export_date <- "2026-06-01"

raw_file <- str_glue("data_raw/redcap_enrollment_export_{export_date}.csv")
query_file <- str_glue("outputs/enrollment_query_listing_{export_date}.csv")
prepared_file <- str_glue("data_clean/enrollment_prepared_{export_date}.csv")

raw_enrollment <- read_csv(raw_file) |>
  clean_names()

enrollment_prepared <- raw_enrollment |>
  mutate(
    consent_date = as.Date(consent_date),
    enrollment_date = as.Date(enrollment_date),
    date_of_birth = as.Date(date_of_birth),
    age_years_derived = as.numeric(enrollment_date - date_of_birth) / 365.25
  )

query_listing <- enrollment_prepared |>
  filter(is.na(consent_date) | enrollment_date < consent_date) |>
  transmute(
    participant_id,
    site,
    query_variable = case_when(
      is.na(consent_date) ~ "consent_date",
      enrollment_date < consent_date ~ "enrollment_date",
      TRUE ~ "unknown"
    ),
    query_text = case_when(
      is.na(consent_date) ~ "Please enter or verify the informed consent date.",
      enrollment_date < consent_date ~ "Enrollment date appears to occur before consent date. Please verify.",
      TRUE ~ "Please review this record."
    )
  )

write_csv(query_listing, query_file)
write_csv(enrollment_prepared, prepared_file)