Writing Readable Cleaning Scripts
Code Example 1
Code
7 / 8
Code Example 1
Code
Code Example 1
r
# Project: Clinical Research Data Management Practice Study
# Script: 02_clean_enrollment_data.R
# Purpose: Import enrollment export, prepare variables, and generate query outputs
# Input: data_raw/redcap_enrollment_export_2026-06-01.csv
# Outputs:
# outputs/enrollment_query_listing_2026-06-01.csv
# data_clean/enrollment_prepared_2026-06-01.csv
library(tidyverse)
library(janitor)
export_date <- "2026-06-01"
raw_file <- str_glue("data_raw/redcap_enrollment_export_{export_date}.csv")
query_file <- str_glue("outputs/enrollment_query_listing_{export_date}.csv")
prepared_file <- str_glue("data_clean/enrollment_prepared_{export_date}.csv")
raw_enrollment <- read_csv(raw_file) |>
clean_names()
enrollment_prepared <- raw_enrollment |>
mutate(
consent_date = as.Date(consent_date),
enrollment_date = as.Date(enrollment_date),
date_of_birth = as.Date(date_of_birth),
age_years_derived = as.numeric(enrollment_date - date_of_birth) / 365.25
)
query_listing <- enrollment_prepared |>
filter(is.na(consent_date) | enrollment_date < consent_date) |>
transmute(
participant_id,
site,
query_variable = case_when(
is.na(consent_date) ~ "consent_date",
enrollment_date < consent_date ~ "enrollment_date",
TRUE ~ "unknown"
),
query_text = case_when(
is.na(consent_date) ~ "Please enter or verify the informed consent date.",
enrollment_date < consent_date ~ "Enrollment date appears to occur before consent date. Please verify.",
TRUE ~ "Please review this record."
)
)
write_csv(query_listing, query_file)
write_csv(enrollment_prepared, prepared_file)