Title: | Scientific Calculations for Quantitative Clinical Pharmacology and Pharmacometrics Analysis |
---|---|
Description: | Utility functions helpful for reproducible scientific calculations. |
Authors: | Matthew Smith [aut, cre], Jenna Johnson [aut], Devin Pastoor [aut], Wesley Cummings [ctb], Emily Schapiro [ctb], Ryan Crass [ctb], Jonah Lyon [ctb], Elizabeth LeBeau [ctb] |
Maintainer: | Matthew Smith <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.1.2 |
Built: | 2025-03-12 06:33:53 UTC |
Source: | https://github.com/a2-ai/scicalc |
Calculates Baseline Body Mass Index based on Weight and Height
bbmi(weight, height)
bbmi(weight, height)
weight |
weight of subject (kg) |
height |
height of subject (cm) |
the bBMI value (kg m^(-2))
b <- bbmi(80.56, 167) df <- data.frame( "WT" = c(80.56, 71.53, 81.04, 70.17), "HT" = c(167, 161, 163, 164) ) df <- dplyr::mutate(df, bbmi = bbmi(WT, HT))
b <- bbmi(80.56, 167) df <- data.frame( "WT" = c(80.56, 71.53, 81.04, 70.17), "HT" = c(167, 161, 163, 164) ) df <- dplyr::mutate(df, bbmi = bbmi(WT, HT))
Calculates hepatic function criteria
bhfc(ast, ulnast, bili, ulnbili)
bhfc(ast, ulnast, bili, ulnbili)
ast |
Aspartate aminotransferase concentration (IU/L) |
ulnast |
Upper limit of normal AST (IU/L), typically 33 |
bili |
bilirubin concentration (mg/dL) |
ulnbili |
Upper limit of normal BILI (mg/dL), typically 1.2 |
category of hepatic function
bhfc(15, 33, 0.6, 1.2) df <- data.frame( "ID" = c(1, 1, 1, 1, 2, 2, 2, 2), "SEX" = c("F", "F", "F", "F", "M", "M", "M", "M"), "RACE" = c("WHITE", "WHITE", "WHITE", "WHITE", "BLACK", "BLACK", "BLACK", "BLACK"), "AGE" = c(24, 24, 24, 24, 22, 22, 22, 22), "CREAT" = c(1, 1, 1, 1, 4, 4, 4, 4), "WEIGHT" = c(70, 70, 70, 70, 65, 65, 65, 65), "AST" = c(15, 15, 15, 15, 23, 23, 23, 23), "ULNAST" = c(33, 33, 33, 33, 33, 33, 33, 33), "BILI" = c(1, 1, 1, 1, 0.4, 0.4, 0.4, 0.4), "ULNBILI" = c(1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2) ) df <- df %>% dplyr::group_by(ID) %>% dplyr::mutate(BHFC = bhfc(AST, ULNAST, BILI, ULNBILI))
bhfc(15, 33, 0.6, 1.2) df <- data.frame( "ID" = c(1, 1, 1, 1, 2, 2, 2, 2), "SEX" = c("F", "F", "F", "F", "M", "M", "M", "M"), "RACE" = c("WHITE", "WHITE", "WHITE", "WHITE", "BLACK", "BLACK", "BLACK", "BLACK"), "AGE" = c(24, 24, 24, 24, 22, 22, 22, 22), "CREAT" = c(1, 1, 1, 1, 4, 4, 4, 4), "WEIGHT" = c(70, 70, 70, 70, 65, 65, 65, 65), "AST" = c(15, 15, 15, 15, 23, 23, 23, 23), "ULNAST" = c(33, 33, 33, 33, 33, 33, 33, 33), "BILI" = c(1, 1, 1, 1, 0.4, 0.4, 0.4, 0.4), "ULNBILI" = c(1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2) ) df <- df %>% dplyr::group_by(ID) %>% dplyr::mutate(BHFC = bhfc(AST, ULNAST, BILI, ULNBILI))
Calculates renal impairment categories based on CrCL
brfc(crcl)
brfc(crcl)
crcl |
creatinine clearance rate (mL/min) |
integer renal impairment category
brfc(crcl(FALSE, 20, 10, 70)) df <- data.frame( "ID" = c(1, 1, 1, 1, 2, 2, 2, 2), "SEX" = c("F", "F", "F", "F", "M", "M", "M", "M"), "RACE" = c("WHITE", "WHITE", "WHITE", "WHITE", "BLACK", "BLACK", "BLACK", "BLACK"), "AGE" = c(24, 24, 24, 24, 22, 22, 22, 22), "CREAT" = c(1, 1, 1, 1, 4, 4, 4, 4), "WEIGHT" = c(70, 70, 70, 70, 65, 65, 65, 65) ) df <- df %>% dplyr::group_by(ID) %>% dplyr::mutate( CRCL = crcl(is_female(SEX), AGE, CREAT, WEIGHT), BRFC = brfc(CRCL) )
brfc(crcl(FALSE, 20, 10, 70)) df <- data.frame( "ID" = c(1, 1, 1, 1, 2, 2, 2, 2), "SEX" = c("F", "F", "F", "F", "M", "M", "M", "M"), "RACE" = c("WHITE", "WHITE", "WHITE", "WHITE", "BLACK", "BLACK", "BLACK", "BLACK"), "AGE" = c(24, 24, 24, 24, 22, 22, 22, 22), "CREAT" = c(1, 1, 1, 1, 4, 4, 4, 4), "WEIGHT" = c(70, 70, 70, 70, 65, 65, 65, 65) ) df <- df %>% dplyr::group_by(ID) %>% dplyr::mutate( CRCL = crcl(is_female(SEX), AGE, CREAT, WEIGHT), BRFC = brfc(CRCL) )
Calculates Body Surface Area based on Weight and Height using the method specified. Default is Dubois.
bsa(weight, height, method = "Dubois")
bsa(weight, height, method = "Dubois")
weight |
weight of a subject (kg) |
height |
height of a subject (cm) |
method |
String to dictate which equation to use. Dubois or Mosteller. |
bsa (m^2)
bsa(70, 170) bsa(70, 170, method = "Mosteller") bsa(70, 170, method = "Dubois")
bsa(70, 170) bsa(70, 170, method = "Mosteller") bsa(70, 170, method = "Dubois")
Converts continuous variable into factor categories.
categorize(continuous_var, nbins = 4, units = "", type = 7, digits = 1)
categorize(continuous_var, nbins = 4, units = "", type = 7, digits = 1)
continuous_var |
continuous variable data |
nbins |
number of bins to break data into, default is 4 |
units |
string, optional units string to add to labels of categorized data |
type |
type argument for stats::quantile, default is 7 |
digits |
number of digits to round quantile breaks to for labels, default is 1 |
a vector of categorized data as factor
x <- rnorm(1000, mean = 10, sd = 5) xc <- categorize(x, nbins = 5)
x <- rnorm(1000, mean = 10, sd = 5) xc <- categorize(x, nbins = 5)
Gives a TRUE/FALSE for if the Parameters have only 1 associated unit
check_for_unique_units(params, units)
check_for_unique_units(params, units)
params |
a column from a dataset with lab parameters |
units |
a column from a dataset with units associated with those parameters |
a boolean
df <- data.frame( PARAM = c( "ALB","ALT","AST","CR","TBIL", "ALB","CR","TBIL","ALT","AST"), UNIT = c( "g/L","U/L","U/L","umol/L","umol/L", "U/L","μmol/L","μmol/L","IU/L","IU/L") ) check_for_unique_units <- get_unique_units_df(df$PARAM, df$UNIT)
df <- data.frame( PARAM = c( "ALB","ALT","AST","CR","TBIL", "ALB","CR","TBIL","ALT","AST"), UNIT = c( "g/L","U/L","U/L","umol/L","umol/L", "U/L","μmol/L","μmol/L","IU/L","IU/L") ) check_for_unique_units <- get_unique_units_df(df$PARAM, df$UNIT)
Calculates Estimated Glomerular Filtration Rate based on Sex, Race, Age, and Creatinine levels based on the CKDEPI 2009 equation
ckdepi_2009_egfr(sexf, raceb, age, creat)
ckdepi_2009_egfr(sexf, raceb, age, creat)
sexf |
boolean value of sex Female: TRUE, Male: FALSE |
raceb |
boolean value of Race == Black: Black: TRUE, Other: FALSE |
age |
age of subject (years) |
creat |
creatinine levels of subject (mg/dL) |
the eGFR value (mL/min/1.73m2)
e <- ckdepi_2009_egfr(TRUE, TRUE, 24, 1) df <- data.frame( "SEXF" = c(TRUE, FALSE, TRUE, FALSE), "RACEB" = c(FALSE, FALSE, TRUE, FALSE), "AGE" = c(24, 24, 23, 24), "CREAT" = c(1, 1, 2, 1) ) df <- dplyr::mutate(df, egfr = ckdepi_2009_egfr(SEXF, RACEB, AGE, CREAT))
e <- ckdepi_2009_egfr(TRUE, TRUE, 24, 1) df <- data.frame( "SEXF" = c(TRUE, FALSE, TRUE, FALSE), "RACEB" = c(FALSE, FALSE, TRUE, FALSE), "AGE" = c(24, 24, 23, 24), "CREAT" = c(1, 1, 2, 1) ) df <- dplyr::mutate(df, egfr = ckdepi_2009_egfr(SEXF, RACEB, AGE, CREAT))
Calculates eGFR using the CKDEPI 2021 creatinine equation
ckdepi_2021_egfr(sexf, age, creat)
ckdepi_2021_egfr(sexf, age, creat)
sexf |
boolean value of sex Female: TRUE, Male: FALSE |
age |
age of subject (years) |
creat |
creatinine levels of subject (mg/dL) |
the eGFR value (mL/min/1.73m2)
e <- ckdepi_2021_egfr(TRUE, 24, 1) df <- data.frame( "SEXF" = c(TRUE, FALSE, TRUE, FALSE), "RACEB" = c(FALSE, FALSE, TRUE, FALSE), "AGE" = c(24, 24, 23, 24), "CREAT" = c(1, 1, 2, 1) ) df <- dplyr::mutate(df, egfr = ckdepi_2021_egfr(SEXF, AGE, CREAT))
e <- ckdepi_2021_egfr(TRUE, 24, 1) df <- data.frame( "SEXF" = c(TRUE, FALSE, TRUE, FALSE), "RACEB" = c(FALSE, FALSE, TRUE, FALSE), "AGE" = c(24, 24, 23, 24), "CREAT" = c(1, 1, 2, 1) ) df <- dplyr::mutate(df, egfr = ckdepi_2021_egfr(SEXF, AGE, CREAT))
Calculates eGFR with CKDEPI 2021 cystatin equation
ckdepi_2021_egfr_cystatin(sexf, age, creat, cystc)
ckdepi_2021_egfr_cystatin(sexf, age, creat, cystc)
sexf |
a boolean representing if the patient is female. |
age |
age of patient in years |
creat |
serum creatinine levels in mg/dL. |
cystc |
serum cystatin C levels in mg/L. |
eGFR in mL/min/1.73 m^2
e <- ckdepi_2021_egfr_cystatin(TRUE, 24, 1, 2) df <- data.frame( "SEXF" = c(TRUE, FALSE, TRUE, FALSE), "RACEB" = c(FALSE, FALSE, TRUE, FALSE), "AGE" = c(24, 24, 23, 24), "CREAT" = c(1, 1, 2, 1), "CYSTC" = c(0.4, 0.8, 1, 2) ) df <- dplyr::mutate(df, egfr = ckdepi_2021_egfr_cystatin(SEXF, AGE, CREAT, CYSTC))
e <- ckdepi_2021_egfr_cystatin(TRUE, 24, 1, 2) df <- data.frame( "SEXF" = c(TRUE, FALSE, TRUE, FALSE), "RACEB" = c(FALSE, FALSE, TRUE, FALSE), "AGE" = c(24, 24, 23, 24), "CREAT" = c(1, 1, 2, 1), "CYSTC" = c(0.4, 0.8, 1, 2) ) df <- dplyr::mutate(df, egfr = ckdepi_2021_egfr_cystatin(SEXF, AGE, CREAT, CYSTC))
Calculates Creatinine clearance with Cockcroft-Gault equation
crcl(sexf, age, creat, weight)
crcl(sexf, age, creat, weight)
sexf |
bool of sex of subject. Female: True, Male: False |
age |
age of subject (years) |
creat |
serum creatinine levels (mg/dL) |
weight |
weight of subject (kg) |
CrCl (mL/min)
crcl(FALSE, 20, 10, 70) df <- data.frame( "ID" = c(1, 1, 1, 1, 2, 2, 2, 2), "SEX" = c("F", "F", "F", "F", "M", "M", "M", "M"), "RACE" = c("WHITE", "WHITE", "WHITE", "WHITE", "BLACK", "BLACK", "BLACK", "BLACK"), "AGE" = c(24, 24, 24, 24, 22, 22, 22, 22), "CREAT" = c(1, 1, 1, 1, 4, 4, 4, 4), "WEIGHT" = c(70, 70, 70, 70, 65, 65, 65, 65) ) df <- df %>% dplyr::group_by(ID) %>% dplyr::mutate(CRCL = crcl(is_female(SEX), AGE, CREAT, WEIGHT))
crcl(FALSE, 20, 10, 70) df <- data.frame( "ID" = c(1, 1, 1, 1, 2, 2, 2, 2), "SEX" = c("F", "F", "F", "F", "M", "M", "M", "M"), "RACE" = c("WHITE", "WHITE", "WHITE", "WHITE", "BLACK", "BLACK", "BLACK", "BLACK"), "AGE" = c(24, 24, 24, 24, 22, 22, 22, 22), "CREAT" = c(1, 1, 1, 1, 4, 4, 4, 4), "WEIGHT" = c(70, 70, 70, 70, 65, 65, 65, 65) ) df <- df %>% dplyr::group_by(ID) %>% dplyr::mutate(CRCL = crcl(is_female(SEX), AGE, CREAT, WEIGHT))
Creates the directory if it doesn't exist
create_dir(path)
create_dir(path)
path |
path of directory to be created |
Nothing
## Not run: create_dir("derived/data/test") ## End(Not run)
## Not run: create_dir("derived/data/test") ## End(Not run)
Computes the coefficient of variation of input vector.
cv(x, na.rm = FALSE)
cv(x, na.rm = FALSE)
x |
Input vector to compute CV for. |
na.rm |
boolean to remove NA. default is FALSE |
CV of x. Standard deviation divided by mean. If you want % you'll need to multiply by 100
cv(c(1, 2, 1, 1, 2, 1, 2, 3))
cv(c(1, 2, 1, 1, 2, 1, 2, 3))
Calculates Body Surface Area based on Weight and Height using Dubois Dubois equation
dubois_bsa(weight, height)
dubois_bsa(weight, height)
weight |
weight of subject (kg) |
height |
height of subject (cm) |
the body surface area (m^2)
#' b <- dubois_bsa(80.56, 167) df <- data.frame( "WT" = c(80.56, 71.53, 81.04, 70.17), "HT" = c(167, 161, 163, 164) ) df <- dplyr::mutate(df, bsa = dubois_bsa(WT, HT))
#' b <- dubois_bsa(80.56, 167) df <- data.frame( "WT" = c(80.56, 71.53, 81.04, 70.17), "HT" = c(167, 161, 163, 164) ) df <- dplyr::mutate(df, bsa = dubois_bsa(WT, HT))
Calculates eGFR based on the method specified
egfr(sexf, raceb, age, creat, cystc, height, method = "CKDEPI 2009")
egfr(sexf, raceb, age, creat, cystc, height, method = "CKDEPI 2009")
sexf |
a boolean representing if the patient is female. |
raceb |
a boolean representing if the patient is black. |
age |
the age of a patient in years. |
creat |
the serum creatinine levels in mg/dL. |
cystc |
the cystatin C levels in mg/L - only used in CKDEPI 2021 cystatin method |
height |
the height of a patient in cm. |
method |
a string specifying the method to use. Available options are "CKDEPI 2009", "MDRD", "CKDEPI 2021", "Schwartz". |
the eGFR calculated based on method.
e <- egfr(TRUE, TRUE, 24, 1, "CKDEPI 2009") df <- data.frame( "SEXF" = c(TRUE, FALSE, TRUE, FALSE), "RACEB" = c(FALSE, FALSE, TRUE, FALSE), "AGE" = c(24, 24, 23, 24), "CREAT" = c(1, 1, 2, 1) ) df <- dplyr::mutate(df, egfr = egfr(SEXF, RACEB, AGE, CREAT, "CKDEPI 2009"))
e <- egfr(TRUE, TRUE, 24, 1, "CKDEPI 2009") df <- data.frame( "SEXF" = c(TRUE, FALSE, TRUE, FALSE), "RACEB" = c(FALSE, FALSE, TRUE, FALSE), "AGE" = c(24, 24, 23, 24), "CREAT" = c(1, 1, 2, 1) ) df <- dplyr::mutate(df, egfr = egfr(SEXF, RACEB, AGE, CREAT, "CKDEPI 2009"))
Takes character input and returns standard yspec numeric value for Ethnic
ethnicn(ethnicc)
ethnicn(ethnicc)
ethnicc |
Ethnic character |
the standard yspec numeric value for the inputted Ethnic character
ethnicn("HISPANIC OR LATINO") # 1 ethnicn("NOT HISPANIC OR LATINO") # 0 ethnicn("UNKNOWN") # -999
ethnicn("HISPANIC OR LATINO") # 1 ethnicn("NOT HISPANIC OR LATINO") # 0 ethnicn("UNKNOWN") # -999
Computes the geometric CV of a vector x
geom_cv(x, na.rm = FALSE)
geom_cv(x, na.rm = FALSE)
x |
vector of data you want the geometric CV of. |
na.rm |
boolean to remove NA from vector. Default is FALSE |
the geometric CV of the input vector x
geom_cv(c(1, 2, 3, 2, 1))
geom_cv(c(1, 2, 3, 2, 1))
Computes the geometric mean of a vector.
geom_mean(x, na.rm = FALSE)
geom_mean(x, na.rm = FALSE)
x |
vector to compute geometric mean of |
na.rm |
boolean to remove NA from vector in calcualtion. Default is False |
geometric mean of input vector x
geom_mean(c(1, 2, 3, 2, 1))
geom_mean(c(1, 2, 3, 2, 1))
Computes the geometric standard deviation of a vector x.
geom_sd(x, na.rm = FALSE)
geom_sd(x, na.rm = FALSE)
x |
The vector of data you want the geometric sd of. |
na.rm |
a boolean to remove NA values. Default is False |
the geometric standard deviation of x
geom_sd(c(1, 2, 3, 2, 1))
geom_sd(c(1, 2, 3, 2, 1))
Creates a dataframe with distinct parameters and units combinations
get_unique_units_df(params, units)
get_unique_units_df(params, units)
params |
a column from a dataset with lab parameters |
units |
a column from a dataset with units associated with those parameters |
a dataframe with distinct units and parameters with IU replaced to U and mu replaced with u
df <- data.frame( PARAM = c( "ALB","ALT","AST","CR","TBIL", "ALB","CR","TBIL","ALT","AST"), UNIT = c( "g/L","U/L","U/L","umol/L","umol/L", "U/L","μmol/L","μmol/L","IU/L","IU/L") ) unique_df <- get_unique_units_df(df$PARAM, df$UNIT)
df <- data.frame( PARAM = c( "ALB","ALT","AST","CR","TBIL", "ALB","CR","TBIL","ALT","AST"), UNIT = c( "g/L","U/L","U/L","umol/L","umol/L", "U/L","μmol/L","μmol/L","IU/L","IU/L") ) unique_df <- get_unique_units_df(df$PARAM, df$UNIT)
Takes character input and returns TRUE/FALSE if asian/other
is_asian(x)
is_asian(x)
x |
input character representing race |
boolean representing Race == Asian
is_asian("ASIAN") is_asian("BLACK")
is_asian("ASIAN") is_asian("BLACK")
Takes character input and returns TRUE/FALSE if black/other also checks for "African American" and "Black or African American"
is_black(x)
is_black(x)
x |
input character representing race |
boolean representing Race == Black
is_black("WHITE") is_black(c("AFRICAN AMERICAN", "BLACK"))
is_black("WHITE") is_black(c("AFRICAN AMERICAN", "BLACK"))
Takes character input and returns TRUE/FALSE if female/male
is_female(x)
is_female(x)
x |
input character representing female or male |
boolean representing female
is_female("F") is_female(c("MALE", "FEMALE"))
is_female("F") is_female(c("MALE", "FEMALE"))
Takes character input and returns TRUE/FALSE if "Hispanic or Latino" or other
is_hispanic_or_latino(x)
is_hispanic_or_latino(x)
x |
input character representing ethnicity |
boolean representing Ethnic == "Hispanic or Latino"
is_hispanic_or_latino("HISPANIC OR LATINO") is_hispanic_or_latino("NOT HISPANIC OR LATINO") is_hispanic_or_latino("UNKNOWN")
is_hispanic_or_latino("HISPANIC OR LATINO") is_hispanic_or_latino("NOT HISPANIC OR LATINO") is_hispanic_or_latino("UNKNOWN")
Takes character input and returns TRUE/FALSE if "Not Hispanic or Latino" or other
is_not_hispanic_or_latino(x)
is_not_hispanic_or_latino(x)
x |
input character representing ethnicity |
boolean representing Ethnic == "Not Hispanic or Latino"
is_not_hispanic_or_latino("HISPANIC OR LATINO") is_not_hispanic_or_latino("NOT HISPANIC OR LATINO") is_not_hispanic_or_latino("UNKNOWN")
is_not_hispanic_or_latino("HISPANIC OR LATINO") is_not_hispanic_or_latino("NOT HISPANIC OR LATINO") is_not_hispanic_or_latino("UNKNOWN")
Takes character input and returns TRUE/FALSE if other/explicit race
is_other(x)
is_other(x)
x |
input character representing race |
boolean representing Race == Other
is_other("OTHER") is_other("BLACK")
is_other("OTHER") is_other("BLACK")
Takes character input and returns TRUE/FALSE if white/other
is_white(x)
is_white(x)
x |
input character representing race |
boolean representing Race == White
is_white("WHITE") is_white("BLACK")
is_white("WHITE") is_white("BLACK")
Modification of Diet in Renal Disease eGFR calculation
mdrd_egfr(sexf, raceb, age, creat)
mdrd_egfr(sexf, raceb, age, creat)
sexf |
a boolean representing if the patient is female. |
raceb |
a boolean representing if the patient is black. |
age |
the age of the patient in years |
creat |
the serum creatinine levels in mg/dL |
the eGFR in mL/min/1.73 m^2
e <- mdrd_egfr(TRUE, TRUE, 24, 1) df <- data.frame( "SEXF" = c(TRUE, FALSE, TRUE, FALSE), "RACEB" = c(FALSE, FALSE, TRUE, FALSE), "AGE" = c(24, 24, 23, 24), "CREAT" = c(1, 1, 2, 1) ) df <- dplyr::mutate(df, egfr = mdrd_egfr(SEXF, RACEB, AGE, CREAT))
e <- mdrd_egfr(TRUE, TRUE, 24, 1) df <- data.frame( "SEXF" = c(TRUE, FALSE, TRUE, FALSE), "RACEB" = c(FALSE, FALSE, TRUE, FALSE), "AGE" = c(24, 24, 23, 24), "CREAT" = c(1, 1, 2, 1) ) df <- dplyr::mutate(df, egfr = mdrd_egfr(SEXF, RACEB, AGE, CREAT))
Calculates Body Surface Area based on Weight and Height using Mosteller equation
mosteller_bsa(weight, height)
mosteller_bsa(weight, height)
weight |
weight of subject (kg) |
height |
height of subject (cm) |
the body surface area (m^2)
mosteller_bsa(70, 170)
mosteller_bsa(70, 170)
Takes character input and returns standard yspec numeric value for Race
racen(racec)
racen(racec)
racec |
Race character |
the standard yspec numeric value for the inputted Race character
racen("WHITE") # 1 racen("BLACK") # 2 racen("ASIAN") # 3 racen("OTHER") # 4 racen("UNKNOWN") # -999
racen("WHITE") # 1 racen("BLACK") # 2 racen("ASIAN") # 3 racen("OTHER") # 4 racen("UNKNOWN") # -999
Reads data from csv file and prints hash of contents.
read_csv_with_hash(csv_file_path, ...)
read_csv_with_hash(csv_file_path, ...)
csv_file_path |
path to csv file to ingest |
... |
additional arguments for digest or read_csv |
dataframe of data within file
## Not run: read_csv_with_hash("data/derived/example_data.csv") ## End(Not run)
## Not run: read_csv_with_hash("data/derived/example_data.csv") ## End(Not run)
Reads data from xlsx/xls file and prints hash of contents.
read_excel_with_hash(xlsx_file_path, ...)
read_excel_with_hash(xlsx_file_path, ...)
xlsx_file_path |
an xlsx/xls file to ingest |
... |
additional arguments to digest or read_excel |
a dataframe(?) of data within file
## Not run: read_excel_with_hash("data/source/example.xpt") ## End(Not run)
## Not run: read_excel_with_hash("data/source/example.xpt") ## End(Not run)
Reads the data from a file (csv or parquet) and prints the hash
read_file_with_hash(file_path, ...)
read_file_with_hash(file_path, ...)
file_path |
path to data file |
... |
additional arguments to digest, read_csv, read_parquet, read_sas, read_pzfx, read_xpt |
data within the supplied file
## Not run: dat <- read_file_with_hash("data/derived/PK_data.parquet") dat2 <- read_file_with_hash("data/source/data.csv") ## End(Not run)
## Not run: dat <- read_file_with_hash("data/derived/PK_data.parquet") dat2 <- read_file_with_hash("data/source/data.csv") ## End(Not run)
Reads a file if the supplied hash matches the file's hash
read_hashed_file(file_path, hash, ...)
read_hashed_file(file_path, hash, ...)
file_path |
path to file with data you want to read |
hash |
hash you expect the file to have |
... |
additional arguments for digest or read_csv, parquet, sas |
data object of contents of file_path
## Not run: file_path <- "data/derived/example_pk.parquet" hash <- 0cfd6da55e6c1e198effe1e584c26d79 read_hashed_file(file_path, hash) ## End(Not run)
## Not run: file_path <- "data/derived/example_pk.parquet" hash <- 0cfd6da55e6c1e198effe1e584c26d79 read_hashed_file(file_path, hash) ## End(Not run)
Reads data from parquet file and prints hash of contents.
read_parquet_with_hash(parquet_file_path, ...)
read_parquet_with_hash(parquet_file_path, ...)
parquet_file_path |
path to parquet file to ingest |
... |
additional arguments to digest or read_parquet |
a tibble of data within file
## Not run: read_parquet_with_hash("data/derived/example_data.parquet") ## End(Not run)
## Not run: read_parquet_with_hash("data/derived/example_data.parquet") ## End(Not run)
Reads in table from a prism pzfx file.
read_pzfx_with_hash(pzfx_file_path, ...)
read_pzfx_with_hash(pzfx_file_path, ...)
pzfx_file_path |
path to pzfx file |
... |
additional arguments to digest or read_pzfx |
data within the table of the pzfx file
## Not run: read_pzfx_with_hash("mydata.pzfx", table = "experiment1") ## End(Not run)
## Not run: read_pzfx_with_hash("mydata.pzfx", table = "experiment1") ## End(Not run)
Reads data from sas file and prints hash of contents.
read_sas_with_hash(sas_file_path, ...)
read_sas_with_hash(sas_file_path, ...)
sas_file_path |
path to sas file to ingest |
... |
additional arguments to digest or read_sas |
a dataframe(?) of data within file
## Not run: read_sas_with_hash("data/source/example.sas7bdat") ## End(Not run)
## Not run: read_sas_with_hash("data/source/example.sas7bdat") ## End(Not run)
Reads data from xpt file and prints hash of contents.
read_xpt_with_hash(xpt_file_path, ...)
read_xpt_with_hash(xpt_file_path, ...)
xpt_file_path |
an xpt file to ingest |
... |
additional arguments to digest or read_xpt |
a dataframe(?) of data within file
## Not run: read_xpt_with_hash("data/source/example.xpt") ## End(Not run)
## Not run: read_xpt_with_hash("data/source/example.xpt") ## End(Not run)
Calculates eGFR based on Schwartz' equation
schwartz_egfr(height, creat)
schwartz_egfr(height, creat)
height |
height of patients in cm. |
creat |
Serum creatinine levels in mg/dL |
eGFR in mL/min/1.73m^2
schwartz_egfr(100, 1)
schwartz_egfr(100, 1)
Also returns numeric for single character Sex characters "F" and "M"
sexf(sex)
sexf(sex)
sex |
Sex character |
the standard yspec numeric value for the inputted Sex character
sexf("FEMALE") # 1 sexf("female") # 1 sexf("f") # 1 sexf("MALE") # 0 sexf("NOT SPECIFIED") # 0
sexf("FEMALE") # 1 sexf("female") # 1 sexf("f") # 1 sexf("MALE") # 0 sexf("NOT SPECIFIED") # 0
Writes data to csv_path with na_value replacing NA values.
write_csv_with_hash(data, csv_path, ...)
write_csv_with_hash(data, csv_path, ...)
data |
a data object to write to file |
csv_path |
the file path to save the csv |
... |
additional arguments to digest or write_csv |
Nothing, creates csv_path file and prints hash of the file
## Not run: df <- data.frame( "a" = c(1, 2, 3, 4) "b" = c("A", "B", "C", "D") ) write_csv_with_hash(df, "test/test.csv") ## End(Not run)
## Not run: df <- data.frame( "a" = c(1, 2, 3, 4) "b" = c("A", "B", "C", "D") ) write_csv_with_hash(df, "test/test.csv") ## End(Not run)
Writes data to path, if directory doesn't exist it is created before file is written
write_file_with_hash(data, path, overwrite = FALSE, ...)
write_file_with_hash(data, path, overwrite = FALSE, ...)
data |
the data object to write to file |
path |
the destination of the file (csv or parquet) |
overwrite |
boolean of whether to overwrite or not. |
... |
additional arguments for digest or write_file. |
Nothing, File is created and hash of created file is printed
## Not run: df <- data.frame( "a" = c(1, 2, 3, 4) "b" = c("A", "B", "C", "D") ) write_data_with_hash(df, "data.csv") ## End(Not run)
## Not run: df <- data.frame( "a" = c(1, 2, 3, 4) "b" = c("A", "B", "C", "D") ) write_data_with_hash(df, "data.csv") ## End(Not run)
Writes data to parquet_path and prints hash
write_parquet_with_hash(data, parquet_path, ...)
write_parquet_with_hash(data, parquet_path, ...)
data |
the data object to save to parquet_path |
parquet_path |
the path to the desired parquet destination |
... |
additional arguments to digest and write_parquet |
Nothing. creates parquet_path file and prints hash
## Not run: df <- data.frame( "a" = c(1, 2, 3, 4) "b" = c("A", "B", "C", "D") ) write_parquet_with_hash(df, "test/test.parquet") ## End(Not run)
## Not run: df <- data.frame( "a" = c(1, 2, 3, 4) "b" = c("A", "B", "C", "D") ) write_parquet_with_hash(df, "test/test.parquet") ## End(Not run)