Add loss functions and model architecture for time-to-event prediction
- Implemented ExponentialNLLLoss and WeibullNLLLoss in losses.py for negative log-likelihood calculations. - Developed TabularEncoder class in model.py for encoding tabular features. - Created DelphiFork and SapDelphi classes in model.py for time-to-event prediction using transformer architecture. - Added data preparation scripts in prepare_data.R and prepare_data.py for processing UK Biobank data, including handling field mappings and event data extraction.
This commit is contained in:
26
prepare_data.R
Normal file
26
prepare_data.R
Normal file
@@ -0,0 +1,26 @@
|
||||
library(data.table)
|
||||
setDTthreads(40)
|
||||
library(readr)
|
||||
field_id <- read.csv("field_id.txt", header = FALSE)
|
||||
uid <- field_id$V1
|
||||
big_path <- "/mnt/storage/shared_data/UKBB/20230518-from-zhourong/HHdata_221103_0512.csv"
|
||||
header_dt <- fread(big_path, nrows = 0) # Read 0 rows => only column names
|
||||
all_names <- names(header_dt)
|
||||
keep_names <- intersect(all_names,uid)
|
||||
ukb_disease <- fread(big_path,
|
||||
select = keep_names,
|
||||
showProgress = TRUE)
|
||||
|
||||
field_id <- read.csv("field_id.txt", header = FALSE)
|
||||
uid <- field_id$V1
|
||||
big_path <- "/mnt/storage/shared_data/UKBB/20230518-from-zhourong/HH_data_220812_0512.csv"
|
||||
header_dt <- fread(big_path, nrows = 0) # Read 0 rows => only column names
|
||||
all_names <- names(header_dt)
|
||||
keep_names <- intersect(all_names,uid)
|
||||
ukb_others <- fread(big_path,
|
||||
select = keep_names,
|
||||
showProgress = TRUE)
|
||||
|
||||
# merge disease and other data by "eid"
|
||||
ukb_data <- merge(ukb_disease, ukb_others, by = "eid", all = TRUE)
|
||||
fwrite(ukb_data, "ukb_data.csv")
|
||||
Reference in New Issue
Block a user