feat: Add model checkpoints and configurations
This commit is contained in:
BIN
best_model_n_embd_120_n_layer_12_n_head_12.pt
Normal file
BIN
best_model_n_embd_120_n_layer_12_n_head_12.pt
Normal file
Binary file not shown.
BIN
best_model_n_embd_256_n_layer_16_n_head_16.pt
Normal file
BIN
best_model_n_embd_256_n_layer_16_n_head_16.pt
Normal file
Binary file not shown.
18
config_n_embd_120_n_layer_12_n_head_12.json
Normal file
18
config_n_embd_120_n_layer_12_n_head_12.json
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_embd": 120,
|
||||||
|
"n_head": 12,
|
||||||
|
"max_epoch": 200,
|
||||||
|
"batch_size": 128,
|
||||||
|
"lr_initial": 0.0006,
|
||||||
|
"lr_final": 6e-05,
|
||||||
|
"weight_decay": 0.2,
|
||||||
|
"warmup_epochs": 10,
|
||||||
|
"early_stopping_patience": 10,
|
||||||
|
"pdrop": 0.0,
|
||||||
|
"token_pdrop": 0.0,
|
||||||
|
"betas": [
|
||||||
|
0.9,
|
||||||
|
0.99
|
||||||
|
]
|
||||||
|
}
|
18
config_n_embd_256_n_layer_16_n_head_16.json
Normal file
18
config_n_embd_256_n_layer_16_n_head_16.json
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"n_layer": 16,
|
||||||
|
"n_embd": 256,
|
||||||
|
"n_head": 16,
|
||||||
|
"max_epoch": 200,
|
||||||
|
"batch_size": 128,
|
||||||
|
"lr_initial": 0.0006,
|
||||||
|
"lr_final": 6e-05,
|
||||||
|
"weight_decay": 0.2,
|
||||||
|
"warmup_epochs": 10,
|
||||||
|
"early_stopping_patience": 10,
|
||||||
|
"pdrop": 0.0,
|
||||||
|
"token_pdrop": 0.0,
|
||||||
|
"betas": [
|
||||||
|
0.9,
|
||||||
|
0.99
|
||||||
|
]
|
||||||
|
}
|
Reference in New Issue
Block a user