training_arguments = TrainingArguments(
output_dir=script_args.output_dir,
per_device_train_batch_size=script_args.per_device_train_batch_size,
gradient_accumulation_steps=script_args.gradient_accumulation_steps,
optim=script_args.optim,
save_steps=script_args.save_steps,
logging_steps=script_args.logging_steps,
learning_rate=script_args.learning_rate,
max_grad_norm=script_args.max_grad_norm,
max_steps=script_args.max_steps,
warmup_ratio=script_args.warmup_ratio,
lr_scheduler_type=script_args.lr_scheduler_type,
gradient_checkpointing=script_args.gradient_checkpointing,
fp16=script_args.fp16,
# bf16=script_args.bf16,
num_train_epochs=1,
# evaluation_strategy="steps",
report_to="wandb",
run_name=f"llama-3-napoleon-{datetime.now(timezone(timedelta(hours=2))).strftime('%Y-%m-%d-%H-%M')}",
)