trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
#data collator only expose the ans to the loss function so that image & instructions don't effect the loss
data_collator = UnslothVisionDataCollator(model, tokenizer), # Must use!
train_dataset = training_dataset,
eval_dataset = validation_dataset,
args = SFTConfig(
# resume_from_checkpoint = True,
per_device_train_batch_size = 1,
gradient_accumulation_steps = 4,
warmup_ratio = 0.1,
# max_steps = 30,
num_train_epochs = 2, # Set this instead of max_steps for full training runs
learning_rate = lr,
fp16 = not is_bf16_supported(),
bf16 = is_bf16_supported(),
fp16_full_eval = True,
logging_steps = 0.1, #0.1
optim = "adamw_8bit",
weight_decay = 0.01,
lr_scheduler_type = "linear",
seed = 3407,
output_dir = output_dir,
report_to = "none", # For Weights and Biases
######
per_device_eval_batch_size = 1,
eval_accumulation_steps = 4,
metric_for_best_model= "eval_loss",
load_best_model_at_end=True,
save_strategy = "steps",
eval_strategy = "steps",
eval_steps = 0.1,
save_steps = 0.1,
save_total_limit = 2,
######
# You MUST put the below items for vision finetuning:
remove_unused_columns = False,
dataset_text_field = "",
dataset_kwargs = {"skip_prepare_dataset": True},
dataset_num_proc = 4,
max_seq_length = 512,
),
)