My OS specs are 8GB dedicated GPU and 24GB shared GPU, I am trying to fine tune a model and I am facing CUDA out of memory
errors . I am not able to use Shared memory. Is there any way to use it when my dedicated memory was full.
Code Snippet for reference
training_args = TrainingArguments(
output_dir=local_output_dir,
per_device_train_batch_size=1,
per_device_eval_batch_size=1,
fp16=False,
bf16=False,
learning_rate=1e-1,
num_train_epochs=2,
deepspeed=None,
gradient_checkpointing=True,
logging_dir=f"{local_output_dir}/runs",
logging_strategy="steps",
logging_steps=10,
evaluation_strategy="steps",
eval_steps=10,
save_strategy="steps",
save_steps=20,
save_total_limit=10,
load_best_model_at_end=False,
report_to="tensorboard",
disable_tqdm=True,
remove_unused_columns=False,
local_rank=2,
warmup_steps=0,
)
trainer = Trainer(
model=model,
tokenizer=tokenizer,
args=training_args,
train_dataset=split_dataset["train"],
eval_dataset=split_dataset["test"],
)