Currently, I am trying to fine tune the Korean Llama model(13B) on a private dataset through DeepSpeed and Flash Attention 2, TRL SFTTrainer. I am using 2 * A100 80G GPUs for the fine-tuning, however, I could not conduct the fine-tuning.. I can't find out the problem and any solution to this situation by googling.. Please let me know what the problem is and how to solve it.
As I mentioned above, I've got stuck in that situation. I used the accelerate launch
to utilize multi-GPU and DeepSpeed config provided by TRL example code. I also tried to use deepspeeed
zero2.yaml
and deepspeed
zero3.yaml
however, both of them did not work..
The running code is as follows:
accelerate launch --config_file=accelerate_configs/deepspeed_zero3.yaml --num_processes 2 finetuning/finetune_SFT.py \
--model_path beomi/llama-2-koen-13b \
--data_path Cartinoe5930/KoRAE_filtered_12k \
--output_dir finetuning/result/llama2/ \
--wandb_project KoRAE_llama2 \
--wandb_run_name KoRAE_llama2 \
--hub_path HUB_PATH_TO_UPLOAD_MODEL \
--auth_token MY_HF_ACCESS_TOKEN \
The specific code of finetune_SFT.py
is as follows:
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, DataCollatorForLanguageModeling
from accelerate import Accelerator
from datasets import load_dataset
from tqdm import tqdm
import random
from trl import SFTTrainer
from accelerate import Accelerator
from utils.prompter import Prompter
import argparse
def args_parse():
parser = argparse.ArgumentParser()
parser.add_argument("--hf_token", type=str, help="Required to upload models to hub.")
parser.add_argument("--model_path", type=str, default="beomi/llama-2-koen-13b")
parser.add_argument("--data_path", type=str, default="Cartinoe5930/KoRAE_filtered_12k")
parser.add_argument("--num_proc", type=int)
parser.add_argument("--seq_length", type=int, default=4096)
parser.add_argument("--num_epochs", type=int, default=3)
parser.add_argument("--batch_size", type=int, default=32)
parser.add_argument("--micro_batch_size", type=int, default=2)
parser.add_argument("--val_set_size", type=float, default=0)
parser.add_argument("--logging_steps", type=int, default=10)
parser.add_argument("--save_strategy", type=str, default="epoch", help="You can choose the strategy of saving model.")
parser.add_argument("--gradient_checkpointing", type=bool, default=True)
parser.add_argument("--group_by_length", type=bool, default=False)
parser.add_argument("--packing", type=bool, default=False)
parser.add_argument("--learning_rate", type=float, default=3e-4)
parser.add_argument("--lr_scheduler_type", type=str, default="cosine")
parser.add_argument("--warmup_ratio", type=float, default=0.03)
parser.add_argument("--weight_decay", type=float, default=0)
parser.add_argument("--wandb_project", type=str)
parser.add_argument("--wandb_run_name", type=str)
parser.add_argument(
"--output_dir",
type=str,
required=True
)
parser.add_argument(
"--hf_hub_path",
type=str,
required=True,
help="The hub path to upload the model"
)
return parser.parse_args()
def process_dataset(example):
prompter = Prompter("KoRAE_template")
result_data = []
for i in range(len(example)):
full_prompt = prompter.generate_prompt(
example["instruction"][i],
example["prompt"][i],
example["input"][i],
example["output"][i])
result_data.append(full_prompt)
return result_data
def create_datasets(args):
dataset = load_dataset(
args.data_path,
split="train",
num_proc=args.num_proc if args.num_proc else None,
)
if args.val_set_size > 0:
train_val = dataset.train_test_split(test_size=args.val_set_size, seed=42)
train_data = train_val["train"]
val_data = train_val["test"]
else:
train_data = dataset
val_data = None
return train_data, val_data
if __name__ == "__main__":
args = args_parse()
gradient_accumulation_steps = args.batch_size // args.micro_batch_size // args.num_proc
model = AutoModelForCausalLM.from_pretrained(
args.model_path,
# device_map={"": Accelerator().process_index},
torch_dtype=torch.bfloat16,
use_auth_token=args.hf_token,
use_flash_attention_2=True
)
model.config.use_cache = False
model.enable_input_require_grads()
tokenizer = AutoTokenizer.from_pretrained(
args.model_path,
use_auth_token=args.hf_token,
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
# Check if parameter passed or if set within environ
use_wandb = len(args.wandb_project) > 0 or (
"WANDB_PROJECT" in os.environ and len(os.environ["WANDB_PROJECT"]) > 0
)
# Only overwrite environ if wandb param passed
if len(args.wandb_project) > 0:
os.environ["WANDB_PROJECT"] = args.wandb_project
train_dataset, eval_dataset = create_datasets(args)
training_args = TrainingArguments(
output_dir=args.output_dir,
num_train_epochs=args.num_epochs,
per_device_train_batch_size=args.micro_batch_size,
per_device_eval_batch_size=args.micro_batch_size if eval_dataset else None,
gradient_accumulation_steps=gradient_accumulation_steps,
gradient_checkpointing=args.gradient_checkpointing,
learning_rate=args.learning_rate,
logging_steps=args.logging_steps,
save_strategy=args.save_strategy,
save_steps=args.save_steps if args.save_strategy == "steps" else None,
evaluation_strategy="epoch" if eval_dataset else None,
group_by_length=args.group_by_length,
lr_scheduler_type=args.lr_scheduler_type,
warmup_ratio=args.warmup_ratio,
bf16=True,
save_total_limit=2,
remove_unused_columns=False,
report_to="wandb" if use_wandb else None,
run_name=args.wandb_run_name if use_wandb else None,
)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
trainer = SFTTrainer(
model=model,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
formatting_func=process_dataset,
data_collator=data_collator,
packing=args.packing,
max_seq_length=args.seq_length,
tokenizer=tokenizer,
args=training_args
)
trainer.train()
trainer.save_model(args.output_dir)
model.push_to_hub(
args.hf_hub_path,
use_temp_dir=True,
use_auth_token=args.hf_token,
)
tokenizer.push_to_hub(
args.hf_hup_path,
use_temp_dir=True,
use_auth_token=args.hf_token,
)
The log of above code is as follows. There are not specific errors or something strange however, that situation just continue and training bar and any other logs are not appeared. So I can't know the training is really conducting..
[2023-11-08 05:02:56,499] torch.distributed.run: [WARNING]
[2023-11-08 05:02:56,499] torch.distributed.run: [WARNING] *****************************************
[2023-11-08 05:02:56,499] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
[2023-11-08 05:02:56,499] torch.distributed.run: [WARNING] *****************************************
[2023-11-08 05:03:00,158] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2023-11-08 05:03:00,422] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py:472: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.
warnings.warn(
You are attempting to use Flash Attention 2.0 with a model initialized on CPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py:472: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.
warnings.warn(
You are attempting to use Flash Attention 2.0 with a model initialized on CPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
Loading checkpoint shards: 100%|████████████████| 10/10 [00:07<00:00, 1.30it/s]
/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py:374: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/transformers/models/auto/tokenization_auto.py:671: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.
warnings.warn(
[2023-11-08 05:04:45,333] [INFO] [comm.py:637:init_distributed] cdb=None
[2023-11-08 05:04:45,333] [INFO] [comm.py:668:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
Loading checkpoint shards: 100%|████████████████| 10/10 [00:10<00:00, 1.09s/it]
/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py:374: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/transformers/models/auto/tokenization_auto.py:671: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.
warnings.warn(
[2023-11-08 05:04:49,096] [INFO] [comm.py:637:init_distributed] cdb=None
Parameter Offload: Total persistent parameters: 414720 in 81 params
Please let me know the brilliant solution to resolve this problem!