'MistralForCausalLM' object has no attribute 'merge_and_unload"

225 views Asked by At

I finetuned (or further pretrained) the Model OpenChat (a Mistral 7B finetuning) on my own data. This worked well and the inference produces nice results. Now I want to merge the adapter weights with the original model, to quantize the model in a further step. The issue is that calling model.merge_and_unload() produces the error:

 "AttributeError: 'MistralForCausalLM' object has no attribute 'merge_and_unload".

Is there a way to fix this or another method to merge my weight adapters with the original model?

Here is my code:

Training

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_id = "openchat/openchat-3.5-1210"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})


from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)




from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)


data_train = ...



import transformers
from trl import SFTTrainer
from transformers import TrainingArguments


tokenizer.pad_token = tokenizer.eos_token

trainer = transformers.Trainer(
    model=model,
    train_dataset=data_train,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        #eval_steps=100,
        logging_dir="./logs",
        #max_steps=10,   
        num_train_epochs=1,  
        #evaluation_strategy="steps",   
        logging_strategy="steps",           
        learning_rate=2e-4,        
        fp16=True,
        logging_steps=5 ,
        save_total_limit=3,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False

trainer.train()

trainer.save_model("pretrained_model")
tokenizer.save_pretrained("pretrained_model")

Merging

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

from peft import prepare_model_for_kbit_training

from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training
)

model_id = "openchat/openchat-3.5-1210"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)


model_name = "pretrained_model"

config = PeftConfig.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)


model.merge_and_unload()
1

There are 1 answers

0
disconnectionist On

If you try print(model), you should get a description of the model, something like this:

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): lora.Linear(
            (base_layer): Linear(in_features=4096, out_features=4096, bias=False)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.1, inplace=False)
...

If lora is mentioned, you can see that LoRa is already applied to the base model. The MistralForCausalLM class does not know anything about LoRa, so you can't call "merge_and_unload" on it.

PeftModel does have the merge_and_unload method, so you need to use that:

# merge base + LoRa models and save the model

from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import sys
import torch

device_map = {"": 0}
lora_dir = "mistralai-my-lora-finetuning"
base_model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
model = AutoPeftModelForCausalLM.from_pretrained(lora_dir, device_map=device_map, torch_dtype=torch.bfloat16)


model = model.merge_and_unload()

output_dir = "output/my_merged_model"
model.save_pretrained(output_dir)