How to run Llama2 model on gpu in Macbook Pro M2 Max using Python

161 views Asked by At

I have done the following steps:

  1. Installed tensorflow-macos, tensorflow-metal and also set the model "meta-llama/Llama-2-7b-hf" model.to(device) after validating token from Hugging face.
  2. Set device(type=mps) Machine is showing that gpu is activated in Mac but while running the model on my dataset its very slow and using the cpu instead of gpu. Please provide some solution. I have been struggling on this for last 15 days.

I am pasting my training code below.

`

# Load pre-trained BERT model and tokenizer

from transformers import AutoModelForSequenceClassification

model_name = "meta-llama/Llama-2-7b-hf"

# Load model tokenizer with the user authentication token

tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token = True)

# Set padding token as EOS token

tokenizer.pad_token = tokenizer.eos_token
#model = AutoModelForCausalLM.from_pretrained(model_name, num_labels=len(label_encoder.classes\_))

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

import torch
device = torch.device("mps")
model.to(device)

# Tokenize and encode the text data

train_encodings1 = tokenizer(list(X_train), truncation=True, padding=True, max_length=5000)
test_encodings1 = tokenizer(list(X_test), truncation=True, padding=True, max_length=5000)

# Create DataLoader for training and testing data

from torch.utils.data import DataLoader, TensorDataset
import torch

train_dataset1 = TensorDataset(torch.tensor(train_encodings1\['input_ids'\]),
torch.tensor(train_encodings1\['attention_mask'\]),
torch.tensor(y_train))
test_dataset1 = TensorDataset(torch.tensor(test_encodings1\['input_ids'\]),
torch.tensor(test_encodings1\['attention_mask'\]),
torch.tensor(y_test))

train_loader1 = DataLoader(train_dataset1, batch_size=1, shuffle=True)
test_loader1 = DataLoader(test_dataset1, batch_size=1, shuffle=False)

# Set up optimizer and loss function

optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
criterion = torch.nn.CrossEntropyLoss()
torch.mps.synchronize()

#**Training**


import time 

num_epochs=3 

for epoch in range(num_epochs):
    model.train()
    print(time.ctime())
    start=time.time()
    print("Train")
    if device.type =='mps':
        for input_ids, attention_mask, labels in train_loader1:
            for i in range(len(train_loader1)):
                print(f"Starting of loop: {i+1}")
                #input_ids,attention_mask, labels = input_ids,attention_mask, labels
                input_ids, attention_mask, labels = input_ids.to(device),
                                                 attention_mask.to(device), labels.to(device)               
                optimizer.zero_grad()
                print(f"Middle of loop: {i+1}")
                outputs = model(input_ids,attention_mask)
                loss = criterion(outputs.logits, labels)
                loss.backward()
                optimizer.step()
                print(f"End of loop: {i+1}")
        average_loss = loss / len(train_loader1)
        print(f"Epoch {epoch + 1}, Average Loss: {average_loss}")
        stop=time.time()
        print(f"Training time: {stop-start}s")
    else:
        print("exit")
        exit()`

Please speed up the training process so that this run using gpu of macbook pro M2.

I am getting this error also after a few loops.

RuntimeError: MPS backend out of memory (MPS allocated: 35.79 GB, other allocations: 508.66 MB, max allowed: 36.27 GB). Tried to allocate 291.25 KB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

0

There are 0 answers