So I have a mix of PyTorch and Transformers code that loads my custom dataset, processes it, downloads a TinyLlama model, then finetunes that model with the processed dataset.
It works fine on cpu, but when i set the device to "cuda" it fails with the following:
Training started...
0%| | 0/10 [00:00<?, ?it/s]Traceback (most recent call last):
File "/storage/Code/DataAssistant/main.py", line 368, in <module>
main()
File "/storage/Code/DataAssistant/main.py", line 364, in main
train()
File "/storage/Code/DataAssistant/main.py", line 252, in train
trainer.train()
File "/storage/Programs/PythonVenvs/transformers-rocm/lib/python3.11/site-packages/transformers/trainer.py", line 1555, in train
return inner_training_loop(
^^^^^^^^^^^^^^^^^^^^
File "/storage/Programs/PythonVenvs/transformers-rocm/lib/python3.11/site-packages/transformers/trainer.py", line 1838, in _inner_training_loop
for step, inputs in enumerate(epoch_iterator):
File "/storage/Programs/PythonVenvs/transformers-rocm/lib/python3.11/site-packages/accelerate/data_loader.py", line 451, in __iter__
current_batch = next(dataloader_iter)
^^^^^^^^^^^^^^^^^^^^^
File "/storage/Programs/PythonVenvs/transformers-rocm/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 630, in __next__
data = self._next_data()
^^^^^^^^^^^^^^^^^
File "/storage/Programs/PythonVenvs/transformers-rocm/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 673, in _next_data
index = self._next_index() # may raise StopIteration
^^^^^^^^^^^^^^^^^^
File "/storage/Programs/PythonVenvs/transformers-rocm/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 620, in _next_index
return next(self._sampler_iter) # may raise StopIteration
^^^^^^^^^^^^^^^^^^^^^^^^
File "/storage/Programs/PythonVenvs/transformers-rocm/lib/python3.11/site-packages/torch/utils/data/sampler.py", line 282, in __iter__
for idx in self.sampler:
File "/storage/Programs/PythonVenvs/transformers-rocm/lib/python3.11/site-packages/torch/utils/data/sampler.py", line 164, in __iter__
yield from torch.randperm(n, generator=generator).tolist()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/storage/Programs/PythonVenvs/transformers-rocm/lib/python3.11/site-packages/torch/utils/_device.py", line 77, in __torch_function__
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Expected a 'cuda' device type for generator but found 'cpu'
0%|
Here is the short version of my code:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, Seq2SeqTrainer, Seq2SeqTrainingArguments, PreTrainedTokenizer
from dataclasses import dataclass, field
from typing import Optional, Dict, Sequence
from torch.nn.utils.rnn import pad_sequence
from datasets import Dataset
import torch
device_name = "cpu"
if(torch.cuda.is_available()):
device_name = "cuda:0"
torch.cuda.set_device(0)
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
torch.set_default_dtype(torch.float32)
torch.set_default_device(device_name)
model = AutoModelForCausalLM.from_pretrained(
path,
torch_dtype=torch.float32,
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(
model_name,
padding_side="right"
)
tokenizer.pad_token="[PAD]"
def preprocess_prompt(data):
inputs = [process_input(c,i) for c,i in zip(data['context'],data['input'])]
outputs = [process_output(a) for a in data["output"]]
return dict(
input=inputs,
output=outputs
)
dataset = Dataset.from_json("data/training_data.json")
dataset = dataset.map(preprocess_prompt, batched=True)
dataset = dataset.with_format("torch", device=torch.device(device_name))
@dataclass
class DataCollatorForCausalLM(object):
tokenizer: PreTrainedTokenizer
source_max_len: int
target_max_len: int
def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
# Extract elements
sources = [f"{self.tokenizer.bos_token}{example['input']}" for example in instances]
targets = [f"{example['output']}{self.tokenizer.eos_token}" for example in instances]
# Tokenize
tokenized_sources_with_prompt = self.tokenizer(
sources,
max_length=self.source_max_len,
truncation=True,
add_special_tokens=False,
)
tokenized_targets = self.tokenizer(
targets,
max_length=self.target_max_len,
truncation=True,
add_special_tokens=False,
)
# Build the input and labels for causal LM
input_ids = []
labels = []
for tokenized_source, tokenized_target in zip(
tokenized_sources_with_prompt['input_ids'],
tokenized_targets['input_ids']
):
input_ids.append(torch.tensor(tokenized_source))
# Apply padding
input_ids = pad_sequence(input_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id)
labels = pad_sequence(labels, batch_first=True, padding_value=IGNORE_INDEX)
data_dict = {
'input_ids': input_ids,
'attention_mask':input_ids.ne(self.tokenizer.pad_token_id),
}
if labels is not None:
data_dict['labels'] = labels
return data_dict
data_collator = DataCollatorForCausalLM(
tokenizer=tokenizer,
source_max_len=1024,
target_max_len=256
)
training_args = Seq2SeqTrainingArguments(
output_dir=model_dir,
optim="adamw_torch",
per_device_train_batch_size=4,
gradient_accumulation_steps=1,
gradient_checkpointing=True,
save_strategy='steps',
save_steps=250,
save_total_limit=40,
lr_scheduler_type='constant',
remove_unused_columns=False,
max_grad_norm=0.3,
max_steps=10,
num_train_epochs=3,
learning_rate=2e-5,
do_train=True
)
trainer = Seq2SeqTrainer(
model=model,
args=training_args,
tokenizer=tokenizer,
train_dataset=dataset,
eval_dataset=dataset,
data_collator=data_collator
)
#Exception raised here:
trainer.train()
The DataCollator is a pre-existing class that I stole from TinyLlama's finetune.py
Any ideas?