Deepspeed tensor parallel gets problem in tensor alignment when using tokenizer

268 views Asked by At

I tried to use deepspeed to conduct tensor parallel on starcoder as I had multiple small GPUs and each of which cannot singly hold the whole model.

from transformers import AutoModelForCausalLM, AutoTokenizer
import os
import torch
import deepspeed

local_rank = int(os.getenv('LOCAL_RANK', '0'))
world_size = int(os.getenv('WORLD_SIZE', '1'))

cache_dir = '/llm-benchmark/starcoder-cache'

os.environ['TRANSFORMERS_CACHE'] = cache_dir

checkpoint = "bigcode/starcoder"
device = "cuda" # for GPU usage or "cpu" for CPU usage

tokenizer = AutoTokenizer.from_pretrained(checkpoint, cache_dir=cache_dir)

# Load model without moving it to device
model = AutoModelForCausalLM.from_pretrained(checkpoint, cache_dir=cache_dir)

ds_engine = deepspeed.init_inference(model, tensor_parallel={'enabled': True, 'tp_size': world_size})
model = ds_engine.module

print('before tokenizing')
inputs = tokenizer.encode("def print_hello_world():", return_tensors="pt").to(f"{device}")
print('before generation')
outputs = model.generate(inputs)
print('after generation')
print(tokenizer.decode(outputs[0]))
print('full result')

When I ran the above code, it seemed that the model had been splitter successfully. However, I got the following error:

Traceback (most recent call last):
  File "/root/code/starcoder/generate.py", line 29, in <module>
    outputs = model.generate(inputs)
  File "/root/code/starcoder/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
    return func(*args, **kwargs)
  File "/root/code/starcoder/lib/python3.10/site-packages/transformers/generation/utils.py", line 1437, in generate
    return self.greedy_search(
  File "/root/code/starcoder/lib/python3.10/site-packages/transformers/generation/utils.py", line 2248, in greedy_search
    outputs = self(
  File "/root/code/starcoder/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/root/code/starcoder/lib/python3.10/site-packages/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py", line 808, in forward
    transformer_outputs = self.transformer(
  File "/root/code/starcoder/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/root/code/starcoder/lib/python3.10/site-packages/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py", line 673, in forward
    outputs = block(
  File "/root/code/starcoder/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/root/code/starcoder/lib/python3.10/site-packages/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py", line 316, in forward
    attn_outputs = self.attn(
  File "/root/code/starcoder/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/root/code/starcoder/lib/python3.10/site-packages/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py", line 230, in forward
    query, key_value = self.c_attn(hidden_states).split((self.embed_dim, 2 * self.kv_dim), dim=2)
  File "/root/code/starcoder/lib/python3.10/site-packages/torch/_tensor.py", line 803, in split
    return torch._VF.split_with_sizes(self, split_size, dim)
RuntimeError: split_with_sizes expects split_sizes to sum exactly to 1600 (input tensor's size at dimension 2), but got split_sizes=[1536, 256]
Traceback (most recent call last):
  File "/root/code/starcoder/generate.py", line 29, in <module>
    outputs = model.generate(inputs)
  File "/root/code/starcoder/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
    return func(*args, **kwargs)
  File "/root/code/starcoder/lib/python3.10/site-packages/transformers/generation/utils.py", line 1437, in generate
    return self.greedy_search(
  File "/root/code/starcoder/lib/python3.10/site-packages/transformers/generation/utils.py", line 2248, in greedy_search
    outputs = self(
  File "/root/code/starcoder/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/root/code/starcoder/lib/python3.10/site-packages/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py", line 808, in forward
    transformer_outputs = self.transformer(
  File "/root/code/starcoder/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/root/code/starcoder/lib/python3.10/site-packages/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py", line 673, in forward
    outputs = block(
  File "/root/code/starcoder/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/root/code/starcoder/lib/python3.10/site-packages/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py", line 316, in forward
    attn_outputs = self.attn(
  File "/root/code/starcoder/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/root/code/starcoder/lib/python3.10/site-packages/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py", line 230, in forward
    query, key_value = self.c_attn(hidden_states).split((self.embed_dim, 2 * self.kv_dim), dim=2)
  File "/root/code/starcoder/lib/python3.10/site-packages/torch/_tensor.py", line 803, in split
    return torch._VF.split_with_sizes(self, split_size, dim)
RuntimeError: split_with_sizes expects split_sizes to sum exactly to 1600 (input tensor's size at dimension 2), but got split_sizes=[1536, 256]

It seems that the tokenizer is not aligned with the model. Why is this happening?

0

There are 0 answers