I am trying to build a model for NLP. The below code is giving error "typeerror 'int' object is not callable" in the last line.
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import nltk
nltk.download('punkt')
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import multilabel_confusion_matrix, confusion_matrix
import torch.optim as optim
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from gensim.models import word2vec
from gensim.models import KeyedVectors
from torchtext.vocab import Vectors
from sklearn.metrics import f1_score, precision_score, accuracy_score, mean_squared_error
data = pd.read_table('devel.tsv')
data.head()
data.dtypes
data.shape
data.size
## PREPROCESSING
data.columns=['Words', 'Tags']
data.columns
word = data['Words']
sen=1
Sentence=[]
for letter in word:
Sentence.append(sen)
if letter =='.':
sen+=1
data['Sentence#'] = Sentence
data.head()
data.tail()
word_counts = data.groupby(["Sentence#"])['Words'].agg(['count'])
# word_counts.hist(bins=50)
max_length = word_counts.max()
max_length
## WORD2VEC for embeddings
# !wget https://ftp.ncbi.nlm.nih.gov/pub/lu/Suppl/BioSentVec/BioWordVec_PubMed_MIMICIII_d200.vec.bin
word_vectors = KeyedVectors.load_word2vec_format('BioWordVec_PubMed_MIMICIII_d200.vec.bin', binary=True, limit=100000)
pad_ukw = np.ones((2, word_vectors.vectors.shape[-1]), dtype=np.int32)
pad_ukw.shape, word_vectors.vectors.shape
data.head()
word_vecs = torch.from_numpy(word_vectors.vectors)
word_vecs = word_vecs.int()
values = np.concatenate((pad_ukw, word_vecs), axis=0)
new_val = torch.from_numpy(values)
new_val = new_val.int()
word_vecs.dtype, new_val.dtype
## DATASET AND DATALOADER
no_samples, no_features = data.shape
input_size = no_features
output_size = no_features
print (input_size, output_size)
x = data['Words'].values
y = data['Tags'].values
# Convert x and y to numpy arrays
x = np.array(x)
y = np.array(y)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size =0.3, random_state=1 )
print (f"{x_train}, \n {y_train}, \n {x_test} ,\n {y_test}")
train_dataset = TensorDataset(x_train, y_train)
I have tried various solutions such as changing train_dataset to some other variable name, seeing if I can convert to numpy etc. However, it is not getting solved. I am using torch version 2.0.1+cu118 in google collab.