TypeError('Keyword argument not understood:', kwarg) TypeError: ('Keyword argument not understood:', 'inputs')

931 views Asked by At

The following code is for fake news detection with a dEFEND model using Tensorflow and Keras. For some reason, I keep getting an error. This is a TypeError with parameter 'inputs'. I don't understand why this error is being raised. Here is my code:

import keras
import pickle
import tensorflow as tf
from keras.models import *
from keras.layers import *
from keras import optimizers
from keras.optimizers import *
from keras.callbacks import *
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras import backend as K
from keras.utils.generic_utils import CustomObjectScope
from keras.layers import Layer
from keras import initializers
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, roc_auc_score
from keras.utils.vis_utils import plot_model

from tqdm import tqdm

import re
import numpy as np
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
import jsonlines

def load_glove_embedding(path, dim, word_index):
    embeddings_index = {}
    f = open(path)

    print('Generating GloVe embedding...')
    for line in tqdm(f):
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
    f.close()

    embedding_matrix = np.zeros((len(word_index) + 1, dim))

    for word, i in word_index.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            # words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector
    print('Loaded GloVe embedding')

    return embedding_matrix

def load_glove_embedding(path, dim, word_index):
    embeddings_index = {}
    f = open(path)

    print('Generating GloVe embedding...')
    for line in tqdm(f):
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
    f.close()

    embedding_matrix = np.zeros((len(word_index) + 1, dim))

    for word, i in word_index.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            # words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector
    print('Loaded GloVe embedding')

    return embedding_matrix

class AttLayer(Layer):
    """
    Attention layer used for the calcualting attention in word and sentence levels
    """

    def __init__(self, **kwargs):
        super(AttLayer, self).__init__(**kwargs)
        self.init = initializers.get('normal')
        self.supports_masking = True
        self.attention_dim = 100

    def build(self, input_shape):
        assert len(input_shape) == 3
        self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
        self.b = K.variable(self.init((self.attention_dim,)))
        self.u = K.variable(self.init((self.attention_dim, 1)))
        self._trainable_weights = [self.W, self.b, self.u]
        super(AttLayer, self).build(input_shape)

    def compute_mask(self, inputs, mask=None):
        return mask

    def call(self, x, mask=None):
        # size of x :[batch_size, sel_len, attention_dim]
        # size of u :[batch_size, attention_dim]
        # uit = tanh(xW+b)
        uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
        ait = K.dot(uit, self.u)
        ait = K.squeeze(ait, -1)
        ait = K.exp(ait)
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            ait *= K.cast(mask, K.floatx())
        print(ait)
        ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        ait = K.expand_dims(ait)
        weighted_input = x * ait
        output = K.sum(weighted_input, axis=1)

        return output

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1])

TOKENIZER_STATE_PATH = './saved_models/tokenizer.p'
GLOVE_EMBEDDING_PATH = './saved_models/zhs_wiki_glove.vectors.100d.txt'

class Metrics(Callback):
    def __init__(self):
        self.log_file = open('./data/Log_Defend_Weibo_content.txt', 'a', encoding='utf-8')

    def on_train_begin(self, logs={}):
        self.val_f1s = []
        self.val_recalls = []
        self.val_precisions = []
        self.val_auc = []
        self.val_acc = []

    def on_epoch_end(self, epoch, logs={}):
        val_predict_onehot = (
            np.asarray(self.model.predict([self.validation_data[0], self.validation_data[1]]))).round()
        val_targ_onehot = self.validation_data[2]
        val_predict = np.argmax(val_predict_onehot, axis=1)
        val_targ = np.argmax(val_targ_onehot, axis=1)
        _val_f1 = f1_score(val_targ, val_predict)
        _val_recall = recall_score(val_targ, val_predict)
        _val_precision = precision_score(val_targ, val_predict)
        _val_auc = roc_auc_score(val_targ, val_predict)
        _val_acc = accuracy_score(val_targ, val_predict)
        self.val_f1s.append(_val_f1)
        self.val_recalls.append(_val_recall)
        self.val_precisions.append(_val_precision)
        self.val_auc.append(_val_auc)
        self.val_acc.append(_val_acc)
        print("Epoch: %d - val_accuracy: % f - val_precision: % f - val_recall % f val_f1: %f auc: %f" % (
            epoch, _val_acc, _val_precision, _val_recall, _val_f1, _val_auc))
        self.log_file.write(
            "Epoch: %d - val_accuracy: % f - val_precision: % f - val_recall % f val_f1: %f auc: %f\n" % (epoch,
                                                                    _val_acc,
                                                                    _val_precision,
                                                                    _val_recall,
                                                                    _val_f1,
                                                                    _val_auc))
        return

class Defend():
    def __init__(self):
        self.model = None
        self.MAX_SENTENCE_LENGTH = 120
        self.MAX_SENTENCE_COUNT = 50
        self.MAX_COMS_COUNT = 150
        self.MAX_COMS_LENGTH = 120
        self.VOCABULARY_SIZE = 0
        self.word_embedding = None
        self.model = None
        self.word_attention_model = None
        self.sentence_comment_co_model = None
        self.tokenizer = None
        self.class_count = 2
        self.metrics = Metrics()

        # Variables for calculating attention weights
        self.news_content_word_level_encoder = None
        self.comment_word_level_encoder = None
        self.news_content_sentence_level_encoder = None
        self.comment_sequence_encoder = None
        self.co_attention_model = None

    def _generate_embedding(self, path, dim):
        return load_glove_embedding(path, dim, self.tokenizer.word_index)

    def _build_model(self, n_classes=2, embedding_dim=100, embeddings_path=False, aff_dim=80):
        GLOVE_DIR = "."
        embeddings_index = {}
        f = open('./saved_models/zhs_wiki_glove.vectors.100d.txt', encoding='utf-8')
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
        f.close()
        word_index = self.tokenizer.word_index
        embedding_matrix = np.random.random((len(word_index) + 1, embedding_dim))
        for word, i in word_index.items():
            embedding_vector = embeddings_index.get(word)
            if embedding_vector is not None:
                embedding_matrix[i] = embedding_vector

        embedding_layer = Embedding(len(word_index) + 1,
                       embedding_dim,
                       weights=[embedding_matrix],
                       input_length=self.MAX_SENTENCE_LENGTH,
                       trainable=True,
                       mask_zero=True)

        com_embedding_layer = Embedding(len(word_index) + 1,
                         embedding_dim,
                         weights=[embedding_matrix],
                         input_length=self.MAX_COMS_LENGTH,
                         trainable=True,
                         mask_zero=True)

        sentence_input = Input(shape=(self.MAX_SENTENCE_LENGTH,), dtype='int32')
        embedded_sequences = embedding_layer(sentence_input)
        embedded_sequences = Lambda(lambda x: x, output_shape=lambda s: s)(embedded_sequences)
        l_lstm = Bidirectional(GRU(100, return_sequences=True), name='word_lstm')(Reshape((120, 100))(embedded_sequences))
        l_att = AttLayer(name='word_attention')(l_lstm)
        sentEncoder = Model(sentence_input, l_att)
        plot_model(sentEncoder, to_file='SentenceEncoder.png', show_shapes=True)

        self.news_content_word_level_encoder = sentEncoder

        content_input = Input(shape=(self.MAX_SENTENCE_COUNT, self.MAX_SENTENCE_LENGTH), dtype='int32')
        content_encoder = TimeDistributed(sentEncoder)(content_input)
        l_lstm_sent = Bidirectional(GRU(100, return_sequences=True), name='sentence_lstm')(content_encoder)

        self.news_content_sentence_level_encoder = Model(content_input, l_lstm_sent)

        content_input_ = Lambda(lambda x: tf.expand_dims(x, axis=3))(l_lstm_sent)  # for channels
        pooling_layer = keras.layers.AveragePooling2D(inputs=content_input_, pool_size=2, strides=2, padding='valid', data_format='channels_last',name='average_pooling_content')
        preds = Dense(2, activation='softmax')(pooling_layer)
        model = Model(inputs=content_input_, outputs=preds)
        model.summary()
        plot_model(model, to_file='CHATT_content.png', show_shapes=True)

        optimize = optimizers.RMSprop(lr=0.001)
        model.compile(loss='categorical_crossentropy',
                      optimizer=optimize,
                      metrics=['accuracy'])

        return model
    def train(self, train_x, train_y, val_x, val_y,
              batch_size=20, epochs=10,
              embeddings_path=False,
              saved_model_dir='saved_models', saved_model_filename=None, ):
        # Fit the vocabulary set on the content and comments
        self._fit_on_texts_and_comments(train_x, val_x)
        self.model = self._build_model(
            n_classes=train_y.shape[-1],
            embedding_dim=100,
            embeddings_path=embeddings_path)

        # Create encoded input for content and comments
        encoded_train_x = self._encode_texts(train_x)
        encoded_val_x = self._encode_texts(val_x)
        """encoded_train_c = self._encode_comments(train_c)
        encoded_val_c = self._encode_comments(val_c)"""
        callbacks = []
        callbacks.append(LambdaCallback(
                on_epoch_end=lambda epoch, logs: self._save_tokenizer_on_epoch_end(
                    os.path.join(saved_model_dir, self._get_tokenizer_filename(saved_model_filename)), epoch))
        )

        if saved_model_filename:
            callbacks.append(
                ModelCheckpoint(
                    filepath=os.path.join(saved_model_dir, saved_model_filename),
                    monitor='val_loss',
                    save_best_only=True,
                    save_weights_only=False,
                )
            )
        callbacks.append(self.metrics)
        early_stopping = EarlyStopping(
                      monitor='val_acc',
                      min_delta=0.0005,
                      patience=5,
                      verbose=1,
                      restore_best_weights=True,
                  )
        lr_scheduler = ReduceLROnPlateau(
                      monitor='val_acc',
                      factor=0.5,
                      patience=3,
                      min_lr=0.00002,
                      verbose=1,
                  )
        callbacks.append(early_stopping)
        callbacks.append(lr_scheduler)
        print(type(callbacks))
        self.model.fit(encoded_train_x, y=train_y,
                       validation_data=(encoded_val_x, val_y),
                       batch_size=batch_size,
                       epochs=epochs,
                       verbose=1,
                       callbacks=callbacks)

if __name__ == '__main__':
    # dataset used for training
    VALIDATION_SPLIT = 0.25
    contents = []
    #comments = []
    labels = []
    #texts = []
    ids = []

    with open('./data/fake_release_all.json', 'r+', encoding='utf-8') as json_file:
        for item in jsonlines.Reader(json_file):
            sentences = sent_tokenize(item['content'])
            j = 0
            k = 0
            for sentence in sentences:
                #print(sentence)
                sentence = clean_str(sentence)
                k+=len(sentence)
                #print(sentence)
                sentences[j] = sentence
                j += 1
            #print(sentences)
            if k>=100:
                contents.append(sentences)
                ids.append(item['id'])
                labels.append(item['label'])
            else:
              continue
    with open('./data/real_release_all.json', 'r+', encoding='utf-8') as json_file:
        for item in jsonlines.Reader(json_file):
            sentences = sent_tokenize(item['content'])
            j = 0
            k = 0
            for sentence in sentences:
                sentence = clean_str(sentence)
                k+=len(sentence)
                sentences[j] = sentence
                j += 1
            if k>=100:
                contents.append(sentences)
                ids.append(item['id'])
                labels.append(item['label'])
            else:
              continue

    labels = np.asarray(labels)
    labels = to_categorical(labels)

    content_ids = set(ids)

    id_train, id_test, x_train, x_val, y_train, y_val= train_test_split(ids,contents, labels,
                                                  test_size=VALIDATION_SPLIT, random_state=42,
                                                  stratify=labels)

    # Train and save the model
    SAVED_MODEL_FILENAME = 'Defend_Weibo_model_content.h5'
    h = Defend()
    h.train(x_train, y_train, x_val, y_val,
            batch_size=20,
            epochs=30,
            embeddings_path='./zhs_wiki_glove.vectors.100d.txt',
            saved_model_dir=SAVED_MODEL_DIR,
            saved_model_filename=SAVED_MODEL_FILENAME)

And here is the error:

    Tensor("word_attention/Exp:0", shape=(?, 120), dtype=float32)
    Tensor("time_distributed_1/word_attention/Exp:0", shape=(?, ?), dtype=float32)
    Traceback (most recent call last):
    File"C:/Users/hqint/PycharmProjects/pythonProject/7460/dEFEND/fake_news_detection/dEFEND/defend_content.py", line 684, in <module>saved_model_filename=SAVED_MODEL_FILENAME)

File "C:/Users/hqint/PycharmProjects/pythonProject/7460/dEFEND/fake_news_detection/dEFEND/defend_content.py", line 380, in train embeddings_path=embeddings_path)

File "C:/Users/hqint/PycharmProjects/pythonProject/7460/dEFEND/fake_news_detection/dEFEND/defend_content.py", line 271, in _build_model
        pooling_layer = keras.layers.AveragePooling2D(inputs=content_input_, pool_size=2, strides=2, padding='valid', data_format='channels_last',name='average_pooling_content')

File "C:\Users\hqint\PycharmProjects\pythonProject\venv\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
        return func(*args, **kwargs)

File "C:\Users\hqint\PycharmProjects\pythonProject\venv\lib\site-packages\keras\layers\pooling.py", line 317, in __init__
        data_format, **kwargs)

File "C:\Users\hqint\PycharmProjects\pythonProject\venv\lib\site-packages\keras\layers\pooling.py", line 171, in __init__
        super(_Pooling2D, self).__init__(**kwargs)

File "C:\Users\hqint\PycharmProjects\pythonProject\venv\lib\site-packages\keras\engine\base_layer.py", line 128, in __init__
   raise TypeError('Keyword argument not understood:', kwarg)

TypeError: ('Keyword argument not understood:', 'inputs')

How can I fix this TypeError? I have installed TensorFlow 1.13.1 and Keras 2.2.4. It seems that the input tensor of the average_pooling layer is the error, but I don't know how to fix it.

0

There are 0 answers