I've been following a liver segmentation Monai based code set off of https://github.com/amine0110/Liver-Segmentation-Using-Monai-and-PyTorch. However when I run def train, I'm getting the error:
----------
epoch 1/100
--------------------
---------------------------------------------------------------------------
ZeroDivisionError Traceback (most recent call last)
<ipython-input-28-22328907c897> in <cell line: 16>()
36
37 if __name__ == '__main__':
---> 38 train(model, data_in, loss_function, optimizer, 100, model_dir)
<ipython-input-26-6b8beda06825> in train(model, data_in, loss, optim, max_epochs, model_dir, test_interval, device)
77 print('-'*20)
78
---> 79 train_epoch_loss /= train_step
80 print(f'Epoch_loss: {train_epoch_loss:.4f}')
81 save_loss_train.append(train_epoch_loss)
ZeroDivisionError: float division by zero
Changing the number close to 0 although not 0 however distorts the model significantly (i.e. 0.01). Is there any way we can alter the codes such that it works but it doesn't change test dice loss and test dice metric plot signficantly?
best_metric = -1
best_metric_epoch = -1
save_loss_train = []
save_loss_test = []
save_metric_train = []
save_metric_test = []
train_loader, test_loader = data_in
for epoch in range(max_epochs):
print("-" * 10)
print(f"epoch {epoch + 1}/{max_epochs}")
model.train()
train_epoch_loss = 0
train_step = 0
epoch_metric_train = 0
for batch_data in train_loader:
train_step += 1
volume = batch_data["vol"]
label = batch_data["seg"]
label = label != 0
volume, label = (volume.to(device), label.to(device))
optim.zero_grad()
outputs = model(volume)
train_loss = loss(outputs, label)
train_loss.backward()
optim.step()
train_epoch_loss += train_loss.item()
print(
f"{train_step}/{len(train_loader) // train_loader.batch_size}, "
f"Train_loss: {train_loss.item():.4f}")
train_metric = dice_metric(outputs, label)
epoch_metric_train += train_metric
print(f'Train_dice: {train_metric:.4f}')
print('-'*20)
train_epoch_loss /= train_step
print(f'Epoch_loss: {train_epoch_loss:.4f}')
save_loss_train.append(train_epoch_loss)
np.save(os.path.join(model_dir, 'loss_train.npy'), save_loss_train)
epoch_metric_train /= train_step
print(f'Epoch_metric: {epoch_metric_train:.4f}')
save_metric_train.append(epoch_metric_train)
np.save(os.path.join(model_dir, 'metric_train.npy'), save_metric_train)
if (epoch + 1) % test_interval == 0:
model.eval()
with torch.no_grad():
test_epoch_loss = 0
test_metric = 0
epoch_metric_test = 0
test_step = 0
for test_data in test_loader:
test_step += 1
test_volume = test_data["vol"]
test_label = test_data["seg"]
test_label = test_label != 0
test_volume, test_label = (test_volume.to(device), test_label.to(device),)
test_outputs = model(test_volume)
test_loss = loss(test_outputs, test_label)
test_epoch_loss += test_loss.item()
test_metric = dice_metric(test_outputs, test_label)
epoch_metric_test += test_metric
test_epoch_loss /= test_step
print(f'test_loss_epoch: {test_epoch_loss:.4f}')
save_loss_test.append(test_epoch_loss)
np.save(os.path.join(model_dir, 'loss_test.npy'), save_loss_test)
epoch_metric_test /= test_step
print(f'test_dice_epoch: {epoch_metric_test:.4f}')
save_metric_test.append(epoch_metric_test)
np.save(os.path.join(model_dir, 'metric_test.npy'), save_metric_test)
if epoch_metric_test > best_metric:
best_metric = epoch_metric_test
best_metric_epoch = epoch + 1
torch.save(model.state_dict(), os.path.join(
model_dir, "best_metric_model.pth"))
print(
f"current epoch: {epoch + 1} current mean dice: {test_metric:.4f}"
f"\nbest mean dice: {best_metric:.4f} "
f"at epoch: {best_metric_epoch}"
)
print(
f"train completed, best_metric: {best_metric:.4f} "
f"at epoch: {best_metric_epoch}")```
It looks like your train_loader is empty.
The only way
train_stepcan be zero is if you never enter the for loop:for batch_data in train_loader:which means train_loader is empty. Make sure that you set your dataset up correctly