Has anyone tried using a neural network classifier?

I am currently doing a neural network classifier to practice and learn more about neural networks and pytorch. And yes, I heard before that classical machine learning methods are better than neural network. I have already used other methods such as random forest and gradient boosting classifer.

I have tried it myself and even with one batch, simplified neural network of 2 layers, no regularization but it is stuck at 54-55% test accuracy with my 3 way holdout. I did in fact use more batches later but the results are the same or worse.

Is there something wrong with my preprocessing or is the dataset does not fit for neural network classifer? Or did I just gotten practical knowledge or example of classical ML methods doing better than nn?

Relevant codes (Removed Ax Bayesian Optimisation as that doesn’t help much)

import numpy as np
import pandas as pd
import torch
import torchvision
import torchvision.transforms as transforms
from ax import optimize
from ax.plot.contour import plot_contour
from ax.plot.trace import optimization_trace_single_method
from ax.utils.notebook.plotting import render
from sklearn import preprocessing
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder, StandardScaler
from torch import functional, nn, optim
import math
from torch.utils.tensorboard import SummaryWriter

rng =0
ordinalEncoder  = OrdinalEncoder()
labelEncoder = LabelEncoder()
standardScaler = StandardScaler()

d = {
    "funder": "string","installer": "string",
    "wpt_name": "string",    "basin": "string",
    "subvillage": "string",    "region": "string",    "lga": "string",    "ward": "string",
    "public_meeting": "string",    "recorded_by": "string",    "scheme_management": "string",
    "scheme_name": "string",    "permit": "string",
    "extraction_type": "string",    "extraction_type_group": "string",    "extraction_type_class": "string",
    "management": "string",
    "management_group": "string",    "payment": "string",
    "payment_type": "string",
    "water_quality": "string",    "quality_group": "string",
    "quantity": "string",    "quantity_group": "string",    "source": "string",
    "source_type": "string",
    "source_class": "string",    "waterpoint_type": "string",
    "waterpoint_type_group": "string",
    "date_recorded": "string",

str_cat= [
    "wpt_name" ,    "basin",
    "subvillage",    "region",
    "lga",    "ward","recorded_by", 
    "scheme_management",    "scheme_name",
    "permit",    "extraction_type",
    "extraction_type_class",    "management",
    "payment",    "payment_type",    "water_quality",    "quality_group",
    "quantity_group",    "source",
    "source_type",    "source_class",

train_batch_size = 100
test_batch_size= 100

test_values =pd.read_csv("./test-set-values.csv")
train_values = pd.read_csv("./training-set-values.csv")
train_labels = pd.read_csv("./training-set-labels.csv")
length = train_values.shape[0]

train_labels['status_group'] = labelEncoder.fit_transform(train_labels.status_group.astype('string'))
train_values[str_cat] = ordinalEncoder.fit_transform(train_values[str_cat].astype(d).fillna(""))
test_values[str_cat] = ordinalEncoder.fit_transform(test_values[str_cat].astype(d).fillna(""))

train_values = train_values.astype('float32').values
test_values = test_values.astype('float32').values
train_labels = train_labels.status_group.astype('long').values

x_train,x_test,y_train,y_test = map(torch.from_numpy, 

train = torch.utils.data.TensorDataset(x_train,y_train)
test = torch.utils.data.TensorDataset(x_test,y_test)
train_loader = torch.utils.data.DataLoader(train,batch_size=train_batch_size,shuffle = False)
test_loader = torch.utils.data.DataLoader(test,batch_size=test_batch_size,shuffle = False)
class Net(nn.Module):
    # def __init__(self,hidden1,hidden2,dropoutProbabilities1):
    # def __init__(self,hidden1,hidden2):
    def __init__(self,hidden1):
        # self.layer1 = nn.Linear(38,hidden1,)
        # self.layer2 = nn.Linear(hidden1,hidden2)
        # self.layer3 = nn.Linear(hidden2,3)
        self.layer1 = nn.Linear(40,3)
        # self.layer2 = nn.Linear(hidden1,3) # layer 2 configuration if layer 3 is not used.
        self.layer4 = nn.Linear(3,3,)
        self.activation =nn.ReLU()
        # self.dropout1 = nn.Dropout(p=dropoutProbabilities1)
        # self.dropout2 = nn.Dropout(p=dropoutProbabilities2)
        # self.batchNormalisation1 = nn.BatchNorm1d(hidden1)
        # self.batchNormalisation2 = nn.BatchNorm1d(hidden2)
    def forward(self, x):
        x = self.layer1(x)
        # x = self.batchNormalisation1(x)
        x = self.activation(x)
        # x = self.layer2(x)
        # x = self.activation(x)
        # x = self.dropout1(x)
#         x = self.batchNormalisation2(x)
        # x = self.layer3(x)
        # x = self.activation(x)
        # x = self.dropout1(x)
        x = self.layer4(x)
        return x
#base 0
# numberOfBatchesToCapture = 20

def train(net, parameterization, trainloader,numberOfBatchesToCapture):
    from IPython.display import clear_output,display
    optimizer = optim.Adam(net.parameters(),
                           # maximize = True
    criterion = nn.CrossEntropyLoss()

    for i, epoch in enumerate(range(parameterization['epochs'])):  
    # for epoch in range(700):
        # display("Epoch: "+str(epoch))
        loss_array = []
        for batchNumber ,data in enumerate(trainloader):
            inputs, labels = data
            loss = criterion(net(inputs), labels)
            writer.add_scalar('training loss',
                            loss.item() ,
                            epoch * len(trainloader) + i)
            if batchNumber ==numberOfBatchesToCapture:
                break # training only x-1 batch
        print("epoch: ",epoch)
        for  loss in loss_array:
            print(" loss: ",loss)
    return net

def evaluate(net, testloader,numberOfBatchesToCapture):
    correct = 0
    total = 0
    with torch.no_grad():
        for j,data in enumerate(testloader):
            inputs, labels = data
            outputs = net(inputs)
            _,predicted = torch.max(outputs,1)
            # print("row in testloader")
            # print("predicted: ")
            # print(predicted)
            # print("labels: ")
            # print(labels)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            # print(str(correct)+"/"+str(total))
            if j ==numberOfBatchesToCapture:
                break # training only x-1 batch

    print('Accuracy of the network: %d %%' % (100 * correct / total))
    return 100 * correct / total

def train_evaluate(parameters):
    # net = Net(parameters["hidden1"],parameters['hidden2'],parameters['dropoutProbabilities1'])
    # net = Net(parameters["hidden1"],parameters['hidden2'])
    net = Net(parameters['hidden1'])
    net = train(net, parameters, train_loader)
    return evaluate(net, test_loader)

            # 'hidden2':1000, 

net = Net(parameters["hidden1"]
          # parameters['hidden2']
writer = SummaryWriter('runs/4')

net = train(net, parameters, train_loader,parameters["numberOfBatchesToCapture"])
evaluate(net, test_loader,parameters["numberOfBatchesToCapture"])

%load_ext tensorboard
%tensorboard --logdir=runs

my notebook on the rest of my attempts on this competition problem