I am currently doing a neural network classifier to practice and learn more about neural networks and pytorch. And yes, I heard before that classical machine learning methods are better than neural network. I have already used other methods such as random forest and gradient boosting classifer.
I have tried it myself and even with one batch, simplified neural network of 2 layers, no regularization but it is stuck at 54-55% test accuracy with my 3 way holdout. I did in fact use more batches later but the results are the same or worse.
Is there something wrong with my preprocessing or is the dataset does not fit for neural network classifer? Or did I just gotten practical knowledge or example of classical ML methods doing better than nn?
Relevant codes (Removed Ax Bayesian Optimisation as that doesn’t help much)
import numpy as np
import pandas as pd
import torch
import torchvision
import torchvision.transforms as transforms
from ax import optimize
from ax.plot.contour import plot_contour
from ax.plot.trace import optimization_trace_single_method
from ax.utils.notebook.plotting import render
from sklearn import preprocessing
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder, StandardScaler
from torch import functional, nn, optim
import math
from torch.utils.tensorboard import SummaryWriter
rng =0
ordinalEncoder = OrdinalEncoder()
labelEncoder = LabelEncoder()
standardScaler = StandardScaler()
d = {
"funder": "string","installer": "string",
"wpt_name": "string", "basin": "string",
"subvillage": "string", "region": "string", "lga": "string", "ward": "string",
"public_meeting": "string", "recorded_by": "string", "scheme_management": "string",
"scheme_name": "string", "permit": "string",
"extraction_type": "string", "extraction_type_group": "string", "extraction_type_class": "string",
"management": "string",
"management_group": "string", "payment": "string",
"payment_type": "string",
"water_quality": "string", "quality_group": "string",
"quantity": "string", "quantity_group": "string", "source": "string",
"source_type": "string",
"source_class": "string", "waterpoint_type": "string",
"waterpoint_type_group": "string",
"date_recorded": "string",
}
str_cat= [
"funder","installer",
"wpt_name" , "basin",
"subvillage", "region",
"lga", "ward","recorded_by",
"public_meeting",
"scheme_management", "scheme_name",
"permit", "extraction_type",
"extraction_type_group",
"extraction_type_class", "management",
"management_group",
"payment", "payment_type", "water_quality", "quality_group",
"quantity",
"quantity_group", "source",
"source_type", "source_class",
"waterpoint_type",
"waterpoint_type_group",
"date_recorded",
]
train_batch_size = 100
test_batch_size= 100
test_values =pd.read_csv("./test-set-values.csv")
train_values = pd.read_csv("./training-set-values.csv")
train_labels = pd.read_csv("./training-set-labels.csv")
length = train_values.shape[0]
train_labels['status_group'] = labelEncoder.fit_transform(train_labels.status_group.astype('string'))
train_values[str_cat] = ordinalEncoder.fit_transform(train_values[str_cat].astype(d).fillna(""))
test_values[str_cat] = ordinalEncoder.fit_transform(test_values[str_cat].astype(d).fillna(""))
train_values = train_values.astype('float32').values
test_values = test_values.astype('float32').values
train_labels = train_labels.status_group.astype('long').values
x_train,x_test,y_train,y_test = map(torch.from_numpy,
train_test_split(
train_values,train_labels,test_size=0.2,random_state=rng,shuffle=True)
)
train = torch.utils.data.TensorDataset(x_train,y_train)
test = torch.utils.data.TensorDataset(x_test,y_test)
train_loader = torch.utils.data.DataLoader(train,batch_size=train_batch_size,shuffle = False)
test_loader = torch.utils.data.DataLoader(test,batch_size=test_batch_size,shuffle = False)
class Net(nn.Module):
# def __init__(self,hidden1,hidden2,dropoutProbabilities1):
# def __init__(self,hidden1,hidden2):
def __init__(self,hidden1):
super(Net,self).__init__()
# self.layer1 = nn.Linear(38,hidden1,)
# self.layer2 = nn.Linear(hidden1,hidden2)
# self.layer3 = nn.Linear(hidden2,3)
self.layer1 = nn.Linear(40,3)
# self.layer2 = nn.Linear(hidden1,3) # layer 2 configuration if layer 3 is not used.
self.layer4 = nn.Linear(3,3,)
self.activation =nn.ReLU()
# self.dropout1 = nn.Dropout(p=dropoutProbabilities1)
# self.dropout2 = nn.Dropout(p=dropoutProbabilities2)
# self.batchNormalisation1 = nn.BatchNorm1d(hidden1)
# self.batchNormalisation2 = nn.BatchNorm1d(hidden2)
def forward(self, x):
x = self.layer1(x)
# x = self.batchNormalisation1(x)
x = self.activation(x)
# x = self.layer2(x)
# x = self.activation(x)
# x = self.dropout1(x)
# x = self.batchNormalisation2(x)
# x = self.layer3(x)
# x = self.activation(x)
# x = self.dropout1(x)
x = self.layer4(x)
return x
#base 0
# numberOfBatchesToCapture = 20
def train(net, parameterization, trainloader,numberOfBatchesToCapture):
from IPython.display import clear_output,display
optimizer = optim.Adam(net.parameters(),
lr=parameterization['lr'],
weight_decay=parameterization['weight_decay'],
# maximize = True
)
criterion = nn.CrossEntropyLoss()
for i, epoch in enumerate(range(parameterization['epochs'])):
# for epoch in range(700):
# display("Epoch: "+str(epoch))
loss_array = []
for batchNumber ,data in enumerate(trainloader):
inputs, labels = data
optimizer.zero_grad()
loss = criterion(net(inputs), labels)
loss_array.append(float(loss.item()))
loss.backward()
optimizer.step()
writer.add_scalar('training loss',
loss.item() ,
epoch * len(trainloader) + i)
if batchNumber ==numberOfBatchesToCapture:
break # training only x-1 batch
clear_output(wait=True)
print("epoch: ",epoch)
for loss in loss_array:
print(" loss: ",loss)
return net
def evaluate(net, testloader,numberOfBatchesToCapture):
correct = 0
total = 0
with torch.no_grad():
for j,data in enumerate(testloader):
inputs, labels = data
outputs = net(inputs)
_,predicted = torch.max(outputs,1)
# print("row in testloader")
# print("predicted: ")
# print(predicted)
# print("labels: ")
# print(labels)
total += labels.size(0)
correct += (predicted == labels).sum().item()
# print(str(correct)+"/"+str(total))
if j ==numberOfBatchesToCapture:
break # training only x-1 batch
print('Accuracy of the network: %d %%' % (100 * correct / total))
return 100 * correct / total
def train_evaluate(parameters):
# net = Net(parameters["hidden1"],parameters['hidden2'],parameters['dropoutProbabilities1'])
# net = Net(parameters["hidden1"],parameters['hidden2'])
net = Net(parameters['hidden1'])
net = train(net, parameters, train_loader)
return evaluate(net, test_loader)
parameters={"lr":3e-4,'hidden1':1000,
# 'hidden2':1000,
'epochs':1000,
"weight_decay":0,
"numberOfBatchesToCapture":20,
}
net = Net(parameters["hidden1"]
# parameters['hidden2']
)
writer = SummaryWriter('runs/4')
net = train(net, parameters, train_loader,parameters["numberOfBatchesToCapture"])
evaluate(net, test_loader,parameters["numberOfBatchesToCapture"])
%load_ext tensorboard
%tensorboard --logdir=runs
my notebook on the rest of my attempts on this competition problem