Using UCI data to achieve 0.1311 on LB, just for fun here
import pandas as pd
source = pd.read_csv(âhttps://archive.ics.uci.edu/ml/machine-learning-databases/blood-transfusion/transfusion.dataâ)
test = pd.read_csv(âhttps://s3.amazonaws.com/drivendata/data/2/public/5c9fa979-5a84-45d6-93b9-543d1a0efc41.csvâ)
source.columns = [âmonthsFâ, âdonationsâ, âvolumnCCâ, âmonthLâ, âtargetâ]
test.columns = [âidâ, âmonthsFâ, âdonationsâ, âvolumnCCâ, âmonthLâ]
source = source.astype(str)
source[âtargetâ] = pd.to_numeric(source[âtargetâ])
test = test.astype(str)
source[âcombinedâ] = source.apply(lambda x: â-â.join([x[âmonthsFâ], x[âdonationsâ], x[âvolumnCCâ], x[âmonthLâ]]), axis = 1)
mydict = dict(source.groupby([âcombinedâ])[âtargetâ].mean())
test[âcombinedâ] = test.apply(lambda x: â-â.join([x[âmonthsFâ], x[âdonationsâ], x[âvolumnCCâ], x[âmonthLâ]]), axis = 1)
test[âMade Donation in March 2007â] = list(test.combined.apply(lambda x: mydict[x]))
test[[âidâ, âMade Donation in March 2007â]].to_csv(âusing_uci_data.csvâ, index = False)