Using UCI data to achieve 0.1311 on LB, just for fun here
import pandas as pd
source = pd.read_csv(“https://archive.ics.uci.edu/ml/machine-learning-databases/blood-transfusion/transfusion.data”)
test = pd.read_csv(“https://s3.amazonaws.com/drivendata/data/2/public/5c9fa979-5a84-45d6-93b9-543d1a0efc41.csv”)
source.columns = [“monthsF”, “donations”, “volumnCC”, “monthL”, ‘target’]
test.columns = [“id”, “monthsF”, “donations”, “volumnCC”, “monthL”]
source = source.astype(str)
source[“target”] = pd.to_numeric(source[“target”])
test = test.astype(str)
source[“combined”] = source.apply(lambda x: “-”.join([x[“monthsF”], x[“donations”], x[“volumnCC”], x[“monthL”]]), axis = 1)
mydict = dict(source.groupby([“combined”])[“target”].mean())
test[“combined”] = test.apply(lambda x: “-”.join([x[“monthsF”], x[“donations”], x[“volumnCC”], x[“monthL”]]), axis = 1)
test[“Made Donation in March 2007”] = list(test.combined.apply(lambda x: mydict))
test[[“id”, “Made Donation in March 2007”]].to_csv(“using_uci_data.csv”, index = False)