2019 << previous post (getting the data) next>> |
Below is some R code to generate a valid submission file for a very simple strategy.
Is up to you to find the strategies that will give the highest returns.
In the next post we'll introduce the leaderboard where you can upload your strategies to.
############################################################
#
# sample R code for the 2019 Melbourne Datathon that will
# generate a valid submission file.
#
############################################################
library(data.table)
#----------------------------------------------
#define where the data is & load
#----------------------------------------------
dataFolder <- "D:/buylowsellhigh/downloaded/"
theDataFile <- paste0(dataFolder,"melbdatathon2019_buylowsellhigh.csv")
dt <- fread(theDataFile)
#-------------------------
# strategy settings
#-------------------------
feePerc <- 0.0015 #fixed const for the competition
longThresh <- 0.002 #the value the prediction has to be higher than
longRemainWindow <- 12 #the number of hours to remain in a trade
#---------------------------------------------------
# flag if the criteria for entry is passed
#---------------------------------------------------
dt[,longEntry := 0]
dt[Lpred7b >= longThresh, longEntry := 1]
#----------------------
# set the exit times
#----------------------
exitLongs <- subset(dt,longEntry == 1,select = c('keys_pair','minutesSinceStart'))
exitLongs[,minutesSinceStart := minutesSinceStart + (longRemainWindow * 60)]
exitLongs[,longExit := 1]
#-------------------------------
#merge the entry and exit times
#-------------------------------
myKeys <- c("keys_pair","minutesSinceStart")
setkeyv(dt,myKeys)
setkeyv(exitLongs,myKeys)
dt <- merge(dt,exitLongs,all.x=TRUE)
#-----------------------------
# no match = no exit
#-----------------------------
dt[is.na(longExit), longExit := 0]
#--------------------------------------------------
# if there is an exit and enter, change the exit
#--------------------------------------------------
dt[longEntry == 1 & longExit == 1, longExit := 0]
#-----------------------------------------------------
#assign a group number to consecutive runs of entries
#-----------------------------------------------------
setkeyv(dt,myKeys)
dt[,grp := rleidv(dt, cols="longEntry")]
dt[,grp1 := paste0(keys_pair,grp)]
dt[, inc := cumsum(longEntry),by=grp1]
dt[, inc1 := cumsum(inc),by=grp1]
#--------------------------------------
# the first entry in the run is a BUY
#--------------------------------------
dt[,buy := 0]
dt[longEntry==1 & inc1 == 1,buy :=1 , by=grp1]
#-----------------------------------------------------
#assign a group number to consecutive runs of exits
#-----------------------------------------------------
dt[,grp := rleidv(dt, cols="longExit")]
dt[,grp1 := paste0(keys_pair,grp)]
dt[, inc := cumsum(longExit),by=grp1]
dt[, inc1 := cumsum(inc),by=grp1]
#-------------------------------------
# The first exit in the run is a SELL
#-------------------------------------
dt[,sell := 0]
dt[longExit==1 & inc1 == 1,sell :=1 , by=grp1]
#---------------------------------------------------
# Keep just the rows where we are BUYING or SELLING
#---------------------------------------------------
d1 <- subset(dt,(sell==1 & buy==0 | sell==0 & buy==1),select = c('keys_pair','minutesSinceStart','tradePrice','buy','sell'))
#------------------------------------------
# Make sure each buy is followed by a sell
#------------------------------------------
d1[,prevRowBuy := shift(buy, 1, type="lag") , by=keys_pair]
d1[,prevRowSell := shift(sell, 1, type="lag") , by=keys_pair]
d1 <- subset(d1,prevRowBuy != buy & prevRowSell != sell)
#--------------------------------------------------------------
#find the next price (for buy rows it will be the sell price)
#--------------------------------------------------------------
d1[,sellPrice := shift(tradePrice, 1, type="lead"), by=keys_pair]
d1[,minutesSinceStartExit := shift(minutesSinceStart, 1, type="lead"), by=keys_pair]
d1[, tradeLength := minutesSinceStartExit - minutesSinceStart]
#----------------------------------------------------------------
# now we only need the buy rows as we have the sell time & price
#----------------------------------------------------------------
d1 <- subset(d1,buy==1 & !is.na(minutesSinceStartExit))
#-------------------------------------------------------------------
# long profit estimate (we only have prices for some pairs though)
# this does not include a stoploss calculation
#--------------------------------------------------------------------
d1[,tradeProfit := ( (sellPrice * (1 - feePerc)) - (tradePrice * (1 + feePerc)) )/ tradePrice]
hist(d1$tradeProfit,breaks=100,col='blue');abline(v=0,col='red')
#------------------------------
# generate a submission file
#------------------------------
stratName <- 'demo_LongOnly'
d1[,enterTime := minutesSinceStart]
d1[,exitTime := minutesSinceStartExit]
d1[,direction := 1]
d1[,percentPair := 1]
d1[,strategyName := stratName]
d1 <- subset(d1,select = c('keys_pair','enterTime','exitTime','direction','percentPair','strategyName'))
fwrite(d1,paste0(dataFolder,stratName,'.csv'))