Congratulations to Samuel Messina for winning the $1,000 milestone prize. The write-up of his technique can be found here.
A blog about playing with data and other maybe unrelated stuff
Buy Low Sell High - leaderboard and rule clarification
# some R code to test your submission file is valid
#define where the data is
fileToTest <- 'test_submission_file_1.csv'
dataFolder <- "D:/buylowsellhigh/downloaded/"
theDataFile <- paste0(dataFolder,fileToTest)
#read in the data
tradeData <- fread(theDataFile)
#set error message
ermsg <- 'OK'
#these are the column names required
requiredCols <- c('strategyName','keys_pair','enterTime','exitTime','direction','percentPair')
#do some basic checks
if (!identical(sort(intersect(colnames(tradeData),requiredCols)) , sort(requiredCols))) {
missing_cols <- setdiff(requiredCols,colnames(tradeData))
ermsg <- paste('\n incorrect column names in the trades file. We are mising:\n',paste(missing_cols,collapse = ","))
} else if (class(tradeData$enterTime) == "character") {
ermsg <- '\n the field enterTime contains non numeric values'
} else if (class(tradeData$exitTime) == "character") {
ermsg <- '\n the field exitTime contains non numeric values'
} else if (class(tradeData$direction) == "character") {
ermsg <- '\n the field direction contains non numeric values'
} else if (class(tradeData$percentPair) == "character") {
ermsg <- '\n the field percentPair contains non numeric values'
} else if (nrow(tradeData) == 0){
ermsg <- '\n the file contains no records'
} else if(length(which(is.na(tradeData)==TRUE)) > 0) {
ermsg <- '\n the file contains missing values'
} else if (min(tradeData$exitTime - tradeData$enterTime) <= 0) {
ermsg <- '\n the exit time needs to be after the entry time'
if (ermsg != 'OK'){
# check that we are not over 100% in
# a trade at any particualr time
timeStep <- 5
tradeData[,xKey := paste(keys_pair,strategyName,direction,sep="_")]
allKeys <- unique(tradeData$xKey)
for (thisKey in allKeys){
pairStrategyTradeData <- subset(tradeData,xKey == thisKey)
E_numberOfTrades <- nrow(pairStrategyTradeData)
#get the times we are in a trade
for (tradeNumber in 1:E_numberOfTrades){
thisTimes1 <- seq(from = pairStrategyTradeData[tradeNumber,enterTime],to=pairStrategyTradeData[tradeNumber,exitTime- timeStep],by=timeStep )
thisPercent1 <- rep(pairStrategyTradeData[tradeNumber,percentPair],length(thisTimes1))
if (tradeNumber==1){
allTimes1 <- thisTimes1
allPercent1 <- thisPercent1
} else {
allTimes1 <- c(allTimes1,thisTimes1)
allPercent1 <- c(allPercent1,thisPercent1)
} #tradeNumber
#make sure the max percent at any one time is not > 1
d <- cbind.data.frame(allTimes1,allPercent1)
posPercents <- aggregate(allPercent1 ~ allTimes1,data=d,sum)$allPercent
maxPercent <- max(posPercents)
if (maxPercent > 1){
aBadTime <- d[which(aggregate(allPercent1 ~ allTimes1,data=d,sum)$allPercent > 1)[1],'allTimes1']
ermsg <- paste("\n you cannot be more than 100% in a trade at any one time:\n",thisKey,"\ntime=",aBadTime,"\npercent =",maxPercent)
} # thisKey
cat('\nthe file appears to be OK!')
requiredCols <- c('strategyName','keys_pair','enterTime','exitTime','direction','percentPair')
requiredPairs <- paste0('pair_',1:22)
#do some checks
if (!identical(sort(intersect(colnames(tradeData),requiredCols)) , sort(requiredCols))) {
missing_cols <- setdiff(requiredCols,colnames(tradeData))
ermsg <- paste('\n incorrect column names in the trades file. We are mising:\n',paste(missing_cols,collapse = ","))
} else if (class(tradeData$enterTime) == "character") {
ermsg <- '\n the field enterTime contains non numeric values'
} else if (class(tradeData$exitTime) == "character") {
ermsg <- '\n the field exitTime contains non numeric values'
} else if (class(tradeData$direction) == "character") {
ermsg <- '\n the field direction contains non numeric values'
} else if (class(tradeData$percentPair) == "character") {
ermsg <- '\n the field percentPair contains non numeric values'
} else if (nrow(tradeData) == 0){
ermsg <- '\n the file contains no records'
} else if(length(which(is.na(tradeData)==TRUE)) > 0) {
ermsg <- '\n the file contains missing values'
} else if (min(tradeData$exitTime - tradeData$enterTime) <= 0) {
ermsg <- '\n the exit time needs to be after the entry time'
} else if (nrow(tradeData) < 50){
ermsg <- '\n not enough trades'
} else if (max(tradeData$direction) > 1){
ermsg <- '\n trade direction > 1'
} else if (min(tradeData$direction) < 0){
ermsg <- '\n trade direction < 0'
} else if (length(unique(tradeData$direction)) > 2){
ermsg <- '\n too many trade directions'
} else if (max(tradeData$enterTime %% 5) > 0) {
ermsg <- '\n wrong times'
} else if (max(tradeData$exitTime %% 5) > 0) {
ermsg <- '\n wrong times'
} else if (min(tradeData$percentPair) < 0) {
ermsg <- '\n negative percent pairs'
} else if ( length(intersect(unique(tradeData$keys_pair),requiredPairs)) == 0 ){
ermsg <- '\n you need to predict for all pairs'
} else if ( !identical(sort(intersect(unique(tradeData$keys_pair),requiredPairs)),sort(requiredPairs))){
ermsg <- '\n you need to predict for all pairs'
} else if (min(tradeData[,.N,by = c('keys_pair','direction')]$N) < 5) {
ermsg <- '\n you need at least 5 trades pairs per '