In the previous post there was code to generate a submission file.
Submission files need to be valid so we can process them, otherwise they will be rejected by the calculation engine. Below is some code which your file should pass to ensure it will not be rejected.
#----------------------------------------------------------
#
# some R code to test your submission file is valid
#
#----------------------------------------------------------
library(data.table)
#define where the data is
fileToTest <- 'test_submission_file_1.csv'
dataFolder <- "D:/buylowsellhigh/downloaded/"
theDataFile <- paste0(dataFolder,fileToTest)
#read in the data
tradeData <- fread(theDataFile)
#set error message
ermsg <- 'OK'
#these are the column names required
requiredCols <- c('strategyName','keys_pair','enterTime','exitTime','direction','percentPair')
#------------------------
#do some basic checks
#-----------------------
if (!identical(sort(intersect(colnames(tradeData),requiredCols)) , sort(requiredCols))) {
missing_cols <- setdiff(requiredCols,colnames(tradeData))
ermsg <- paste('\n incorrect column names in the trades file. We are mising:\n',paste(missing_cols,collapse = ","))
} else if (class(tradeData$enterTime) == "character") {
ermsg <- '\n the field enterTime contains non numeric values'
} else if (class(tradeData$exitTime) == "character") {
ermsg <- '\n the field exitTime contains non numeric values'
} else if (class(tradeData$direction) == "character") {
ermsg <- '\n the field direction contains non numeric values'
} else if (class(tradeData$percentPair) == "character") {
ermsg <- '\n the field percentPair contains non numeric values'
} else if (nrow(tradeData) == 0){
ermsg <- '\n the file contains no records'
} else if(length(which(is.na(tradeData)==TRUE)) > 0) {
ermsg <- '\n the file contains missing values'
} else if (min(tradeData$exitTime - tradeData$enterTime) <= 0) {
ermsg <- '\n the exit time needs to be after the entry time'
}
if (ermsg != 'OK'){
stop(ermsg)
}
#-------------------------------------
# check that we are not over 100% in
# a trade at any particualr time
#-------------------------------------
timeStep <- 5
tradeData[,xKey := paste(keys_pair,strategyName,direction,sep="_")]
allKeys <- unique(tradeData$xKey)
for (thisKey in allKeys){
pairStrategyTradeData <- subset(tradeData,xKey == thisKey)
E_numberOfTrades <- nrow(pairStrategyTradeData)
cat("\n",thisKey,E_numberOfTrades)
#get the times we are in a trade
for (tradeNumber in 1:E_numberOfTrades){
thisTimes1 <- seq(from = pairStrategyTradeData[tradeNumber,enterTime],to=pairStrategyTradeData[tradeNumber,exitTime- timeStep],by=timeStep )
thisPercent1 <- rep(pairStrategyTradeData[tradeNumber,percentPair],length(thisTimes1))
if (tradeNumber==1){
allTimes1 <- thisTimes1
allPercent1 <- thisPercent1
} else {
allTimes1 <- c(allTimes1,thisTimes1)
allPercent1 <- c(allPercent1,thisPercent1)
}
} #tradeNumber
#make sure the max percent at any one time is not > 1
d <- cbind.data.frame(allTimes1,allPercent1)
posPercents <- aggregate(allPercent1 ~ allTimes1,data=d,sum)$allPercent
maxPercent <- max(posPercents)
if (maxPercent > 1){
aBadTime <- d[which(aggregate(allPercent1 ~ allTimes1,data=d,sum)$allPercent > 1)[1],'allTimes1']
ermsg <- paste("\n you cannot be more than 100% in a trade at any one time:\n",thisKey,"\ntime=",aBadTime,"\npercent =",maxPercent)
stop(ermsg)
}
} # thisKey
cat('\nthe file appears to be OK!')
Update 17th Oct 2019.
The file validator is now more strict. The basic checks section in the above code has now expand to the following set of rules:
requiredCols <- c('strategyName','keys_pair','enterTime','exitTime','direction','percentPair')
requiredPairs <- paste0('pair_',1:22)
#do some checks
if (!identical(sort(intersect(colnames(tradeData),requiredCols)) , sort(requiredCols))) {
missing_cols <- setdiff(requiredCols,colnames(tradeData))
ermsg <- paste('\n incorrect column names in the trades file. We are mising:\n',paste(missing_cols,collapse = ","))
} else if (class(tradeData$enterTime) == "character") {
ermsg <- '\n the field enterTime contains non numeric values'
} else if (class(tradeData$exitTime) == "character") {
ermsg <- '\n the field exitTime contains non numeric values'
} else if (class(tradeData$direction) == "character") {
ermsg <- '\n the field direction contains non numeric values'
} else if (class(tradeData$percentPair) == "character") {
ermsg <- '\n the field percentPair contains non numeric values'
} else if (nrow(tradeData) == 0){
ermsg <- '\n the file contains no records'
} else if(length(which(is.na(tradeData)==TRUE)) > 0) {
ermsg <- '\n the file contains missing values'
} else if (min(tradeData$exitTime - tradeData$enterTime) <= 0) {
ermsg <- '\n the exit time needs to be after the entry time'
} else if (nrow(tradeData) < 50){
ermsg <- '\n not enough trades'
} else if (max(tradeData$direction) > 1){
ermsg <- '\n trade direction > 1'
} else if (min(tradeData$direction) < 0){
ermsg <- '\n trade direction < 0'
} else if (length(unique(tradeData$direction)) > 2){
ermsg <- '\n too many trade directions'
} else if (max(tradeData$enterTime %% 5) > 0) {
ermsg <- '\n wrong times'
} else if (max(tradeData$exitTime %% 5) > 0) {
ermsg <- '\n wrong times'
} else if (min(tradeData$percentPair) < 0) {
ermsg <- '\n negative percent pairs'
} else if ( length(intersect(unique(tradeData$keys_pair),requiredPairs)) == 0 ){
ermsg <- '\n you need to predict for all pairs'
} else if ( !identical(sort(intersect(unique(tradeData$keys_pair),requiredPairs)),sort(requiredPairs))){
ermsg <- '\n you need to predict for all pairs'
} else if (min(tradeData[,.N,by = c('keys_pair','direction')]$N) < 5) {
ermsg <- '\n you need at least 5 trades pairs per '
}