Find and average all data between two date variables in node.js and mongodb

403 views Asked by At

I have a mongodb collection I made downloading all historical trades of Ethereum from GDAX. I'm currently trying to backtest trade algorithms, and I want to start with a simple ten day average. The Data in the collection looks something like this

[{
    "time": "2014-11-07T22:19:28.578544Z",
    "trade_id": 74,
    "price": "10.00000000",
    "size": "0.01000000",
    "side": "buy"
}, {
    "time": "2014-11-07T01:08:43.642366Z",
    "trade_id": 73,
    "price": "100.00000000",
    "size": "0.01000000",
    "side": "sell"
}]

The data is saved in ISO format, not entirely sure if it's string, or ISO date, but attempting to convert the start and end time variables results in the adding of Eastern Standard Time at the end, which the saved data does not have. The code below will do the following. First it looks for the highest trade_id in the collection. Then, starting with trade_id 1, it loops through each item in the collection, getting all items in a range 10 days prior to the date. Here is where I'm stuck. I've console.log'd the start and end date variables I've created which are 10 days apart, but when I query the collection using those variables I get no results. Using two random dates picked, in single quotes does work, so I don't see the difference in using two variables. Also, I would like to also calculate the average of all the price results in the same query if possible. Would appreciate any suggestions or help with this. Thanks.

var MongoClient = require('mongodb').MongoClient;
var url = "mongodb://localhost:27017/EthHistory";

function getNextTradeID(maxID, i){
    mongo.collection("EthTestData").find({ 
        trade_id: i }).toArray(function(err, newid) { 
            if (err) {
                console.log(err);
                return;
            }
            i = i + 1;
            var startTimeAvg = new Date(Date.parse(newid[0].time));
            var endTimeAvg = new Date(startTimeAvg- (60*60*24*10*1000))
            getTenDayAverage(maxID, i, startTimeAvg, endTimeAvg);
    });
}

function getTenDayAverage(maxID, i, startTimeAvg, endTimeAvg) {
    console.log(startTimeAvg, endTimeAvg)
    mongo.collection("EthTestData").find({ 
        time:{ 
            $gte: (endTimeAvg), 
            $lte: (startTimeAvg) }}).toArray(function(err, result) {    
        if (err) {
            console.log(err);
            return;
        }
        console.log(result)
        tradeIDLoop(maxID, i);
    });
}

function tradeIDLoop(maxID, i) {        
    if (maxID < i) {
        mongo.close
    }
    else {
        getNextTradeID(maxID, i);
    }
};

function getMaxTradeID() {
    mongo.collection("EthTestData").find().sort({trade_id:-1}).limit(1).toArray(function(err, result) { 
        if (err) {
            console.log(err);
            return;
        }
        var i = 1  
        var maxID = Number(result[0].trade_id);    
        tradeIDLoop(maxID, i);        
    });   
};

MongoClient.connect(url, function(err, db) {
    if (err) {
        console.log(err);
        return;
    }
    mongo = db
    getMaxTradeID();   
        
});

Finally I want to update the item in the collection that I started the loop, with the result, and then move on to the next record. If this can be included with the 10 days of data select, and average, maybe someone can suggest how, otherwise I'll just use another query. Trying to make this as efficient as possible, since I have to do this for 19 million plus records

1

There are 1 answers

0
Superdawg On

Apparently the information is stored as ISOString. Using the .toISOString() function now gives results. However, the loop slows significantly, almost to a crawl after about 20k loops, once it's pulling a full 10 days of data. Perhaps this method won't be viable for backtesting.