Slow query performance: MongoDB with $lookup and $and/$or

267 views Asked by At

I'm trying to find a way to create an engine that translates GraphQL query filters to MongoDB aggregations while keeping the performance. Our application has the requisite of limiting the results from collection A by applying filters to collection B, C and even D sometimes.

For better understanding, here's a sample about how a filter is translated to MongoDB.

This:

{
  "filter": {
    "return": null,
    "AND": [{
      "customer_WITH": {
        "OR": [{
          "code": "CUSTOMER NAME"
        }, {
          "commercialName_LIKE": "CUSTOMER NAME"
        }, {
          "corporateName_LIKE": "CUSTOMER NAME"
        }]
      }
    }],
    "OR": [{
      "dispatcher_WITH": {
        "company_WITH": {
          "corporateName_LIKE": "COMPANY NAME"
        }
      }
    }, {
      "redispatcher_WITH": {
        "company_WITH": {
          "corporateName_LIKE": "COMPANY NAME"
        }
      }
    }],
    "reversal": null
  }
}

Gets translated to this:

[{
  "$match": {
    "return": {
      "$eq": null
    },
    "reversal": {
      "$eq": null
    },
    "company": {
      "$eq": ObjectId("xxxxxxxxxxxxxxxxxxxxxxxx")
    }
  }
}, {
  "$lookup": {
    "as": "dispatcher",
    "from": "shippers",
    "localField": "dispatcher",
    "foreignField": "_id"
  }
}, {
  "$unwind": {
    "path": "$dispatcher",
    "preserveNullAndEmptyArrays": true
  }
}, {
  "$lookup": {
    "as": "dispatcher.company",
    "from": "companies",
    "localField": "dispatcher.company",
    "foreignField": "_id"
  }
}, {
  "$unwind": {
    "path": "$dispatcher.company",
    "preserveNullAndEmptyArrays": true
  }
}, {
  "$lookup": {
    "as": "redispatcher",
    "from": "shippers",
    "localField": "redispatcher",
    "foreignField": "_id"
  }
}, {
  "$unwind": {
    "path": "$redispatcher",
    "preserveNullAndEmptyArrays": true
  }
}, {
  "$lookup": {
    "as": "redispatcher.company",
    "from": "companies",
    "localField": "redispatcher.company",
    "foreignField": "_id"
  }
}, {
  "$unwind": {
    "path": "$redispatcher.company",
    "preserveNullAndEmptyArrays": true
  }
}, {
  "$lookup": {
    "as": "customer",
    "from": "customers",
    "localField": "customer",
    "foreignField": "_id"
  }
}, {
  "$match": {
    "$or": [{
      "dispatcher.company.corporateName": {
        "$regex": /\sCOMPANY\sNAME/
      }
    }, {
      "redispatcher.company.corporateName": {
        "$regex": /\sCOMPANY\sNAME/
      }
    }],
    "$and": [{
      "$or": [{
        "customer.code": {
          "$eq": "CUSTOMER NAME"
        }
      }, {
        "customer.commercialName": {
          "$regex": /CUSTOMER\sNAME/
        }
      }, {
        "customer.corporateName": {
          "$regex": /CUSTOMER\sNAME/
        }
      }]
    }]
  }
}, {
  "$unwind": {
    "path": "$customer",
    "preserveNullAndEmptyArrays": true
  }
}, {
  "$group": {
    "_id": "$invoiceNo",
    "__rootId": {
      "$first": "$_id"
    },
    "company": {
      "$first": "$company"
    },
    "customer": {
      "$first": "$customer._id"
    },
    "dispatcher": {
      "$first": "$dispatcher._id"
    },
    "redispatcher": {
      "$first": "$redispatcher._id"
    },
    "driverPlate": {
      "$first": "$driverPlate"
    },
    "key": {
      "$first": "$key"
    },
    "activities": {
      "$first": "$activities"
    },
    "serialNo": {
      "$first": "$serialNo"
    },
    "invoiceNo": {
      "$first": "$invoiceNo"
    },
    "incidents": {
      "$first": "$incidents"
    },
    "deliveries": {
      "$first": "$deliveries"
    },
    "return": {
      "$first": "$return"
    }
  }
}, {
  "$project": {
    "_id": "$__rootId",
    "company": "$company",
    "customer": "$customer",
    "dispatcher": "$dispatcher",
    "redispatcher": "$redispatcher",
    "driverPlate": "$driverPlate",
    "key": "$key",
    "activities": "$activities",
    "serialNo": "$serialNo",
    "invoiceNo": "$invoiceNo",
    "incidents": "$incidents",
    "deliveries": "$deliveries",
    "return": "$return"
  }
}, {
  "$sort": {
    "invoiceNo": -1
  }
}, {
  "$limit": 51
}]

The engine is smart enough to reallocate to the first position $match properties that don't require $lookups and right after $lookups if they do, however if they are within a $and/$or condition block, then they are reallocated after the last $lookup, regardless of what properties are there.

I could scan for what is used inside the $and and deconstruct it into new reallocated $match phases, but I need to figure how to handle the $or operator: I can't apply the same desconstruction idea on it because this would invalidate the condition.

So my question is: Is there an alternative way to use the phase $lookup along with $and/$or and improve the performance drastically?

Creating more indexes won't help because they're not used for the $lookup. Moving up $match phases, as the MongoDB team would suggest is also not possible because it would break the conditions. So I'm out of ideas now.

Best regards.

0

There are 0 answers