0

I'm looking for some advice on how to improve the performance of my query.

I have this user model in mongoose and I'm indexing interested_cities.

const firebaseToken = new Schema({
    type: {
        type: String,
        default: "android",
        required: true,
        trim: true,
    },
    device_id: {
        type: String,
        required: true,
        trim: true,
    },
    fcm_token: {
        type: String,
        required: true,
        trim: true,
    },
});

const userSchema = new Schema({
    name: {
        type: String,
        required: true,
        trim: true,
    },
    interested_cities: {
        type: [{
            type: String,
            trim: true,
            lowercase: true,
            unique: true
        }],
        required: false,
        default: [],
    },
    push_notification_tokens_firebase: {
        type: [firebaseToken],
        required: false,
        default: [],
    },
});

userSchema.index({
    interested_cities: 1
});

What I'm looking for is to query the users who have 'A' or 'B' in their array of interested_cities.

I'm querying something like this. I only need the firebase fcm_token from the query.

const involvedUsers = await User.find(
    {
      $or: [
        { interested_cities: { $in: ['A', 'B'] } },
        { phone_number: { $in: adminPhoneNumbersList } },
      ],
    },
    {
      _id: 1,
      "push_notification_tokens_firebase.fcm_token": 1,
    }
  );

Currently, the query is taking 20 sec for 14k documents, which needs improvements. Any pointers would be appreciated.

Explain:

{
                "explainVersion": "1",
                "queryPlanner": {
                    "namespace": "production.users",
                    "indexFilterSet": false,
                    "parsedQuery": {
                        "interested_cities": {
                            "$in": [
                                "A",
                                "B"
                            ]
                        }
                    },
                    "maxIndexedOrSolutionsReached": false,
                    "maxIndexedAndSolutionsReached": false,
                    "maxScansToExplodeReached": false,
                    "winningPlan": {
                        "stage": "PROJECTION_DEFAULT",
                        "transformBy": {
                            "_id": 1,
                            "push_notification_tokens_firebase.fcm_token": 1
                        },
                        "inputStage": {
                            "stage": "FETCH",
                            "inputStage": {
                                "stage": "IXSCAN",
                                "keyPattern": {
                                    "interested_cities": 1
                                },
                                "indexName": "interested_cities_1",
                                "isMultiKey": true,
                                "multiKeyPaths": {
                                    "interested_cities": [
                                        "interested_cities"
                                    ]
                                },
                                "isUnique": false,
                                "isSparse": false,
                                "isPartial": false,
                                "indexVersion": 2,
                                "direction": "forward",
                                "indexBounds": {
                                    "interested_cities": [
                                        "[\"A\", \"A\"]",
                                        "[\"B\", \"B\"]"
                                    ]
                                }
                            }
                        }
                    },
                    "rejectedPlans": []
                }
                "executionStats": {
                    "executionSuccess": true,
                    "nReturned": 6497,
                    "executionTimeMillis": 48,
                    "totalKeysExamined": 0,
                    "totalDocsExamined": 14827,
                    "executionStages": {
                        "stage": "SUBPLAN",
                        "nReturned": 6497,
                        "executionTimeMillisEstimate": 46,
                        "works": 14829,
                        "advanced": 6497,
                        "needTime": 8331,
                        "needYield": 0,
                        "saveState": 14,
                        "restoreState": 14,
                        "isEOF": 1,
                        "inputStage": {
                            "stage": "PROJECTION_DEFAULT",
                            "nReturned": 6497,
                            "executionTimeMillisEstimate": 46,
                            "works": 14829,
                            "advanced": 6497,
                            "needTime": 8331,
                            "needYield": 0,
                            "saveState": 14,
                            "restoreState": 14,
                            "isEOF": 1,
                            "transformBy": {
                                "_id": 1,
                                "push_notification_tokens_firebase.fcm_token": 1
                            },
                            "inputStage": {
                                "stage": "COLLSCAN",
                                "filter": {
                                    "$or": [
                                        {
                                            "interested_cities": {
                                                "$in": [
                                                    "A",
                                                    "B"
                                                ]
                                            }
                                        },
                                        {
                                            "phone_number": {
                                                "$in": [
                                                    "phone numbers",
                                                    "phone number"
                                                ]
                                            }
                                        }
                                    ]
                                },
                                "nReturned": 6497,
                                "executionTimeMillisEstimate": 41,
                                "works": 14829,
                                "advanced": 6497,
                                "needTime": 8331,
                                "needYield": 0,
                                "saveState": 14,
                                "restoreState": 14,
                                "isEOF": 1,
                                "direction": "forward",
                                "docsExamined": 14827
                            }
                        }
                    },
                    "allPlansExecution": []
                }

10
  • what is len of adminPhoneNumbersList?? Commented May 16, 2022 at 13:22
  • It's around two to three numbers. So max 5 @niko Commented May 16, 2022 at 13:23
  • Mongo official docs on scan $in $or operator, add adminPhoneNumberslist as an index and both lists must be indexed else it ignores the interested_cities index. Commented May 16, 2022 at 13:31
  • this answer may give a guide: stackoverflow.com/questions/51898076/… Commented May 16, 2022 at 13:32
  • I tried removing phone_number from the query but still got the same amount of time. Commented May 16, 2022 at 13:33

1 Answer 1

1

Mongoose optimization:

By default, Mongoose queries return an instance of the Mongoose Document class. Documents are much heavier than vanilla JavaScript objects, because they have a lot of internal state for change tracking. Enabling the lean option tells Mongoose to skip instantiating a full Mongoose document and just give you the POJO.

https://mongoosejs.com/docs/tutorials/lean.html#using-lean

You can disable this behaviour on per-query basis by appending .lean() at the end. If your query is returning "a lot" of documents, this can really improve your speed. You should read more about lean() from the link above.


Query optimization:

When evaluating the clauses in the $or expression, MongoDB either performs a collection scan or, if all the clauses are supported by indexes, MongoDB performs index scans. That is, for MongoDB to use indexes to evaluate an $or expression, all the clauses in the $or expression must be supported by indexes. Otherwise, MongoDB will perform a collection scan.

https://www.mongodb.com/docs/manual/reference/operator/query/or/#-or-clauses-and-indexes

Query you shared looks like this:

const involvedUsers = await User.find({
  $or: [
    { interested_cities: { $in: citiesArr } },
    { phone_number: { $in: phonesArr } },
  ],
}, { _id: 1, "push_notification_tokens_firebase.fcm_token": 1 });

Based on the info above, you need to create following two indexes:

userSchema.index({ interested_cities: 1 });
userSchema.index({ phone_number: 1 });

This way, mongo will be able to "know" which documents are relevant, find them on disk, extract your projection ("_id" and "push_notification_tokens_firebase.fcm_token") and return it.

One step further in the optimization would be to create following indexes instead of the ones above:

userSchema.index({ interested_cities: 1, _id: 1, "push_notification_tokens_firebase.fcm_token": 1 });
userSchema.index({ phone_number: 1, _id: 1, "push_notification_tokens_firebase.fcm_token": 1 });

This way, mongo will have all the info it needs to fulfil your query from the indexes, meaning it will never even access the disk to fetch a document.

You can confirm this by running <your-query>.explain('executionStats') and confirming that totalDocsExamined is 0.

Read more about executionStats here: https://www.mongodb.com/docs/manual/reference/explain-results/#mongodb-data-explain.executionStats

I hope this helps!

Sign up to request clarification or add additional context in comments.

1 Comment

Hey Alex, Just following back to this, now this problem becoming worst so I re-tried your solution but in MongoDB you can not have index parallel array. so indexing intrested_cities and token array won't work as per your solution

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.