Setup
Imagine this document shape:
{
dateString: "2024-10-31", // type string
realDate: ISODate("2024-10-31"), // type date
}
Assume both fields are indexed separately.
Problem
If I create an aggregation with a $match stage followed by a $group stage, the type of the queried field makes a difference:
- on
dateString, the$groupstage causesIXSCAN->FETCH->GROUPactions (as seen in explain). This is considerably slower than querying onrealDate. - on
realDate, the$groupstage causes onlyIXSCAN->PROJECTION_COVERED<-- This is magnitudes faster (on 500k records: 9 ms vs. 30 sec+)
Any ideas why that is and how to solve this?
Aggregations:
// This aggregation is slow despite the string field being indexed.
[
{
$match: {
dateString: {
$gte: "2024-10-01",
$lte: "2024-10-31"
}
}
},
{
$group: {
_id: "$dateString",
count: { $sum: 1 }
}
}
]
// This aggregation is fast because it's on an indexed date field.
[
{
$match: {
realDate: {
$gte: ISODate("2024-10-01"),
$lte: ISODate("2024-10-31")
}
}
},
{
$group: {
_id: {
$month: "$realDate"
},
count: {
$sum: 1
}
}
}
]
Explain plan:
{
"explainVersion": "2",
"queryPlanner": {
"namespace": "autoixpert.reports",
"indexFilterSet": false,
"parsedQuery": {
"$and": [
{
"completionDate": {
"$lte": "2024-10-31"
}
},
{
"completionDate": {
"$gte": "2024-10-01"
}
}
]
},
"collation": {
"locale": "de",
"caseLevel": false,
"caseFirst": "off",
"strength": 3,
"numericOrdering": false,
"alternate": "non-ignorable",
"maxVariable": "punct",
"normalization": false,
"backwards": false,
"version": "57.1"
},
"queryHash": "B08363F9",
"planCacheKey": "B727189F",
"optimizedPipeline": true,
"maxIndexedOrSolutionsReached": false,
"maxIndexedAndSolutionsReached": false,
"maxScansToExplodeReached": false,
"winningPlan": {
"queryPlan": {
"stage": "GROUP",
"planNodeId": 4,
"inputStage": {
"stage": "FETCH",
"planNodeId": 2,
"inputStage": {
"stage": "IXSCAN",
"planNodeId": 1,
"keyPattern": { "completionDate": 1 },
"indexName": "completionDate_1",
"collation": {
"locale": "de",
"caseLevel": false,
"caseFirst": "off",
"strength": 3,
"numericOrdering": false,
"alternate": "non-ignorable",
"maxVariable": "punct",
"normalization": false,
"backwards": false,
"version": "57.1"
},
"isMultiKey": false,
"multiKeyPaths": {
"completionDate": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"completionDate": [
"[CollationKey(0x1612161a050e1412050e1214010e010e), CollationKey(0x1612161a050e1412050e1814010e010e)]"
]
}
}
}
},
"slotBasedPlan": {
"slots": "$$RESULT=s19 env: { s4 = 1730549863726 (NOW), s5 = Collator({\"locale\" : \"de\", \"caseLevel\" : false, \"caseFirst\" : \"off\", \"strength\" : 3, \"numericOrdering\" : false, \"alternate\" : \"non-ignorable\", \"maxVariable\" : \"punct\", \"normalization\" : false, \"backwards\" : false, \"version\" : \"57.1\"}) (collator), s7 = KS(3C1612161A050E1412050E1214010E010E000104), s1 = TimeZoneDatabase(Africa/Nairobi...Asia/Kolkata) (timeZoneDB), s8 = KS(3C1612161A050E1412050E1814010E010E00FE04), s3 = Timestamp(1730549861, 1) (CLUSTER_TIME), s2 = Nothing (SEARCH_META), s12 = {\"completionDate\" : 1} }",
"stages": "[4] project [s19 = newBsonObj(\"_id\", s16, \"count\", s17)] \n[4] group [s16] [s17 = sum(1)] spillSlots[s18] mergingExprs[sum(s18)] s5 \n[4] project [s16 = (s15 ?: null)] \n[2] nlj inner [] [s6, s9, s10, s11, s12] \n left \n [1] cfilter {(exists(s7) && exists(s8))} \n [1] ixseek s7 s8 s11 s6 s9 s10 [] @\"474a8370-3457-4ceb-b673-80c0b5ce9203\" @\"completionDate_1\" true \n right \n [2] limit 1ll \n [2] seek s6 s13 s14 s9 s10 s11 s12 [s15 = completionDate] @\"474a8370-3457-4ceb-b673-80c0b5ce9203\" true false \n"
}
},
"rejectedPlans": []
},
"executionStats": {
"executionSuccess": true,
"nReturned": 27,
"executionTimeMillis": 5,
"totalKeysExamined": 221,
"totalDocsExamined": 221,
"executionStages": {
"stage": "project",
"planNodeId": 4,
"nReturned": 27,
"executionTimeMillisEstimate": 0,
"opens": 1,
"closes": 1,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"projections": {
"19": "newBsonObj(\"_id\", s16, \"count\", s17) "
},
"inputStage": {
"stage": "group",
"planNodeId": 4,
"nReturned": 27,
"executionTimeMillisEstimate": 0,
"opens": 1,
"closes": 1,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"groupBySlots": [16],
"expressions": {
"17": "sum(1) ",
"initExprs": { "17": null }
},
"mergingExprs": { "18": "sum(s18) " },
"usedDisk": false,
"spills": 0,
"spilledRecords": 0,
"spilledDataStorageSize": 0,
"inputStage": {
"stage": "project",
"planNodeId": 4,
"nReturned": 221,
"executionTimeMillisEstimate": 0,
"opens": 1,
"closes": 1,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"projections": {
"16": "(s15 ?: null) "
},
"inputStage": {
"stage": "nlj",
"planNodeId": 2,
"nReturned": 221,
"executionTimeMillisEstimate": 0,
"opens": 1,
"closes": 1,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"totalDocsExamined": 221,
"totalKeysExamined": 221,
"collectionScans": 0,
"collectionSeeks": 221,
"indexScans": 0,
"indexSeeks": 1,
"indexesUsed": ["completionDate_1"],
"innerOpens": 221,
"innerCloses": 1,
"outerProjects": [],
"outerCorrelated": [6, 9, 10, 11, 12],
"outerStage": {
"stage": "cfilter",
"planNodeId": 1,
"nReturned": 221,
"executionTimeMillisEstimate": 0,
"opens": 1,
"closes": 1,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"numTested": 1,
"filter": "(exists(s7) && exists(s8)) ",
"inputStage": {
"stage": "ixseek",
"planNodeId": 1,
"nReturned": 221,
"executionTimeMillisEstimate": 0,
"opens": 1,
"closes": 1,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"indexName": "completionDate_1",
"keysExamined": 221,
"seeks": 1,
"numReads": 222,
"indexKeySlot": 11,
"recordIdSlot": 6,
"snapshotIdSlot": 9,
"indexIdentSlot": 10,
"outputSlots": [],
"indexKeysToInclude": "00000000000000000000000000000000",
"seekKeyLow": "s7 ",
"seekKeyHigh": "s8 "
}
},
"innerStage": {
"stage": "limit",
"planNodeId": 2,
"nReturned": 221,
"executionTimeMillisEstimate": 0,
"opens": 221,
"closes": 1,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"limit": 1,
"inputStage": {
"stage": "seek",
"planNodeId": 2,
"nReturned": 221,
"executionTimeMillisEstimate": 0,
"opens": 221,
"closes": 1,
"saveState": 0,
"restoreState": 0,
"isEOF": 0,
"numReads": 221,
"recordSlot": 13,
"recordIdSlot": 14,
"seekKeySlot": 6,
"snapshotIdSlot": 9,
"indexIdentSlot": 10,
"indexKeySlot": 11,
"indexKeyPatternSlot": 12,
"fields": ["completionDate"],
"outputSlots": [15]
}
}
}
}
}
}
},
"command": {
"aggregate": "reports",
"pipeline": [
{
"$match": {
"completionDate": {
"$gte": "2024-10-01",
"$lte": "2024-10-31"
}
}
},
{
"$group": {
"_id": "$completionDate",
"count": { "$sum": 1 }
}
}
],
"cursor": {},
"collation": { "locale": "de" },
"maxTimeMS": 60000,
"$db": "autoixpert"
},
"serverInfo": {
"host": "795d0985cbad",
"port": 27017,
"version": "7.0.11",
"gitVersion": "f451220f0df2b9dfe073f1521837f8ec5c208a8c"
},
"serverParameters": {
"internalQueryFacetBufferSizeBytes": 104857600,
"internalQueryFacetMaxOutputDocSizeBytes": 104857600,
"internalLookupStageIntermediateDocumentMaxSizeBytes": 104857600,
"internalDocumentSourceGroupMaxMemoryBytes": 104857600,
"internalQueryMaxBlockingSortMemoryUsageBytes": 104857600,
"internalQueryProhibitBlockingMergeOnMongoS": 0,
"internalQueryMaxAddToSetBytes": 104857600,
"internalDocumentSourceSetWindowFieldsMaxMemoryBytes": 104857600,
"internalQueryFrameworkControl": "trySbeRestricted"
},
"ok": 1,
"$clusterTime": {
"clusterTime": {
"$timestamp": "7432655057092345857"
},
"signature": {
"hash": "HYWsaU9s9ulyEsmOYYHunIajzyM=",
"keyId": {
"low": 7,
"high": 1729592506,
"unsigned": false
}
}
},
"operationTime": {
"$timestamp": "7432655057092345857"
}
}
$gte: "2024-10-01", $lte: "2024-10-31"is already going to be significantly slower, even with an index. Because it's still string comparisons. Date comparisons are effectively like comparing Integers or Floats.dateStringis covered by an index?