Use the $function operator available in 4.4 (Aug 2021) to do this recursively as you note. Given this input which is a slightly expanded version of that supplied in the question:
var dd = {
"test": "some",
"test2": { },
"test3": {
"some-key": { },
"some-other-key": {
"more-nested-data": true,
"more-nested-emtpy": null,
"emptyArr": [],
"notEmptyArr": [
"XXX",
null,
{"corn":"dog"},
{"bad":null},
{"other": {zip:null, empty:[], zap:"notNull"}}
]
}
}
}
db.foo.insert(dd);
then this pipeline:
db.foo.aggregate([
{$replaceRoot: {newRoot: {$function: {
body: function(obj) {
var process = function(holder, spot, value) {
var remove_it = false;
// test FIRST since [] instanceof Object is true!
if(Array.isArray(value)) {
// walk BACKWARDS due to potential splice() later
// that will change the length...
for(var jj = value.length - 1; jj >= 0; jj--) {
process(value, jj, value[jj]);
}
if(0 == value.length) {
remove_it = true;
}
} else if(value instanceof Object) {
walkObj(value);
if(0 == Object.keys(value).length) {
remove_it = true;
}
} else {
if(null == value) {
remove_it = true;
}
}
if(remove_it) {
if(Array.isArray(holder)) {
holder.splice(spot,1); // snip out the val
} else if(holder instanceof Object) {
delete holder[spot];
}
}
};
var walkObj = function(obj) {
Object.keys(obj).forEach(function(k) {
process(obj, k, obj[k]);
});
}
walkObj(obj); // entry point!
return obj;
},
args: [ "$$CURRENT" ],
lang: "js"
}}
}}
]);
produces this result:
{
"_id" : 0,
"test" : "some",
"test3" : {
"some-other-key" : {
"more-nested-data" : true,
"notEmptyArr" : [
"XXX",
{
"corn" : "dog"
},
{
"other" : {
"zap" : "notNull"
}
}
]
}
}
}
A convenient way to debug such complex functions is by declaring them as variables outside of the pipeline and running data through them to simulate the documents (objects) coming out the database, e.g.:
ff = function(obj) {
var process = function(holder, spot, value) {
var remove_it = false;
// test FIRST since [] instanceof Object is true!
if(Array.isArray(value)) {
...
printjson(ff(dd)); // use the same doc as above
You can put print and other debugging aids into the code and then when you are done, you can remove them and call the pipeline to process the real data as follows:
db.foo.aggregate([
{$replaceRoot: {newRoot: {$function: {
body: ff, // substitute here!
args: [ "$$CURRENT" ],
lang: "js"
}}
}}
]);