$ lookup中其他连接条件（使用管道）的性能严重下降

编程入门行业动态更新时间:2024-10-23 15:30:22

$ lookup中其他连接条件（使用<a href=https://www.elefans.com/category/jswz/34/1771259.html style= 管道）的性能严重下降"/>

$ lookup中其他连接条件（使用管道）的性能严重下降

因此，在进行一些代码审查时，我决定通过改进这样的聚合来提高现有查询的性能：

    .aggregate([
        //difference starts here
        {
            "$lookup": {
                "from": "sessions",
                "localField": "_id",
                "foreignField": "_client",
                "as": "sessions"
            }
        },
        {
            $unwind: "$sessions"
        },
        {
            $match: {
                "sessions.deleted_at": null
            }
        },
        //difference ends here
        {
            $project: {
                name: client_name_concater,
                email: '$email',
                phone: '$phone',
                address: addressConcater,
                updated_at: '$updated_at',
            }
        }
    ]);

至此：

    .aggregate([
    //difference starts here
    {
        $lookup: {
            from: 'sessions',
            let: {
                id: "$_id"
            },
            pipeline: [
                {
                    $match: {
                        $expr: {
                            $and:
                                [
                                    {
                                        $eq: ["$_client", "$$id"]
                                    }, {
                                    $eq: ["$deleted_at", null]
                                },
                                ]
                        }
                    }
                }
            ],
            as: 'sessions'
        }
    },
    {
        $match: {
            "sessions": {$ne: []}
        }
    },
    //difference ends here
        {
            $project: {
                name: client_name_concater,
                email: '$email',
                phone: '$phone',
                address: addressConcater,
                updated_at: '$updated_at',
            }
        }
    ]);

我认为第二个选项应该更好，因为我们的阶段要少一些，但是性能上的差异却是相反的，第一个查询的平均运行时间约为40ms，另一个查询的运行时间为3.5-5秒， 100倍以上。另一个集合（会话）约有120个文档，而这个集合约有152个文档，但是即使由于数据大小而可以接受，但为什么这两者之间的区别基本上不是同一件事，我们只是添加管道中的连接条件与连接的其他主要条件。我想念什么吗？

包括的某些函数或变量大多是静态或串联的，不应影响$ lookup部分。

谢谢

编辑：

为版本1添加了查询计划：

{
        "stages": [
            {
                "$cursor": {
                    "query": {
                        "$and": [
                            {
                                "deleted_at": null
                            },
                            {}
                        ]
                    },
                    "fields": {
                        "email": 1,
                        "phone": 1,
                        "updated_at": 1,
                        "_id": 1
                    },
                    "queryPlanner": {
                        "plannerVersion": 1,
                        "namespace": "test.clients",
                        "indexFilterSet": false,
                        "parsedQuery": {
                            "deleted_at": {
                                "$eq": null
                            }
                        },
                        "winningPlan": {
                            "stage": "COLLSCAN",
                            "filter": {
                                "deleted_at": {
                                    "$eq": null
                                }
                            },
                            "direction": "forward"
                        },
                        "rejectedPlans": []
                    }
                }
            },
            {
                "$lookup": {
                    "from": "sessions",
                    "as": "sessions",
                    "localField": "_id",
                    "foreignField": "_client",
                    "unwinding": {
                        "preserveNullAndEmptyArrays": false
                    }
                }
            },
            {
                "$project": {
                    "_id": true,
                    "email": "$email",
                    "phone": "$phone",
                    "updated_at": "$updated_at"
                }
            }
        ],
        "ok": 1
    }

对于版本2：

{
        "stages": [
            {
                "$cursor": {
                    "query": {
                        "deleted_at": null
                    },
                    "fields": {
                        "email": 1,
                        "phone": 1,
                        "sessions": 1,
                        "updated_at": 1,
                        "_id": 1
                    },
                    "queryPlanner": {
                        "plannerVersion": 1,
                        "namespace": "test.clients",
                        "indexFilterSet": false,
                        "parsedQuery": {
                            "deleted_at": {
                                "$eq": null
                            }
                        },
                        "winningPlan": {
                            "stage": "COLLSCAN",
                            "filter": {
                                "deleted_at": {
                                    "$eq": null
                                }
                            },
                            "direction": "forward"
                        },
                        "rejectedPlans": []
                    }
                }
            },
            {
                "$lookup": {
                    "from": "sessions",
                    "as": "sessions",
                    "let": {
                        "id": "$_id"
                    },
                    "pipeline": [
                        {
                            "$match": {
                                "$expr": {
                                    "$and": [
                                        {
                                            "$eq": [
                                                "$_client",
                                                "$$id"
                                            ]
                                        },
                                        {
                                            "$eq": [
                                                "$deleted_at",
                                                null
                                            ]
                                        }
                                    ]
                                }
                            }
                        }
                    ]
                }
            },
            {
                "$match": {
                    "sessions": {
                        "$not": {
                            "$eq": []
                        }
                    }
                }
            },
            {
                "$project": {
                    "_id": true,
                    "email": "$email",
                    "phone": "$phone",
                    "updated_at": "$updated_at"
                }
            }
        ],
        "ok": 1
    }

注意，加入的会话集合具有某些属性，这些属性具有非常大的数据（某些导入的数据），所以我认为由于这些数据，它可能以某种方式影响查询的大小？但是，为什么两个$ lookup版本之间有区别呢？

回答如下：

第二个版本添加了聚合流水线执行对于连接的集合中的每个文档。

The documentation说：