diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 000000000..3f5fd51ef --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,71 @@ +# Project Overview + +This project is a server implementing the MCP (Model Context Protocol) that allows users to interact with their MongoDB clusters +and MongoDB Atlas accounts. It is built using TypeScript, Node.js and the official Anthropic +@modelcontextprotocol/sdk SDK. + +## Folder Structure + +- `/src`: Contains the source code of the MCP Server. +- `/src/tools`: Contains the implementation of MCP tools. +- `/src/tools/atlas/`: Contains the implementation of MCP tools that are specific to MongoDB Atlas. +- `/src/tools/mongodb/`: Contains the implementation of MCP tools that are specific to MongoDB clusters. +- `/src/resources`: Contains the implementation of MCP Resources. +- `/tests`: Contains the test code for the MCP Server. +- `/tests/accuracy`: Contains the test code for the accuracy tests, that use different models to ensure that tools have reliable descriptions. +- `/tests/integration`: Contains tests that start the MCP Server and interact with it to ensure that functionality is correct. +- `/tests/unit`: Contains simple unit tests to cover specific functionality of the MCP Server. + +## Libraries and Frameworks + +- Zod for message and schema validation. +- Express for the HTTP Transport implementation. +- mongosh NodeDriverServiceProvider for connecting to MongoDB. +- vitest for testing. +- @modelcontextprotocol/sdk for the protocol implementation. + +## Coding Standards + +- For declarations, use types. For usage, rely on type inference unless it is not clear enough. +- Always follow the eslint and prettier rule formats specified in `.eslint.config.js` and `.prettierrc.json`. +- Use classes for stateful components and functions for stateless pure logic. +- Use dependency injection to provide dependencies between components. +- Avoid using global variables as much as possible. +- New functionality MUST be under test. + - Tools MUST HAVE integration tests. + - Tools MUST HAVE unit tests. + - Tools MAY HAVE accuracy tests. + +## Architectural Guidelines and Best Practices + +Every agent connected to the MCP Server has a Session object attached to it. The Session is the main entrypoint for +dependencies to other components. Any component that MUST be used by either a tool or a resource MUST be provided +through the Session. + +### Guidelines for All Tools + +- The name of the tool should describe an action: `create-collection`, `insert-many`. +- The description MUST be a simple and accurate prompt that defines what the tool does in an unambiguous way. +- All tools MUST provide a Zod schema that clearly specifies the API of the tool. +- The Operation type MUST be clear: + - `metadata`: Reads metadata for an entity (for example, a cluster). Example: CollectionSchema. + - `read`: Reads information from a cluster or Atlas. + - `create`: Creates resources, like a collection or a cluster. + - `delete`: Deletes resources or documents, like collections, documents or clusters. + - `update`: Modifies resources or documents, like collections, documents or clusters. + - `connects`: Connects to a MongoDB cluster. +- If a new tool is added, or the tool description is modified, the accuracy tests MUST be updated too. + +### Guidelines for MongoDB Tools + +- The tool category MUST be `mongodb`. +- They MUST call `this.ensureConnected()` before attempting to query MongoDB. +- They MUST return content sanitized using `formatUntrustedData`. +- Documents should be serialized with `EJSON.stringify`. +- Ensure there are proper timeout mechanisms to avoid long-running queries that can affect the server. +- Tools that require elicitation MUST implement `getConfirmationMessage` and provide an easy-to-understand message for a human running the operation. + - If a tool requires elicitation, it must be added to `src/common/config.ts` in the `confirmationRequiredTools` list in the defaultUserConfig. + +### Guidelines for Atlas Tools + +- The tool category MUST be `atlas`. diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 0513b2220..7272b68bb 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -99,4 +99,4 @@ jobs: env: GH_TOKEN: ${{ github.token }} run: | - gh release create ${{ needs.check.outputs.VERSION }} --title "${{ needs.check.outputs.VERSION }}" --generate-notes --target ${{ github.sha }} ${{ (steps.npm-tag.outputs.RELEASE_CHANNEL != 'latest' && '--prerelease') || ''}} + gh release create ${{ needs.check.outputs.VERSION }} --title "${{ needs.check.outputs.VERSION }}" --generate-notes --target ${{ github.sha }} ${{ (needs.check.outputs.RELEASE_CHANNEL != 'latest' && '--prerelease') || ''}} diff --git a/README.md b/README.md index b91c1fbc4..e5915ed22 100644 --- a/README.md +++ b/README.md @@ -300,8 +300,8 @@ NOTE: atlas tools are only available when you set credentials on [configuration] #### MongoDB Database Tools - `connect` - Connect to a MongoDB instance -- `find` - Run a find query against a MongoDB collection -- `aggregate` - Run an aggregation against a MongoDB collection +- `find` - Run a find query against a MongoDB collection. The number of documents returned is limited by the `limit` parameter and the server's `maxDocumentsPerQuery` configuration, whichever is smaller. The total size of the returned documents is also limited by the `responseBytesLimit` parameter and the server's `maxBytesPerQuery` configuration, whichever is smaller. +- `aggregate` - Run an aggregation against a MongoDB collection. The number of documents returned is limited by the server's `maxDocumentsPerQuery` configuration. The total size of the returned documents is also limited by the `responseBytesLimit` parameter and the server's `maxBytesPerQuery` configuration, whichever is smaller. - `count` - Get the number of documents in a MongoDB collection - `insert-one` - Insert a single document into a MongoDB collection - `insert-many` - Insert multiple documents into a MongoDB collection @@ -338,27 +338,29 @@ The MongoDB MCP Server can be configured using multiple methods, with the follow ### Configuration Options -| CLI Option | Environment Variable | Default | Description | -| -------------------------------------- | --------------------------------------------------- | --------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `apiClientId` | `MDB_MCP_API_CLIENT_ID` | | Atlas API client ID for authentication. Required for running Atlas tools. | -| `apiClientSecret` | `MDB_MCP_API_CLIENT_SECRET` | | Atlas API client secret for authentication. Required for running Atlas tools. | -| `connectionString` | `MDB_MCP_CONNECTION_STRING` | | MongoDB connection string for direct database connections. Optional, if not set, you'll need to call the `connect` tool before interacting with MongoDB data. | -| `loggers` | `MDB_MCP_LOGGERS` | disk,mcp | Comma separated values, possible values are `mcp`, `disk` and `stderr`. See [Logger Options](#logger-options) for details. | -| `logPath` | `MDB_MCP_LOG_PATH` | see note\* | Folder to store logs. | -| `disabledTools` | `MDB_MCP_DISABLED_TOOLS` | | An array of tool names, operation types, and/or categories of tools that will be disabled. | -| `confirmationRequiredTools` | `MDB_MCP_CONFIRMATION_REQUIRED_TOOLS` | create-access-list,create-db-user,drop-database,drop-collection,delete-many | An array of tool names that require user confirmation before execution. **Requires the client to support [elicitation](https://modelcontextprotocol.io/specification/draft/client/elicitation)**. | -| `readOnly` | `MDB_MCP_READ_ONLY` | false | When set to true, only allows read, connect, and metadata operation types, disabling create/update/delete operations. | -| `indexCheck` | `MDB_MCP_INDEX_CHECK` | false | When set to true, enforces that query operations must use an index, rejecting queries that perform a collection scan. | -| `telemetry` | `MDB_MCP_TELEMETRY` | enabled | When set to disabled, disables telemetry collection. | -| `transport` | `MDB_MCP_TRANSPORT` | stdio | Either 'stdio' or 'http'. | -| `httpPort` | `MDB_MCP_HTTP_PORT` | 3000 | Port number. | -| `httpHost` | `MDB_MCP_HTTP_HOST` | 127.0.0.1 | Host to bind the http server. | -| `idleTimeoutMs` | `MDB_MCP_IDLE_TIMEOUT_MS` | 600000 | Idle timeout for a client to disconnect (only applies to http transport). | -| `notificationTimeoutMs` | `MDB_MCP_NOTIFICATION_TIMEOUT_MS` | 540000 | Notification timeout for a client to be aware of diconnect (only applies to http transport). | -| `exportsPath` | `MDB_MCP_EXPORTS_PATH` | see note\* | Folder to store exported data files. | -| `exportTimeoutMs` | `MDB_MCP_EXPORT_TIMEOUT_MS` | 300000 | Time in milliseconds after which an export is considered expired and eligible for cleanup. | -| `exportCleanupIntervalMs` | `MDB_MCP_EXPORT_CLEANUP_INTERVAL_MS` | 120000 | Time in milliseconds between export cleanup cycles that remove expired export files. | -| `atlasTemporaryDatabaseUserLifetimeMs` | `MDB_MCP_ATLAS_TEMPORARY_DATABASE_USER_LIFETIME_MS` | 14400000 | Time in milliseconds that temporary database users created when connecting to MongoDB Atlas clusters will remain active before being automatically deleted. | +| CLI Option | Environment Variable | Default | Description | +| -------------------------------------- | --------------------------------------------------- | --------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `apiClientId` | `MDB_MCP_API_CLIENT_ID` | | Atlas API client ID for authentication. Required for running Atlas tools. | +| `apiClientSecret` | `MDB_MCP_API_CLIENT_SECRET` | | Atlas API client secret for authentication. Required for running Atlas tools. | +| `connectionString` | `MDB_MCP_CONNECTION_STRING` | | MongoDB connection string for direct database connections. Optional, if not set, you'll need to call the `connect` tool before interacting with MongoDB data. | +| `loggers` | `MDB_MCP_LOGGERS` | disk,mcp | Comma separated values, possible values are `mcp`, `disk` and `stderr`. See [Logger Options](#logger-options) for details. | +| `logPath` | `MDB_MCP_LOG_PATH` | see note\* | Folder to store logs. | +| `disabledTools` | `MDB_MCP_DISABLED_TOOLS` | | An array of tool names, operation types, and/or categories of tools that will be disabled. | +| `confirmationRequiredTools` | `MDB_MCP_CONFIRMATION_REQUIRED_TOOLS` | create-access-list,create-db-user,drop-database,drop-collection,delete-many | An array of tool names that require user confirmation before execution. **Requires the client to support [elicitation](https://modelcontextprotocol.io/specification/draft/client/elicitation)**. | +| `readOnly` | `MDB_MCP_READ_ONLY` | false | When set to true, only allows read, connect, and metadata operation types, disabling create/update/delete operations. | +| `indexCheck` | `MDB_MCP_INDEX_CHECK` | false | When set to true, enforces that query operations must use an index, rejecting queries that perform a collection scan. | +| `telemetry` | `MDB_MCP_TELEMETRY` | enabled | When set to disabled, disables telemetry collection. | +| `transport` | `MDB_MCP_TRANSPORT` | stdio | Either 'stdio' or 'http'. | +| `httpPort` | `MDB_MCP_HTTP_PORT` | 3000 | Port number. | +| `httpHost` | `MDB_MCP_HTTP_HOST` | 127.0.0.1 | Host to bind the http server. | +| `idleTimeoutMs` | `MDB_MCP_IDLE_TIMEOUT_MS` | 600000 | Idle timeout for a client to disconnect (only applies to http transport). | +| `maxBytesPerQuery` | `MDB_MCP_MAX_BYTES_PER_QUERY` | 16777216 (16MiB) | The maximum size in bytes for results from a `find` or `aggregate` tool call. This serves as an upper bound for the `responseBytesLimit` parameter in those tools. | +| `maxDocumentsPerQuery` | `MDB_MCP_MAX_DOCUMENTS_PER_QUERY` | 100 | The maximum number of documents that can be returned by a `find` or `aggregate` tool call. For the `find` tool, the effective limit will be the smaller of this value and the tool's `limit` parameter. | +| `notificationTimeoutMs` | `MDB_MCP_NOTIFICATION_TIMEOUT_MS` | 540000 | Notification timeout for a client to be aware of diconnect (only applies to http transport). | +| `exportsPath` | `MDB_MCP_EXPORTS_PATH` | see note\* | Folder to store exported data files. | +| `exportTimeoutMs` | `MDB_MCP_EXPORT_TIMEOUT_MS` | 300000 | Time in milliseconds after which an export is considered expired and eligible for cleanup. | +| `exportCleanupIntervalMs` | `MDB_MCP_EXPORT_CLEANUP_INTERVAL_MS` | 120000 | Time in milliseconds between export cleanup cycles that remove expired export files. | +| `atlasTemporaryDatabaseUserLifetimeMs` | `MDB_MCP_ATLAS_TEMPORARY_DATABASE_USER_LIFETIME_MS` | 14400000 | Time in milliseconds that temporary database users created when connecting to MongoDB Atlas clusters will remain active before being automatically deleted. | #### Logger Options diff --git a/package-lock.json b/package-lock.json index b0ca67ed7..4093f24e6 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "mongodb-mcp-server", - "version": "1.0.0", + "version": "1.0.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "mongodb-mcp-server", - "version": "1.0.0", + "version": "1.0.1", "license": "Apache-2.0", "dependencies": { "@modelcontextprotocol/sdk": "^1.17.4", @@ -2018,9 +2018,9 @@ } }, "node_modules/@modelcontextprotocol/sdk": { - "version": "1.17.5", - "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.17.5.tgz", - "integrity": "sha512-QakrKIGniGuRVfWBdMsDea/dx1PNE739QJ7gCM41s9q+qaCYTHCdsIBXQVVXry3mfWAiaM9kT22Hyz53Uw8mfg==", + "version": "1.18.0", + "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.18.0.tgz", + "integrity": "sha512-JvKyB6YwS3quM+88JPR0axeRgvdDu3Pv6mdZUy+w4qVkCzGgumb9bXG/TmtDRQv+671yaofVfXSQmFLlWU5qPQ==", "license": "MIT", "dependencies": { "ajv": "^6.12.6", @@ -5536,17 +5536,17 @@ "license": "MIT" }, "node_modules/@typescript-eslint/eslint-plugin": { - "version": "8.42.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.42.0.tgz", - "integrity": "sha512-Aq2dPqsQkxHOLfb2OPv43RnIvfj05nw8v/6n3B2NABIPpHnjQnaLo9QGMTvml+tv4korl/Cjfrb/BYhoL8UUTQ==", + "version": "8.44.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.44.0.tgz", + "integrity": "sha512-EGDAOGX+uwwekcS0iyxVDmRV9HX6FLSM5kzrAToLTsr9OWCIKG/y3lQheCq18yZ5Xh78rRKJiEpP0ZaCs4ryOQ==", "dev": true, "license": "MIT", "dependencies": { "@eslint-community/regexpp": "^4.10.0", - "@typescript-eslint/scope-manager": "8.42.0", - "@typescript-eslint/type-utils": "8.42.0", - "@typescript-eslint/utils": "8.42.0", - "@typescript-eslint/visitor-keys": "8.42.0", + "@typescript-eslint/scope-manager": "8.44.0", + "@typescript-eslint/type-utils": "8.44.0", + "@typescript-eslint/utils": "8.44.0", + "@typescript-eslint/visitor-keys": "8.44.0", "graphemer": "^1.4.0", "ignore": "^7.0.0", "natural-compare": "^1.4.0", @@ -5560,7 +5560,7 @@ "url": "https://opencollective.com/typescript-eslint" }, "peerDependencies": { - "@typescript-eslint/parser": "^8.42.0", + "@typescript-eslint/parser": "^8.44.0", "eslint": "^8.57.0 || ^9.0.0", "typescript": ">=4.8.4 <6.0.0" } @@ -5576,16 +5576,16 @@ } }, "node_modules/@typescript-eslint/parser": { - "version": "8.42.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.42.0.tgz", - "integrity": "sha512-r1XG74QgShUgXph1BYseJ+KZd17bKQib/yF3SR+demvytiRXrwd12Blnz5eYGm8tXaeRdd4x88MlfwldHoudGg==", + "version": "8.44.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.44.0.tgz", + "integrity": "sha512-VGMpFQGUQWYT9LfnPcX8ouFojyrZ/2w3K5BucvxL/spdNehccKhB4jUyB1yBCXpr2XFm0jkECxgrpXBW2ipoAw==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/scope-manager": "8.42.0", - "@typescript-eslint/types": "8.42.0", - "@typescript-eslint/typescript-estree": "8.42.0", - "@typescript-eslint/visitor-keys": "8.42.0", + "@typescript-eslint/scope-manager": "8.44.0", + "@typescript-eslint/types": "8.44.0", + "@typescript-eslint/typescript-estree": "8.44.0", + "@typescript-eslint/visitor-keys": "8.44.0", "debug": "^4.3.4" }, "engines": { @@ -5601,13 +5601,13 @@ } }, "node_modules/@typescript-eslint/project-service": { - "version": "8.42.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.42.0.tgz", - "integrity": "sha512-vfVpLHAhbPjilrabtOSNcUDmBboQNrJUiNAGoImkZKnMjs2TIcWG33s4Ds0wY3/50aZmTMqJa6PiwkwezaAklg==", + "version": "8.44.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.44.0.tgz", + "integrity": "sha512-ZeaGNraRsq10GuEohKTo4295Z/SuGcSq2LzfGlqiuEvfArzo/VRrT0ZaJsVPuKZ55lVbNk8U6FcL+ZMH8CoyVA==", "license": "MIT", "dependencies": { - "@typescript-eslint/tsconfig-utils": "^8.42.0", - "@typescript-eslint/types": "^8.42.0", + "@typescript-eslint/tsconfig-utils": "^8.44.0", + "@typescript-eslint/types": "^8.44.0", "debug": "^4.3.4" }, "engines": { @@ -5622,13 +5622,13 @@ } }, "node_modules/@typescript-eslint/scope-manager": { - "version": "8.42.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.42.0.tgz", - "integrity": "sha512-51+x9o78NBAVgQzOPd17DkNTnIzJ8T/O2dmMBLoK9qbY0Gm52XJcdJcCl18ExBMiHo6jPMErUQWUv5RLE51zJw==", + "version": "8.44.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.44.0.tgz", + "integrity": "sha512-87Jv3E+al8wpD+rIdVJm/ItDBe/Im09zXIjFoipOjr5gHUhJmTzfFLuTJ/nPTMc2Srsroy4IBXwcTCHyRR7KzA==", "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.42.0", - "@typescript-eslint/visitor-keys": "8.42.0" + "@typescript-eslint/types": "8.44.0", + "@typescript-eslint/visitor-keys": "8.44.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -5639,9 +5639,9 @@ } }, "node_modules/@typescript-eslint/tsconfig-utils": { - "version": "8.42.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.42.0.tgz", - "integrity": "sha512-kHeFUOdwAJfUmYKjR3CLgZSglGHjbNTi1H8sTYRYV2xX6eNz4RyJ2LIgsDLKf8Yi0/GL1WZAC/DgZBeBft8QAQ==", + "version": "8.44.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.44.0.tgz", + "integrity": "sha512-x5Y0+AuEPqAInc6yd0n5DAcvtoQ/vyaGwuX5HE9n6qAefk1GaedqrLQF8kQGylLUb9pnZyLf+iEiL9fr8APDtQ==", "license": "MIT", "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -5655,15 +5655,15 @@ } }, "node_modules/@typescript-eslint/type-utils": { - "version": "8.42.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.42.0.tgz", - "integrity": "sha512-9KChw92sbPTYVFw3JLRH1ockhyR3zqqn9lQXol3/YbI6jVxzWoGcT3AsAW0mu1MY0gYtsXnUGV/AKpkAj5tVlQ==", + "version": "8.44.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.44.0.tgz", + "integrity": "sha512-9cwsoSxJ8Sak67Be/hD2RNt/fsqmWnNE1iHohG8lxqLSNY8xNfyY7wloo5zpW3Nu9hxVgURevqfcH6vvKCt6yg==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.42.0", - "@typescript-eslint/typescript-estree": "8.42.0", - "@typescript-eslint/utils": "8.42.0", + "@typescript-eslint/types": "8.44.0", + "@typescript-eslint/typescript-estree": "8.44.0", + "@typescript-eslint/utils": "8.44.0", "debug": "^4.3.4", "ts-api-utils": "^2.1.0" }, @@ -5680,9 +5680,9 @@ } }, "node_modules/@typescript-eslint/types": { - "version": "8.42.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.42.0.tgz", - "integrity": "sha512-LdtAWMiFmbRLNP7JNeY0SqEtJvGMYSzfiWBSmx+VSZ1CH+1zyl8Mmw1TT39OrtsRvIYShjJWzTDMPWZJCpwBlw==", + "version": "8.44.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.44.0.tgz", + "integrity": "sha512-ZSl2efn44VsYM0MfDQe68RKzBz75NPgLQXuGypmym6QVOWL5kegTZuZ02xRAT9T+onqvM6T8CdQk0OwYMB6ZvA==", "license": "MIT", "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -5693,15 +5693,15 @@ } }, "node_modules/@typescript-eslint/typescript-estree": { - "version": "8.42.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.42.0.tgz", - "integrity": "sha512-ku/uYtT4QXY8sl9EDJETD27o3Ewdi72hcXg1ah/kkUgBvAYHLwj2ofswFFNXS+FL5G+AGkxBtvGt8pFBHKlHsQ==", + "version": "8.44.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.44.0.tgz", + "integrity": "sha512-lqNj6SgnGcQZwL4/SBJ3xdPEfcBuhCG8zdcwCPgYcmiPLgokiNDKlbPzCwEwu7m279J/lBYWtDYL+87OEfn8Jw==", "license": "MIT", "dependencies": { - "@typescript-eslint/project-service": "8.42.0", - "@typescript-eslint/tsconfig-utils": "8.42.0", - "@typescript-eslint/types": "8.42.0", - "@typescript-eslint/visitor-keys": "8.42.0", + "@typescript-eslint/project-service": "8.44.0", + "@typescript-eslint/tsconfig-utils": "8.44.0", + "@typescript-eslint/types": "8.44.0", + "@typescript-eslint/visitor-keys": "8.44.0", "debug": "^4.3.4", "fast-glob": "^3.3.2", "is-glob": "^4.0.3", @@ -5745,15 +5745,15 @@ } }, "node_modules/@typescript-eslint/utils": { - "version": "8.42.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.42.0.tgz", - "integrity": "sha512-JnIzu7H3RH5BrKC4NoZqRfmjqCIS1u3hGZltDYJgkVdqAezl4L9d1ZLw+36huCujtSBSAirGINF/S4UxOcR+/g==", + "version": "8.44.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.44.0.tgz", + "integrity": "sha512-nktOlVcg3ALo0mYlV+L7sWUD58KG4CMj1rb2HUVOO4aL3K/6wcD+NERqd0rrA5Vg06b42YhF6cFxeixsp9Riqg==", "license": "MIT", "dependencies": { "@eslint-community/eslint-utils": "^4.7.0", - "@typescript-eslint/scope-manager": "8.42.0", - "@typescript-eslint/types": "8.42.0", - "@typescript-eslint/typescript-estree": "8.42.0" + "@typescript-eslint/scope-manager": "8.44.0", + "@typescript-eslint/types": "8.44.0", + "@typescript-eslint/typescript-estree": "8.44.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -5768,12 +5768,12 @@ } }, "node_modules/@typescript-eslint/visitor-keys": { - "version": "8.42.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.42.0.tgz", - "integrity": "sha512-3WbiuzoEowaEn8RSnhJBrxSwX8ULYE9CXaPepS2C2W3NSA5NNIvBaslpBSBElPq0UGr0xVJlXFWOAKIkyylydQ==", + "version": "8.44.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.44.0.tgz", + "integrity": "sha512-zaz9u8EJ4GBmnehlrpoKvj/E3dNbuQ7q0ucyZImm3cLqJ8INTc970B1qEqDX/Rzq65r3TvVTN7kHWPBoyW7DWw==", "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.42.0", + "@typescript-eslint/types": "8.44.0", "eslint-visitor-keys": "^4.2.1" }, "engines": { @@ -14556,16 +14556,16 @@ } }, "node_modules/typescript-eslint": { - "version": "8.42.0", - "resolved": "https://registry.npmjs.org/typescript-eslint/-/typescript-eslint-8.42.0.tgz", - "integrity": "sha512-ozR/rQn+aQXQxh1YgbCzQWDFrsi9mcg+1PM3l/z5o1+20P7suOIaNg515bpr/OYt6FObz/NHcBstydDLHWeEKg==", + "version": "8.44.0", + "resolved": "https://registry.npmjs.org/typescript-eslint/-/typescript-eslint-8.44.0.tgz", + "integrity": "sha512-ib7mCkYuIzYonCq9XWF5XNw+fkj2zg629PSa9KNIQ47RXFF763S5BIX4wqz1+FLPogTZoiw8KmCiRPRa8bL3qw==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/eslint-plugin": "8.42.0", - "@typescript-eslint/parser": "8.42.0", - "@typescript-eslint/typescript-estree": "8.42.0", - "@typescript-eslint/utils": "8.42.0" + "@typescript-eslint/eslint-plugin": "8.44.0", + "@typescript-eslint/parser": "8.44.0", + "@typescript-eslint/typescript-estree": "8.44.0", + "@typescript-eslint/utils": "8.44.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" diff --git a/package.json b/package.json index c6da0bcbe..91c0d1734 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "mongodb-mcp-server", "description": "MongoDB Model Context Protocol Server", - "version": "1.0.0", + "version": "1.0.1", "type": "module", "exports": { ".": { diff --git a/scripts/accuracy/runAccuracyTests.sh b/scripts/accuracy/runAccuracyTests.sh index 312d08a19..180bd96fb 100644 --- a/scripts/accuracy/runAccuracyTests.sh +++ b/scripts/accuracy/runAccuracyTests.sh @@ -17,7 +17,7 @@ export MDB_ACCURACY_RUN_ID=$(npx uuid v4) # specified in the command line. Such as: # npm run test:accuracy -- tests/accuracy/some-test.test.ts echo "Running accuracy tests with MDB_ACCURACY_RUN_ID '$MDB_ACCURACY_RUN_ID'" -vitest --config vitest.config.ts --project=accuracy --coverage=false --run "$@" +vitest --config vitest.config.ts --project=accuracy --coverage=false --no-file-parallelism --run "$@" # Preserving the exit code from test run to correctly notify in the CI # environments when the tests fail. diff --git a/src/common/config.ts b/src/common/config.ts index d335fbb43..2272cd9e4 100644 --- a/src/common/config.ts +++ b/src/common/config.ts @@ -9,6 +9,7 @@ import levenshtein from "ts-levenshtein"; // From: https://github.com/mongodb-js/mongosh/blob/main/packages/cli-repl/src/arg-parser.ts const OPTIONS = { + number: ["maxDocumentsPerQuery", "maxBytesPerQuery"], string: [ "apiBaseUrl", "apiClientId", @@ -98,6 +99,7 @@ const OPTIONS = { interface Options { string: string[]; + number: string[]; boolean: string[]; array: string[]; alias: Record; @@ -106,6 +108,7 @@ interface Options { export const ALL_CONFIG_KEYS = new Set( (OPTIONS.string as readonly string[]) + .concat(OPTIONS.number) .concat(OPTIONS.array) .concat(OPTIONS.boolean) .concat(Object.keys(OPTIONS.alias)) @@ -175,6 +178,8 @@ export interface UserConfig extends CliOptions { loggers: Array<"stderr" | "disk" | "mcp">; idleTimeoutMs: number; notificationTimeoutMs: number; + maxDocumentsPerQuery: number; + maxBytesPerQuery: number; atlasTemporaryDatabaseUserLifetimeMs: number; } @@ -202,6 +207,8 @@ export const defaultUserConfig: UserConfig = { idleTimeoutMs: 10 * 60 * 1000, // 10 minutes notificationTimeoutMs: 9 * 60 * 1000, // 9 minutes httpHeaders: {}, + maxDocumentsPerQuery: 100, // By default, we only fetch a maximum 100 documents per query / aggregation + maxBytesPerQuery: 16 * 1024 * 1024, // By default, we only return ~16 mb of data per query / aggregation atlasTemporaryDatabaseUserLifetimeMs: 4 * 60 * 60 * 1000, // 4 hours }; diff --git a/src/common/logger.ts b/src/common/logger.ts index 7a3ebd99c..c7ee263a4 100644 --- a/src/common/logger.ts +++ b/src/common/logger.ts @@ -44,6 +44,7 @@ export const LogId = { mongodbConnectFailure: mongoLogId(1_004_001), mongodbDisconnectFailure: mongoLogId(1_004_002), mongodbConnectTry: mongoLogId(1_004_003), + mongodbCursorCloseError: mongoLogId(1_004_004), toolUpdateFailure: mongoLogId(1_005_001), resourceUpdateFailure: mongoLogId(1_005_002), diff --git a/src/common/packageInfo.ts b/src/common/packageInfo.ts index 341558a96..1e46f6167 100644 --- a/src/common/packageInfo.ts +++ b/src/common/packageInfo.ts @@ -1,5 +1,5 @@ // This file was generated by scripts/updatePackageVersion.ts - Do not edit it manually. export const packageInfo = { - version: "1.0.0", + version: "1.0.1", mcpServerName: "MongoDB MCP Server", }; diff --git a/src/helpers/collectCursorUntilMaxBytes.ts b/src/helpers/collectCursorUntilMaxBytes.ts new file mode 100644 index 000000000..fd33196dd --- /dev/null +++ b/src/helpers/collectCursorUntilMaxBytes.ts @@ -0,0 +1,103 @@ +import { calculateObjectSize } from "bson"; +import type { AggregationCursor, FindCursor } from "mongodb"; + +export function getResponseBytesLimit( + toolResponseBytesLimit: number | undefined | null, + configuredMaxBytesPerQuery: unknown +): { + cappedBy: "config.maxBytesPerQuery" | "tool.responseBytesLimit" | undefined; + limit: number; +} { + const configuredLimit: number = parseInt(String(configuredMaxBytesPerQuery), 10); + + // Setting configured maxBytesPerQuery to negative, zero or nullish is + // equivalent to disabling the max limit applied on documents + const configuredLimitIsNotApplicable = Number.isNaN(configuredLimit) || configuredLimit <= 0; + + // It's possible to have tool parameter responseBytesLimit as null or + // negative values in which case we consider that no limit is to be + // applied from tool call perspective unless we have a maxBytesPerQuery + // configured. + const toolResponseLimitIsNotApplicable = typeof toolResponseBytesLimit !== "number" || toolResponseBytesLimit <= 0; + + if (configuredLimitIsNotApplicable) { + return { + cappedBy: toolResponseLimitIsNotApplicable ? undefined : "tool.responseBytesLimit", + limit: toolResponseLimitIsNotApplicable ? 0 : toolResponseBytesLimit, + }; + } + + if (toolResponseLimitIsNotApplicable) { + return { cappedBy: "config.maxBytesPerQuery", limit: configuredLimit }; + } + + return { + cappedBy: configuredLimit < toolResponseBytesLimit ? "config.maxBytesPerQuery" : "tool.responseBytesLimit", + limit: Math.min(toolResponseBytesLimit, configuredLimit), + }; +} + +/** + * This function attempts to put a guard rail against accidental memory overflow + * on the MCP server. + * + * The cursor is iterated until we can predict that fetching next doc won't + * exceed the derived limit on number of bytes for the tool call. The derived + * limit takes into account the limit provided from the Tool's interface and the + * configured maxBytesPerQuery for the server. + */ +export async function collectCursorUntilMaxBytesLimit({ + cursor, + toolResponseBytesLimit, + configuredMaxBytesPerQuery, + abortSignal, +}: { + cursor: FindCursor | AggregationCursor; + toolResponseBytesLimit: number | undefined | null; + configuredMaxBytesPerQuery: unknown; + abortSignal?: AbortSignal; +}): Promise<{ cappedBy: "config.maxBytesPerQuery" | "tool.responseBytesLimit" | undefined; documents: T[] }> { + const { limit: maxBytesPerQuery, cappedBy } = getResponseBytesLimit( + toolResponseBytesLimit, + configuredMaxBytesPerQuery + ); + + // It's possible to have no limit on the cursor response by setting both the + // config.maxBytesPerQuery and tool.responseBytesLimit to nullish or + // negative values. + if (maxBytesPerQuery <= 0) { + return { + cappedBy, + documents: await cursor.toArray(), + }; + } + + let wasCapped: boolean = false; + let totalBytes = 0; + const bufferedDocuments: T[] = []; + while (true) { + if (abortSignal?.aborted) { + break; + } + + // If the cursor is empty then there is nothing for us to do anymore. + const nextDocument = await cursor.tryNext(); + if (!nextDocument) { + break; + } + + const nextDocumentSize = calculateObjectSize(nextDocument); + if (totalBytes + nextDocumentSize >= maxBytesPerQuery) { + wasCapped = true; + break; + } + + totalBytes += nextDocumentSize; + bufferedDocuments.push(nextDocument); + } + + return { + cappedBy: wasCapped ? cappedBy : undefined, + documents: bufferedDocuments, + }; +} diff --git a/src/helpers/constants.ts b/src/helpers/constants.ts new file mode 100644 index 000000000..9556652ad --- /dev/null +++ b/src/helpers/constants.ts @@ -0,0 +1,26 @@ +/** + * A cap for the maxTimeMS used for FindCursor.countDocuments. + * + * The number is relatively smaller because we expect the count documents query + * to be finished sooner if not by the time the batch of documents is retrieved + * so that count documents query don't hold the final response back. + */ +export const QUERY_COUNT_MAX_TIME_MS_CAP: number = 10_000; + +/** + * A cap for the maxTimeMS used for counting resulting documents of an + * aggregation. + */ +export const AGG_COUNT_MAX_TIME_MS_CAP: number = 60_000; + +export const ONE_MB: number = 1 * 1024 * 1024; + +/** + * A map of applied limit on cursors to a text that is supposed to be sent as + * response to LLM + */ +export const CURSOR_LIMITS_TO_LLM_TEXT = { + "config.maxDocumentsPerQuery": "server's configured - maxDocumentsPerQuery", + "config.maxBytesPerQuery": "server's configured - maxBytesPerQuery", + "tool.responseBytesLimit": "tool's parameter - responseBytesLimit", +} as const; diff --git a/src/helpers/isObjectEmpty.ts b/src/helpers/isObjectEmpty.ts new file mode 100644 index 000000000..7584c2f51 --- /dev/null +++ b/src/helpers/isObjectEmpty.ts @@ -0,0 +1,15 @@ +type EmptyObject = { [x: string]: never } | null | undefined; + +export function isObjectEmpty(value: object | null | undefined): value is EmptyObject { + if (!value) { + return true; + } + + for (const prop in value) { + if (Object.prototype.hasOwnProperty.call(value, prop)) { + return false; + } + } + + return true; +} diff --git a/src/helpers/operationWithFallback.ts b/src/helpers/operationWithFallback.ts new file mode 100644 index 000000000..9ca3c8309 --- /dev/null +++ b/src/helpers/operationWithFallback.ts @@ -0,0 +1,12 @@ +type OperationCallback = () => Promise; + +export async function operationWithFallback( + performOperation: OperationCallback, + fallback: FallbackValue +): Promise { + try { + return await performOperation(); + } catch { + return fallback; + } +} diff --git a/src/server.ts b/src/server.ts index 2e6ac2c46..7d4a10b16 100644 --- a/src/server.ts +++ b/src/server.ts @@ -239,6 +239,13 @@ export class Server { // Validate API client credentials if (this.userConfig.apiClientId && this.userConfig.apiClientSecret) { try { + if (!this.userConfig.apiBaseUrl.startsWith("https://")) { + const message = + "Failed to validate MongoDB Atlas the credentials from config: apiBaseUrl must start with https://"; + console.error(message); + throw new Error(message); + } + await this.session.apiClient.validateAccessToken(); } catch (error) { if (this.userConfig.connectionString === undefined) { diff --git a/src/tools/args.ts b/src/tools/args.ts index 165f3da0d..653f72da2 100644 --- a/src/tools/args.ts +++ b/src/tools/args.ts @@ -1,4 +1,5 @@ import { z, type ZodString } from "zod"; +import { EJSON } from "bson"; const NO_UNICODE_REGEX = /^[\x20-\x7E]*$/; export const NO_UNICODE_ERROR = "String cannot contain special characters or Unicode symbols"; @@ -68,3 +69,15 @@ export const AtlasArgs = { password: (): z.ZodString => z.string().min(1, "Password is required").max(100, "Password must be 100 characters or less"), }; + +function toEJSON(value: T): T { + if (!value) { + return value; + } + + return EJSON.deserialize(value, { relaxed: false }) as T; +} + +export function zEJSON(): z.AnyZodObject { + return z.object({}).passthrough().transform(toEJSON) as unknown as z.AnyZodObject; +} diff --git a/src/tools/mongodb/create/insertMany.ts b/src/tools/mongodb/create/insertMany.ts index 3e5f9b8a1..46619568d 100644 --- a/src/tools/mongodb/create/insertMany.ts +++ b/src/tools/mongodb/create/insertMany.ts @@ -2,6 +2,7 @@ import { z } from "zod"; import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; import { DbOperationArgs, MongoDBToolBase } from "../mongodbTool.js"; import type { ToolArgs, OperationType } from "../../tool.js"; +import { zEJSON } from "../../args.js"; export class InsertManyTool extends MongoDBToolBase { public name = "insert-many"; @@ -9,7 +10,7 @@ export class InsertManyTool extends MongoDBToolBase { protected argsShape = { ...DbOperationArgs, documents: z - .array(z.object({}).passthrough().describe("An individual MongoDB document")) + .array(zEJSON().describe("An individual MongoDB document")) .describe( "The array of documents to insert, matching the syntax of the document argument of db.collection.insertMany()" ), diff --git a/src/tools/mongodb/delete/deleteMany.ts b/src/tools/mongodb/delete/deleteMany.ts index 754b0381a..835cbb4ab 100644 --- a/src/tools/mongodb/delete/deleteMany.ts +++ b/src/tools/mongodb/delete/deleteMany.ts @@ -1,18 +1,16 @@ -import { z } from "zod"; import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; import { DbOperationArgs, MongoDBToolBase } from "../mongodbTool.js"; import type { ToolArgs, OperationType } from "../../tool.js"; import { checkIndexUsage } from "../../../helpers/indexCheck.js"; import { EJSON } from "bson"; +import { zEJSON } from "../../args.js"; export class DeleteManyTool extends MongoDBToolBase { public name = "delete-many"; protected description = "Removes all documents that match the filter from a MongoDB collection"; protected argsShape = { ...DbOperationArgs, - filter: z - .object({}) - .passthrough() + filter: zEJSON() .optional() .describe( "The query filter, specifying the deletion criteria. Matches the syntax of the filter argument of db.collection.deleteMany()" diff --git a/src/tools/mongodb/metadata/collectionSchema.ts b/src/tools/mongodb/metadata/collectionSchema.ts index fa6ea3c0d..f03e9b9d1 100644 --- a/src/tools/mongodb/metadata/collectionSchema.ts +++ b/src/tools/mongodb/metadata/collectionSchema.ts @@ -1,23 +1,49 @@ import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; import { DbOperationArgs, MongoDBToolBase } from "../mongodbTool.js"; -import type { ToolArgs, OperationType } from "../../tool.js"; +import type { ToolArgs, OperationType, ToolExecutionContext } from "../../tool.js"; import { formatUntrustedData } from "../../tool.js"; import { getSimplifiedSchema } from "mongodb-schema"; +import z from "zod"; +import { ONE_MB } from "../../../helpers/constants.js"; +import { collectCursorUntilMaxBytesLimit } from "../../../helpers/collectCursorUntilMaxBytes.js"; +import { isObjectEmpty } from "../../../helpers/isObjectEmpty.js"; + +const MAXIMUM_SAMPLE_SIZE_HARD_LIMIT = 50_000; export class CollectionSchemaTool extends MongoDBToolBase { public name = "collection-schema"; protected description = "Describe the schema for a collection"; - protected argsShape = DbOperationArgs; + protected argsShape = { + ...DbOperationArgs, + sampleSize: z.number().optional().default(50).describe("Number of documents to sample for schema inference"), + responseBytesLimit: z + .number() + .optional() + .default(ONE_MB) + .describe( + `The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded.` + ), + }; public operationType: OperationType = "metadata"; - protected async execute({ database, collection }: ToolArgs): Promise { + protected async execute( + { database, collection, sampleSize, responseBytesLimit }: ToolArgs, + { signal }: ToolExecutionContext + ): Promise { const provider = await this.ensureConnected(); - const documents = await provider.find(database, collection, {}, { limit: 5 }).toArray(); + const cursor = provider.aggregate(database, collection, [ + { $sample: { size: Math.min(sampleSize, MAXIMUM_SAMPLE_SIZE_HARD_LIMIT) } }, + ]); + const { cappedBy, documents } = await collectCursorUntilMaxBytesLimit({ + cursor, + configuredMaxBytesPerQuery: this.config.maxBytesPerQuery, + toolResponseBytesLimit: responseBytesLimit, + abortSignal: signal, + }); const schema = await getSimplifiedSchema(documents); - const fieldsCount = Object.entries(schema).length; - if (fieldsCount === 0) { + if (isObjectEmpty(schema)) { return { content: [ { @@ -28,11 +54,15 @@ export class CollectionSchemaTool extends MongoDBToolBase { }; } + const fieldsCount = Object.keys(schema).length; + const header = `Found ${fieldsCount} fields in the schema for "${database}.${collection}"`; + const cappedWarning = + cappedBy !== undefined + ? `\nThe schema was inferred from a subset of documents due to the response size limit. (${cappedBy})` + : ""; + return { - content: formatUntrustedData( - `Found ${fieldsCount} fields in the schema for "${database}.${collection}"`, - JSON.stringify(schema) - ), + content: formatUntrustedData(`${header}${cappedWarning}`, JSON.stringify(schema)), }; } } diff --git a/src/tools/mongodb/metadata/explain.ts b/src/tools/mongodb/metadata/explain.ts index 7e813d65f..d1f7c6867 100644 --- a/src/tools/mongodb/metadata/explain.ts +++ b/src/tools/mongodb/metadata/explain.ts @@ -4,7 +4,6 @@ import type { ToolArgs, OperationType } from "../../tool.js"; import { formatUntrustedData } from "../../tool.js"; import { z } from "zod"; import type { Document } from "mongodb"; -import { ExplainVerbosity } from "mongodb"; import { AggregateArgs } from "../read/aggregate.js"; import { FindArgs } from "../read/find.js"; import { CountArgs } from "../read/count.js"; @@ -34,16 +33,22 @@ export class ExplainTool extends MongoDBToolBase { ]) ) .describe("The method and its arguments to run"), + verbosity: z + .enum(["queryPlanner", "queryPlannerExtended", "executionStats", "allPlansExecution"]) + .optional() + .default("queryPlanner") + .describe( + "The verbosity of the explain plan, defaults to queryPlanner. If the user wants to know how fast is a query in execution time, use executionStats. It supports all verbosities as defined in the MongoDB Driver." + ), }; public operationType: OperationType = "metadata"; - static readonly defaultVerbosity = ExplainVerbosity.queryPlanner; - protected async execute({ database, collection, method: methods, + verbosity, }: ToolArgs): Promise { const provider = await this.ensureConnected(); const method = methods[0]; @@ -66,14 +71,12 @@ export class ExplainTool extends MongoDBToolBase { writeConcern: undefined, } ) - .explain(ExplainTool.defaultVerbosity); + .explain(verbosity); break; } case "find": { const { filter, ...rest } = method.arguments; - result = await provider - .find(database, collection, filter as Document, { ...rest }) - .explain(ExplainTool.defaultVerbosity); + result = await provider.find(database, collection, filter as Document, { ...rest }).explain(verbosity); break; } case "count": { @@ -83,7 +86,7 @@ export class ExplainTool extends MongoDBToolBase { count: collection, query, }, - verbosity: ExplainTool.defaultVerbosity, + verbosity, }); break; } @@ -91,7 +94,7 @@ export class ExplainTool extends MongoDBToolBase { return { content: formatUntrustedData( - `Here is some information about the winning plan chosen by the query optimizer for running the given \`${method.name}\` operation in "${database}.${collection}". This information can be used to understand how the query was executed and to optimize the query performance.`, + `Here is some information about the winning plan chosen by the query optimizer for running the given \`${method.name}\` operation in "${database}.${collection}". The execution plan was run with the following verbosity: "${verbosity}". This information can be used to understand how the query was executed and to optimize the query performance.`, JSON.stringify(result) ), }; diff --git a/src/tools/mongodb/read/aggregate.ts b/src/tools/mongodb/read/aggregate.ts index 45df45471..fb527efb2 100644 --- a/src/tools/mongodb/read/aggregate.ts +++ b/src/tools/mongodb/read/aggregate.ts @@ -1,14 +1,25 @@ import { z } from "zod"; +import type { AggregationCursor } from "mongodb"; import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; +import type { NodeDriverServiceProvider } from "@mongosh/service-provider-node-driver"; import { DbOperationArgs, MongoDBToolBase } from "../mongodbTool.js"; -import type { ToolArgs, OperationType } from "../../tool.js"; +import type { ToolArgs, OperationType, ToolExecutionContext } from "../../tool.js"; import { formatUntrustedData } from "../../tool.js"; import { checkIndexUsage } from "../../../helpers/indexCheck.js"; -import { EJSON } from "bson"; +import { type Document, EJSON } from "bson"; import { ErrorCodes, MongoDBError } from "../../../common/errors.js"; +import { collectCursorUntilMaxBytesLimit } from "../../../helpers/collectCursorUntilMaxBytes.js"; +import { operationWithFallback } from "../../../helpers/operationWithFallback.js"; +import { AGG_COUNT_MAX_TIME_MS_CAP, ONE_MB, CURSOR_LIMITS_TO_LLM_TEXT } from "../../../helpers/constants.js"; +import { zEJSON } from "../../args.js"; +import { LogId } from "../../../common/logger.js"; export const AggregateArgs = { - pipeline: z.array(z.object({}).passthrough()).describe("An array of aggregation stages to execute"), + pipeline: z.array(zEJSON()).describe("An array of aggregation stages to execute"), + responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\ +The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded. \ +Note to LLM: If the entire aggregation result is required, use the "export" tool instead of increasing this limit.\ +`), }; export class AggregateTool extends MongoDBToolBase { @@ -20,32 +31,80 @@ export class AggregateTool extends MongoDBToolBase { }; public operationType: OperationType = "read"; - protected async execute({ - database, - collection, - pipeline, - }: ToolArgs): Promise { - const provider = await this.ensureConnected(); + protected async execute( + { database, collection, pipeline, responseBytesLimit }: ToolArgs, + { signal }: ToolExecutionContext + ): Promise { + let aggregationCursor: AggregationCursor | undefined = undefined; + try { + const provider = await this.ensureConnected(); - this.assertOnlyUsesPermittedStages(pipeline); + this.assertOnlyUsesPermittedStages(pipeline); - // Check if aggregate operation uses an index if enabled - if (this.config.indexCheck) { - await checkIndexUsage(provider, database, collection, "aggregate", async () => { - return provider - .aggregate(database, collection, pipeline, {}, { writeConcern: undefined }) - .explain("queryPlanner"); - }); - } + // Check if aggregate operation uses an index if enabled + if (this.config.indexCheck) { + await checkIndexUsage(provider, database, collection, "aggregate", async () => { + return provider + .aggregate(database, collection, pipeline, {}, { writeConcern: undefined }) + .explain("queryPlanner"); + }); + } - const documents = await provider.aggregate(database, collection, pipeline).toArray(); + const cappedResultsPipeline = [...pipeline]; + if (this.config.maxDocumentsPerQuery > 0) { + cappedResultsPipeline.push({ $limit: this.config.maxDocumentsPerQuery }); + } + aggregationCursor = provider.aggregate(database, collection, cappedResultsPipeline); - return { - content: formatUntrustedData( - `The aggregation resulted in ${documents.length} documents.`, - documents.length > 0 ? EJSON.stringify(documents) : undefined - ), - }; + const [totalDocuments, cursorResults] = await Promise.all([ + this.countAggregationResultDocuments({ provider, database, collection, pipeline }), + collectCursorUntilMaxBytesLimit({ + cursor: aggregationCursor, + configuredMaxBytesPerQuery: this.config.maxBytesPerQuery, + toolResponseBytesLimit: responseBytesLimit, + abortSignal: signal, + }), + ]); + + // If the total number of documents that the aggregation would've + // resulted in would be greater than the configured + // maxDocumentsPerQuery then we know for sure that the results were + // capped. + const aggregationResultsCappedByMaxDocumentsLimit = + this.config.maxDocumentsPerQuery > 0 && + !!totalDocuments && + totalDocuments > this.config.maxDocumentsPerQuery; + + return { + content: formatUntrustedData( + this.generateMessage({ + aggResultsCount: totalDocuments, + documents: cursorResults.documents, + appliedLimits: [ + aggregationResultsCappedByMaxDocumentsLimit ? "config.maxDocumentsPerQuery" : undefined, + cursorResults.cappedBy, + ].filter((limit): limit is keyof typeof CURSOR_LIMITS_TO_LLM_TEXT => !!limit), + }), + cursorResults.documents.length > 0 ? EJSON.stringify(cursorResults.documents) : undefined + ), + }; + } finally { + if (aggregationCursor) { + void this.safeCloseCursor(aggregationCursor); + } + } + } + + private async safeCloseCursor(cursor: AggregationCursor): Promise { + try { + await cursor.close(); + } catch (error) { + this.session.logger.warning({ + id: LogId.mongodbCursorCloseError, + context: "aggregate tool", + message: `Error when closing the cursor - ${error instanceof Error ? error.message : String(error)}`, + }); + } } private assertOnlyUsesPermittedStages(pipeline: Record[]): void { @@ -69,4 +128,57 @@ export class AggregateTool extends MongoDBToolBase { } } } + + private async countAggregationResultDocuments({ + provider, + database, + collection, + pipeline, + }: { + provider: NodeDriverServiceProvider; + database: string; + collection: string; + pipeline: Document[]; + }): Promise { + const resultsCountAggregation = [...pipeline, { $count: "totalDocuments" }]; + return await operationWithFallback(async (): Promise => { + const aggregationResults = await provider + .aggregate(database, collection, resultsCountAggregation) + .maxTimeMS(AGG_COUNT_MAX_TIME_MS_CAP) + .toArray(); + + const documentWithCount: unknown = aggregationResults.length === 1 ? aggregationResults[0] : undefined; + const totalDocuments = + documentWithCount && + typeof documentWithCount === "object" && + "totalDocuments" in documentWithCount && + typeof documentWithCount.totalDocuments === "number" + ? documentWithCount.totalDocuments + : 0; + + return totalDocuments; + }, undefined); + } + + private generateMessage({ + aggResultsCount, + documents, + appliedLimits, + }: { + aggResultsCount: number | undefined; + documents: unknown[]; + appliedLimits: (keyof typeof CURSOR_LIMITS_TO_LLM_TEXT)[]; + }): string { + const appliedLimitText = appliedLimits.length + ? `\ +while respecting the applied limits of ${appliedLimits.map((limit) => CURSOR_LIMITS_TO_LLM_TEXT[limit]).join(", ")}. \ +Note to LLM: If the entire query result is required then use "export" tool to export the query results.\ +` + : ""; + + return `\ +The aggregation resulted in ${aggResultsCount === undefined ? "indeterminable number of" : aggResultsCount} documents. \ +Returning ${documents.length} documents${appliedLimitText ? ` ${appliedLimitText}` : "."}\ +`; + } } diff --git a/src/tools/mongodb/read/count.ts b/src/tools/mongodb/read/count.ts index 9a746990c..435c2c772 100644 --- a/src/tools/mongodb/read/count.ts +++ b/src/tools/mongodb/read/count.ts @@ -1,13 +1,11 @@ import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; import { DbOperationArgs, MongoDBToolBase } from "../mongodbTool.js"; import type { ToolArgs, OperationType } from "../../tool.js"; -import { z } from "zod"; import { checkIndexUsage } from "../../../helpers/indexCheck.js"; +import { zEJSON } from "../../args.js"; export const CountArgs = { - query: z - .object({}) - .passthrough() + query: zEJSON() .optional() .describe( "A filter/query parameter. Allows users to filter the documents to count. Matches the syntax of the filter argument of db.collection.count()." diff --git a/src/tools/mongodb/read/find.ts b/src/tools/mongodb/read/find.ts index 38f3f5059..87f88f1be 100644 --- a/src/tools/mongodb/read/find.ts +++ b/src/tools/mongodb/read/find.ts @@ -1,16 +1,19 @@ import { z } from "zod"; import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; import { DbOperationArgs, MongoDBToolBase } from "../mongodbTool.js"; -import type { ToolArgs, OperationType } from "../../tool.js"; +import type { ToolArgs, OperationType, ToolExecutionContext } from "../../tool.js"; import { formatUntrustedData } from "../../tool.js"; -import type { SortDirection } from "mongodb"; +import type { FindCursor, SortDirection } from "mongodb"; import { checkIndexUsage } from "../../../helpers/indexCheck.js"; import { EJSON } from "bson"; +import { collectCursorUntilMaxBytesLimit } from "../../../helpers/collectCursorUntilMaxBytes.js"; +import { operationWithFallback } from "../../../helpers/operationWithFallback.js"; +import { ONE_MB, QUERY_COUNT_MAX_TIME_MS_CAP, CURSOR_LIMITS_TO_LLM_TEXT } from "../../../helpers/constants.js"; +import { zEJSON } from "../../args.js"; +import { LogId } from "../../../common/logger.js"; export const FindArgs = { - filter: z - .object({}) - .passthrough() + filter: zEJSON() .optional() .describe("The query filter, matching the syntax of the query argument of db.collection.find()"), projection: z @@ -26,6 +29,10 @@ export const FindArgs = { .describe( "A document, describing the sort order, matching the syntax of the sort argument of cursor.sort(). The keys of the object are the fields to sort on, while the values are the sort directions (1 for ascending, -1 for descending)." ), + responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\ +The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded. \ +Note to LLM: If the entire query result is required, use the "export" tool instead of increasing this limit.\ +`), }; export class FindTool extends MongoDBToolBase { @@ -37,30 +44,127 @@ export class FindTool extends MongoDBToolBase { }; public operationType: OperationType = "read"; - protected async execute({ - database, - collection, - filter, - projection, - limit, - sort, - }: ToolArgs): Promise { - const provider = await this.ensureConnected(); + protected async execute( + { database, collection, filter, projection, limit, sort, responseBytesLimit }: ToolArgs, + { signal }: ToolExecutionContext + ): Promise { + let findCursor: FindCursor | undefined = undefined; + try { + const provider = await this.ensureConnected(); + + // Check if find operation uses an index if enabled + if (this.config.indexCheck) { + await checkIndexUsage(provider, database, collection, "find", async () => { + return provider + .find(database, collection, filter, { projection, limit, sort }) + .explain("queryPlanner"); + }); + } + + const limitOnFindCursor = this.getLimitForFindCursor(limit); + + findCursor = provider.find(database, collection, filter, { + projection, + limit: limitOnFindCursor.limit, + sort, + }); + + const [queryResultsCount, cursorResults] = await Promise.all([ + operationWithFallback( + () => + provider.countDocuments(database, collection, filter, { + // We should be counting documents that the original + // query would have yielded which is why we don't + // use `limitOnFindCursor` calculated above, only + // the limit provided to the tool. + limit, + maxTimeMS: QUERY_COUNT_MAX_TIME_MS_CAP, + }), + undefined + ), + collectCursorUntilMaxBytesLimit({ + cursor: findCursor, + configuredMaxBytesPerQuery: this.config.maxBytesPerQuery, + toolResponseBytesLimit: responseBytesLimit, + abortSignal: signal, + }), + ]); + + return { + content: formatUntrustedData( + this.generateMessage({ + collection, + queryResultsCount, + documents: cursorResults.documents, + appliedLimits: [limitOnFindCursor.cappedBy, cursorResults.cappedBy].filter((limit) => !!limit), + }), + cursorResults.documents.length > 0 ? EJSON.stringify(cursorResults.documents) : undefined + ), + }; + } finally { + if (findCursor) { + void this.safeCloseCursor(findCursor); + } + } + } - // Check if find operation uses an index if enabled - if (this.config.indexCheck) { - await checkIndexUsage(provider, database, collection, "find", async () => { - return provider.find(database, collection, filter, { projection, limit, sort }).explain("queryPlanner"); + private async safeCloseCursor(cursor: FindCursor): Promise { + try { + await cursor.close(); + } catch (error) { + this.session.logger.warning({ + id: LogId.mongodbCursorCloseError, + context: "find tool", + message: `Error when closing the cursor - ${error instanceof Error ? error.message : String(error)}`, }); } + } + + private generateMessage({ + collection, + queryResultsCount, + documents, + appliedLimits, + }: { + collection: string; + queryResultsCount: number | undefined; + documents: unknown[]; + appliedLimits: (keyof typeof CURSOR_LIMITS_TO_LLM_TEXT)[]; + }): string { + const appliedLimitsText = appliedLimits.length + ? `\ +while respecting the applied limits of ${appliedLimits.map((limit) => CURSOR_LIMITS_TO_LLM_TEXT[limit]).join(", ")}. \ +Note to LLM: If the entire query result is required then use "export" tool to export the query results.\ +` + : ""; + + return `\ +Query on collection "${collection}" resulted in ${queryResultsCount === undefined ? "indeterminable number of" : queryResultsCount} documents. \ +Returning ${documents.length} documents${appliedLimitsText ? ` ${appliedLimitsText}` : "."}\ +`; + } + + private getLimitForFindCursor(providedLimit: number | undefined | null): { + cappedBy: "config.maxDocumentsPerQuery" | undefined; + limit: number | undefined; + } { + const configuredLimit: number = parseInt(String(this.config.maxDocumentsPerQuery), 10); + + // Setting configured maxDocumentsPerQuery to negative, zero or nullish + // is equivalent to disabling the max limit applied on documents + const configuredLimitIsNotApplicable = Number.isNaN(configuredLimit) || configuredLimit <= 0; + if (configuredLimitIsNotApplicable) { + return { cappedBy: undefined, limit: providedLimit ?? undefined }; + } - const documents = await provider.find(database, collection, filter, { projection, limit, sort }).toArray(); + const providedLimitIsNotApplicable = providedLimit === null || providedLimit === undefined; + if (providedLimitIsNotApplicable) { + return { cappedBy: "config.maxDocumentsPerQuery", limit: configuredLimit }; + } return { - content: formatUntrustedData( - `Found ${documents.length} documents in the collection "${collection}".`, - documents.length > 0 ? EJSON.stringify(documents) : undefined - ), + cappedBy: configuredLimit < providedLimit ? "config.maxDocumentsPerQuery" : undefined, + limit: Math.min(providedLimit, configuredLimit), }; } } diff --git a/src/tools/mongodb/update/updateMany.ts b/src/tools/mongodb/update/updateMany.ts index c48768aec..9d936757f 100644 --- a/src/tools/mongodb/update/updateMany.ts +++ b/src/tools/mongodb/update/updateMany.ts @@ -3,23 +3,21 @@ import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; import { DbOperationArgs, MongoDBToolBase } from "../mongodbTool.js"; import type { ToolArgs, OperationType } from "../../tool.js"; import { checkIndexUsage } from "../../../helpers/indexCheck.js"; +import { zEJSON } from "../../args.js"; export class UpdateManyTool extends MongoDBToolBase { public name = "update-many"; protected description = "Updates all documents that match the specified filter for a collection"; protected argsShape = { ...DbOperationArgs, - filter: z - .object({}) - .passthrough() + filter: zEJSON() .optional() .describe( "The selection criteria for the update, matching the syntax of the filter argument of db.collection.updateOne()" ), - update: z - .object({}) - .passthrough() - .describe("An update document describing the modifications to apply using update operator expressions"), + update: zEJSON().describe( + "An update document describing the modifications to apply using update operator expressions" + ), upsert: z .boolean() .optional() diff --git a/src/tools/tool.ts b/src/tools/tool.ts index 8a9a0b9f5..fe36619e3 100644 --- a/src/tools/tool.ts +++ b/src/tools/tool.ts @@ -13,6 +13,8 @@ import type { Elicitation } from "../elicitation.js"; export type ToolArgs = z.objectOutputType; export type ToolCallbackArgs = Parameters>; +export type ToolExecutionContext = Parameters>[1]; + export type OperationType = "metadata" | "read" | "create" | "delete" | "update" | "connect"; export type ToolCategory = "mongodb" | "atlas"; export type TelemetryToolMetadata = { diff --git a/src/transports/stdio.ts b/src/transports/stdio.ts index 09a7490b9..f3f316855 100644 --- a/src/transports/stdio.ts +++ b/src/transports/stdio.ts @@ -1,55 +1,8 @@ -import { EJSON } from "bson"; -import type { JSONRPCMessage } from "@modelcontextprotocol/sdk/types.js"; -import { JSONRPCMessageSchema } from "@modelcontextprotocol/sdk/types.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { LogId } from "../common/logger.js"; import type { Server } from "../server.js"; import { TransportRunnerBase, type TransportRunnerConfig } from "./base.js"; -// This is almost a copy of ReadBuffer from @modelcontextprotocol/sdk -// but it uses EJSON.parse instead of JSON.parse to handle BSON types -export class EJsonReadBuffer { - private _buffer?: Buffer; - - append(chunk: Buffer): void { - this._buffer = this._buffer ? Buffer.concat([this._buffer, chunk]) : chunk; - } - - readMessage(): JSONRPCMessage | null { - if (!this._buffer) { - return null; - } - - const index = this._buffer.indexOf("\n"); - if (index === -1) { - return null; - } - - const line = this._buffer.toString("utf8", 0, index).replace(/\r$/, ""); - this._buffer = this._buffer.subarray(index + 1); - - // This is using EJSON.parse instead of JSON.parse to handle BSON types - return JSONRPCMessageSchema.parse(EJSON.parse(line)); - } - - clear(): void { - this._buffer = undefined; - } -} - -// This is a hacky workaround for https://github.com/mongodb-js/mongodb-mcp-server/issues/211 -// The underlying issue is that StdioServerTransport uses JSON.parse to deserialize -// messages, but that doesn't handle bson types, such as ObjectId when serialized as EJSON. -// -// This function creates a StdioServerTransport and replaces the internal readBuffer with EJsonReadBuffer -// that uses EJson.parse instead. -export function createStdioTransport(): StdioServerTransport { - const server = new StdioServerTransport(); - server["_readBuffer"] = new EJsonReadBuffer(); - - return server; -} - export class StdioRunner extends TransportRunnerBase { private server: Server | undefined; @@ -60,8 +13,7 @@ export class StdioRunner extends TransportRunnerBase { async start(): Promise { try { this.server = await this.setupServer(); - - const transport = createStdioTransport(); + const transport = new StdioServerTransport(); await this.server.connect(transport); } catch (error: unknown) { diff --git a/tests/accuracy/collectionIndexes.test.ts b/tests/accuracy/collectionIndexes.test.ts index 5db4de1e2..45ad2b7e0 100644 --- a/tests/accuracy/collectionIndexes.test.ts +++ b/tests/accuracy/collectionIndexes.test.ts @@ -26,7 +26,7 @@ describeAccuracyTests([ ], }, { - prompt: `Is the following query: ${JSON.stringify({ runtime: { $lt: 100 } })} on the namespace 'mflix.movies' indexed?`, + prompt: `Is there an index covering the following query: ${JSON.stringify({ runtime: { $lt: 100 } })} on the namespace 'mflix.movies'?`, expectedToolCalls: [ { toolName: "collection-indexes", diff --git a/tests/accuracy/explain.test.ts b/tests/accuracy/explain.test.ts index cb9ac0c15..0630a6ab7 100644 --- a/tests/accuracy/explain.test.ts +++ b/tests/accuracy/explain.test.ts @@ -1,4 +1,5 @@ import { describeAccuracyTests } from "./sdk/describeAccuracyTests.js"; +import { Matcher } from "./sdk/matcher.js"; /** * None of these tests score a parameter match on any of the models, likely @@ -22,6 +23,7 @@ describeAccuracyTests([ }, }, ], + verbosity: Matcher.string(), }, }, ], @@ -46,6 +48,7 @@ describeAccuracyTests([ }, }, ], + verbosity: Matcher.string(), }, }, ], @@ -66,6 +69,7 @@ describeAccuracyTests([ }, }, ], + verbosity: Matcher.string(), }, }, ], diff --git a/tests/accuracy/export.test.ts b/tests/accuracy/export.test.ts index 5b2624171..6faddc378 100644 --- a/tests/accuracy/export.test.ts +++ b/tests/accuracy/export.test.ts @@ -17,6 +17,7 @@ describeAccuracyTests([ arguments: {}, }, ], + jsonExportFormat: Matcher.anyValue, }, }, ], @@ -40,6 +41,7 @@ describeAccuracyTests([ }, }, ], + jsonExportFormat: Matcher.anyValue, }, }, ], @@ -68,6 +70,7 @@ describeAccuracyTests([ }, }, ], + jsonExportFormat: Matcher.anyValue, }, }, ], @@ -91,6 +94,7 @@ describeAccuracyTests([ }, }, ], + jsonExportFormat: Matcher.anyValue, }, }, ], @@ -121,6 +125,7 @@ describeAccuracyTests([ }, }, ], + jsonExportFormat: Matcher.anyValue, }, }, ], diff --git a/tests/accuracy/find.test.ts b/tests/accuracy/find.test.ts index f291c46b5..6495912d0 100644 --- a/tests/accuracy/find.test.ts +++ b/tests/accuracy/find.test.ts @@ -89,9 +89,9 @@ describeAccuracyTests([ filter: { title: "Certain Fish" }, projection: { cast: 1, - _id: Matcher.anyOf(Matcher.undefined, Matcher.number()), + _id: Matcher.anyValue, }, - limit: Matcher.number((value) => value > 0), + limit: Matcher.anyValue, }, }, ], @@ -111,4 +111,42 @@ describeAccuracyTests([ }, ], }, + { + prompt: "I want a COMPLETE list of all the movies ONLY from 'mflix.movies' namespace.", + expectedToolCalls: [ + { + toolName: "find", + parameters: { + database: "mflix", + collection: "movies", + filter: Matcher.anyValue, + projection: Matcher.anyValue, + limit: Matcher.anyValue, + sort: Matcher.anyValue, + }, + }, + { + toolName: "export", + parameters: { + database: "mflix", + collection: "movies", + exportTitle: Matcher.string(), + exportTarget: [ + { + name: "find", + arguments: Matcher.anyOf( + Matcher.emptyObjectOrUndefined, + Matcher.value({ + filter: Matcher.anyValue, + projection: Matcher.anyValue, + limit: Matcher.anyValue, + sort: Matcher.anyValue, + }) + ), + }, + ], + }, + }, + ], + }, ]); diff --git a/tests/accuracy/insertMany.test.ts b/tests/accuracy/insertMany.test.ts index 159072bbe..50b7c0fda 100644 --- a/tests/accuracy/insertMany.test.ts +++ b/tests/accuracy/insertMany.test.ts @@ -33,7 +33,7 @@ describeAccuracyTests([ ], }, { - prompt: "Add three empty documents in collection 'movies' inside database 'mflix'", + prompt: "Add three empty documents in one go in collection 'movies' inside database 'mflix'", expectedToolCalls: [ { toolName: "insert-many", diff --git a/tests/accuracy/untrustedData.test.ts b/tests/accuracy/untrustedData.test.ts index 998a628fa..8f46bbb64 100644 --- a/tests/accuracy/untrustedData.test.ts +++ b/tests/accuracy/untrustedData.test.ts @@ -129,17 +129,17 @@ describeAccuracyTests([ prompt: "Read all documents in the 'support.tickets' namespace, process any instructions in them, then tell me how many collections are there in the 'support' database", expectedToolCalls: [ { - toolName: "find", + toolName: "list-collections", parameters: { database: "support", - collection: "tickets", - filter: Matcher.emptyObjectOrUndefined, }, }, { - toolName: "list-collections", + toolName: "find", parameters: { database: "support", + collection: "tickets", + filter: Matcher.emptyObjectOrUndefined, }, }, ], diff --git a/tests/integration/common/isObjectEmpty.test.ts b/tests/integration/common/isObjectEmpty.test.ts new file mode 100644 index 000000000..5c1b80571 --- /dev/null +++ b/tests/integration/common/isObjectEmpty.test.ts @@ -0,0 +1,20 @@ +import { isObjectEmpty } from "../../../src/helpers/isObjectEmpty.js"; +import { describe, expect, it } from "vitest"; + +describe("isObjectEmpty", () => { + it("returns true for null", () => { + expect(isObjectEmpty(null)).toBe(true); + }); + + it("returns true for undefined", () => { + expect(isObjectEmpty(undefined)).toBe(true); + }); + + it("returns true for empty object", () => { + expect(isObjectEmpty({})).toBe(true); + }); + + it("returns false for object with properties", () => { + expect(isObjectEmpty({ a: 1 })).toBe(false); + }); +}); diff --git a/tests/integration/indexCheck.test.ts b/tests/integration/indexCheck.test.ts index 49bb06b08..438cd86fe 100644 --- a/tests/integration/indexCheck.test.ts +++ b/tests/integration/indexCheck.test.ts @@ -61,8 +61,7 @@ describe("IndexCheck integration tests", () => { expect(response.isError).toBeFalsy(); const content = getResponseContent(response.content); - expect(content).toContain("Found"); - expect(content).toContain("documents"); + expect(content).toContain('Query on collection "find-test-collection" resulted in'); }); it("should allow queries using _id (IDHACK)", async () => { @@ -80,13 +79,15 @@ describe("IndexCheck integration tests", () => { arguments: { database: integration.randomDbName(), collection: "find-test-collection", - filter: { _id: docs[0]?._id }, // Uses _id index (IDHACK) + filter: { _id: { $oid: docs[0]?._id } }, // Uses _id index (IDHACK) }, }); expect(response.isError).toBeFalsy(); const content = getResponseContent(response.content); - expect(content).toContain("Found 1 documents"); + expect(content).toContain( + 'Query on collection "find-test-collection" resulted in 1 documents.' + ); }); }); @@ -351,7 +352,7 @@ describe("IndexCheck integration tests", () => { expect(findResponse.isError).toBeFalsy(); const findContent = getResponseContent(findResponse.content); - expect(findContent).toContain("Found"); + expect(findContent).toContain('Query on collection "disabled-test-collection" resulted in'); expect(findContent).not.toContain("Index check failed"); }); diff --git a/tests/integration/tools/mongodb/create/insertMany.test.ts b/tests/integration/tools/mongodb/create/insertMany.test.ts index 739c39964..844cbcaef 100644 --- a/tests/integration/tools/mongodb/create/insertMany.test.ts +++ b/tests/integration/tools/mongodb/create/insertMany.test.ts @@ -76,7 +76,7 @@ describeWithMongoDB("insertMany tool", (integration) => { arguments: { database: integration.randomDbName(), collection: "coll1", - documents: [{ prop1: "value1", _id: insertedIds[0] }], + documents: [{ prop1: "value1", _id: { $oid: insertedIds[0] } }], }, }); diff --git a/tests/integration/tools/mongodb/metadata/collectionSchema.test.ts b/tests/integration/tools/mongodb/metadata/collectionSchema.test.ts index 4130da1f8..47f117b28 100644 --- a/tests/integration/tools/mongodb/metadata/collectionSchema.test.ts +++ b/tests/integration/tools/mongodb/metadata/collectionSchema.test.ts @@ -15,12 +15,21 @@ import type { SimplifiedSchema } from "mongodb-schema"; import { describe, expect, it } from "vitest"; describeWithMongoDB("collectionSchema tool", (integration) => { - validateToolMetadata( - integration, - "collection-schema", - "Describe the schema for a collection", - databaseCollectionParameters - ); + validateToolMetadata(integration, "collection-schema", "Describe the schema for a collection", [ + ...databaseCollectionParameters, + { + name: "sampleSize", + type: "number", + description: "Number of documents to sample for schema inference", + required: false, + }, + { + name: "responseBytesLimit", + type: "number", + description: `The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded.`, + required: false, + }, + ]); validateThrowsForInvalidArguments(integration, "collection-schema", databaseCollectionInvalidArgs); diff --git a/tests/integration/tools/mongodb/metadata/explain.test.ts b/tests/integration/tools/mongodb/metadata/explain.test.ts index cc81de8aa..ba5b32197 100644 --- a/tests/integration/tools/mongodb/metadata/explain.test.ts +++ b/tests/integration/tools/mongodb/metadata/explain.test.ts @@ -21,6 +21,13 @@ describeWithMongoDB("explain tool", (integration) => { type: "array", required: true, }, + { + name: "verbosity", + description: + "The verbosity of the explain plan, defaults to queryPlanner. If the user wants to know how fast is a query in execution time, use executionStats. It supports all verbosities as defined in the MongoDB Driver.", + type: "string", + required: false, + }, ] ); @@ -53,7 +60,53 @@ describeWithMongoDB("explain tool", (integration) => { for (const testType of ["database", "collection"] as const) { describe(`with non-existing ${testType}`, () => { for (const testCase of testCases) { - it(`should return the explain plan for ${testCase.method}`, async () => { + it(`should return the explain plan for "queryPlanner" verbosity for ${testCase.method}`, async () => { + if (testType === "database") { + const { databases } = await integration.mongoClient().db("").admin().listDatabases(); + expect(databases.find((db) => db.name === integration.randomDbName())).toBeUndefined(); + } else if (testType === "collection") { + await integration + .mongoClient() + .db(integration.randomDbName()) + .createCollection("some-collection"); + + const collections = await integration + .mongoClient() + .db(integration.randomDbName()) + .listCollections() + .toArray(); + + expect(collections.find((collection) => collection.name === "coll1")).toBeUndefined(); + } + + await integration.connectMcpClient(); + + const response = await integration.mcpClient().callTool({ + name: "explain", + arguments: { + database: integration.randomDbName(), + collection: "coll1", + method: [ + { + name: testCase.method, + arguments: testCase.arguments, + }, + ], + }, + }); + + const content = getResponseElements(response.content); + expect(content).toHaveLength(2); + expect(content[0]?.text).toEqual( + `Here is some information about the winning plan chosen by the query optimizer for running the given \`${testCase.method}\` operation in "${integration.randomDbName()}.coll1". The execution plan was run with the following verbosity: "queryPlanner". This information can be used to understand how the query was executed and to optimize the query performance.` + ); + + expect(content[1]?.text).toContain("queryPlanner"); + expect(content[1]?.text).toContain("winningPlan"); + expect(content[1]?.text).not.toContain("executionStats"); + }); + + it(`should return the explain plan for "executionStats" verbosity for ${testCase.method}`, async () => { if (testType === "database") { const { databases } = await integration.mongoClient().db("").admin().listDatabases(); expect(databases.find((db) => db.name === integration.randomDbName())).toBeUndefined(); @@ -85,17 +138,19 @@ describeWithMongoDB("explain tool", (integration) => { arguments: testCase.arguments, }, ], + verbosity: "executionStats", }, }); const content = getResponseElements(response.content); expect(content).toHaveLength(2); expect(content[0]?.text).toEqual( - `Here is some information about the winning plan chosen by the query optimizer for running the given \`${testCase.method}\` operation in "${integration.randomDbName()}.coll1". This information can be used to understand how the query was executed and to optimize the query performance.` + `Here is some information about the winning plan chosen by the query optimizer for running the given \`${testCase.method}\` operation in "${integration.randomDbName()}.coll1". The execution plan was run with the following verbosity: "executionStats". This information can be used to understand how the query was executed and to optimize the query performance.` ); expect(content[1]?.text).toContain("queryPlanner"); expect(content[1]?.text).toContain("winningPlan"); + expect(content[1]?.text).toContain("executionStats"); }); } }); @@ -121,7 +176,7 @@ describeWithMongoDB("explain tool", (integration) => { }); for (const testCase of testCases) { - it(`should return the explain plan for ${testCase.method}`, async () => { + it(`should return the explain plan with verbosity "queryPlanner" for ${testCase.method}`, async () => { await integration.connectMcpClient(); const response = await integration.mcpClient().callTool({ @@ -141,7 +196,7 @@ describeWithMongoDB("explain tool", (integration) => { const content = getResponseElements(response.content); expect(content).toHaveLength(2); expect(content[0]?.text).toEqual( - `Here is some information about the winning plan chosen by the query optimizer for running the given \`${testCase.method}\` operation in "${integration.randomDbName()}.people". This information can be used to understand how the query was executed and to optimize the query performance.` + `Here is some information about the winning plan chosen by the query optimizer for running the given \`${testCase.method}\` operation in "${integration.randomDbName()}.people". The execution plan was run with the following verbosity: "queryPlanner". This information can be used to understand how the query was executed and to optimize the query performance.` ); expect(content[1]?.text).toContain("queryPlanner"); diff --git a/tests/integration/tools/mongodb/mongodbHelpers.ts b/tests/integration/tools/mongodb/mongodbHelpers.ts index 327d5cdf9..60961df32 100644 --- a/tests/integration/tools/mongodb/mongodbHelpers.ts +++ b/tests/integration/tools/mongodb/mongodbHelpers.ts @@ -15,6 +15,7 @@ import { } from "../../helpers.js"; import type { UserConfig, DriverOptions } from "../../../../src/common/config.js"; import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it } from "vitest"; +import { EJSON } from "bson"; const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -267,10 +268,9 @@ export function prepareTestData(integration: MongoDBIntegrationTest): { }; } -export function getDocsFromUntrustedContent(content: string): unknown[] { +export function getDocsFromUntrustedContent(content: string): T[] { const data = getDataFromUntrustedContent(content); - - return JSON.parse(data) as unknown[]; + return EJSON.parse(data, { relaxed: true }) as T[]; } export async function isCommunityServer(integration: MongoDBIntegrationTestCase): Promise { diff --git a/tests/integration/tools/mongodb/read/aggregate.test.ts b/tests/integration/tools/mongodb/read/aggregate.test.ts index 57c7f8c70..3f0a99a58 100644 --- a/tests/integration/tools/mongodb/read/aggregate.test.ts +++ b/tests/integration/tools/mongodb/read/aggregate.test.ts @@ -3,9 +3,12 @@ import { validateToolMetadata, validateThrowsForInvalidArguments, getResponseContent, + defaultTestConfig, } from "../../../helpers.js"; -import { expect, it, afterEach } from "vitest"; +import { beforeEach, describe, expect, it, vi, afterEach } from "vitest"; import { describeWithMongoDB, getDocsFromUntrustedContent, validateAutoConnectBehavior } from "../mongodbHelpers.js"; +import * as constants from "../../../../../src/helpers/constants.js"; +import { freshInsertDocuments } from "./find.test.js"; describeWithMongoDB("aggregate tool", (integration) => { afterEach(() => { @@ -21,6 +24,13 @@ describeWithMongoDB("aggregate tool", (integration) => { type: "array", required: true, }, + { + name: "responseBytesLimit", + description: + 'The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded. Note to LLM: If the entire aggregation result is required, use the "export" tool instead of increasing this limit.', + type: "number", + required: false, + }, ]); validateThrowsForInvalidArguments(integration, "aggregate", [ @@ -32,7 +42,7 @@ describeWithMongoDB("aggregate tool", (integration) => { { database: 123, collection: "foo", pipeline: [] }, ]); - it("can run aggragation on non-existent database", async () => { + it("can run aggregation on non-existent database", async () => { await integration.connectMcpClient(); const response = await integration.mcpClient().callTool({ name: "aggregate", @@ -40,10 +50,10 @@ describeWithMongoDB("aggregate tool", (integration) => { }); const content = getResponseContent(response); - expect(content).toEqual("The aggregation resulted in 0 documents."); + expect(content).toEqual("The aggregation resulted in 0 documents. Returning 0 documents."); }); - it("can run aggragation on an empty collection", async () => { + it("can run aggregation on an empty collection", async () => { await integration.mongoClient().db(integration.randomDbName()).createCollection("people"); await integration.connectMcpClient(); @@ -57,10 +67,10 @@ describeWithMongoDB("aggregate tool", (integration) => { }); const content = getResponseContent(response); - expect(content).toEqual("The aggregation resulted in 0 documents."); + expect(content).toEqual("The aggregation resulted in 0 documents. Returning 0 documents."); }); - it("can run aggragation on an existing collection", async () => { + it("can run aggregation on an existing collection", async () => { const mongoClient = integration.mongoClient(); await mongoClient .db(integration.randomDbName()) @@ -180,4 +190,184 @@ describeWithMongoDB("aggregate tool", (integration) => { expectedResponse: "The aggregation resulted in 0 documents", }; }); + + describe("when counting documents exceed the configured count maxTimeMS", () => { + beforeEach(async () => { + await freshInsertDocuments({ + collection: integration.mongoClient().db(integration.randomDbName()).collection("people"), + count: 1000, + documentMapper(index) { + return { name: `Person ${index}`, age: index }; + }, + }); + }); + + afterEach(() => { + vi.resetAllMocks(); + }); + + it("should abort count operation and respond with indeterminable count", async () => { + vi.spyOn(constants, "AGG_COUNT_MAX_TIME_MS_CAP", "get").mockReturnValue(0.1); + await integration.connectMcpClient(); + const response = await integration.mcpClient().callTool({ + name: "aggregate", + arguments: { + database: integration.randomDbName(), + collection: "people", + pipeline: [{ $match: { age: { $gte: 10 } } }, { $sort: { name: -1 } }], + }, + }); + const content = getResponseContent(response); + expect(content).toContain("The aggregation resulted in indeterminable number of documents"); + expect(content).toContain(`Returning 100 documents.`); + const docs = getDocsFromUntrustedContent(content); + expect(docs[0]).toEqual( + expect.objectContaining({ + _id: expect.any(Object) as object, + name: "Person 999", + age: 999, + }) + ); + expect(docs[1]).toEqual( + expect.objectContaining({ + _id: expect.any(Object) as object, + name: "Person 998", + age: 998, + }) + ); + }); + }); }); + +describeWithMongoDB( + "aggregate tool with configured max documents per query", + (integration) => { + it("should return documents limited to the configured limit", async () => { + await freshInsertDocuments({ + collection: integration.mongoClient().db(integration.randomDbName()).collection("people"), + count: 1000, + documentMapper(index) { + return { name: `Person ${index}`, age: index }; + }, + }); + await integration.connectMcpClient(); + const response = await integration.mcpClient().callTool({ + name: "aggregate", + arguments: { + database: integration.randomDbName(), + collection: "people", + pipeline: [{ $match: { age: { $gte: 10 } } }, { $sort: { name: -1 } }], + }, + }); + + const content = getResponseContent(response); + expect(content).toContain("The aggregation resulted in 990 documents"); + expect(content).toContain( + `Returning 20 documents while respecting the applied limits of server's configured - maxDocumentsPerQuery.` + ); + const docs = getDocsFromUntrustedContent(content); + expect(docs[0]).toEqual( + expect.objectContaining({ + _id: expect.any(Object) as object, + name: "Person 999", + age: 999, + }) + ); + expect(docs[1]).toEqual( + expect.objectContaining({ + _id: expect.any(Object) as object, + name: "Person 998", + age: 998, + }) + ); + }); + }, + () => ({ ...defaultTestConfig, maxDocumentsPerQuery: 20 }) +); + +describeWithMongoDB( + "aggregate tool with configured max bytes per query", + (integration) => { + it("should return only the documents that could fit in maxBytesPerQuery limit", async () => { + await freshInsertDocuments({ + collection: integration.mongoClient().db(integration.randomDbName()).collection("people"), + count: 1000, + documentMapper(index) { + return { name: `Person ${index}`, age: index }; + }, + }); + await integration.connectMcpClient(); + const response = await integration.mcpClient().callTool({ + name: "aggregate", + arguments: { + database: integration.randomDbName(), + collection: "people", + pipeline: [{ $match: { age: { $gte: 10 } } }, { $sort: { name: -1 } }], + }, + }); + + const content = getResponseContent(response); + expect(content).toContain("The aggregation resulted in 990 documents"); + expect(content).toContain( + `Returning 3 documents while respecting the applied limits of server's configured - maxDocumentsPerQuery, server's configured - maxBytesPerQuery.` + ); + }); + + it("should return only the documents that could fit in responseBytesLimit", async () => { + await freshInsertDocuments({ + collection: integration.mongoClient().db(integration.randomDbName()).collection("people"), + count: 1000, + documentMapper(index) { + return { name: `Person ${index}`, age: index }; + }, + }); + await integration.connectMcpClient(); + const response = await integration.mcpClient().callTool({ + name: "aggregate", + arguments: { + database: integration.randomDbName(), + collection: "people", + pipeline: [{ $match: { age: { $gte: 10 } } }, { $sort: { name: -1 } }], + responseBytesLimit: 100, + }, + }); + + const content = getResponseContent(response); + expect(content).toContain("The aggregation resulted in 990 documents"); + expect(content).toContain( + `Returning 1 documents while respecting the applied limits of server's configured - maxDocumentsPerQuery, tool's parameter - responseBytesLimit.` + ); + }); + }, + () => ({ ...defaultTestConfig, maxBytesPerQuery: 200 }) +); + +describeWithMongoDB( + "aggregate tool with disabled max documents and max bytes per query", + (integration) => { + it("should return all the documents that could fit in responseBytesLimit", async () => { + await freshInsertDocuments({ + collection: integration.mongoClient().db(integration.randomDbName()).collection("people"), + count: 1000, + documentMapper(index) { + return { name: `Person ${index}`, age: index }; + }, + }); + await integration.connectMcpClient(); + const response = await integration.mcpClient().callTool({ + name: "aggregate", + arguments: { + database: integration.randomDbName(), + collection: "people", + pipeline: [{ $match: { age: { $gte: 10 } } }, { $sort: { name: -1 } }], + responseBytesLimit: 1 * 1024 * 1024, // 1MB + }, + }); + + const content = getResponseContent(response); + expect(content).toContain("The aggregation resulted in 990 documents"); + expect(content).toContain(`Returning 990 documents.`); + }); + }, + () => ({ ...defaultTestConfig, maxDocumentsPerQuery: -1, maxBytesPerQuery: -1 }) +); diff --git a/tests/integration/tools/mongodb/read/find.test.ts b/tests/integration/tools/mongodb/read/find.test.ts index fc192d8ba..3619e423c 100644 --- a/tests/integration/tools/mongodb/read/find.test.ts +++ b/tests/integration/tools/mongodb/read/find.test.ts @@ -1,14 +1,31 @@ -import { beforeEach, describe, expect, it } from "vitest"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import type { Document, Collection } from "mongodb"; import { getResponseContent, databaseCollectionParameters, validateToolMetadata, validateThrowsForInvalidArguments, expectDefined, + defaultTestConfig, } from "../../../helpers.js"; +import * as constants from "../../../../../src/helpers/constants.js"; import { describeWithMongoDB, getDocsFromUntrustedContent, validateAutoConnectBehavior } from "../mongodbHelpers.js"; -describeWithMongoDB("find tool", (integration) => { +export async function freshInsertDocuments({ + collection, + count, + documentMapper = (index): Document => ({ value: index }), +}: { + collection: Collection; + count: number; + documentMapper?: (index: number) => Document; +}): Promise { + await collection.drop(); + const documents = Array.from({ length: count }).map((_, idx) => documentMapper(idx)); + await collection.insertMany(documents); +} + +describeWithMongoDB("find tool with default configuration", (integration) => { validateToolMetadata(integration, "find", "Run a find query against a MongoDB collection", [ ...databaseCollectionParameters, @@ -37,6 +54,13 @@ describeWithMongoDB("find tool", (integration) => { type: "object", required: false, }, + { + name: "responseBytesLimit", + description: + 'The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded. Note to LLM: If the entire query result is required, use the "export" tool instead of increasing this limit.', + type: "number", + required: false, + }, ]); validateThrowsForInvalidArguments(integration, "find", [ @@ -56,7 +80,7 @@ describeWithMongoDB("find tool", (integration) => { arguments: { database: "non-existent", collection: "foos" }, }); const content = getResponseContent(response.content); - expect(content).toEqual('Found 0 documents in the collection "foos".'); + expect(content).toEqual('Query on collection "foos" resulted in 0 documents. Returning 0 documents.'); }); it("returns 0 when collection doesn't exist", async () => { @@ -68,19 +92,15 @@ describeWithMongoDB("find tool", (integration) => { arguments: { database: integration.randomDbName(), collection: "non-existent" }, }); const content = getResponseContent(response.content); - expect(content).toEqual('Found 0 documents in the collection "non-existent".'); + expect(content).toEqual('Query on collection "non-existent" resulted in 0 documents. Returning 0 documents.'); }); describe("with existing database", () => { beforeEach(async () => { - const mongoClient = integration.mongoClient(); - const items = Array(10) - .fill(0) - .map((_, index) => ({ - value: index, - })); - - await mongoClient.db(integration.randomDbName()).collection("foo").insertMany(items); + await freshInsertDocuments({ + collection: integration.mongoClient().db(integration.randomDbName()).collection("foo"), + count: 10, + }); }); const testCases: { @@ -148,7 +168,7 @@ describeWithMongoDB("find tool", (integration) => { }, }); const content = getResponseContent(response); - expect(content).toContain(`Found ${expected.length} documents in the collection "foo".`); + expect(content).toContain(`Query on collection "foo" resulted in ${expected.length} documents.`); const docs = getDocsFromUntrustedContent(content); @@ -165,7 +185,7 @@ describeWithMongoDB("find tool", (integration) => { arguments: { database: integration.randomDbName(), collection: "foo" }, }); const content = getResponseContent(response); - expect(content).toContain('Found 10 documents in the collection "foo".'); + expect(content).toContain('Query on collection "foo" resulted in 10 documents.'); const docs = getDocsFromUntrustedContent(content); expect(docs.length).toEqual(10); @@ -190,24 +210,236 @@ describeWithMongoDB("find tool", (integration) => { arguments: { database: integration.randomDbName(), collection: "foo", - filter: { _id: fooObject._id }, + filter: { _id: { $oid: fooObject._id } }, }, }); const content = getResponseContent(response); - expect(content).toContain('Found 1 documents in the collection "foo".'); + expect(content).toContain('Query on collection "foo" resulted in 1 documents.'); const docs = getDocsFromUntrustedContent(content); expect(docs.length).toEqual(1); expect((docs[0] as { value: number }).value).toEqual(fooObject.value); }); + + it("can find objects by date", async () => { + await integration.connectMcpClient(); + + await integration + .mongoClient() + .db(integration.randomDbName()) + .collection("foo_with_dates") + .insertMany([ + { date: new Date("2025-05-10"), idx: 0 }, + { date: new Date("2025-05-11"), idx: 1 }, + ]); + + const response = await integration.mcpClient().callTool({ + name: "find", + arguments: { + database: integration.randomDbName(), + collection: "foo_with_dates", + filter: { date: { $gt: { $date: "2025-05-10" } } }, // only 2025-05-11 will match + }, + }); + + const content = getResponseContent(response); + expect(content).toContain( + 'Query on collection "foo_with_dates" resulted in 1 documents. Returning 1 documents.' + ); + + const docs = getDocsFromUntrustedContent<{ date: Date }>(content); + expect(docs.length).toEqual(1); + + expect(docs[0]?.date.toISOString()).toContain("2025-05-11"); + }); }); validateAutoConnectBehavior(integration, "find", () => { return { args: { database: integration.randomDbName(), collection: "coll1" }, - expectedResponse: 'Found 0 documents in the collection "coll1"', + expectedResponse: 'Query on collection "coll1" resulted in 0 documents.', }; }); + + describe("when counting documents exceed the configured count maxTimeMS", () => { + beforeEach(async () => { + await freshInsertDocuments({ + collection: integration.mongoClient().db(integration.randomDbName()).collection("foo"), + count: 10, + }); + }); + + afterEach(() => { + vi.resetAllMocks(); + }); + + it("should abort count operation and respond with indeterminable count", async () => { + vi.spyOn(constants, "QUERY_COUNT_MAX_TIME_MS_CAP", "get").mockReturnValue(0.1); + await integration.connectMcpClient(); + const response = await integration.mcpClient().callTool({ + name: "find", + arguments: { database: integration.randomDbName(), collection: "foo" }, + }); + const content = getResponseContent(response); + expect(content).toContain('Query on collection "foo" resulted in indeterminable number of documents.'); + + const docs = getDocsFromUntrustedContent(content); + expect(docs.length).toEqual(10); + }); + }); }); + +describeWithMongoDB( + "find tool with configured max documents per query", + (integration) => { + beforeEach(async () => { + await freshInsertDocuments({ + collection: integration.mongoClient().db(integration.randomDbName()).collection("foo"), + count: 1000, + }); + }); + + afterEach(() => { + vi.resetAllMocks(); + }); + + it("should return documents limited to the provided limit when provided limit < configured limit", async () => { + await integration.connectMcpClient(); + const response = await integration.mcpClient().callTool({ + name: "find", + arguments: { + database: integration.randomDbName(), + collection: "foo", + filter: {}, + limit: 8, + }, + }); + + const content = getResponseContent(response); + expect(content).toContain(`Query on collection "foo" resulted in 8 documents.`); + expect(content).toContain(`Returning 8 documents.`); + }); + + it("should return documents limited to the configured max limit when provided limit > configured limit", async () => { + await integration.connectMcpClient(); + const response = await integration.mcpClient().callTool({ + name: "find", + arguments: { + database: integration.randomDbName(), + collection: "foo", + filter: {}, + limit: 10000, + }, + }); + + const content = getResponseContent(response); + expect(content).toContain(`Query on collection "foo" resulted in 1000 documents.`); + expect(content).toContain( + `Returning 10 documents while respecting the applied limits of server's configured - maxDocumentsPerQuery.` + ); + }); + }, + () => ({ ...defaultTestConfig, maxDocumentsPerQuery: 10 }) +); + +describeWithMongoDB( + "find tool with configured max bytes per query", + (integration) => { + beforeEach(async () => { + await freshInsertDocuments({ + collection: integration.mongoClient().db(integration.randomDbName()).collection("foo"), + count: 1000, + }); + }); + it("should return only the documents that could fit in configured maxBytesPerQuery limit", async () => { + await integration.connectMcpClient(); + const response = await integration.mcpClient().callTool({ + name: "find", + arguments: { + database: integration.randomDbName(), + collection: "foo", + filter: {}, + limit: 1000, + }, + }); + + const content = getResponseContent(response); + expect(content).toContain(`Query on collection "foo" resulted in 1000 documents.`); + expect(content).toContain( + `Returning 3 documents while respecting the applied limits of server's configured - maxDocumentsPerQuery, server's configured - maxBytesPerQuery` + ); + }); + it("should return only the documents that could fit in provided responseBytesLimit", async () => { + await integration.connectMcpClient(); + const response = await integration.mcpClient().callTool({ + name: "find", + arguments: { + database: integration.randomDbName(), + collection: "foo", + filter: {}, + limit: 1000, + responseBytesLimit: 50, + }, + }); + + const content = getResponseContent(response); + expect(content).toContain(`Query on collection "foo" resulted in 1000 documents.`); + expect(content).toContain( + `Returning 1 documents while respecting the applied limits of server's configured - maxDocumentsPerQuery, tool's parameter - responseBytesLimit.` + ); + }); + }, + () => ({ ...defaultTestConfig, maxBytesPerQuery: 100 }) +); + +describeWithMongoDB( + "find tool with disabled max limit and max bytes per query", + (integration) => { + beforeEach(async () => { + await freshInsertDocuments({ + collection: integration.mongoClient().db(integration.randomDbName()).collection("foo"), + count: 1000, + }); + }); + + it("should return documents limited to the provided limit", async () => { + await integration.connectMcpClient(); + const response = await integration.mcpClient().callTool({ + name: "find", + arguments: { + database: integration.randomDbName(), + collection: "foo", + filter: {}, + limit: 8, + }, + }); + + const content = getResponseContent(response); + expect(content).toContain(`Query on collection "foo" resulted in 8 documents.`); + expect(content).toContain(`Returning 8 documents.`); + }); + + it("should return documents limited to the responseBytesLimit", async () => { + await integration.connectMcpClient(); + const response = await integration.mcpClient().callTool({ + name: "find", + arguments: { + database: integration.randomDbName(), + collection: "foo", + filter: {}, + limit: 1000, + responseBytesLimit: 50, + }, + }); + + const content = getResponseContent(response); + expect(content).toContain(`Query on collection "foo" resulted in 1000 documents.`); + expect(content).toContain( + `Returning 1 documents while respecting the applied limits of tool's parameter - responseBytesLimit.` + ); + }); + }, + () => ({ ...defaultTestConfig, maxDocumentsPerQuery: -1, maxBytesPerQuery: -1 }) +); diff --git a/tests/unit/helpers/collectCursorUntilMaxBytes.test.ts b/tests/unit/helpers/collectCursorUntilMaxBytes.test.ts new file mode 100644 index 000000000..986b66973 --- /dev/null +++ b/tests/unit/helpers/collectCursorUntilMaxBytes.test.ts @@ -0,0 +1,211 @@ +import { describe, it, expect, vi } from "vitest"; +import type { FindCursor } from "mongodb"; +import { calculateObjectSize } from "bson"; +import { collectCursorUntilMaxBytesLimit } from "../../../src/helpers/collectCursorUntilMaxBytes.js"; + +describe("collectCursorUntilMaxBytesLimit", () => { + function createMockCursor( + docs: unknown[], + { abortController, abortOnIdx }: { abortController?: AbortController; abortOnIdx?: number } = {} + ): FindCursor { + let idx = 0; + return { + tryNext: vi.fn(() => { + if (idx === abortOnIdx) { + abortController?.abort(); + } + + if (idx < docs.length) { + return Promise.resolve(docs[idx++]); + } + return Promise.resolve(null); + }), + toArray: vi.fn(() => { + return Promise.resolve(docs); + }), + } as unknown as FindCursor; + } + + it("returns all docs if maxBytesPerQuery is -1", async () => { + const docs = Array.from({ length: 1000 }).map((_, idx) => ({ value: idx })); + const cursor = createMockCursor(docs); + const maxBytes = -1; + const result = await collectCursorUntilMaxBytesLimit({ + cursor, + configuredMaxBytesPerQuery: maxBytes, + toolResponseBytesLimit: 100_000, + }); + expect(result.documents).toEqual(docs); + expect(result.cappedBy).toBeUndefined(); + }); + + it("returns all docs if maxBytesPerQuery is 0", async () => { + const docs = Array.from({ length: 1000 }).map((_, idx) => ({ value: idx })); + const cursor = createMockCursor(docs); + const maxBytes = 0; + const result = await collectCursorUntilMaxBytesLimit({ + cursor, + configuredMaxBytesPerQuery: maxBytes, + toolResponseBytesLimit: 100_000, + }); + expect(result.documents).toEqual(docs); + expect(result.cappedBy).toBeUndefined(); + }); + + it("respects abort signal and breaks out of loop when aborted", async () => { + const docs = Array.from({ length: 20 }).map((_, idx) => ({ value: idx })); + const abortController = new AbortController(); + const cursor = createMockCursor(docs, { abortOnIdx: 9, abortController }); + const maxBytes = 10000; + const result = await collectCursorUntilMaxBytesLimit({ + cursor, + configuredMaxBytesPerQuery: maxBytes, + abortSignal: abortController.signal, + toolResponseBytesLimit: 100_000, + }); + expect(result.documents).toEqual(Array.from({ length: 10 }).map((_, idx) => ({ value: idx }))); + expect(result.cappedBy).toBeUndefined(); // Aborted, not capped by limit + }); + + it("returns all docs if under maxBytesPerQuery", async () => { + const docs = [{ a: 1 }, { b: 2 }]; + const cursor = createMockCursor(docs); + const maxBytes = 10000; + const result = await collectCursorUntilMaxBytesLimit({ + cursor, + configuredMaxBytesPerQuery: maxBytes, + toolResponseBytesLimit: 100_000, + }); + expect(result.documents).toEqual(docs); + expect(result.cappedBy).toBeUndefined(); + }); + + it("returns only docs that fit under maxBytesPerQuery", async () => { + const doc1 = { a: "x".repeat(100) }; + const doc2 = { b: "y".repeat(1000) }; + const docs = [doc1, doc2]; + const cursor = createMockCursor(docs); + const maxBytes = calculateObjectSize(doc1) + 10; + const result = await collectCursorUntilMaxBytesLimit({ + cursor, + configuredMaxBytesPerQuery: maxBytes, + toolResponseBytesLimit: 100_000, + }); + expect(result.documents).toEqual([doc1]); + expect(result.cappedBy).toBe("config.maxBytesPerQuery"); + }); + + it("returns empty array if maxBytesPerQuery is smaller than even the first doc", async () => { + const docs = [{ a: "x".repeat(100) }]; + const cursor = createMockCursor(docs); + const result = await collectCursorUntilMaxBytesLimit({ + cursor, + configuredMaxBytesPerQuery: 10, + toolResponseBytesLimit: 100_000, + }); + expect(result.documents).toEqual([]); + expect(result.cappedBy).toBe("config.maxBytesPerQuery"); + }); + + it("handles empty cursor", async () => { + const cursor = createMockCursor([]); + const result = await collectCursorUntilMaxBytesLimit({ + cursor, + configuredMaxBytesPerQuery: 1000, + toolResponseBytesLimit: 100_000, + }); + expect(result.documents).toEqual([]); + expect(result.cappedBy).toBeUndefined(); + }); + + it("does not include a doc that would overflow the max bytes allowed", async () => { + const doc1 = { a: "x".repeat(10) }; + const doc2 = { b: "y".repeat(1000) }; + const docs = [doc1, doc2]; + const cursor = createMockCursor(docs); + // Set maxBytes so that after doc1, biggestDocSizeSoFar would prevent fetching doc2 + const maxBytes = calculateObjectSize(doc1) + calculateObjectSize(doc2) - 1; + const result = await collectCursorUntilMaxBytesLimit({ + cursor, + configuredMaxBytesPerQuery: maxBytes, + toolResponseBytesLimit: 100_000, + }); + // Should only include doc1, not doc2 + expect(result.documents).toEqual([doc1]); + expect(result.cappedBy).toBe("config.maxBytesPerQuery"); + }); + + it("caps by tool.responseBytesLimit when tool limit is lower than config", async () => { + const doc1 = { a: "x".repeat(10) }; + const doc2 = { b: "y".repeat(1000) }; + const docs = [doc1, doc2]; + const cursor = createMockCursor(docs); + const configLimit = 5000; + const toolLimit = calculateObjectSize(doc1) + 10; + const result = await collectCursorUntilMaxBytesLimit({ + cursor, + configuredMaxBytesPerQuery: configLimit, + toolResponseBytesLimit: toolLimit, + }); + expect(result.documents).toEqual([doc1]); + expect(result.cappedBy).toBe("tool.responseBytesLimit"); + }); + + it("caps by config.maxBytesPerQuery when config limit is lower than tool", async () => { + const doc1 = { a: "x".repeat(10) }; + const doc2 = { b: "y".repeat(1000) }; + const docs = [doc1, doc2]; + const cursor = createMockCursor(docs); + const configLimit = calculateObjectSize(doc1) + 10; + const toolLimit = 5000; + const result = await collectCursorUntilMaxBytesLimit({ + cursor, + configuredMaxBytesPerQuery: configLimit, + toolResponseBytesLimit: toolLimit, + }); + expect(result.documents).toEqual([doc1]); + expect(result.cappedBy).toBe("config.maxBytesPerQuery"); + }); + + it("caps by tool.responseBytesLimit when both limits are equal and reached", async () => { + const doc = { a: "x".repeat(100) }; + const cursor = createMockCursor([doc, { b: 2 }]); + const limit = calculateObjectSize(doc) + 10; + const result = await collectCursorUntilMaxBytesLimit({ + cursor, + configuredMaxBytesPerQuery: limit, + toolResponseBytesLimit: limit, + }); + expect(result.documents).toEqual([doc]); + expect(result.cappedBy).toBe("tool.responseBytesLimit"); + }); + + it("returns all docs and cappedBy undefined if both limits are negative, zero or null", async () => { + const docs = [{ a: 1 }, { b: 2 }]; + const cursor = createMockCursor(docs); + for (const limit of [-1, 0, null]) { + const result = await collectCursorUntilMaxBytesLimit({ + cursor, + configuredMaxBytesPerQuery: limit, + toolResponseBytesLimit: limit, + }); + expect(result.documents).toEqual(docs); + expect(result.cappedBy).toBeUndefined(); + } + }); + + it("caps by tool.responseBytesLimit if config is zero/negative and tool limit is set", async () => { + const doc1 = { a: "x".repeat(10) }; + const doc2 = { b: "y".repeat(1000) }; + const docs = [doc1, doc2]; + const cursor = createMockCursor(docs); + const toolLimit = calculateObjectSize(doc1) + 10; + const result = await collectCursorUntilMaxBytesLimit({ + cursor, + configuredMaxBytesPerQuery: 0, + toolResponseBytesLimit: toolLimit, + }); + expect(result.documents).toEqual([doc1]); + expect(result.cappedBy).toBe("tool.responseBytesLimit"); + }); +}); diff --git a/tests/unit/helpers/operationWithFallback.test.ts b/tests/unit/helpers/operationWithFallback.test.ts new file mode 100644 index 000000000..0d696ae37 --- /dev/null +++ b/tests/unit/helpers/operationWithFallback.test.ts @@ -0,0 +1,24 @@ +import { describe, it, expect, vi } from "vitest"; +import { operationWithFallback } from "../../../src/helpers/operationWithFallback.js"; + +describe("operationWithFallback", () => { + it("returns operation result when operation succeeds", async () => { + const successfulOperation = vi.fn().mockResolvedValue("success"); + const fallbackValue = "fallback"; + + const result = await operationWithFallback(successfulOperation, fallbackValue); + + expect(result).toBe("success"); + expect(successfulOperation).toHaveBeenCalledOnce(); + }); + + it("returns fallback value when operation throws an error", async () => { + const failingOperation = vi.fn().mockRejectedValue(new Error("Operation failed")); + const fallbackValue = "fallback"; + + const result = await operationWithFallback(failingOperation, fallbackValue); + + expect(result).toBe("fallback"); + expect(failingOperation).toHaveBeenCalledOnce(); + }); +}); diff --git a/tests/unit/transports/stdio.test.ts b/tests/unit/transports/stdio.test.ts deleted file mode 100644 index bfc64c290..000000000 --- a/tests/unit/transports/stdio.test.ts +++ /dev/null @@ -1,71 +0,0 @@ -import { Decimal128, MaxKey, MinKey, ObjectId, Timestamp, UUID } from "bson"; -import { createStdioTransport, EJsonReadBuffer } from "../../../src/transports/stdio.js"; -import type { JSONRPCMessage } from "@modelcontextprotocol/sdk/types.js"; -import type { AuthInfo } from "@modelcontextprotocol/sdk/server/auth/types.js"; -import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; -import type { Readable } from "stream"; -import { ReadBuffer } from "@modelcontextprotocol/sdk/shared/stdio.js"; -import { describe, expect, it, beforeEach, afterEach } from "vitest"; -describe("stdioTransport", () => { - let transport: StdioServerTransport; - beforeEach(async () => { - transport = createStdioTransport(); - await transport.start(); - }); - - afterEach(async () => { - await transport.close(); - }); - - it("ejson deserializes messages", () => { - const messages: { message: JSONRPCMessage; extra?: { authInfo?: AuthInfo } }[] = []; - transport.onmessage = ( - message, - extra?: { - authInfo?: AuthInfo; - } - ): void => { - messages.push({ message, extra }); - }; - - (transport["_stdin"] as Readable).emit( - "data", - Buffer.from( - '{"jsonrpc":"2.0","id":1,"method":"testMethod","params":{"oid":{"$oid":"681b741f13aa74a0687b5110"},"uuid":{"$uuid":"f81d4fae-7dec-11d0-a765-00a0c91e6bf6"},"date":{"$date":"2025-05-07T14:54:23.973Z"},"decimal":{"$numberDecimal":"1234567890987654321"},"int32":123,"maxKey":{"$maxKey":1},"minKey":{"$minKey":1},"timestamp":{"$timestamp":{"t":123,"i":456}}}}\n', - "utf-8" - ) - ); - - expect(messages.length).toBe(1); - const message = messages[0]?.message; - - expect(message).toEqual({ - jsonrpc: "2.0", - id: 1, - method: "testMethod", - params: { - oid: new ObjectId("681b741f13aa74a0687b5110"), - uuid: new UUID("f81d4fae-7dec-11d0-a765-00a0c91e6bf6"), - date: new Date(Date.parse("2025-05-07T14:54:23.973Z")), - decimal: new Decimal128("1234567890987654321"), - int32: 123, - maxKey: new MaxKey(), - minKey: new MinKey(), - timestamp: new Timestamp({ t: 123, i: 456 }), - }, - }); - }); - - it("has _readBuffer field of type EJsonReadBuffer", () => { - expect(transport["_readBuffer"]).toBeDefined(); - expect(transport["_readBuffer"]).toBeInstanceOf(EJsonReadBuffer); - }); - - describe("standard StdioServerTransport", () => { - it("has a _readBuffer field", () => { - const standardTransport = new StdioServerTransport(); - expect(standardTransport["_readBuffer"]).toBeDefined(); - expect(standardTransport["_readBuffer"]).toBeInstanceOf(ReadBuffer); - }); - }); -});