Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions docs/components/vectordbs/dbs/oracledb.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
[Oracle AI Vector Search](https://www.oracle.com/database/ai-vector-search/) is designed for AI workloads and lets you query data based on semantics rather than keywords. A key benefit is the ability to combine semantic search on unstructured data with relational search on business data in a single system.

## Overview

The `oracledb` provider integrates mem0 with Oracle Database’s AI Vector Search feature. It stores embeddings in an Oracle table that uses the `VECTOR` data type and leverages `CREATE VECTOR INDEX` for fast similarity search.

This backend uses the official `python-oracledb` driver. You’ll need a database user with privileges to create tables and vector indexes.

Install the required library:
```bash
python -m pip install -U oracledb
```

### Usage

<CodeGroup>
```python Python
import os
from mem0 import Memory

os.environ["OPENAI_API_KEY"] = "sk-xx"

config = {
"vector_store": {
"provider": "oracledb",
"config": {
"connection_params": {
"user": "",
"password": "",
"dsn": ""
},
}
}
}

m = Memory.from_config(config)
messages = [
{"role": "user", "content": "I'm planning to watch a movie tonight. Any recommendations?"},
{"role": "assistant", "content": "How about thriller movies? They can be quite engaging."},
{"role": "user", "content": "I'm not a big fan of thriller movies but I love sci-fi movies."},
{"role": "assistant", "content": "Got it! I'll avoid thriller recommendations and suggest sci-fi movies in the future."}
]
m.add(messages, user_id="alice", metadata={"category": "movies"})
```
</CodeGroup>

## Configuration

### Config

Here are the parameters available for configuring Oracle AI Vector Search:

| Parameter | Description | Default Value |
| --- | --- | --- |
| `connection_params` | Oracle connection settings for `python-oracledb` (`user`, `password`, `dsn` or more). See https://python-oracledb.readthedocs.io/en/latest/user_guide/connection_handling.html | `None` |
| `use_connection_pool` | If `true`, a connection pool is created from `connection_params`; if `false`, a single connection is created | `True` |
| `client` | An existing `oracledb.Connection` or `oracledb.ConnectionPool` to use instead of building one from `connection_params` | `None` |
| `collection_name` | Name of the Oracle table used to store vectors and payloads | `mem0` |
| `embedding_model_dims` | Dimension of your embedding vectors (must be > 0) | `1536` |
| `distance_metric` | Distance function used for indexing and search. One of `EUCLIDEAN`, `EUCLIDEAN_SQUARED`, `COSINE`, `DOT`, `HAMMING`, `MANHATTAN` | `COSINE` |
| `do_create_index` | Whether to create a vector index on the collection | `True` |
| `index_type` | Vector index type. `HNSW` or `IVF` | `HNSW` |
| `index_name` | Name of the vector index. Defaults to `<collection_name>_VEC_IDX` | `None` |
| `index_parameters_str` | Additional parameters passed to `CREATE VECTOR INDEX` via `PARAMETERS(...)` | `None` |
| `index_accuracy` | Target index accuracy (1–100), used with `WITH TARGET ACCURACY <n>` | `None` |

**Notes**
- When you provide a pre-built `client`, mem0 will use it instead of creating a new connection or pool.
- Do not include the index “type” in `index_parameters_str`; set it via `index_type`. See Oracle docs: https://docs.oracle.com/en/database/oracle/oracle-database/26/sqlrf/create-vector-index.html
- `connection_params` must follow `python-oracledb` conventions (`user`, `password`, `dsn` and more).
- The returned `score` from searches is Oracle’s `VECTOR_DISTANCE` (lower means closer/more similar).
3 changes: 2 additions & 1 deletion docs/docs.json
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,8 @@
"components/vectordbs/dbs/s3_vectors",
"components/vectordbs/dbs/databricks",
"components/vectordbs/dbs/neptune_analytics",
"components/vectordbs/dbs/turbopuffer"
"components/vectordbs/dbs/turbopuffer",
"components/vectordbs/dbs/oracledb"
]
}
]
Expand Down
159 changes: 159 additions & 0 deletions mem0/configs/vector_stores/oracledb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
"""Pydantic configuration for the Oracle AI Vector Search integration."""

import re
from typing import Any, Dict, Optional

from pydantic import BaseModel, Field, model_validator


def _quote_identifier(name: str) -> str:
name = name.strip()
reg = r'^(?:"[^"]+"|[^".]+)(?:\.(?:"[^"]+"|[^".]+))*$'
pattern_validate = re.compile(reg)

if not pattern_validate.match(name):
raise ValueError(f"Identifier name {name} is not valid.")

pattern_match = r'"([^"]+)"|([^".]+)'
groups = re.findall(pattern_match, name)
groups = [m[0] or m[1] for m in groups]
groups = [f'"{g}"' for g in groups]

return ".".join(groups)


ALLOWED_DISTANCE_METRICS = {"EUCLIDEAN", "EUCLIDEAN_SQUARED", "COSINE", "DOT", "HAMMING", "MANHATTAN"}
ALLOWED_INDEX_TYPES = {"HNSW", "IVF"}
ALLOWED_HNSW_INDEX_PARAMETERS = {"neighbors", "efconstruction"}
ALLOWED_IVF_INDEX_PARAMETERS = {"neighbor_partitions", "samples_per_partition", "min_vectors_per_partition"}


def _validate_int_parameter(parameters: dict, key: str, min_value: int, max_value: Optional[int] = None) -> None:
if key not in parameters:
return

value = parameters[key]
if isinstance(value, bool) or not isinstance(value, int):
raise ValueError(f"`index_parameters.{key}` must be an integer")
if value < min_value:
raise ValueError(f"`index_parameters.{key}` must be at least {min_value}")
if max_value is not None and value > max_value:
raise ValueError(f"`index_parameters.{key}` must be at most {max_value}")
parameters[key] = int(value)


def _validate_index_parameters(index_type: str, index_parameters: Optional[dict]) -> Optional[dict]:
if index_parameters is None:
return None
if not isinstance(index_parameters, dict):
raise ValueError("`index_parameters` must be a dictionary")

parameters = dict(index_parameters)
invalid_keys = [key for key in parameters if not isinstance(key, str)]
if invalid_keys:
raise ValueError(
"`index_parameters` keys must be strings: {}".format(
", ".join(sorted(repr(key) for key in invalid_keys))
)
)

allowed_parameters = ALLOWED_HNSW_INDEX_PARAMETERS if index_type == "HNSW" else ALLOWED_IVF_INDEX_PARAMETERS
extra_parameters = set(parameters) - allowed_parameters
if extra_parameters:
raise ValueError(
"`index_parameters` contains unsupported keys for {}: {}".format(
index_type, ", ".join(sorted(extra_parameters))
)
)

if index_type == "HNSW":
_validate_int_parameter(parameters, "neighbors", 2, 2048)
_validate_int_parameter(parameters, "efconstruction", 1, 65535)
else:
_validate_int_parameter(parameters, "neighbor_partitions", 1, 10000000)
_validate_int_parameter(parameters, "samples_per_partition", 1)
_validate_int_parameter(parameters, "min_vectors_per_partition", 0)

return parameters


class OracleAIVectorSearchConfig(BaseModel):
"""Configuration required to connect to an Oracle database with vector search enabled."""

connection_params: Optional[dict] = Field(None, description="Database connection parameters, including auth.")
use_connection_pool: Optional[bool] = Field(
True, description="Oracle Connection or ConnectionPool (overrides connection string and individual parameters)"
)

client: Optional[Any] = Field(
None, description="Oracle Connection or ConnectionPool (overrides connection string and individual parameters)"
)

collection_name: Optional[str] = Field("mem0", description="Default name for the collection")
embedding_model_dims: int = Field(1536, description="Dimension of the embedding vectors")
distance_metric: Optional[str] = Field(
"COSINE",
description="Similarity metric: EUCLIDEAN, EUCLIDEAN_SQUARED, COSINE, DOT, HAMMING or MANHATTAN. Defaults to COSINE",
)

do_create_index: Optional[bool] = Field(True, description="Optional whether to create index")
index_type: Optional[str] = Field("HNSW", description="Optional index type, HNSW or IVF")
index_name: Optional[str] = Field(None, description="Optional custom name for the vector index")
index_parameters: Optional[dict] = Field(
None,
description="Optional structured CREATE VECTOR INDEX parameters",
)
index_accuracy: Optional[int] = Field(None, description="Optional index accuracy")

@model_validator(mode="after")
def _validate_model(self):
"""Normalise attributes and validate identifiers/metrics."""

if not self.connection_params and not self.client:
raise ValueError("Must provide at least one of `connection_params` and `client`")

if self.distance_metric is None:
raise ValueError("`distance_metric` must not be None")
distance_metric = self.distance_metric.upper()
if distance_metric not in ALLOWED_DISTANCE_METRICS:
raise ValueError(f"`distance_metric` must be one of: {ALLOWED_DISTANCE_METRICS}")
self.distance_metric = distance_metric

if self.index_type is None:
raise ValueError("`index_type` must not be None")
index_type = self.index_type.upper()
if index_type not in ALLOWED_INDEX_TYPES:
raise ValueError(f"`index_type` must be one of: {ALLOWED_INDEX_TYPES}")
self.index_type = index_type

if self.index_name is None:
self.index_name = f"{self.collection_name}_VEC_IDX"

self.index_name = _quote_identifier(self.index_name)
self.collection_name = _quote_identifier(self.collection_name)

self.index_parameters = _validate_index_parameters(self.index_type, self.index_parameters)

if self.index_accuracy and not (0 < self.index_accuracy <= 100):
raise ValueError("`index_accuracy` must be between 1 and 100")

if not (0 < self.embedding_model_dims):
raise ValueError("`embedding_model_dims` must be bigger than 0")

return self

def canonical_index_parameters(self) -> Optional[dict]:
return _validate_index_parameters(self.index_type, self.index_parameters)

@model_validator(mode="before")
@classmethod
def validate_extra_fields(cls, values: Dict[str, Any]) -> Dict[str, Any]:
allowed_fields = set(cls.model_fields.keys())
extra_fields = set(values.keys()) - allowed_fields
if extra_fields:
raise ValueError(
"Extra fields not allowed: {}. Please input only the following fields: {}".format(
", ".join(sorted(extra_fields)), ", ".join(sorted(allowed_fields))
)
)
return values
6 changes: 5 additions & 1 deletion mem0/utils/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ class VectorStoreFactory:
"cassandra": "mem0.vector_stores.cassandra.CassandraDB",
"neptune": "mem0.vector_stores.neptune_analytics.NeptuneAnalyticsVector",
"turbopuffer": "mem0.vector_stores.turbopuffer.TurbopufferDB",
"oracledb": "mem0.vector_stores.oracledb.OracleAIVectorSearch",
}

@classmethod
Expand Down Expand Up @@ -219,7 +220,10 @@ class RerankerFactory:
# Provider mappings with their config classes
provider_to_class = {
"cohere": ("mem0.reranker.cohere_reranker.CohereReranker", CohereRerankerConfig),
"sentence_transformer": ("mem0.reranker.sentence_transformer_reranker.SentenceTransformerReranker", SentenceTransformerRerankerConfig),
"sentence_transformer": (
"mem0.reranker.sentence_transformer_reranker.SentenceTransformerReranker",
SentenceTransformerRerankerConfig,
),
"zero_entropy": ("mem0.reranker.zero_entropy_reranker.ZeroEntropyReranker", ZeroEntropyRerankerConfig),
"llm_reranker": ("mem0.reranker.llm_reranker.LLMReranker", LLMRerankerConfig),
"huggingface": ("mem0.reranker.huggingface_reranker.HuggingFaceReranker", HuggingFaceRerankerConfig),
Expand Down
1 change: 1 addition & 0 deletions mem0/vector_stores/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class VectorStoreConfig(BaseModel):
"langchain": "LangchainConfig",
"s3_vectors": "S3VectorsConfig",
"turbopuffer": "TurbopufferConfig",
"oracledb": "OracleAIVectorSearchConfig",
}

@model_validator(mode="after")
Expand Down
Loading