PineconeSparseEmbeddings#

class langchain_pinecone.embeddings.PineconeSparseEmbeddings[source]#

Bases: PineconeEmbeddings

PineconeSparseEmbeddings embedding model.

Example

from langchain_pinecone import PineconeSparseEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_core.documents import Document

# Initialize sparse embeddings
sparse_embeddings = PineconeSparseEmbeddings(model="pinecone-sparse-english-v0")

# Embed a single query (returns SparseValues)
query_embedding = sparse_embeddings.embed_query("What is machine learning?")
# query_embedding contains SparseValues with indices and values

# Embed multiple documents
docs = ["Document 1 content", "Document 2 content"]
doc_embeddings = sparse_embeddings.embed_documents(docs)

# Use with an index configured for sparse vectors
from pinecone import Pinecone

pc = Pinecone(api_key="your-api-key")

# Create index with sparse embeddings support
if not pc.has_index("sparse-index"):
    pc.create_index_for_model(
        name="sparse-index",
        cloud="aws",
        region="us-east-1",
        embed={
            "model": "pinecone-sparse-english-v0",
            "field_map": {"text": "chunk_text"},
            "metric": "dotproduct",
            "read_parameters": {},
            "write_parameters": {}
        }
    )

index = pc.Index("sparse-index")

# IMPORTANT: Use PineconeSparseVectorStore for sparse vectors
# The regular PineconeVectorStore won't work with sparse embeddings
from langchain_pinecone.vectorstores_sparse import PineconeSparseVectorStore

# Initialize sparse vector store with sparse embeddings
vector_store = PineconeSparseVectorStore(
    index=index,
    embedding=sparse_embeddings
)

# Add documents
from uuid import uuid4

documents = [
    Document(page_content="Machine learning is awesome", metadata={"source": "article"}),
    Document(page_content="Neural networks power modern AI", metadata={"source": "book"})
]

# Generate unique IDs for each document
uuids = [str(uuid4()) for _ in range(len(documents))]

# Add documents to the vector store
vector_store.add_documents(documents=documents, ids=uuids)

# Search for similar documents
results = vector_store.similarity_search("machine learning", k=2)

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

self is explicitly positional-only to allow self as a field name.

param batch_size: int | None = None#: Batch size for embedding documents.

param dimension: int | None = None#

param document_params: Dict [Optional]#: Parameters for embedding document

param model: str [Required]#: Model to use for example ‘multilingual-e5-large’.

param pinecone_api_key: SecretStr [Optional] (alias 'api_key')#

Pinecone API key.

If not provided, will look for the PINECONE_API_KEY environment variable.

param query_params: Dict [Optional]#: Parameters for embedding query.

param show_progress_bar: bool = False#

async aembed_documents( texts: List[str], ) → List[SparseValues][source]#

Asynchronously embed search docs with sparse embeddings.

Parameters:: texts (List[str])
Return type:: List[SparseValues]

async aembed_query( text: str, ) → SparseValues[source]#

Asynchronously embed query text with sparse embeddings.

Parameters:: text (str)
Return type:: SparseValues

embed_documents( texts: List[str], ) → List[SparseValues][source]#

Embed search docs with sparse embeddings.

Parameters:: texts (List[str])
Return type:: List[SparseValues]

embed_query( text: str, ) → SparseValues[source]#

Embed query text with sparse embeddings.

Parameters:: text (str)
Return type:: SparseValues

property async_client: PineconeAsyncio#: Lazily initialize the async client.