This overview covers text-based embedding models. LangChain does not currently support multimodal embeddings.
Embedding models transform raw text—such as a sentence, paragraph, or tweet—into a fixed-length vector of numbers that captures its semantic meaning. These vectors allow machines to compare and search text based on meaning rather than exact words.In practice, this means that texts with similar ideas are placed close together in the vector space. For example, instead of matching only the phrase “machine learning”, embeddings can surface documents that discuss related concepts even when different wording is used.
LangChain provides a standard interface for text embedding models (e.g., OpenAI, Cohere, Hugging Face) via the Embeddings interface.Two main methods are available:
embed_documents(texts: List[str]) → List[List[float]]: Embeds a list of documents.
embed_query(text: str) → List[float]: Embeds a single query.
The interface allows queries and documents to be embedded with different strategies, though most providers handle them the same way in practice.
import getpassimport osif not os.environ.get("OPENAI_API_KEY"): os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")from langchain_openai import OpenAIEmbeddingsembeddings = OpenAIEmbeddings(model="text-embedding-3-large")
pip install -qU "langchain[azure]"
import getpassimport osif not os.environ.get("AZURE_OPENAI_API_KEY"): os.environ["AZURE_OPENAI_API_KEY"] = getpass.getpass("Enter API key for Azure: ")from langchain_openai import AzureOpenAIEmbeddingsembeddings = AzureOpenAIEmbeddings( azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"], azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"], openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],)
pip install -qU langchain-google-genai
import getpassimport osif not os.environ.get("GOOGLE_API_KEY"): os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ")from langchain_google_genai import GoogleGenerativeAIEmbeddingsembeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")
pip install -qU langchain-google-vertexai
from langchain_google_vertexai import VertexAIEmbeddingsembeddings = VertexAIEmbeddings(model="text-embedding-005")
pip install -qU langchain-aws
from langchain_aws import BedrockEmbeddingsembeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v2:0")
pip install -qU langchain-huggingface
from langchain_huggingface import HuggingFaceEmbeddingsembeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
pip install -qU langchain-ollama
from langchain_ollama import OllamaEmbeddingsembeddings = OllamaEmbeddings(model="llama3")
pip install -qU langchain-cohere
import getpassimport osif not os.environ.get("COHERE_API_KEY"): os.environ["COHERE_API_KEY"] = getpass.getpass("Enter API key for Cohere: ")from langchain_cohere import CohereEmbeddingsembeddings = CohereEmbeddings(model="embed-english-v3.0")
pip install -qU langchain-mistralai
import getpassimport osif not os.environ.get("MISTRALAI_API_KEY"): os.environ["MISTRALAI_API_KEY"] = getpass.getpass("Enter API key for MistralAI: ")from langchain_mistralai import MistralAIEmbeddingsembeddings = MistralAIEmbeddings(model="mistral-embed")
pip install -qU langchain-nomic
import getpassimport osif not os.environ.get("NOMIC_API_KEY"): os.environ["NOMIC_API_KEY"] = getpass.getpass("Enter API key for Nomic: ")from langchain_nomic import NomicEmbeddingsembeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
pip install -qU langchain-nvidia-ai-endpoints
import getpassimport osif not os.environ.get("NVIDIA_API_KEY"): os.environ["NVIDIA_API_KEY"] = getpass.getpass("Enter API key for NVIDIA: ")from langchain_nvidia_ai_endpoints import NVIDIAEmbeddingsembeddings = NVIDIAEmbeddings(model="NV-Embed-QA")
pip install -qU langchain-voyageai
import getpassimport osif not os.environ.get("VOYAGE_API_KEY"): os.environ["VOYAGE_API_KEY"] = getpass.getpass("Enter API key for Voyage AI: ")from langchain-voyageai import VoyageAIEmbeddingsembeddings = VoyageAIEmbeddings(model="voyage-3")
pip install -qU langchain-ibm
import getpassimport osif not os.environ.get("WATSONX_APIKEY"): os.environ["WATSONX_APIKEY"] = getpass.getpass("Enter API key for IBM watsonx: ")from langchain_ibm import WatsonxEmbeddingsembeddings = WatsonxEmbeddings( model_id="ibm/slate-125m-english-rtrvr", url="https://us-south.ml.cloud.ibm.com", project_id="<WATSONX PROJECT_ID>",)
pip install -qU langchain-core
from langchain_core.embeddings import DeterministicFakeEmbeddingembeddings = DeterministicFakeEmbedding(size=4096)
pip install -qU "langchain[langchain-xai]"
import getpassimport osif not os.environ.get("XAI_API_KEY"): os.environ["XAI_API_KEY"] = getpass.getpass("Enter API key for xAI: ")from langchain.chat_models import init_chat_modelmodel = init_chat_model("grok-2", model_provider="xai")
pip install -qU "langchain[langchain-perplexity]"
import getpassimport osif not os.environ.get("PPLX_API_KEY"): os.environ["PPLX_API_KEY"] = getpass.getpass("Enter API key for Perplexity: ")from langchain.chat_models import init_chat_modelmodel = init_chat_model("llama-3.1-sonar-small-128k-online", model_provider="perplexity")
pip install -qU "langchain[langchain-deepseek]"
import getpassimport osif not os.environ.get("DEEPSEEK_API_KEY"): os.environ["DEEPSEEK_API_KEY"] = getpass.getpass("Enter API key for DeepSeek: ")from langchain.chat_models import init_chat_modelmodel = init_chat_model("deepseek-chat", model_provider="deepseek")
Embeddings can be stored or temporarily cached to avoid needing to recompute them.Caching embeddings can be done using a CacheBackedEmbeddings. This wrapper stores embeddings in a key-value store, where the text is hashed and the hash is used as the key in the cache.The main supported way to initialize a CacheBackedEmbeddings is from_bytes_store. It takes the following parameters:
underlying_embedder: The embedder to use for embedding.
document_embedding_cache: Any ByteStore for caching document embeddings.
batch_size: (optional, defaults to None) The number of documents to embed between store updates.
namespace: (optional, defaults to "") The namespace to use for the document cache. Helps avoid collisions (e.g., set it to the embedding model name).
query_embedding_cache: (optional, defaults to None) A ByteStore for caching query embeddings, or True to reuse the same store as document_embedding_cache.
import timefrom langchain.embeddings import CacheBackedEmbeddings from langchain.storage import LocalFileStore from langchain_core.vectorstores import InMemoryVectorStore# Create your underlying embeddings modelunderlying_embeddings = ... # e.g., OpenAIEmbeddings(), HuggingFaceEmbeddings(), etc.# Store persists embeddings to the local filesystem# This isn't for production use, but is useful for localstore = LocalFileStore("./cache/") cached_embedder = CacheBackedEmbeddings.from_bytes_store( underlying_embeddings, store, namespace=underlying_embeddings.model)# Example: caching a query embeddingtic = time.time()print(cached_embedder.embed_query("Hello, world!"))print(f"First call took: {time.time() - tic:.2f} seconds")# Subsequent calls use the cachetic = time.time()print(cached_embedder.embed_query("Hello, world!"))print(f"Second call took: {time.time() - tic:.2f} seconds")
In production, you would typically use a more robust persistent store, such as a database or cloud storage. Please see stores integrations for options.