File size: 1,217 Bytes
03ac85a
addff1b
03ac85a
addff1b
03ac85a
 
 
addff1b
 
 
 
 
03ac85a
 
addff1b
03ac85a
addff1b
 
 
 
03ac85a
 
addff1b
 
 
 
 
03ac85a
 
 
 
addff1b
 
 
 
 
03ac85a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import os
from langchain_chroma import Chroma
from models import load_embeddings
import streamlit as st

CHROMA_DIR = "tmp/chroma_store"

def load_vectorstore(provider="ollama", embeddings_model="tinyllama"):
    """
    Load the Chroma vectorstore with the specified embeddings.
    Automatically creates the persistence directory if it doesn't exist.
    """
    embeddings = load_embeddings(provider, embeddings_model)

    os.makedirs(CHROMA_DIR, exist_ok=True)

    return Chroma(
        persist_directory=CHROMA_DIR,
        embedding_function=embeddings
    )


@st.cache_resource
def get_retriever(provider="ollama", embeddings_model="tinyllama"):
    """
    Get a retriever for the vectorstore. Uses similarity search by default.
    """
    vs = load_vectorstore(provider, embeddings_model)
    return vs.as_retriever(search_type="similarity", search_kwargs={"k": 3})


def add_document(text, metadata=None, provider="ollama", embeddings_model="tinyllama"):
    """
    Add a single document to the vectorstore.
    Automatic persistence is handled by Chroma; no need to call vs.persist().
    """
    vs = load_vectorstore(provider, embeddings_model)
    vs.add_texts([text], metadatas=[metadata or {}])