Documentation Index
Fetch the complete documentation index at: https://docs.upsonic.ai/llms.txt
Use this file to discover all available pages before exploring further.
Overview
KnowledgeBase supports full document lifecycle management after initial setup. You can add new sources, insert raw text, remove documents, refresh changed files, update metadata, and delete by filter — all without recreating the knowledge base.
Adding Sources Dynamically
Use add_source() to add new files or directories to an existing knowledge base:
from upsonic import Agent, Task, KnowledgeBase
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
vectordb = ChromaProvider(ChromaConfig(
collection_name="dynamic_kb",
vector_size=1536,
connection=ConnectionConfig(mode=Mode.EMBEDDED, db_path="./dynamic_db")
))
kb = KnowledgeBase(
sources=["initial_docs/"],
embedding_provider=embedding,
vectordb=vectordb
)
# Later, add more sources
document_ids = kb.add_source("new_report.pdf")
print(f"Added documents: {document_ids}")
# Add with custom metadata
document_ids = kb.add_source(
"quarterly_update.pdf",
metadata={"quarter": "Q4", "year": "2024", "department": "engineering"}
)
agent = Agent("anthropic/claude-sonnet-4-5")
task = Task(
description="What are the key findings from the quarterly update?",
context=[kb]
)
result = agent.do(task)
print(result)
Adding Raw Text
Use add_text() to insert text content directly:
from upsonic import Agent, Task, KnowledgeBase
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
vectordb = ChromaProvider(ChromaConfig(
collection_name="text_kb",
vector_size=1536,
connection=ConnectionConfig(mode=Mode.EMBEDDED, db_path="./text_db")
))
kb = KnowledgeBase(
sources=["handbook.pdf"],
embedding_provider=embedding,
vectordb=vectordb
)
# Add text from an API response, database query, or user input
doc_id = kb.add_text(
text="The board approved a 15% budget increase for R&D in fiscal year 2025.",
document_name="board_decision_2025",
metadata={"type": "decision", "date": "2025-01-10"}
)
print(f"Added document: {doc_id}")
agent = Agent("anthropic/claude-sonnet-4-5")
task = Task(
description="What budget decisions were made for 2025?",
context=[kb]
)
result = agent.do(task)
print(result)
add_text() is idempotent — if the same text content is added twice, the duplicate is automatically skipped based on content hash.
Removing Documents
Remove a document and all its chunks by document ID:
from upsonic import Agent, Task, KnowledgeBase
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
vectordb = ChromaProvider(ChromaConfig(
collection_name="remove_kb",
vector_size=1536,
connection=ConnectionConfig(mode=Mode.EMBEDDED, db_path="./remove_db")
))
kb = KnowledgeBase(
sources=["docs/"],
embedding_provider=embedding,
vectordb=vectordb
)
# Add a document and get its ID
doc_ids = kb.add_source("outdated_policy.pdf")
# Later, remove it
if doc_ids:
success = kb.remove_document(doc_ids[0])
print(f"Removed: {success}")
agent = Agent("anthropic/claude-sonnet-4-5")
task = Task(
description="What policies are currently active?",
context=[kb]
)
result = agent.do(task)
print(result)
Delete all chunks matching a metadata filter — useful for bulk cleanup:
from upsonic import Agent, Task, KnowledgeBase
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
vectordb = ChromaProvider(ChromaConfig(
collection_name="filter_kb",
vector_size=1536,
connection=ConnectionConfig(mode=Mode.EMBEDDED, db_path="./filter_db")
))
kb = KnowledgeBase(
sources=["docs/"],
embedding_provider=embedding,
vectordb=vectordb
)
# Remove all chunks from a specific document name
success = kb.delete_by_filter({"document_name": "deprecated_guide.pdf"})
print(f"Deleted by filter: {success}")
agent = Agent("anthropic/claude-sonnet-4-5")
task = Task(
description="Summarize the current documentation",
context=[kb]
)
result = agent.do(task)
print(result)
Refreshing Changed Sources
Re-scan all sources for changes and re-index modified documents:
from upsonic import Agent, Task, KnowledgeBase
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
vectordb = ChromaProvider(ChromaConfig(
collection_name="refresh_kb",
vector_size=1536,
connection=ConnectionConfig(mode=Mode.EMBEDDED, db_path="./refresh_db")
))
kb = KnowledgeBase(
sources=["docs/"],
embedding_provider=embedding,
vectordb=vectordb
)
# After files on disk have changed, refresh the index
stats = kb.refresh()
print(f"Refresh stats: {stats}")
agent = Agent("anthropic/claude-sonnet-4-5")
task = Task(
description="What are the latest changes in the documentation?",
context=[kb]
)
result = agent.do(task)
print(result)
Update metadata for all chunks of a specific document:
from upsonic import Agent, Task, KnowledgeBase
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
vectordb = ChromaProvider(ChromaConfig(
collection_name="metadata_kb",
vector_size=1536,
connection=ConnectionConfig(mode=Mode.EMBEDDED, db_path="./metadata_db")
))
kb = KnowledgeBase(
sources=["contracts/"],
embedding_provider=embedding,
vectordb=vectordb
)
# Add a document
doc_ids = kb.add_source("contract_draft.pdf")
# Update its metadata (e.g., mark as approved)
if doc_ids:
success = kb.update_document_metadata(
document_id=doc_ids[0],
metadata_updates={"status": "approved", "approved_by": "legal_team"}
)
print(f"Metadata updated: {success}")
agent = Agent("anthropic/claude-sonnet-4-5")
task = Task(
description="Which contracts have been approved?",
context=[kb],
vector_search_filter={"status": "approved"}
)
result = agent.do(task)
print(result)
Method Reference
| Method | Async Version | Description |
|---|
add_source(source, loader, splitter, metadata) | aadd_source(...) | Add file/directory source |
add_text(text, metadata, document_name, splitter) | aadd_text(...) | Add raw text content |
remove_document(document_id) | aremove_document(...) | Remove a document and all its chunks |
delete_by_filter(metadata_filter) | adelete_by_filter(...) | Delete chunks by metadata filter |
refresh() | arefresh() | Re-scan and re-index changed sources |
update_document_metadata(document_id, metadata_updates) | aupdate_document_metadata(...) | Update metadata for a document’s chunks |