Lab: Build a RAG Knowledge Assistant

Build a knowledge assistant that answers questions about your DevOps documentation using Retrieval-Augmented Generation.

Duration: 1-2 hours
Level: Intermediate
Prerequisites: Python 3.10+, OpenAI API key or Anthropic API key

What You'll Build

A CLI assistant that:

Loads your Markdown documentation
Creates vector embeddings and stores them
Answers questions using relevant document context
Cites sources in its answers

Step 1: Project Setup

mkdir rag-assistant && cd rag-assistant
python -m venv venv
source venv/bin/activate

pip install langchain langchain-community langchain-openai \
            chromadb sentence-transformers rich

Step 2: Create the Document Loader

# loader.py
from pathlib import Path
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain.schema import Document


def load_markdown_docs(docs_dir: str) -> list[Document]:
    """Load all Markdown files from a directory."""
    docs = []
    docs_path = Path(docs_dir)

    for md_file in docs_path.rglob("*.md"):
        try:
            loader = TextLoader(str(md_file), encoding="utf-8")
            file_docs = loader.load()
            # Add source metadata
            for doc in file_docs:
                doc.metadata["source"] = str(
                    md_file.relative_to(docs_path)
                )
            docs.extend(file_docs)
        except Exception as e:
            print(f"Error loading {md_file}: {e}")

    print(f"Loaded {len(docs)} documents")
    return docs


def chunk_documents(
    documents: list[Document],
    chunk_size: int = 1000,
    overlap: int = 200
) -> list[Document]:
    """Split documents into chunks."""
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=overlap,
        separators=["\n## ", "\n### ", "\n\n", "\n", " "],
    )
    chunks = splitter.split_documents(documents)
    print(f"Created {len(chunks)} chunks")
    return chunks

Step 3: Build the Vector Store

# vectorstore.py
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.schema import Document

PERSIST_DIR = "./chroma_db"

def create_vectorstore(
    chunks: list[Document]
) -> Chroma:
    """Create and persist a vector store."""
    # Use a free, local embedding model
    embeddings = HuggingFaceEmbeddings(
        model_name="all-MiniLM-L6-v2"
    )

    vectorstore = Chroma.from_documents(
        documents=chunks,
        embedding=embeddings,
        persist_directory=PERSIST_DIR,
        collection_name="devops_docs"
    )

    print(f"Vector store created with {len(chunks)} chunks")
    return vectorstore


def load_vectorstore() -> Chroma:
    """Load an existing vector store."""
    embeddings = HuggingFaceEmbeddings(
        model_name="all-MiniLM-L6-v2"
    )

    return Chroma(
        persist_directory=PERSIST_DIR,
        embedding_function=embeddings,
        collection_name="devops_docs"
    )

Step 4: Create the RAG Chain

# rag_chain.py
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import Chroma

PROMPT_TEMPLATE = """You are a DevOps and AIOps expert assistant.
Answer the question based on the provided context from our 
documentation. If the context doesn't contain the answer, 
say so honestly.

Context:
{context}

Question: {question}

Provide a clear, concise answer. Include code examples if 
relevant. Cite the source document when possible.

Answer:"""


def create_rag_chain(vectorstore: Chroma):
    """Create a RAG chain with the vector store."""

    retriever = vectorstore.as_retriever(
        search_type="mmr",
        search_kwargs={"k": 4, "fetch_k": 8}
    )

    prompt = PromptTemplate(
        template=PROMPT_TEMPLATE,
        input_variables=["context", "question"]
    )

    llm = ChatOpenAI(
        model="gpt-4o-mini",
        temperature=0
    )

    chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        chain_type_kwargs={"prompt": prompt},
        return_source_documents=True,
    )

    return chain

Step 5: Interactive CLI

# main.py
from rich.console import Console
from rich.markdown import Markdown
from rich.panel import Panel

from loader import load_markdown_docs, chunk_documents
from vectorstore import create_vectorstore, load_vectorstore
from rag_chain import create_rag_chain

import os
import sys

console = Console()


def index_docs(docs_dir: str):
    """Index documentation into vector store."""
    console.print("[bold]Indexing documents...[/bold]")
    docs = load_markdown_docs(docs_dir)
    chunks = chunk_documents(docs)
    create_vectorstore(chunks)
    console.print("[green]Indexing complete![/green]")


def chat():
    """Interactive chat loop."""
    console.print(Panel(
        "[bold]DevOps Knowledge Assistant[/bold]\n"
        "Ask questions about your documentation.\n"
        "Type 'quit' to exit.",
        title="RAG Assistant"
    ))

    vectorstore = load_vectorstore()
    chain = create_rag_chain(vectorstore)

    while True:
        question = console.input("\n[bold cyan]Question:[/bold cyan] ")
        if question.lower() in ("quit", "exit", "q"):
            break

        with console.status("Thinking..."):
            result = chain.invoke({"query": question})

        # Display answer
        console.print("\n[bold green]Answer:[/bold green]")
        console.print(Markdown(result["result"]))

        # Display sources
        sources = set(
            doc.metadata.get("source", "unknown")
            for doc in result["source_documents"]
        )
        console.print(f"\n[dim]Sources: {', '.join(sources)}[/dim]")


if __name__ == "__main__":
    if len(sys.argv) > 1 and sys.argv[1] == "index":
        docs_dir = sys.argv[2] if len(sys.argv) > 2 else "./docs"
        index_docs(docs_dir)
    else:
        chat()

Step 6: Run It

# Set your API key
export OPENAI_API_KEY="sk-..."

# Index your docs (point to your documentation folder)
python main.py index ../website/docs

# Start chatting
python main.py

Example Session

┌─────────────────────────────────┐
│       RAG Assistant             │
│ Ask questions about your docs.  │
│ Type 'quit' to exit.            │
└─────────────────────────────────┘

Question: How do I set up GPU monitoring?

Answer:
To set up GPU monitoring, install the NVIDIA DCGM Exporter 
alongside the GPU Operator. Key metrics to track include:
- `DCGM_FI_DEV_GPU_UTIL` — GPU utilization
- `DCGM_FI_DEV_FB_USED` — Memory usage
...

Sources: ai-infra/gpu-cluster-setup.md

Challenge Extensions

Add Anthropic Claude — swap OpenAI for Claude as the LLM
Web UI — build a Streamlit or Gradio frontend
Incremental indexing — only re-index changed files
Conversation memory — maintain chat history for follow-up questions

Next Steps

Prompt Engineering — Write better prompts
AIOps Monitoring Lab — Build monitoring systems

What You'll Build​

Step 1: Project Setup​

Step 2: Create the Document Loader​

Step 3: Build the Vector Store​

Step 4: Create the RAG Chain​

Step 5: Interactive CLI​

Step 6: Run It​

Example Session​

Challenge Extensions​

Next Steps​