MODULO 4.8

💾 Persistencia e Banco de Dados

SQLite, modelos SQLAlchemy e a implementacao da persistencia total.

6
Topicos
~35
Minutos
Avanc.
Nivel
Pratico
Tipo
1

🗄️ Modelos de Dados

Estrutura das tabelas principais do NotebookLMX.

from sqlalchemy import Column, Integer, String, Text, DateTime, ForeignKey, Float
from sqlalchemy.orm import relationship
from app.database import Base

class Notebook(Base):
    __tablename__ = "notebooks"

    id = Column(Integer, primary_key=True, index=True)
    title = Column(String(255), nullable=False)
    description = Column(Text)
    created_at = Column(DateTime, default=datetime.utcnow)
    updated_at = Column(DateTime, onupdate=datetime.utcnow)

    # Relacionamentos
    sources = relationship("Source", back_populates="notebook", cascade="all, delete")
    messages = relationship("Message", back_populates="notebook", cascade="all, delete")
    artifacts = relationship("Artifact", back_populates="notebook", cascade="all, delete")
2

📚 Modelo Source

Fontes de dados (PDFs, textos) vinculadas aos notebooks.

class Source(Base):
    __tablename__ = "sources"

    id = Column(Integer, primary_key=True, index=True)
    notebook_id = Column(Integer, ForeignKey("notebooks.id"), nullable=False)

    # Metadados do arquivo
    filename = Column(String(255), nullable=False)
    file_type = Column(String(50))  # pdf, txt, url
    file_size = Column(Integer)
    file_path = Column(String(500))

    # Conteudo processado
    extracted_text = Column(Text)
    summary = Column(Text)
    token_count = Column(Integer)

    # Auditoria
    uploaded_at = Column(DateTime, default=datetime.utcnow)
    processed_at = Column(DateTime)
    status = Column(String(20), default="pending")  # pending, processing, ready, error

    notebook = relationship("Notebook", back_populates="sources")
3

💬 Modelo Message (Auditoria)

Historico completo de interacoes para rastreabilidade.

class Message(Base):
    __tablename__ = "messages"

    id = Column(Integer, primary_key=True, index=True)
    notebook_id = Column(Integer, ForeignKey("notebooks.id"), nullable=False)
    request_id = Column(String(36), unique=True, index=True)

    # Conteudo
    role = Column(String(20))  # user, assistant
    content = Column(Text, nullable=False)

    # Metadados GIPM - Persistencia Total
    prompt_sent = Column(Text)  # Prompt completo enviado
    model_used = Column(String(50))
    persona_used = Column(String(50))

    # Metricas
    tokens_input = Column(Integer)
    tokens_output = Column(Integer)
    cost_usd = Column(Float)
    latency_ms = Column(Integer)

    # Timestamps
    created_at = Column(DateTime, default=datetime.utcnow)

    notebook = relationship("Notebook", back_populates="messages")
4

🔧 Configuracao SQLAlchemy

Setup do banco de dados com SQLite.

# app/database.py
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from app.config import settings

SQLALCHEMY_DATABASE_URL = settings.DATABASE_URL  # sqlite:///./notebooklmx.db

engine = create_engine(
    SQLALCHEMY_DATABASE_URL,
    connect_args={"check_same_thread": False}  # SQLite only
)

SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)

Base = declarative_base()

# Dependency para injetar sessao
def get_db():
    db = SessionLocal()
    try:
        yield db
    finally:
        db.close()
5

📝 Repository Pattern

Camada de acesso a dados desacoplada.

class NotebookRepository:
    def __init__(self, db: Session):
        self.db = db

    async def create(self, data: NotebookCreate) -> Notebook:
        notebook = Notebook(**data.dict())
        self.db.add(notebook)
        self.db.commit()
        self.db.refresh(notebook)
        return notebook

    async def get_by_id(self, id: int) -> Optional[Notebook]:
        return self.db.query(Notebook).filter(Notebook.id == id).first()

    async def list_all(self, skip: int = 0, limit: int = 100) -> List[Notebook]:
        return self.db.query(Notebook).offset(skip).limit(limit).all()

    async def delete(self, id: int) -> bool:
        notebook = await self.get_by_id(id)
        if notebook:
            self.db.delete(notebook)
            self.db.commit()
            return True
        return False
6

📊 Queries de Auditoria

Consultas para rastreabilidade e analytics.

class AuditRepository:
    """Queries para rastreabilidade GIPM"""

    async def get_usage_by_notebook(self, notebook_id: int) -> UsageStats:
        result = self.db.query(
            func.count(Message.id).label("total_messages"),
            func.sum(Message.tokens_input).label("total_tokens_in"),
            func.sum(Message.tokens_output).label("total_tokens_out"),
            func.sum(Message.cost_usd).label("total_cost")
        ).filter(Message.notebook_id == notebook_id).first()

        return UsageStats(**result._asdict())

    async def get_daily_costs(self, days: int = 30) -> List[DailyCost]:
        return self.db.query(
            func.date(Message.created_at).label("date"),
            func.sum(Message.cost_usd).label("cost")
        ).group_by(func.date(Message.created_at)
        ).order_by(func.date(Message.created_at).desc()
        ).limit(days).all()

    async def get_request_trace(self, request_id: str) -> RequestTrace:
        """Rastreia uma requisicao completa"""
        message = self.db.query(Message).filter(
            Message.request_id == request_id
        ).first()
        return RequestTrace.from_message(message)

📝 Resumo do Modulo

Modelos - Notebook, Source, Message, Artifact
SQLite - Banco leve e portavel
Repository - Acesso a dados desacoplado
Auditoria - Queries para rastreabilidade