Skip to content

Source Model

Stores immutable source content from which facts are extracted.

Class: FactDb::Models::Source

source = FactDb::Models::Source.new(
  content: "Document content...",
  kind: "email",
  captured_at: Time.current
)

Attributes

Attribute Type Description
id Integer Primary key
content_hash String SHA256 hash for deduplication
kind String Kind (email, document, etc.)
content Text Original unmodified text content
title String Optional title
source_uri String Original location
metadata Hash Additional metadata (JSONB)
embedding Vector Semantic search vector
captured_at DateTime When content was captured
created_at DateTime Record creation time

Associations

has_many :fact_sources
has_many :facts, through: :fact_sources

Callbacks

before_create :compute_hash
before_create :generate_embedding

Instance Methods

compute_hash

def compute_hash

Computes SHA256 hash of content for deduplication.

generate_embedding

def generate_embedding

Generates embedding vector using configured generator.

Class Methods

find_or_create_by_text

def self.find_or_create_by_text(text, **attributes)

Find existing source by hash or create new.

Example:

source = Source.find_or_create_by_text(
  "Document text",
  kind: "document",
  captured_at: Time.current
)

Scopes

by_kind

scope :by_kind, ->(kind) { where(kind: kind) }

Filter by content kind.

Source.by_kind('email')

captured_between

scope :captured_between, ->(from, to) {
  where(captured_at: from..to)
}

Filter by capture date range.

Source.captured_between(1.week.ago, Time.current)

search_text

scope :search_text, ->(query) {
  where("content @@ plainto_tsquery(?)", query)
}

Full-text search.

Source.search_text("quarterly earnings")

Usage Examples

Create Source

source = Source.create!(
  content: "Important document...",
  kind: "document",
  title: "Q4 Report",
  source_uri: "https://example.com/report.pdf",
  captured_at: Time.current,
  metadata: {
    author: "Jane Smith",
    department: "Finance"
  }
)

Find by Hash

hash = Digest::SHA256.hexdigest("Document text")
source = Source.find_by(content_hash: hash)
source.facts.each do |fact|
  puts fact.text
end
# Requires embedding
similar = Source
  .where.not(embedding: nil)
  .order(Arel.sql("embedding <=> '#{query_embedding}'"))
  .limit(10)