Rails Models¶
The ragdoll-rails gem provides a comprehensive set of ActiveRecord models for managing documents, content, embeddings, and search functionality within your Rails application.
Model Overview¶
The Ragdoll Rails models follow Rails conventions and provide a clean, intuitive API for document management and search:
erDiagram
Document ||--o{ Content : contains
Content ||--o{ Embedding : has
Document ||--o{ DocumentMetadata : has
User ||--o{ Document : owns
Document ||--o{ SearchResult : appears_in
Document {
uuid id
string title
text description
string file_path
string content_type
integer file_size
string status
json metadata
uuid user_id
timestamps created_at
timestamps updated_at
}
Content {
uuid id
uuid document_id
text content
integer chunk_index
integer start_position
integer end_position
string content_type
json metadata
timestamps created_at
}
Embedding {
uuid id
uuid content_id
vector embedding
string model_name
integer dimensions
timestamps created_at
}
Core Models¶
Ragdoll::Document¶
The central model representing uploaded documents and their metadata.
Attributes¶
class Ragdoll::Document < ApplicationRecord
# Core attributes
attribute :id, :uuid, default: -> { SecureRandom.uuid }
attribute :title, :string
attribute :description, :text
attribute :file_path, :string
attribute :content_type, :string
attribute :file_size, :integer
attribute :status, :string, default: 'pending'
attribute :metadata, :json, default: {}
attribute :user_id, :uuid
# Processing attributes
attribute :processed_at, :datetime
attribute :processing_errors, :json, default: []
attribute :content_hash, :string
# Search attributes
attribute :search_vector, :tsvector
attribute :language, :string, default: 'en'
end
Associations¶
class Ragdoll::Document < ApplicationRecord
# User association (polymorphic by default)
belongs_to :user, polymorphic: true, optional: true
# Content associations
has_many :contents, dependent: :destroy
has_many :embeddings, through: :contents
# File attachment
has_one_attached :file
has_many_attached :images
# Metadata
has_many :document_metadata, dependent: :destroy
end
Scopes and Finders¶
class Ragdoll::Document < ApplicationRecord
# Status scopes
scope :pending, -> { where(status: 'pending') }
scope :processing, -> { where(status: 'processing') }
scope :processed, -> { where(status: 'processed') }
scope :failed, -> { where(status: 'failed') }
# Content type scopes
scope :pdfs, -> { where(content_type: 'application/pdf') }
scope :images, -> { where(content_type: /^image\//) }
scope :text_files, -> { where(content_type: /^text\//) }
# Date scopes
scope :recent, ->(days = 7) { where(created_at: days.days.ago..) }
scope :by_date, ->(date) { where(created_at: date.beginning_of_day..date.end_of_day) }
# User scopes
scope :by_user, ->(user) { where(user: user) }
scope :public_documents, -> { where(metadata: { visibility: 'public' }) }
end
Instance Methods¶
class Ragdoll::Document < ApplicationRecord
# Status methods
def pending?
status == 'pending'
end
def processed?
status == 'processed'
end
def failed?
status == 'failed'
end
# Content methods
def full_content
contents.order(:chunk_index).pluck(:content).join('\n')
end
def content_preview(length = 500)
full_content.truncate(length)
end
# File methods
def file_extension
File.extname(file_path).downcase if file_path
end
def human_file_size
ActionController::Base.helpers.number_to_human_size(file_size)
end
# Metadata methods
def get_metadata(key)
metadata[key.to_s]
end
def set_metadata(key, value)
metadata[key.to_s] = value
save!
end
def add_metadata(hash)
self.metadata = metadata.merge(hash.stringify_keys)
save!
end
end
Class Methods¶
class Ragdoll::Document < ApplicationRecord
# Search methods
def self.search(query, options = {})
Ragdoll::SearchService.new(query, options).call
end
def self.semantic_search(query, limit: 10, threshold: 0.7)
Ragdoll::SemanticSearchService.new(query, limit: limit, threshold: threshold).call
end
def self.keyword_search(query, limit: 10)
where("search_vector @@ plainto_tsquery(?)", query).limit(limit)
end
# Bulk operations
def self.bulk_process(document_ids)
where(id: document_ids).find_each do |document|
Ragdoll::ProcessDocumentJob.perform_later(document)
end
end
def self.reindex_all
processed.find_each do |document|
Ragdoll::IndexDocumentJob.perform_later(document)
end
end
end
Ragdoll::Content¶
Represents chunked content extracted from documents.
Attributes¶
class Ragdoll::Content < ApplicationRecord
attribute :id, :uuid, default: -> { SecureRandom.uuid }
attribute :document_id, :uuid
attribute :content, :text
attribute :chunk_index, :integer
attribute :start_position, :integer
attribute :end_position, :integer
attribute :content_type, :string, default: 'text/plain'
attribute :metadata, :json, default: {}
attribute :language, :string
attribute :word_count, :integer
attribute :character_count, :integer
end
Associations and Methods¶
class Ragdoll::Content < ApplicationRecord
belongs_to :document
has_many :embeddings, dependent: :destroy
# Scopes
scope :by_document, ->(document) { where(document: document) }
scope :ordered, -> { order(:chunk_index) }
scope :with_embeddings, -> { joins(:embeddings) }
# Instance methods
def summary(length = 150)
content.truncate(length)
end
def has_embedding?
embeddings.any?
end
def primary_embedding
embeddings.first
end
# Content analysis
def calculate_statistics!
self.word_count = content.split.size
self.character_count = content.length
save!
end
end
Ragdoll::Embedding¶
Stores vector embeddings for content chunks.
Attributes¶
class Ragdoll::Embedding < ApplicationRecord
attribute :id, :uuid, default: -> { SecureRandom.uuid }
attribute :content_id, :uuid
attribute :embedding, :vector
attribute :model_name, :string
attribute :dimensions, :integer
attribute :created_at, :datetime
end
Methods¶
class Ragdoll::Embedding < ApplicationRecord
belongs_to :content
has_one :document, through: :content
# Validations
validates :embedding, presence: true
validates :model_name, presence: true
validates :dimensions, presence: true, numericality: { greater_than: 0 }
# Scopes
scope :by_model, ->(model) { where(model_name: model) }
scope :recent, ->(days = 30) { where(created_at: days.days.ago..) }
# Search methods
def self.similar_to(vector, limit: 10, threshold: 0.7)
where("embedding <-> ? < ?", vector, 1 - threshold)
.order("embedding <-> ?", vector)
.limit(limit)
end
def self.cosine_similarity(vector, limit: 10)
select("*, (embedding <=> ?) AS similarity", vector)
.order("similarity DESC")
.limit(limit)
end
# Instance methods
def similarity_to(other_vector)
# Calculate cosine similarity
connection.select_value(
"SELECT 1 - (embedding <=> ?::vector) AS similarity",
other_vector
)
end
def vector_magnitude
Math.sqrt(embedding.sum { |x| x * x })
end
end
Searchable Concern¶
The Ragdoll::Searchable
concern can be included in your own models to add search capabilities:
class Article < ApplicationRecord
include Ragdoll::Searchable
# Configure searchable attributes
ragdoll_searchable do |config|
config.content_field = :body
config.title_field = :title
config.metadata_fields = [:category, :tags, :author]
config.chunk_size = 1000
config.auto_process = true
end
end
Searchable Configuration¶
class BlogPost < ApplicationRecord
include Ragdoll::Searchable
ragdoll_searchable do |config|
# Content configuration
config.content_field = :content
config.title_field = :title
config.description_field = :excerpt
# Processing configuration
config.chunk_size = 800
config.chunk_overlap = 100
config.auto_process = true
config.process_on_create = true
config.process_on_update = true
# Metadata configuration
config.metadata_fields = [:category, :tags, :status]
config.custom_metadata = ->(record) {
{
author: record.user.name,
published_at: record.published_at,
word_count: record.content.split.size
}
}
# Search configuration
config.searchable = true
config.enable_suggestions = true
config.boost_factor = 1.0
end
end
Generated Methods¶
When you include Ragdoll::Searchable
, your model gains these methods:
# Search methods
BlogPost.search("machine learning")
BlogPost.semantic_search("AI algorithms", limit: 5)
# Processing methods
post = BlogPost.create!(title: "My Post", content: "Content here")
post.process_for_search!
post.reindex!
# Status methods
post.ragdoll_processed?
post.ragdoll_processing?
post.ragdoll_failed?
# Content methods
post.ragdoll_contents
post.ragdoll_embeddings
post.ragdoll_search_preview
Model Callbacks¶
Document Callbacks¶
class Ragdoll::Document < ApplicationRecord
# Processing callbacks
after_create :enqueue_processing, if: :should_auto_process?
after_update :enqueue_reprocessing, if: :file_changed?
before_destroy :cleanup_attachments
# Search callbacks
after_save :update_search_vector, if: :title_or_description_changed?
after_touch :touch_embeddings
private
def should_auto_process?
file.attached? && Ragdoll.configuration.auto_process_documents
end
def file_changed?
saved_change_to_file_path? || file.attachment.changed?
end
def title_or_description_changed?
saved_change_to_title? || saved_change_to_description?
end
def cleanup_attachments
file.purge_later if file.attached?
images.purge_later if images.any?
end
def update_search_vector
self.update_column(:search_vector,
"to_tsvector('#{language}', '#{title} #{description}')"
)
end
def touch_embeddings
embeddings.touch_all
end
end
Validations¶
Document Validations¶
class Ragdoll::Document < ApplicationRecord
# Required fields
validates :title, presence: true, length: { maximum: 255 }
validates :status, presence: true, inclusion: {
in: %w[pending processing processed failed]
}
# File validations
validates :file, presence: true, on: :create
validate :file_type_allowed
validate :file_size_within_limit
# Content validations
validates :content_type, presence: true
validates :file_size, presence: true, numericality: { greater_than: 0 }
# User validations
validates :user, presence: true, if: :require_user?
private
def file_type_allowed
return unless file.attached?
allowed_types = Ragdoll.configuration.allowed_file_types
file_type = file.content_type
unless allowed_types.include?(file_type)
errors.add(:file, "type #{file_type} is not allowed")
end
end
def file_size_within_limit
return unless file.attached?
max_size = Ragdoll.configuration.max_file_size
if file.byte_size > max_size
errors.add(:file, "size exceeds maximum allowed (#{max_size} bytes)")
end
end
def require_user?
Ragdoll.configuration.require_user_association
end
end
Custom Model Extensions¶
Adding Custom Attributes¶
# Create a migration to add custom fields
class AddCustomFieldsToRagdollDocuments < ActiveRecord::Migration[7.0]
def change
add_column :ragdoll_documents, :category, :string
add_column :ragdoll_documents, :priority, :integer, default: 0
add_column :ragdoll_documents, :expires_at, :datetime
add_index :ragdoll_documents, :category
add_index :ragdoll_documents, :priority
end
end
# Extend the model
Ragdoll::Document.class_eval do
# Add custom validations
validates :category, presence: true
validates :priority, inclusion: { in: 0..10 }
# Add custom scopes
scope :by_category, ->(cat) { where(category: cat) }
scope :high_priority, -> { where(priority: 8..10) }
scope :active, -> { where('expires_at IS NULL OR expires_at > ?', Time.current) }
# Add custom methods
def expired?
expires_at && expires_at < Time.current
end
def high_priority?
priority >= 8
end
end
Custom Metadata Handling¶
class Ragdoll::Document < ApplicationRecord
# Typed metadata accessors
def self.define_metadata_accessor(name, type = :string)
define_method(name) do
value = metadata[name.to_s]
case type
when :integer then value&.to_i
when :float then value&.to_f
when :boolean then !!value
when :date then value ? Date.parse(value) : nil
when :datetime then value ? DateTime.parse(value) : nil
else value
end
end
define_method("#{name}=") do |value|
metadata[name.to_s] = value
end
end
# Define typed metadata fields
define_metadata_accessor :department
define_metadata_accessor :confidence_score, :float
define_metadata_accessor :is_public, :boolean
define_metadata_accessor :published_at, :datetime
end
# Usage
document.department = "Engineering"
document.confidence_score = 0.95
document.is_public = true
document.published_at = 1.week.ago
document.save!
Performance Considerations¶
Database Indexes¶
# Create custom indexes for better performance
class AddPerformanceIndexesToRagdollModels < ActiveRecord::Migration[7.0]
def change
# Document indexes
add_index :ragdoll_documents, [:user_id, :status]
add_index :ragdoll_documents, [:content_type, :created_at]
add_index :ragdoll_documents, :file_size
# Content indexes
add_index :ragdoll_contents, [:document_id, :chunk_index]
add_index :ragdoll_contents, :word_count
# Embedding indexes
add_index :ragdoll_embeddings, [:content_id, :model_name]
add_index :ragdoll_embeddings, :created_at
# Vector similarity index (requires pgvector)
execute "CREATE INDEX CONCURRENTLY IF NOT EXISTS ragdoll_embeddings_embedding_idx
ON ragdoll_embeddings USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100)"
end
end
Query Optimization¶
class Ragdoll::Document < ApplicationRecord
# Optimized includes for search results
scope :with_search_data, -> {
includes(:contents, :embeddings, :user)
.select('ragdoll_documents.*, COUNT(ragdoll_contents.id) as content_count')
.left_joins(:contents)
.group('ragdoll_documents.id')
}
# Efficient content loading
def preload_search_content
contents.includes(:embeddings).limit(3)
end
# Cached expensive operations
def cached_content_preview
Rails.cache.fetch("document:#{id}:preview", expires_in: 1.hour) do
content_preview
end
end
end
This comprehensive model documentation provides everything you need to work with Ragdoll Rails models effectively, from basic usage to advanced customization and performance optimization.