Skip to content

Long-term Memory

Long-term memory is HTM's durable PostgreSQL storage layer. This guide covers database operations, maintenance, performance optimization, and advanced queries.

Architecture Overview

Long-term memory provides:

  • Permanent storage for all memories
  • Vector embeddings via pgvector
  • Full-text search via PostgreSQL's ts_vector
  • Time-series optimization via TimescaleDB hypertables
  • Relationship graphs for knowledge connections
  • Audit logging for all operations

Long-term Memory Architecture

Database Schema

Nodes Table

The primary storage for memories:

CREATE TABLE nodes (
  id BIGSERIAL PRIMARY KEY,
  key TEXT NOT NULL UNIQUE,
  value TEXT NOT NULL,
  type TEXT,
  category TEXT,
  importance FLOAT DEFAULT 1.0,
  token_count INTEGER DEFAULT 0,
  robot_id TEXT NOT NULL,
  embedding vector(1536),  -- pgvector type
  created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
  last_accessed TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
  in_working_memory BOOLEAN DEFAULT TRUE
);

-- Indexes
CREATE INDEX idx_nodes_robot_id ON nodes(robot_id);
CREATE INDEX idx_nodes_type ON nodes(type);
CREATE INDEX idx_nodes_created_at ON nodes(created_at);
CREATE INDEX idx_nodes_embedding ON nodes USING hnsw(embedding vector_cosine_ops);
CREATE INDEX idx_nodes_fulltext ON nodes USING gin(to_tsvector('english', value));

Relationships Table

Tracks connections between nodes:

CREATE TABLE relationships (
  id BIGSERIAL PRIMARY KEY,
  from_node_id BIGINT REFERENCES nodes(id) ON DELETE CASCADE,
  to_node_id BIGINT REFERENCES nodes(id) ON DELETE CASCADE,
  relationship_type TEXT,
  strength FLOAT DEFAULT 1.0,
  created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
  UNIQUE(from_node_id, to_node_id, relationship_type)
);

Tags Table

Flexible categorization:

CREATE TABLE tags (
  id BIGSERIAL PRIMARY KEY,
  node_id BIGINT REFERENCES nodes(id) ON DELETE CASCADE,
  tag TEXT NOT NULL,
  created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
  UNIQUE(node_id, tag)
);

CREATE INDEX idx_tags_tag ON tags(tag);

Operations Log Table (Hypertable)

Audit trail with time-series optimization:

CREATE TABLE operations_log (
  time TIMESTAMPTZ NOT NULL,
  operation TEXT NOT NULL,
  node_id BIGINT,
  robot_id TEXT NOT NULL,
  details JSONB,
  PRIMARY KEY (time, operation, robot_id)
);

-- Convert to hypertable
SELECT create_hypertable('operations_log', 'time');

Robots Table

Robot registry:

CREATE TABLE robots (
  id TEXT PRIMARY KEY,
  name TEXT NOT NULL,
  created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
  last_active TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
);

Database Operations

Direct Database Queries

While HTM provides a high-level API, you can query the database directly:

require 'pg'

# Get connection config
config = HTM::Database.default_config

# Execute raw queries
conn = PG.connect(config)

# Query nodes
result = conn.exec("SELECT * FROM nodes WHERE type = 'decision' LIMIT 10")
result.each do |row|
  puts "#{row['key']}: #{row['value']}"
end

# Query with parameters
result = conn.exec_params(
  "SELECT * FROM nodes WHERE robot_id = $1 AND importance >= $2",
  ["your-robot-id", 8.0]
)

conn.close

Using LongTermMemory Directly

Access the long-term memory layer (advanced usage):

# Access via HTM instance
ltm = htm.long_term_memory

# Retrieve a node by ID
node = ltm.retrieve(123)  # Returns Node model or nil

# The recommended way to add memories is via HTM
node_id = htm.remember(
  "Test memory",
  tags: ["test"],
  metadata: { category: "fact" }
)

# Retrieve the node
node = ltm.retrieve(node_id)
puts "Content: #{node.content}"
puts "Created: #{node.created_at}"

Memory Statistics

Get statistics using ActiveRecord:

# Total nodes
total_nodes = HTM::Models::Node.count
puts "Total nodes: #{total_nodes}"

# Nodes by robot via robot_nodes join table
robot_counts = HTM::Models::RobotNode
  .group(:robot_id)
  .count
puts "Nodes by robot:"
robot_counts.each do |robot_id, count|
  robot = HTM::Models::Robot.find(robot_id)
  puts "  #{robot.name}: #{count} nodes"
end

# Total tags
puts "Total tags: #{HTM::Models::Tag.count}"

# Time range
oldest = HTM::Models::Node.minimum(:created_at)
newest = HTM::Models::Node.maximum(:created_at)
puts "Oldest memory: #{oldest}"
puts "Newest memory: #{newest}"

# Active robots
puts "Active robots: #{HTM::Models::Robot.count}"
HTM::Models::Robot.order(last_active_at: :desc).each do |robot|
  puts "  #{robot.name}: last active #{robot.last_active_at}"
end

Advanced Queries

Query by Date Range

# Get all memories from a specific month
start_date = Time.new(2024, 1, 1)
end_date = Time.new(2024, 1, 31, 23, 59, 59)

config = HTM::Database.default_config
conn = PG.connect(config)

result = conn.exec_params(
  <<~SQL,
    SELECT key, value, type, importance, created_at
    FROM nodes
    WHERE created_at BETWEEN $1 AND $2
    ORDER BY created_at DESC
  SQL
  [start_date, end_date]
)

result.each do |row|
  puts "#{row['created_at']}: #{row['value'][0..50]}..."
end

conn.close

Query by Type and Importance

# Find critical decisions
conn = PG.connect(HTM::Database.default_config)

result = conn.exec_params(
  <<~SQL,
    SELECT key, value, importance, created_at
    FROM nodes
    WHERE type = $1 AND importance >= $2
    ORDER BY importance DESC, created_at DESC
  SQL
  ['decision', 8.0]
)

puts "Critical decisions:"
result.each do |row|
  puts "- [#{row['importance']}] #{row['value'][0..100]}..."
end

conn.close

Query Relationships

# Find all nodes related to a specific node
conn = PG.connect(HTM::Database.default_config)

result = conn.exec_params(
  <<~SQL,
    SELECT n.key, n.value, n.type, r.relationship_type
    FROM nodes n
    JOIN relationships r ON n.id = r.to_node_id
    JOIN nodes source ON r.from_node_id = source.id
    WHERE source.key = $1
  SQL
  ['decision_001']
)

puts "Related nodes:"
result.each do |row|
  puts "- [#{row['type']}] #{row['value'][0..50]}... (#{row['relationship_type']})"
end

conn.close

Query by Tags

# Find all nodes with specific tag
conn = PG.connect(HTM::Database.default_config)

result = conn.exec_params(
  <<~SQL,
    SELECT DISTINCT n.key, n.value, n.type, n.importance
    FROM nodes n
    JOIN tags t ON n.id = t.node_id
    WHERE t.tag = $1
    ORDER BY n.importance DESC
  SQL
  ['architecture']
)

puts "Architecture-related memories:"
result.each do |row|
  puts "- [#{row['importance']}] #{row['value'][0..80]}..."
end

conn.close

Most Active Robots

# Find robots with most contributions
conn = PG.connect(HTM::Database.default_config)

result = conn.exec(
  <<~SQL
    SELECT r.name, r.id, COUNT(n.id) as memory_count
    FROM robots r
    LEFT JOIN nodes n ON r.id = n.robot_id
    GROUP BY r.id, r.name
    ORDER BY memory_count DESC
  SQL
)

puts "Robot contributions:"
result.each do |row|
  puts "#{row['name']}: #{row['memory_count']} memories"
end

conn.close

Time-Based Activity

# Get activity by day
conn = PG.connect(HTM::Database.default_config)

result = conn.exec(
  <<~SQL
    SELECT DATE(created_at) as date, COUNT(*) as count
    FROM nodes
    WHERE created_at >= CURRENT_DATE - INTERVAL '30 days'
    GROUP BY DATE(created_at)
    ORDER BY date DESC
  SQL
)

puts "Activity last 30 days:"
result.each do |row|
  puts "#{row['date']}: #{row['count']} memories"
end

conn.close

Database Maintenance

Vacuuming

PostgreSQL requires periodic vacuuming:

# Manual vacuum
conn = PG.connect(HTM::Database.default_config)
conn.exec("VACUUM ANALYZE nodes")
conn.exec("VACUUM ANALYZE relationships")
conn.exec("VACUUM ANALYZE tags")
conn.close

puts "Vacuum completed"

Reindexing

Rebuild indexes for optimal performance:

conn = PG.connect(HTM::Database.default_config)

# Reindex vector index
conn.exec("REINDEX INDEX idx_nodes_embedding")

# Reindex full-text
conn.exec("REINDEX INDEX idx_nodes_fulltext")

conn.close

puts "Reindexing completed"

Compression (TimescaleDB)

TimescaleDB can compress old data:

# Enable compression on operations_log hypertable
conn = PG.connect(HTM::Database.default_config)

conn.exec(
  <<~SQL
    ALTER TABLE operations_log SET (
      timescaledb.compress,
      timescaledb.compress_segmentby = 'robot_id'
    )
  SQL
)

# Add compression policy (compress data older than 7 days)
conn.exec(
  <<~SQL
    SELECT add_compression_policy('operations_log', INTERVAL '7 days')
  SQL
)

conn.close

puts "Compression policy enabled"

Cleanup Old Logs

# Delete operations logs older than 90 days
conn = PG.connect(HTM::Database.default_config)

result = conn.exec_params(
  "DELETE FROM operations_log WHERE time < $1",
  [Time.now - (90 * 24 * 3600)]
)

puts "Deleted #{result.cmd_tuples} old log entries"
conn.close

Performance Optimization

Analyzing Query Performance

# Explain query plan
conn = PG.connect(HTM::Database.default_config)

query = <<~SQL
  SELECT * FROM nodes
  WHERE type = 'decision'
  AND importance >= 8.0
  ORDER BY created_at DESC
  LIMIT 10
SQL

# Get query plan
result = conn.exec("EXPLAIN ANALYZE #{query}")
puts result.values.flatten
conn.close

Index Usage Statistics

# Check index usage
conn = PG.connect(HTM::Database.default_config)

result = conn.exec(
  <<~SQL
    SELECT
      schemaname,
      tablename,
      indexname,
      idx_scan as scans,
      idx_tup_read as tuples_read,
      idx_tup_fetch as tuples_fetched
    FROM pg_stat_user_indexes
    WHERE schemaname = 'public'
    ORDER BY idx_scan DESC
  SQL
)

puts "Index usage statistics:"
result.each do |row|
  puts "#{row['indexname']}: #{row['scans']} scans, #{row['tuples_read']} tuples"
end

conn.close

Table Size Analysis

# Check table sizes
conn = PG.connect(HTM::Database.default_config)

result = conn.exec(
  <<~SQL
    SELECT
      tablename,
      pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size
    FROM pg_tables
    WHERE schemaname = 'public'
    ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
  SQL
)

puts "Table sizes:"
result.each do |row|
  puts "#{row['tablename']}: #{row['size']}"
end

conn.close

Optimizing Vector Searches

# HNSW index parameters can be tuned
# (This is done during index creation, shown for reference)

# m: max connections per layer (default: 16)
# ef_construction: construction time/accuracy tradeoff (default: 64)

# Example (run during schema setup):
# CREATE INDEX idx_nodes_embedding ON nodes
#   USING hnsw(embedding vector_cosine_ops)
#   WITH (m = 16, ef_construction = 64);

# For queries, you can adjust ef_search:
conn = PG.connect(HTM::Database.default_config)

# Higher ef_search = more accurate but slower
conn.exec("SET hnsw.ef_search = 100")

# Now run vector searches...

conn.close

Backup and Restore

Backup Database

# Full database backup
pg_dump -h localhost -U user -d database -F c -f htm_backup.dump

# Backup just the schema
pg_dump -h localhost -U user -d database -s -f htm_schema.sql

# Backup just the data
pg_dump -h localhost -U user -d database -a -f htm_data.sql

Restore Database

# Restore from custom format
pg_restore -h localhost -U user -d database htm_backup.dump

# Restore from SQL format
psql -h localhost -U user -d database -f htm_schema.sql
psql -h localhost -U user -d database -f htm_data.sql

Backup Ruby Script

require 'open3'

def backup_database
  config = HTM::Database.default_config
  uri = URI.parse(config[:host])

  timestamp = Time.now.strftime("%Y%m%d_%H%M%S")
  backup_file = "htm_backup_#{timestamp}.dump"

  cmd = [
    "pg_dump",
    "-h", uri.host,
    "-p", uri.port.to_s,
    "-U", config[:user],
    "-d", config[:dbname],
    "-F", "c",  # Custom format
    "-f", backup_file
  ].join(" ")

  # Set password via environment
  env = { "PGPASSWORD" => config[:password] }

  stdout, stderr, status = Open3.capture3(env, cmd)

  if status.success?
    puts "Backup created: #{backup_file}"
    backup_file
  else
    raise "Backup failed: #{stderr}"
  end
end

# Usage
backup_database

Monitoring and Observability

Connection Pooling

HTM uses connection pooling internally, but you can monitor it:

# Check active connections
conn = PG.connect(HTM::Database.default_config)

result = conn.exec(
  <<~SQL
    SELECT
      count(*) as total,
      count(*) FILTER (WHERE state = 'active') as active,
      count(*) FILTER (WHERE state = 'idle') as idle
    FROM pg_stat_activity
    WHERE datname = current_database()
  SQL
)

puts "Connections: #{result.first['total']}"
puts "  Active: #{result.first['active']}"
puts "  Idle: #{result.first['idle']}"

conn.close

Slow Query Log

Enable slow query logging in PostgreSQL:

-- In postgresql.conf or via SQL
ALTER DATABASE your_database SET log_min_duration_statement = 1000;  -- Log queries > 1s

Custom Monitoring

class DatabaseMonitor
  def initialize(htm)
    @htm = htm
    @config = HTM::Database.default_config
  end

  def health_check
    conn = PG.connect(@config)

    # Check connectivity
    result = conn.exec("SELECT 1")

    # Check table accessibility
    conn.exec("SELECT COUNT(*) FROM nodes")
    conn.exec("SELECT COUNT(*) FROM relationships")

    conn.close

    { status: :healthy, message: "All checks passed" }
  rescue => e
    { status: :error, message: e.message }
  end

  def performance_report
    conn = PG.connect(@config)

    report = {}

    # Query counts
    result = conn.exec("SELECT COUNT(*) FROM nodes")
    report[:total_nodes] = result.first['count'].to_i

    # Table sizes
    result = conn.exec(
      <<~SQL
        SELECT pg_size_pretty(pg_total_relation_size('nodes')) as size
      SQL
    )
    report[:nodes_size] = result.first['size']

    # Cache hit ratio
    result = conn.exec(
      <<~SQL
        SELECT
          sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) as ratio
        FROM pg_statio_user_tables
        WHERE schemaname = 'public'
      SQL
    )
    report[:cache_hit_ratio] = result.first['ratio'].to_f

    conn.close
    report
  end

  def alert_if_unhealthy
    health = health_check

    if health[:status] != :healthy
      # Send alert (email, Slack, etc.)
      warn "Database unhealthy: #{health[:message]}"
    end
  end
end

monitor = DatabaseMonitor.new(htm)
puts monitor.health_check
puts monitor.performance_report

Best Practices

1. Use Prepared Statements

# Good: Use parameterized queries
conn.exec_params(
  "SELECT * FROM nodes WHERE robot_id = $1 AND type = $2",
  [robot_id, type]
)

# Avoid: String interpolation (SQL injection risk)
# conn.exec("SELECT * FROM nodes WHERE robot_id = '#{robot_id}'")

2. Connection Management

# Good: Use HTM's internal connection handling
htm.add_node(...)  # Manages connections automatically

# Advanced: Manual connections, always close
conn = PG.connect(config)
begin
  # Do work
ensure
  conn.close
end

3. Batch Operations

# Good: Use transactions for multiple operations
conn = PG.connect(config)
conn.transaction do |c|
  100.times do |i|
    c.exec_params("INSERT INTO nodes (...) VALUES ($1, $2)", [key, value])
  end
end
conn.close

4. Regular Maintenance

# Schedule regular maintenance
require 'whenever'  # gem for cron jobs

# In schedule.rb
every 1.day, at: '2:00 am' do
  runner "HTM::Database.vacuum_analyze"
end

every 1.week, at: '3:00 am' do
  runner "HTM::Database.reindex"
end

5. Monitor Growth

# Track database growth over time
class GrowthTracker
  def initialize
    @log_file = "database_growth.log"
  end

  def log_stats
    stats = htm.memory_stats

    entry = {
      timestamp: Time.now,
      total_nodes: stats[:total_nodes],
      database_size: stats[:database_size]
    }

    File.open(@log_file, 'a') do |f|
      f.puts entry.to_json
    end
  end
end

# Run daily
tracker = GrowthTracker.new
tracker.log_stats

Troubleshooting

Connection Issues

# Test connection
begin
  conn = PG.connect(HTM::Database.default_config)
  puts "Connection successful"
  conn.close
rescue PG::Error => e
  puts "Connection failed: #{e.message}"
  puts "Check HTM_DBURL environment variable"
end

Slow Queries

# Enable query timing
conn = PG.connect(HTM::Database.default_config)

start = Time.now
result = conn.exec("SELECT * FROM nodes WHERE type = 'decision'")
elapsed = Time.now - start

puts "Query returned #{result.ntuples} rows in #{elapsed}s"

if elapsed > 1.0
  puts "Slow query detected! Consider:"
  puts "- Adding indexes"
  puts "- Using LIMIT"
  puts "- Narrowing date range"
end

conn.close

Disk Space Issues

# Check disk usage
conn = PG.connect(HTM::Database.default_config)

result = conn.exec("SELECT pg_database_size(current_database()) as size")
size_gb = result.first['size'].to_i / (1024.0 ** 3)

puts "Database size: #{size_gb.round(2)} GB"

if size_gb > 10
  puts "Large database. Consider:"
  puts "- Archiving old nodes"
  puts "- Enabling compression"
  puts "- Cleaning up operations_log"
end

conn.close

Next Steps

Complete Example

require 'htm'

# Initialize HTM
htm = HTM.new(robot_name: "Database Admin")

# Add some test data
puts "Adding test data..."
node_ids = []
10.times do |i|
  node_id = htm.remember(
    "Test memory number #{i}",
    tags: ["test", "batch:#{i / 5}"],
    metadata: { category: "fact", index: i }
  )
  node_ids << node_id
end

# Get statistics using ActiveRecord
puts "\n=== Database Statistics ==="
puts "Total nodes: #{HTM::Models::Node.count}"
puts "Total tags: #{HTM::Models::Tag.count}"
puts "Active robots: #{HTM::Models::Robot.count}"

# Query by tag using ActiveRecord
puts "\n=== Query by Tag ==="
test_tag = HTM::Models::Tag.find_by(name: "test")
if test_tag
  tagged_nodes = test_tag.nodes
  puts "Found #{tagged_nodes.count} nodes with tag 'test'"
  tagged_nodes.each do |node|
    puts "- #{node.id}: #{node.content}"
  end
end

# Performance check
puts "\n=== Performance Metrics ==="
puts "Table sizes:"
puts "  nodes: #{HTM::Models::Node.count} records"
puts "  tags: #{HTM::Models::Tag.count} records"
puts "  node_tags: #{HTM::Models::NodeTag.count} records"

# Cleanup test data (soft delete)
puts "\n=== Cleanup ==="
node_ids.each do |node_id|
  htm.forget(node_id)  # Soft delete by default
end
puts "Test data soft-deleted (recoverable)"