Class: FactDb::Facts

Inherits:
Object
  • Object
show all
Defined in:
lib/fact_db.rb

Constant Summary collapse

FORMATS =

Available output formats for LLM consumption

%i[raw json triples cypher text].freeze
STRATEGIES =

Available retrieval strategies

%i[auto semantic fulltext graph temporal hybrid].freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(config: nil) ⇒ Facts

Returns a new instance of Facts.



71
72
73
74
75
76
77
78
79
80
81
# File 'lib/fact_db.rb', line 71

def initialize(config: nil)
  @config = config || FactDb.config
  Database.establish_connection!(@config)

  @source_service = Services::SourceService.new(@config)
  @entity_service = Services::EntityService.new(@config)
  @fact_service = Services::FactService.new(@config)
  @extraction_pipeline = Pipeline::ExtractionPipeline.new(@config)
  @resolution_pipeline = Pipeline::ResolutionPipeline.new(@config)
  @transformers = build_transformers
end

Instance Attribute Details

#configObject (readonly)

Returns the value of attribute config.



68
69
70
# File 'lib/fact_db.rb', line 68

def config
  @config
end

#entity_serviceObject (readonly)

Returns the value of attribute entity_service.



68
69
70
# File 'lib/fact_db.rb', line 68

def entity_service
  @entity_service
end

#extraction_pipelineObject (readonly)

Returns the value of attribute extraction_pipeline.



68
69
70
# File 'lib/fact_db.rb', line 68

def extraction_pipeline
  @extraction_pipeline
end

#fact_serviceObject (readonly)

Returns the value of attribute fact_service.



68
69
70
# File 'lib/fact_db.rb', line 68

def fact_service
  @fact_service
end

#resolution_pipelineObject (readonly)

Returns the value of attribute resolution_pipeline.



68
69
70
# File 'lib/fact_db.rb', line 68

def resolution_pipeline
  @resolution_pipeline
end

#source_serviceObject (readonly)

Returns the value of attribute source_service.



68
69
70
# File 'lib/fact_db.rb', line 68

def source_service
  @source_service
end

Instance Method Details

#at(date) ⇒ Temporal::QueryBuilder

Temporal query builder - query at a specific point in time

Examples:

facts.at("2024-01-15").query("Paula's role", format: :cypher)
facts.at("2024-01-15").facts_for(entity_id)
facts.at("2024-01-15").compare_to("2024-06-15")

Parameters:

  • date (Date, Time, String)

    Point in time

Returns:



154
155
156
# File 'lib/fact_db.rb', line 154

def at(date)
  Temporal::QueryBuilder.new(self, parse_date(date))
end

#batch_extract(source_ids, extractor: @config.default_extractor, parallel: true) ⇒ Array<Hash>

Batch extract facts from multiple sources

Parameters:

  • source_ids (Array<Integer>)

    Source IDs to process

  • extractor (Symbol) (defaults to: @config.default_extractor)

    Extractor type (:manual, :llm, :rule_based)

  • parallel (Boolean) (defaults to: true)

    Whether to use parallel processing

Returns:

  • (Array<Hash>)

    Results with extracted facts per source



254
255
256
257
258
259
260
261
# File 'lib/fact_db.rb', line 254

def batch_extract(source_ids, extractor: @config.default_extractor, parallel: true)
  sources = Models::Source.where(id: source_ids).to_a
  if parallel
    @extraction_pipeline.process_parallel(sources, extractor: extractor)
  else
    @extraction_pipeline.process(sources, extractor: extractor)
  end
end

#batch_resolve_entities(names, kind: nil) ⇒ Array<Hash>

Batch resolve entity names

Parameters:

  • names (Array<String>)

    Entity names to resolve

  • kind (Symbol, nil) (defaults to: nil)

    Entity kind filter

Returns:

  • (Array<Hash>)

    Resolution results



268
269
270
# File 'lib/fact_db.rb', line 268

def batch_resolve_entities(names, kind: nil)
  @resolution_pipeline.resolve_entities(names, kind: kind)
end

#current_facts_for(entity_id, format: :json) ⇒ Array, ...

Get currently valid facts about an entity

Parameters:

  • entity_id (Integer)

    Entity ID

  • format (Symbol) (defaults to: :json)

    Output format

Returns:

  • (Array, String, Hash)

    Results in requested format



128
129
130
131
# File 'lib/fact_db.rb', line 128

def current_facts_for(entity_id, format: :json)
  results = @fact_service.current_facts(entity: entity_id)
  transform_results(results, topic: "entity_#{entity_id}", format: format)
end

#detect_fact_conflicts(entity_ids) ⇒ Array<Hash>

Detect fact conflicts for multiple entities

Parameters:

  • entity_ids (Array<Integer>)

    Entity IDs to check

Returns:

  • (Array<Hash>)

    Conflict detection results



276
277
278
# File 'lib/fact_db.rb', line 276

def detect_fact_conflicts(entity_ids)
  @resolution_pipeline.detect_conflicts(entity_ids)
end

#diff(topic = nil, from:, to:) ⇒ Hash

Compare what changed between two dates

Parameters:

  • topic (String, nil) (defaults to: nil)

    Topic to compare (nil for all facts)

  • from (Date, Time, String)

    Start date

  • to (Date, Time, String)

    End date

Returns:

  • (Hash)

    Differences with :added, :removed, :unchanged keys



164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# File 'lib/fact_db.rb', line 164

def diff(topic = nil, from:, to:)
  from_date = parse_date(from)
  to_date = parse_date(to)

  from_results = @fact_service.query(topic: topic, at: from_date, status: :canonical)
  to_results = @fact_service.query(topic: topic, at: to_date, status: :canonical)

  from_set = facts_to_comparable(from_results)
  to_set = facts_to_comparable(to_results)

  {
    topic: topic,
    from: from_date,
    to: to_date,
    added: to_results.select { |f| !from_set.include?(comparable_key(f)) },
    removed: from_results.select { |f| !to_set.include?(comparable_key(f)) },
    unchanged: from_results.select { |f| to_set.include?(comparable_key(f)) }
  }
end

#extract_facts(source_id, extractor: @config.default_extractor) ⇒ Object

Extract facts from source



96
97
98
# File 'lib/fact_db.rb', line 96

def extract_facts(source_id, extractor: @config.default_extractor)
  @fact_service.extract_from_source(source_id, extractor: extractor)
end

#facts_at(at, entity: nil, topic: nil, format: :json) ⇒ Array, ...

Get facts valid at a specific point in time

Parameters:

  • at (Date, Time, String)

    Point in time

  • entity (Integer, nil) (defaults to: nil)

    Entity ID to filter by

  • topic (String, nil) (defaults to: nil)

    Topic to search for

  • format (Symbol) (defaults to: :json)

    Output format

Returns:

  • (Array, String, Hash)

    Results in requested format



140
141
142
143
# File 'lib/fact_db.rb', line 140

def facts_at(at, entity: nil, topic: nil, format: :json)
  results = @fact_service.facts_at(at, entity: entity, topic: topic)
  transform_results(results, topic: topic || "facts_at_#{at}", format: format)
end

#ingest(content, kind:, captured_at: Time.current, metadata: {}, title: nil, source_uri: nil) ⇒ Object

Ingest raw content



84
85
86
87
88
89
90
91
92
93
# File 'lib/fact_db.rb', line 84

def ingest(content, kind:, captured_at: Time.current, metadata: {}, title: nil, source_uri: nil)
  @source_service.create(
    content,
    kind: kind,
    captured_at: captured_at,
    metadata: ,
    title: title,
    source_uri: source_uri
  )
end

#introspect(topic = nil) ⇒ Hash

Introspect the schema - what does the layer know about?

Parameters:

  • topic (String, nil) (defaults to: nil)

    Optional topic to introspect specifically

Returns:

  • (Hash)

    Schema information or topic-specific coverage



188
189
190
# File 'lib/fact_db.rb', line 188

def introspect(topic = nil)
  topic ? introspect_topic(topic) : introspect_schema
end

#query_facts(topic: nil, at: nil, entity: nil, status: :canonical, format: :json) ⇒ Array, ...

Query facts with temporal and entity filtering

Parameters:

  • topic (String, nil) (defaults to: nil)

    Topic to search for

  • at (Date, Time, String, nil) (defaults to: nil)

    Point in time for temporal query

  • entity (Integer, nil) (defaults to: nil)

    Entity ID to filter by

  • status (Symbol) (defaults to: :canonical)

    Fact status (:canonical, :superseded, :synthesized, :all)

  • format (Symbol) (defaults to: :json)

    Output format (:json, :triples, :cypher, :text, :prolog)

Returns:

  • (Array, String, Hash)

    Results in requested format



108
109
110
111
# File 'lib/fact_db.rb', line 108

def query_facts(topic: nil, at: nil, entity: nil, status: :canonical, format: :json)
  results = @fact_service.query(topic: topic, at: at, entity: entity, status: status)
  transform_results(results, topic: topic, format: format)
end

#resolve_entity(name, kind: nil) ⇒ Object

Resolve a name to an entity



114
115
116
# File 'lib/fact_db.rb', line 114

def resolve_entity(name, kind: nil)
  @entity_service.resolve(name, kind: kind)
end

#suggest_queries(topic) ⇒ Array<String>

Suggest queries based on what’s stored for a topic

Parameters:

  • topic (String)

    Topic to get suggestions for

Returns:

  • (Array<String>)

    Suggested queries



196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# File 'lib/fact_db.rb', line 196

def suggest_queries(topic)
  resolved = resolve_entity(topic)
  return [] unless resolved

  entity = resolved.respond_to?(:entity) ? resolved.entity : resolved
  suggestions = []

  entity_kind = entity.respond_to?(:kind) ? entity.kind : nil
  suggestions << "current status" if entity_kind == "person"

  # Check relationships
  relationships = @entity_service.relationship_types_for(entity.id)
  suggestions << "employment history" if relationships.include?(:works_at) || relationships.include?(:object)
  suggestions << "team members" if relationships.include?(:works_with)
  suggestions << "reporting chain" if relationships.include?(:reports_to)

  # Check fact coverage
  fact_stats = @fact_service.fact_stats(entity.id)
  suggestions << "timeline" if fact_stats[:canonical]&.positive?
  suggestions << "historical changes" if fact_stats[:superseded]&.positive?

  suggestions
end

#suggest_strategies(query_text) ⇒ Array<Hash>

Suggest retrieval strategies for a query

Parameters:

  • query_text (String)

    The query

Returns:

  • (Array<Hash>)

    Strategy options with descriptions



224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# File 'lib/fact_db.rb', line 224

def suggest_strategies(query_text)
  strategies = []

  # Check for temporal keywords
  if query_text.match?(/\b(yesterday|last\s+week|last\s+month|ago|since|before|after|between)\b/i)
    strategies << { strategy: :temporal, description: "Filter by date range" }
  end

  # Check for semantic intent
  if query_text.match?(/\b(about|related|similar|like)\b/i)
    strategies << { strategy: :semantic, description: "Search by semantic similarity" }
  end

  # Check for entity focus
  if query_text.match?(/\b(who|what|where)\b/i)
    strategies << { strategy: :graph, description: "Traverse from entity node" }
  end

  # Default: hybrid
  strategies << { strategy: :hybrid, description: "Combine multiple strategies" }

  strategies
end

#timeline_for(entity_id, from: nil, to: nil) ⇒ Object

Build a timeline for an entity



119
120
121
# File 'lib/fact_db.rb', line 119

def timeline_for(entity_id, from: nil, to: nil)
  @fact_service.timeline(entity_id: entity_id, from: from, to: to)
end