Skip to content

Entity Management

Entities represent real-world things mentioned in facts - people, organizations, places, and more.

Creating Entities

Basic Creation

facts = FactDb.new

person = facts.entity_service.create(
  "Paula Chen",
  type: :person
)

With Aliases

person = facts.entity_service.create(
  "Paula Chen",
  type: :person,
  aliases: ["Paula", "P. Chen", "Chen, Paula"]
)

With Metadata

person = facts.entity_service.create(
  "Paula Chen",
  type: :person,
  aliases: ["Paula"],
  metadata: {
    employee_id: "E12345",
    department: "Engineering",
    start_date: "2024-01-10"
  }
)

Entity Types

Type Description Examples
:person Individual people Paula Chen, John Smith
:organization Companies, teams Microsoft, Platform Team
:place Locations San Francisco, Building A
:product Products, services Windows 11, Azure
:event Named events Q4 Earnings, Annual Review
# Custom types are also supported
entity = facts.entity_service.create(
  "TPS Report",
  type: :document_type
)

Managing Aliases

Add Alias

facts.entity_service.add_alias(
  entity.id,
  "P. Chen",
  type: :abbreviation,
  confidence: 0.95
)

Alias Types

Type Description
nickname Informal names
abbreviation Shortened forms
formal Formal/legal names
maiden_name Previous names
trading_name Business aliases

List Aliases

entity.entity_aliases.each do |alias_record|
  puts "#{alias_record.name} (#{alias_record.type})"
  puts "  Confidence: #{alias_record.confidence}"
end

Remove Alias

facts.entity_service.remove_alias(entity.id, "Old Name")

Entity Resolution

Basic Resolution

# Resolve a name to an entity
entity = facts.resolve_entity("Paula Chen")

# Returns existing entity or nil if not found

Type-Constrained Resolution

# Only match person entities
person = facts.resolve_entity("Paula", type: :person)

# Only match organizations
org = facts.resolve_entity("Microsoft", type: :organization)

Resolution Strategies

The resolver tries in order:

  1. Exact match on canonical name
  2. Alias match on registered aliases
  3. Fuzzy match using Levenshtein distance
# Configure fuzzy matching
FactDb.configure do |config|
  config.fuzzy_match_threshold = 0.85  # 85% similarity required
end

Batch Resolution

names = ["Paula Chen", "John Smith", "Microsoft", "Seattle"]

results = facts.batch_resolve_entities(names)

results.each do |result|
  status = result[:status]  # :resolved, :not_found, :error
  entity = result[:entity]
  puts "#{result[:name]}: #{status} -> #{entity&.name}"
end

Merging Entities

When duplicate entities are discovered:

# Merge entity2 into entity1 (entity1 is kept)
facts.entity_service.merge(entity1.id, entity2.id)

# After merge:
entity2.reload
entity2.resolution_status  # => "merged"
entity2.canonical_id     # => entity1.id

What Happens on Merge

  1. Entity2's status changes to "merged"
  2. Entity2 points to entity1 via canonical_id
  3. Entity2's aliases are copied to entity1
  4. All facts mentioning entity2 now also reference entity1

Auto-Merge

Configure automatic merging for high-confidence matches:

FactDb.configure do |config|
  config.auto_merge_threshold = 0.95  # Auto-merge at 95% similarity
end

Updating Entities

Update Canonical Name

facts.entity_service.update(
  entity.id,
  name: "Paula M. Chen"
)

Update Metadata

facts.entity_service.update(
  entity.id,
  metadata: entity.metadata.merge(title: "Senior Principal Engineer")
)

Change Type

# Reclassify entity type
facts.entity_service.update(
  entity.id,
  type: :organization
)

Resolution Status

Status Description
unresolved Entity created but not confirmed
resolved Entity identity confirmed
merged Entity merged into another

Mark as Resolved

facts.entity_service.update(
  entity.id,
  resolution_status: :resolved
)

Find Unresolved

unresolved = FactDb::Models::Entity
  .where(resolution_status: 'unresolved')
  .order(created_at: :desc)

Querying Entities

Find by ID

entity = facts.entity_service.find(entity_id)

Search by Name

entities = facts.entity_service.search("Paula")

Filter by Type

people = FactDb::Models::Entity
  .where(type: 'person')
  .where.not(resolution_status: 'merged')

Find Entities in Source

# Find all entities mentioned in a source
entities = facts.entity_service.in_source(source.id)
# Entities mentioned in facts about Paula
related = facts.entity_service.related_to(paula.id)

Search entities by meaning:

# Find entities similar to a description
similar = facts.entity_service.semantic_search(
  "software engineering leadership",
  type: :person,
  limit: 10
)

Best Practices

1. Use Comprehensive Aliases

entity = facts.entity_service.create(
  "International Business Machines Corporation",
  type: :organization,
  aliases: [
    "IBM",
    "Big Blue",
    "International Business Machines",
    "IBM Corp",
    "IBM Corporation"
  ]
)

2. Store Relevant Metadata

person = facts.entity_service.create(
  "Paula Chen",
  type: :person,
  metadata: {
    # Stable identifiers
    employee_id: "E12345",
    linkedin_url: "linkedin.com/in/paulachen",

    # Useful context
    department: "Engineering",
    location: "San Francisco"
  }
)

3. Review Unresolved Entities

# Periodically review unresolved entities
unresolved = FactDb::Models::Entity
  .where(resolution_status: 'unresolved')
  .where('created_at < ?', 1.week.ago)

unresolved.each do |entity|
  # Try to find duplicates
  similar = facts.entity_service.search(entity.name)
  if similar.count > 1
    puts "Potential duplicate: #{entity.name}"
  end
end

4. Handle Merged Entities

# When querying, exclude merged entities
active_entities = FactDb::Models::Entity
  .where.not(resolution_status: 'merged')

# Or follow the merge chain
def canonical_entity(entity)
  while entity.canonical_id
    entity = FactDb::Models::Entity.find(entity.canonical_id)
  end
  entity
end

5. Validate Entity Types

VALID_TYPES = %i[person organization place product event].freeze

def create_entity(name, type:)
  unless VALID_TYPES.include?(type.to_sym)
    raise ArgumentError, "Invalid entity type: #{type}"
  end
  facts.entity_service.create(name, type: type)
end