Skip to content

CLI Examples

Common usage patterns and workflows for the Ragdoll CLI.

Getting Started

Initial Setup

# Install the CLI
gem install ragdoll-cli

# Configure database connection
ragdoll config set database.url "postgresql://user:pass@localhost/ragdoll"

# Configure LLM provider
ragdoll config set llm.provider "openai"
ragdoll config set llm.api_key "sk-your-key-here"

# Test the setup
ragdoll health

First Document Upload

# Upload a single document
ragdoll update ~/Documents/research-paper.pdf

# Upload with metadata
ragdoll update ~/Documents/report.pdf \
  --metadata "author=Jane Doe" \
  --metadata "department=Research" \
  --metadata "category=quarterly-report"

# Upload and extract images
ragdoll update ~/Documents/presentation.pptx --extract-images

Document Management

Bulk Document Processing

# Process entire directory
ragdoll update ~/Documents/papers --recursive

# Process specific file types
find ~/Documents -name "*.pdf" -exec ragdoll update {} \;

# Process with custom metadata based on path
for file in ~/Documents/research/*.pdf; do
  ragdoll update "$file" --metadata "category=research"
done

Updating Existing Documents

# Force update (even if file hasn't changed)
ragdoll update document.pdf --force

# Update with new metadata
ragdoll update document.pdf \
  --metadata "status=reviewed" \
  --metadata "last-updated=$(date)"

Removing Documents

# Delete by file path
ragdoll delete ~/Documents/old-document.pdf --by-path

# Delete by document ID
ragdoll list documents --format json | jq '.[] | select(.title | contains("obsolete")) | .id' | \
  xargs -I {} ragdoll delete {} --by-id --confirm

Search Operations

Basic Searches

# Simple search
ragdoll search "machine learning algorithms"

# Search with result limit
ragdoll search "Python programming" --limit 5

# Search with similarity threshold
ragdoll search "neural networks" --threshold 0.8

Advanced Search Queries

# Search with JSON output for processing
ragdoll search "data science" --format json > search-results.json

# Search and extract specific fields
ragdoll search "AI research" --format json | \
  jq '.[] | {title: .title, score: .similarity_score, path: .file_path}'

# Search with metadata inclusion
ragdoll search "quarterly report" --include-metadata --format json | \
  jq '.[] | select(.metadata.department == "Research")'

Search Workflows

# Create a search report
cat << 'EOF' > search-report.sh
#!/bin/bash
QUERY="$1"
echo "# Search Results for: $QUERY"
echo "Generated: $(date)"
echo ""
ragdoll search "$QUERY" --include-metadata --format json | \
  jq -r '.[] | "## \(.title)\n**Score:** \(.similarity_score)\n**Path:** \(.file_path)\n**Summary:** \(.summary // "No summary available")\n"'
EOF
chmod +x search-report.sh

# Usage
./search-report.sh "artificial intelligence" > ai-search-report.md

System Monitoring

Health Monitoring

# Basic health check
ragdoll health

# Detailed health check with timing
time ragdoll health --verbose

# Monitor system status continuously
ragdoll status --watch --interval 30

Performance Monitoring

# Monitor search performance
echo "machine learning,$(time (ragdoll search "machine learning" --limit 1 >/dev/null) 2>&1 | grep real)"

# Batch performance testing
for query in "AI" "ML" "data science" "algorithms"; do
  echo -n "$query: "
  time ragdoll search "$query" --limit 5 >/dev/null
done 2>&1 | grep real

System Statistics

# Basic statistics
ragdoll stats

# Detailed statistics with formatting
ragdoll stats --detailed --format json | jq '{
  documents: .total_documents,
  embeddings: .total_embeddings,
  storage: .storage_size,
  last_update: .last_updated
}'

Batch Operations

Processing Multiple Files

# Process files in parallel
find ~/Documents -name "*.pdf" -print0 | \
  xargs -0 -P 4 -I {} ragdoll update {}

# Process with progress tracking
total=$(find ~/Documents -name "*.pdf" | wc -l)
count=0
find ~/Documents -name "*.pdf" | while read file; do
  count=$((count + 1))
  echo "Processing $count/$total: $file"
  ragdoll update "$file"
done

Bulk Metadata Updates

# Add category metadata to all PDFs in research directory
find ~/Documents/research -name "*.pdf" | while read file; do
  ragdoll update "$file" --metadata "category=research"
done

# Update metadata based on file location
for dir in ~/Documents/*/; do
  category=$(basename "$dir")
  find "$dir" -name "*.pdf" | while read file; do
    ragdoll update "$file" --metadata "category=$category"
  done
done

Integration Examples

Backup and Sync

# Backup document list
ragdoll list documents --format json > documents-backup-$(date +%Y%m%d).json

# Sync documents from backup
jq -r '.[].file_path' documents-backup.json | while read path; do
  if [ -f "$path" ]; then
    ragdoll update "$path"
  fi
done

Web Scraping Integration

# Download and process web content
curl -s "https://example.com/article" | \
  html2text > temp-article.txt && \
  ragdoll update temp-article.txt \
    --metadata "source=web" \
    --metadata "url=https://example.com/article" && \
  rm temp-article.txt

Git Integration

# Process files changed in git
git diff --name-only HEAD~1 HEAD | \
  grep -E '\.(pdf|docx|txt|md)$' | \
  xargs -I {} ragdoll update {}

# Add git metadata
for file in $(git ls-files '*.md'); do
  commit=$(git log -1 --format="%H" -- "$file")
  author=$(git log -1 --format="%an" -- "$file")
  ragdoll update "$file" \
    --metadata "git-commit=$commit" \
    --metadata "git-author=$author"
done

Automation Scripts

Daily Processing Script

#!/bin/bash
# daily-ragdoll-sync.sh

LOG_FILE="$HOME/.ragdoll/logs/daily-sync.log"
echo "$(date): Starting daily Ragdoll sync" >> "$LOG_FILE"

# Check system health
if ! ragdoll health >/dev/null 2>&1; then
  echo "$(date): Health check failed!" >> "$LOG_FILE"
  exit 1
fi

# Process new documents
find ~/Documents -name "*.pdf" -mtime -1 | while read file; do
  echo "$(date): Processing $file" >> "$LOG_FILE"
  ragdoll update "$file" 2>&1 | tee -a "$LOG_FILE"
done

# Clean up old logs
find ~/.ragdoll/logs -name "*.log" -mtime +7 -delete

echo "$(date): Daily sync completed" >> "$LOG_FILE"

Search API Wrapper

#!/bin/bash
# ragdoll-api.sh - Simple HTTP API wrapper

PORT=${PORT:-8080}

handle_search() {
  local query=$(echo "$1" | sed 's/+/ /g')
  ragdoll search "$query" --format json
}

while true; do
  echo -e "HTTP/1.1 200 OK\nContent-Type: application/json\n" | nc -l "$PORT" -c "
    read method path protocol
    if [[ \$path =~ ^/search\\?q=(.+) ]]; then
      handle_search \${BASH_REMATCH[1]}
    else
      echo '{\"error\": \"Not found\"}'
    fi
  "
done

Troubleshooting Workflows

Diagnostic Script

#!/bin/bash
# ragdoll-diagnostics.sh

echo "=== Ragdoll CLI Diagnostics ==="
echo "Timestamp: $(date)"
echo "CLI Version: $(ragdoll --version)"
echo ""

echo "=== Configuration ==="
ragdoll config list
echo ""

echo "=== Health Check ==="
ragdoll health --verbose
echo ""

echo "=== System Stats ==="
ragdoll stats --detailed
echo ""

echo "=== Recent Log Entries ==="
tail -n 20 ~/.ragdoll/logs/ragdoll.log 2>/dev/null || echo "No log file found"

Performance Benchmark

#!/bin/bash
# benchmark-ragdoll.sh

QUERIES=("artificial intelligence" "machine learning" "data science" "neural networks" "algorithms")

echo "=== Ragdoll Performance Benchmark ==="
echo "Timestamp: $(date)"
echo ""

for query in "${QUERIES[@]}"; do
  echo "Testing query: '$query'"
  time ragdoll search "$query" --limit 10 >/dev/null
  echo ""
done

These examples provide a comprehensive foundation for using the Ragdoll CLI effectively in various scenarios and workflows.