ira/config/config.yaml.example

# Example configuration file for the intelligent research system
# Rename this file to config.yaml and fill in your API keys and settings

# API keys (alternatively, set environment variables)
api_keys:
  openai: "your-openai-api-key"  # Or set OPENAI_API_KEY environment variable
  jina: "your-jina-api-key"      # Or set JINA_API_KEY environment variable
  serper: "your-serper-api-key"  # Or set SERPER_API_KEY environment variable
  google: "your-google-api-key"  # Or set GOOGLE_API_KEY environment variable
  anthropic: "your-anthropic-api-key" # Or set ANTHROPIC_API_KEY environment variable
  openrouter: "your-openrouter-api-key" # Or set OPENROUTER_API_KEY environment variable
  groq: "your-groq-api-key" # Or set GROQ_API_KEY environment variable
  newsapi: "your-newsapi-key" # Or set NEWSAPI_API_KEY environment variable
  core: "your-core-api-key" # Or set CORE_API_KEY environment variable
  github: "your-github-api-key" # Or set GITHUB_API_KEY environment variable
  stackexchange: "your-stackexchange-api-key" # Or set STACKEXCHANGE_API_KEY environment variable

# LLM model configurations
models:
  gpt-3.5-turbo:
    provider: "openai"
    temperature: 0.7
    max_tokens: 1000
    top_p: 1.0
    endpoint: null  # Use default OpenAI endpoint

  gpt-4:
    provider: "openai"
    temperature: 0.5
    max_tokens: 2000
    top_p: 1.0
    endpoint: null  # Use default OpenAI endpoint

  claude-2:
    provider: "anthropic"
    temperature: 0.7
    max_tokens: 1500
    top_p: 1.0
    endpoint: null  # Use default Anthropic endpoint

  azure-gpt-4:
    provider: "azure"
    temperature: 0.5
    max_tokens: 2000
    top_p: 1.0
    endpoint: "https://your-azure-endpoint.openai.azure.com"
    deployment_name: "your-deployment-name"
    api_version: "2023-05-15"

  local-llama:
    provider: "ollama"
    temperature: 0.8
    max_tokens: 1000
    endpoint: "http://localhost:11434/api/generate"
    model_name: "llama2"

  llama-3.1-8b-instant:
    provider: "groq"
    model_name: "llama-3.1-8b-instant"
    temperature: 0.7
    max_tokens: 1024
    top_p: 1.0
    endpoint: "https://api.groq.com/openai/v1"

  llama-3.3-70b-versatile:
    provider: "groq"
    model_name: "llama-3.3-70b-versatile"
    temperature: 0.5
    max_tokens: 2048
    top_p: 1.0
    endpoint: "https://api.groq.com/openai/v1"

  openrouter-mixtral:
    provider: "openrouter"
    model_name: "mistralai/mixtral-8x7b-instruct"
    temperature: 0.7
    max_tokens: 1024
    top_p: 1.0
    endpoint: "https://openrouter.ai/api/v1"

  openrouter-claude:
    provider: "openrouter"
    model_name: "anthropic/claude-3-opus"
    temperature: 0.5
    max_tokens: 2048
    top_p: 1.0
    endpoint: "https://openrouter.ai/api/v1"

# Default model to use if not specified for a module
default_model: "llama-3.1-8b-instant"  # Using Groq's Llama 3.1 8B model for testing

# Module-specific model assignments
module_models:
  # Query processing module
  query_processing:
    enhance_query: "llama-3.1-8b-instant"  # Use Groq's Llama 3.1 8B for query enhancement
    classify_query: "llama-3.1-8b-instant"  # Use Groq's Llama 3.1 8B for classification
    generate_search_queries: "llama-3.1-8b-instant"  # Use Groq's Llama 3.1 8B for generating search queries

  # Search strategy module
  search_strategy:
    develop_strategy: "llama-3.1-8b-instant"  # Use Groq's Llama 3.1 8B for developing search strategies
    target_selection: "llama-3.1-8b-instant"  # Use Groq's Llama 3.1 8B for target selection

  # Document ranking module
  document_ranking:
    rerank_documents: "jina-reranker"  # Use Jina's reranker for document reranking

  # Report generation module
  report_generation:
    synthesize_report: "llama-3.3-70b-versatile"  # Use Groq's Llama 3.3 70B for report synthesis
    format_report: "llama-3.1-8b-instant"  # Use Groq's Llama 3.1 8B for formatting

# Search engine configurations
search_engines:
  google:
    enabled: true
    max_results: 10

  serper:
    enabled: true
    max_results: 10

  jina:
    enabled: true
    max_results: 10

  scholar:
    enabled: false
    max_results: 5

  arxiv:
    enabled: false
    max_results: 5

  news:
    enabled: true
    max_results: 10
    days_back: 7
    use_headlines: false  # Set to true to use top headlines endpoint
    country: "us"  # Country code for top headlines
    language: "en"  # Language code

  openalex:
    enabled: true
    max_results: 10
    filter_open_access: false  # Set to true to only return open access publications

  core:
    enabled: true
    max_results: 10
    full_text: true  # Set to true to search in full text of papers

  github:
    enabled: true
    max_results: 10
    sort: "best_match"  # Options: best_match, stars, forks, updated

  stackexchange:
    enabled: true
    max_results: 10
    site: "stackoverflow"  # Default site (stackoverflow, serverfault, superuser, etc.)
    sort: "relevance"  # Options: relevance, votes, creation, activity

# Jina AI specific configurations
jina:
  reranker:
    model: "jina-reranker-v2-base-multilingual"  # Default reranker model
    top_n: 10  # Default number of top results to return

# UI configuration
ui:
  theme: "light"  # light or dark
  port: 7860
  share: false
  title: "Intelligent Research System"
  description: "An automated system for finding, filtering, and synthesizing information"

# Academic search settings
academic_search:
  email: "user@example.com"  # Used for Unpaywall and OpenAlex APIs

  # OpenAlex settings
  openalex:
    default_sort: "relevance_score:desc"  # Other options: cited_by_count:desc, publication_date:desc

  # Unpaywall settings
  unpaywall:
    # No specific settings needed

  # CORE settings
  core:
    # No specific settings needed

# System settings
system:
  cache_dir: "data/cache"
  results_dir: "data/results"
  log_level: "INFO"  # DEBUG, INFO, WARNING, ERROR, CRITICAL