ira/config/config.yaml

# Example configuration file for the intelligent research system
# Rename this file to config.yaml and fill in your API keys and settings

# API keys (alternatively, set environment variables)
api_keys:
  openai: "your-openai-api-key"  # Or set OPENAI_API_KEY environment variable
  jina: "your-jina-api-key"      # Or set JINA_API_KEY environment variable
  serper: "your-serper-api-key"  # Or set SERPER_API_KEY environment variable
  google: "your-google-api-key"  # Or set GOOGLE_API_KEY environment variable
  anthropic: "your-anthropic-api-key" # Or set ANTHROPIC_API_KEY environment variable
  openrouter: "your-openrouter-api-key" # Or set OPENROUTER_API_KEY environment variable
  groq: "your-groq-api-key" # Or set GROQ_API_KEY environment variable

# LLM model configurations
models:
  gpt-3.5-turbo:
    provider: "openai"
    temperature: 0.7
    max_tokens: 1000
    top_p: 1.0
    endpoint: null  # Use default OpenAI endpoint

  gpt-4:
    provider: "openai"
    temperature: 0.5
    max_tokens: 2000
    top_p: 1.0
    endpoint: null  # Use default OpenAI endpoint

  claude-2:
    provider: "anthropic"
    temperature: 0.7
    max_tokens: 1500
    top_p: 1.0
    endpoint: null  # Use default Anthropic endpoint

  azure-gpt-4:
    provider: "azure"
    temperature: 0.5
    max_tokens: 2000
    top_p: 1.0
    endpoint: "https://your-azure-endpoint.openai.azure.com"
    deployment_name: "your-deployment-name"
    api_version: "2023-05-15"

  local-llama:
    provider: "ollama"
    temperature: 0.8
    max_tokens: 1000
    endpoint: "http://localhost:11434/api/generate"
    model_name: "llama2"

  llama-3.1-8b-instant:
    provider: "groq"
    model_name: "llama-3.1-8b-instant"
    temperature: 0.7
    max_tokens: 1024
    top_p: 1.0
    endpoint: "https://api.groq.com/openai/v1"

  llama-3.3-70b-versatile:
    provider: "groq"
    model_name: "llama-3.3-70b-versatile"
    temperature: 0.5
    max_tokens: 2048
    top_p: 1.0
    endpoint: "https://api.groq.com/openai/v1"

  openrouter-mixtral:
    provider: "openrouter"
    model_name: "mistralai/mixtral-8x7b-instruct"
    temperature: 0.7
    max_tokens: 1024
    top_p: 1.0
    endpoint: "https://openrouter.ai/api/v1"

  openrouter-claude:
    provider: "openrouter"
    model_name: "anthropic/claude-3-opus"
    temperature: 0.5
    max_tokens: 2048
    top_p: 1.0
    endpoint: "https://openrouter.ai/api/v1"

  gemini-2.0-flash:
    provider: "gemini"
    model_name: "gemini-2.0-flash"
    temperature: 0.5
    max_tokens: 2048
    top_p: 1.0

# Default model to use if not specified for a module
default_model: "llama-3.1-8b-instant"  # Using Groq's Llama 3.1 8B model for testing

# Module-specific model assignments
module_models:
  # Query processing module
  query_processing:
    enhance_query: "llama-3.1-8b-instant"  # Use Groq's Llama 3.1 8B for query enhancement
    classify_query: "llama-3.1-8b-instant"  # Use Groq's Llama 3.1 8B for classification
    generate_search_queries: "llama-3.1-8b-instant"  # Use Groq's Llama 3.1 8B for generating search queries

  # Search strategy module
  search_strategy:
    develop_strategy: "llama-3.1-8b-instant"  # Use Groq's Llama 3.1 8B for developing search strategies
    target_selection: "llama-3.1-8b-instant"  # Use Groq's Llama 3.1 8B for target selection

  # Document ranking module
  document_ranking:
    rerank_documents: "jina-reranker"  # Use Jina's reranker for document reranking

  # Report generation module
  report_generation:
    synthesize_report: "gemini-2.0-flash"  # Use Google's Gemini 2.0 Flash for report synthesis
    format_report: "llama-3.1-8b-instant"  # Use Groq's Llama 3.1 8B for formatting

# Search engine configurations
search_engines:
  google:
    enabled: true
    max_results: 10

  serper:
    enabled: true
    max_results: 10

  jina:
    enabled: true
    max_results: 10

  scholar:
    enabled: false
    max_results: 5

  arxiv:
    enabled: false
    max_results: 5

# Jina AI specific configurations
jina:
  reranker:
    model: "jina-reranker-v2-base-multilingual"  # Default reranker model
    top_n: 10  # Default number of top results to return

# UI configuration
ui:
  theme: "light"  # light or dark
  port: 7860
  share: false
  title: "Intelligent Research System"
  description: "An automated system for finding, filtering, and synthesizing information"

# System settings
system:
  cache_dir: "data/cache"
  results_dir: "data/results"
  log_level: "INFO"  # DEBUG, INFO, WARNING, ERROR, CRITICAL