ira/config/config.yaml.example

200 lines
5.8 KiB
Plaintext

# Example configuration file for the intelligent research system
# Rename this file to config.yaml and fill in your API keys and settings
# API keys (alternatively, set environment variables)
api_keys:
openai: "your-openai-api-key" # Or set OPENAI_API_KEY environment variable
jina: "your-jina-api-key" # Or set JINA_API_KEY environment variable
serper: "your-serper-api-key" # Or set SERPER_API_KEY environment variable
google: "your-google-api-key" # Or set GOOGLE_API_KEY environment variable
anthropic: "your-anthropic-api-key" # Or set ANTHROPIC_API_KEY environment variable
openrouter: "your-openrouter-api-key" # Or set OPENROUTER_API_KEY environment variable
groq: "your-groq-api-key" # Or set GROQ_API_KEY environment variable
newsapi: "your-newsapi-key" # Or set NEWSAPI_API_KEY environment variable
core: "your-core-api-key" # Or set CORE_API_KEY environment variable
github: "your-github-api-key" # Or set GITHUB_API_KEY environment variable
stackexchange: "your-stackexchange-api-key" # Or set STACKEXCHANGE_API_KEY environment variable
# LLM model configurations
models:
gpt-3.5-turbo:
provider: "openai"
temperature: 0.7
max_tokens: 1000
top_p: 1.0
endpoint: null # Use default OpenAI endpoint
gpt-4:
provider: "openai"
temperature: 0.5
max_tokens: 2000
top_p: 1.0
endpoint: null # Use default OpenAI endpoint
claude-2:
provider: "anthropic"
temperature: 0.7
max_tokens: 1500
top_p: 1.0
endpoint: null # Use default Anthropic endpoint
azure-gpt-4:
provider: "azure"
temperature: 0.5
max_tokens: 2000
top_p: 1.0
endpoint: "https://your-azure-endpoint.openai.azure.com"
deployment_name: "your-deployment-name"
api_version: "2023-05-15"
local-llama:
provider: "ollama"
temperature: 0.8
max_tokens: 1000
endpoint: "http://localhost:11434/api/generate"
model_name: "llama2"
llama-3.1-8b-instant:
provider: "groq"
model_name: "llama-3.1-8b-instant"
temperature: 0.7
max_tokens: 1024
top_p: 1.0
endpoint: "https://api.groq.com/openai/v1"
llama-3.3-70b-versatile:
provider: "groq"
model_name: "llama-3.3-70b-versatile"
temperature: 0.5
max_tokens: 2048
top_p: 1.0
endpoint: "https://api.groq.com/openai/v1"
openrouter-mixtral:
provider: "openrouter"
model_name: "mistralai/mixtral-8x7b-instruct"
temperature: 0.7
max_tokens: 1024
top_p: 1.0
endpoint: "https://openrouter.ai/api/v1"
openrouter-claude:
provider: "openrouter"
model_name: "anthropic/claude-3-opus"
temperature: 0.5
max_tokens: 2048
top_p: 1.0
endpoint: "https://openrouter.ai/api/v1"
# Default model to use if not specified for a module
default_model: "llama-3.1-8b-instant" # Using Groq's Llama 3.1 8B model for testing
# Module-specific model assignments
module_models:
# Query processing module
query_processing:
enhance_query: "llama-3.1-8b-instant" # Use Groq's Llama 3.1 8B for query enhancement
classify_query: "llama-3.1-8b-instant" # Use Groq's Llama 3.1 8B for classification
generate_search_queries: "llama-3.1-8b-instant" # Use Groq's Llama 3.1 8B for generating search queries
# Search strategy module
search_strategy:
develop_strategy: "llama-3.1-8b-instant" # Use Groq's Llama 3.1 8B for developing search strategies
target_selection: "llama-3.1-8b-instant" # Use Groq's Llama 3.1 8B for target selection
# Document ranking module
document_ranking:
rerank_documents: "jina-reranker" # Use Jina's reranker for document reranking
# Report generation module
report_generation:
synthesize_report: "llama-3.3-70b-versatile" # Use Groq's Llama 3.3 70B for report synthesis
format_report: "llama-3.1-8b-instant" # Use Groq's Llama 3.1 8B for formatting
# Search engine configurations
search_engines:
google:
enabled: true
max_results: 10
serper:
enabled: true
max_results: 10
jina:
enabled: true
max_results: 10
scholar:
enabled: false
max_results: 5
arxiv:
enabled: false
max_results: 5
news:
enabled: true
max_results: 10
days_back: 7
use_headlines: false # Set to true to use top headlines endpoint
country: "us" # Country code for top headlines
language: "en" # Language code
openalex:
enabled: true
max_results: 10
filter_open_access: false # Set to true to only return open access publications
core:
enabled: true
max_results: 10
full_text: true # Set to true to search in full text of papers
github:
enabled: true
max_results: 10
sort: "best_match" # Options: best_match, stars, forks, updated
stackexchange:
enabled: true
max_results: 10
site: "stackoverflow" # Default site (stackoverflow, serverfault, superuser, etc.)
sort: "relevance" # Options: relevance, votes, creation, activity
# Jina AI specific configurations
jina:
reranker:
model: "jina-reranker-v2-base-multilingual" # Default reranker model
top_n: 10 # Default number of top results to return
# UI configuration
ui:
theme: "light" # light or dark
port: 7860
share: false
title: "Intelligent Research System"
description: "An automated system for finding, filtering, and synthesizing information"
# Academic search settings
academic_search:
email: "user@example.com" # Used for Unpaywall and OpenAlex APIs
# OpenAlex settings
openalex:
default_sort: "relevance_score:desc" # Other options: cited_by_count:desc, publication_date:desc
# Unpaywall settings
unpaywall:
# No specific settings needed
# CORE settings
core:
# No specific settings needed
# System settings
system:
cache_dir: "data/cache"
results_dir: "data/results"
log_level: "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL