200 lines
5.8 KiB
Plaintext
200 lines
5.8 KiB
Plaintext
# Example configuration file for the intelligent research system
|
|
# Rename this file to config.yaml and fill in your API keys and settings
|
|
|
|
# API keys (alternatively, set environment variables)
|
|
api_keys:
|
|
openai: "your-openai-api-key" # Or set OPENAI_API_KEY environment variable
|
|
jina: "your-jina-api-key" # Or set JINA_API_KEY environment variable
|
|
serper: "your-serper-api-key" # Or set SERPER_API_KEY environment variable
|
|
google: "your-google-api-key" # Or set GOOGLE_API_KEY environment variable
|
|
anthropic: "your-anthropic-api-key" # Or set ANTHROPIC_API_KEY environment variable
|
|
openrouter: "your-openrouter-api-key" # Or set OPENROUTER_API_KEY environment variable
|
|
groq: "your-groq-api-key" # Or set GROQ_API_KEY environment variable
|
|
newsapi: "your-newsapi-key" # Or set NEWSAPI_API_KEY environment variable
|
|
core: "your-core-api-key" # Or set CORE_API_KEY environment variable
|
|
github: "your-github-api-key" # Or set GITHUB_API_KEY environment variable
|
|
stackexchange: "your-stackexchange-api-key" # Or set STACKEXCHANGE_API_KEY environment variable
|
|
|
|
# LLM model configurations
|
|
models:
|
|
gpt-3.5-turbo:
|
|
provider: "openai"
|
|
temperature: 0.7
|
|
max_tokens: 1000
|
|
top_p: 1.0
|
|
endpoint: null # Use default OpenAI endpoint
|
|
|
|
gpt-4:
|
|
provider: "openai"
|
|
temperature: 0.5
|
|
max_tokens: 2000
|
|
top_p: 1.0
|
|
endpoint: null # Use default OpenAI endpoint
|
|
|
|
claude-2:
|
|
provider: "anthropic"
|
|
temperature: 0.7
|
|
max_tokens: 1500
|
|
top_p: 1.0
|
|
endpoint: null # Use default Anthropic endpoint
|
|
|
|
azure-gpt-4:
|
|
provider: "azure"
|
|
temperature: 0.5
|
|
max_tokens: 2000
|
|
top_p: 1.0
|
|
endpoint: "https://your-azure-endpoint.openai.azure.com"
|
|
deployment_name: "your-deployment-name"
|
|
api_version: "2023-05-15"
|
|
|
|
local-llama:
|
|
provider: "ollama"
|
|
temperature: 0.8
|
|
max_tokens: 1000
|
|
endpoint: "http://localhost:11434/api/generate"
|
|
model_name: "llama2"
|
|
|
|
llama-3.1-8b-instant:
|
|
provider: "groq"
|
|
model_name: "llama-3.1-8b-instant"
|
|
temperature: 0.7
|
|
max_tokens: 1024
|
|
top_p: 1.0
|
|
endpoint: "https://api.groq.com/openai/v1"
|
|
|
|
llama-3.3-70b-versatile:
|
|
provider: "groq"
|
|
model_name: "llama-3.3-70b-versatile"
|
|
temperature: 0.5
|
|
max_tokens: 2048
|
|
top_p: 1.0
|
|
endpoint: "https://api.groq.com/openai/v1"
|
|
|
|
openrouter-mixtral:
|
|
provider: "openrouter"
|
|
model_name: "mistralai/mixtral-8x7b-instruct"
|
|
temperature: 0.7
|
|
max_tokens: 1024
|
|
top_p: 1.0
|
|
endpoint: "https://openrouter.ai/api/v1"
|
|
|
|
openrouter-claude:
|
|
provider: "openrouter"
|
|
model_name: "anthropic/claude-3-opus"
|
|
temperature: 0.5
|
|
max_tokens: 2048
|
|
top_p: 1.0
|
|
endpoint: "https://openrouter.ai/api/v1"
|
|
|
|
# Default model to use if not specified for a module
|
|
default_model: "llama-3.1-8b-instant" # Using Groq's Llama 3.1 8B model for testing
|
|
|
|
# Module-specific model assignments
|
|
module_models:
|
|
# Query processing module
|
|
query_processing:
|
|
enhance_query: "llama-3.1-8b-instant" # Use Groq's Llama 3.1 8B for query enhancement
|
|
classify_query: "llama-3.1-8b-instant" # Use Groq's Llama 3.1 8B for classification
|
|
generate_search_queries: "llama-3.1-8b-instant" # Use Groq's Llama 3.1 8B for generating search queries
|
|
|
|
# Search strategy module
|
|
search_strategy:
|
|
develop_strategy: "llama-3.1-8b-instant" # Use Groq's Llama 3.1 8B for developing search strategies
|
|
target_selection: "llama-3.1-8b-instant" # Use Groq's Llama 3.1 8B for target selection
|
|
|
|
# Document ranking module
|
|
document_ranking:
|
|
rerank_documents: "jina-reranker" # Use Jina's reranker for document reranking
|
|
|
|
# Report generation module
|
|
report_generation:
|
|
synthesize_report: "llama-3.3-70b-versatile" # Use Groq's Llama 3.3 70B for report synthesis
|
|
format_report: "llama-3.1-8b-instant" # Use Groq's Llama 3.1 8B for formatting
|
|
|
|
# Search engine configurations
|
|
search_engines:
|
|
google:
|
|
enabled: true
|
|
max_results: 10
|
|
|
|
serper:
|
|
enabled: true
|
|
max_results: 10
|
|
|
|
jina:
|
|
enabled: true
|
|
max_results: 10
|
|
|
|
scholar:
|
|
enabled: false
|
|
max_results: 5
|
|
|
|
arxiv:
|
|
enabled: false
|
|
max_results: 5
|
|
|
|
news:
|
|
enabled: true
|
|
max_results: 10
|
|
days_back: 7
|
|
use_headlines: false # Set to true to use top headlines endpoint
|
|
country: "us" # Country code for top headlines
|
|
language: "en" # Language code
|
|
|
|
openalex:
|
|
enabled: true
|
|
max_results: 10
|
|
filter_open_access: false # Set to true to only return open access publications
|
|
|
|
core:
|
|
enabled: true
|
|
max_results: 10
|
|
full_text: true # Set to true to search in full text of papers
|
|
|
|
github:
|
|
enabled: true
|
|
max_results: 10
|
|
sort: "best_match" # Options: best_match, stars, forks, updated
|
|
|
|
stackexchange:
|
|
enabled: true
|
|
max_results: 10
|
|
site: "stackoverflow" # Default site (stackoverflow, serverfault, superuser, etc.)
|
|
sort: "relevance" # Options: relevance, votes, creation, activity
|
|
|
|
# Jina AI specific configurations
|
|
jina:
|
|
reranker:
|
|
model: "jina-reranker-v2-base-multilingual" # Default reranker model
|
|
top_n: 10 # Default number of top results to return
|
|
|
|
# UI configuration
|
|
ui:
|
|
theme: "light" # light or dark
|
|
port: 7860
|
|
share: false
|
|
title: "Intelligent Research System"
|
|
description: "An automated system for finding, filtering, and synthesizing information"
|
|
|
|
# Academic search settings
|
|
academic_search:
|
|
email: "user@example.com" # Used for Unpaywall and OpenAlex APIs
|
|
|
|
# OpenAlex settings
|
|
openalex:
|
|
default_sort: "relevance_score:desc" # Other options: cited_by_count:desc, publication_date:desc
|
|
|
|
# Unpaywall settings
|
|
unpaywall:
|
|
# No specific settings needed
|
|
|
|
# CORE settings
|
|
core:
|
|
# No specific settings needed
|
|
|
|
# System settings
|
|
system:
|
|
cache_dir: "data/cache"
|
|
results_dir: "data/results"
|
|
log_level: "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL
|