import os import json import requests # Get your Jina AI API key for free: https://jina.ai/?sui=apikey JINA_API_KEY = os.getenv('JINA_API_KEY') def segment_markdown(file_path): """ Segments a markdown file using Jina AI's Segmenter API. Args: file_path (str): Path to the markdown file. Returns: dict: JSON structure containing the segments. """ try: # Read the markdown file with open(file_path, 'r') as file: markdown_content = file.read() # Prepare the request to Jina Segmenter API headers = { 'Authorization': f'Bearer {JINA_API_KEY}', 'Content-Type': 'application/json', 'Accept': 'application/json' } data = { 'content': markdown_content, 'tokenizer': 'cl100k_base', 'return_tokens': False, 'return_chunks': True, 'max_chunk_length': 1000 } # Make the API request response = requests.post( 'https://segment.jina.ai/', headers=headers, json=data ) response.raise_for_status() # Return the segments as JSON return response.json() except Exception as e: print(f'Error segmenting markdown: {str(e)}') return None if __name__ == '__main__': import sys if len(sys.argv) != 2: print('Usage: python markdown_segmenter.py ') sys.exit(1) segments = segment_markdown(sys.argv[1]) if segments: print(json.dumps(segments, indent=2))