#!/usr/bin/env python3
"""
═══════════════════════════════════════════════════════════════
📄 GOOGLE DOCS FETCHER - ICHIGRIDEA PIPELINE
═══════════════════════════════════════════════════════════════
Récupère les specs depuis Google Docs et calcule les hashes.

NOTE: Ce script est conçu pour être appelé par Claude via
      google_drive_fetch() dans le contexte Claude.ai

Usage (standalone - nécessite credentials):
    python gdocs_fetcher.py --list
    python gdocs_fetcher.py --fetch <doc_id>
    python gdocs_fetcher.py --hash <doc_id>

Usage (via Claude):
    Claude utilise google_drive_fetch(doc_id) directement
"""

import json
import hashlib
import re
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional


class GDocsFetcher:
    """Gère la récupération des specs Google Docs"""
    
    # IDs des documents Google Docs (depuis le transcript)
    KNOWN_DOCS = {
        "INDEX": "1GoX8Cx28c9HEeqxJyTvyIQCAjbRmdz9wfGfy9gjOM3E",
        "PART_01": "13PGxes2QbHG-GsyDHv5bqLzPpzzdEa5PRmb6AWwzSQs",
        "PART_02": "1QlcprKJrc-_sv0HlxAJN0Bzl7c2LQuxEJ3B5geCwyEc",
        "PART_03": "1rLfH4PhVRgVeEB5Y_O3VJzlBjAWxicHXT7OHE6-sICA",
        "PART_04": "1qojpPS7cK-z-9Sqc-aPpUoeX3X9ON7hyABuyNcE6sQQ",
        "PART_05": "1DVpPRKk4jD8tPsZMmDTZIRoxIPQ93GCy89bNoons9Pc",
        "PART_06": "1S9W1VOPWwutGlkz5uL4S73vGS3PVjyd7a_k-zJHjb_0",
        "PART_07": "1mGASCSKSn7yGsZyEQXsnVmGHkXlj5RzxOYJN-bxp6k4",
        "PART_08": "1TB3GgJKNAH131wGXJVCOBpiiU3DWlQcR71o3r_b5U1g",
        "PART_09": "1JIX4vMzDvIv6KZjEkiMxzA8F8CpXj8tjHqiB_B37nl8",
        "PART_10": "1Xy_0R-YiMrRX69CfqLkLCjyTvrw3619PT9_B0_0PZ1U",
        "FILTRES_ICHIMOKU": "1cUWBC5mBj1kXV8DTf1yLCF7YYYJsyxGyLFUqTiH-_UM"
    }
    
    # Sections par document
    SECTIONS_BY_DOC = {
        "PART_01": "S001-S100",
        "PART_02": "S100-S200",
        "PART_03": "S200-S300",
        "PART_04": "S300-S370",
        "PART_05": "S370-S420",
        "PART_06": "S420-S460",
        "PART_07": "S460-S560",
        "PART_08": "S560-S640",
        "PART_09": "S640-S730",
        "PART_10": "S730-S760",
        "FILTRES_ICHIMOKU": "F01-F57"
    }
    
    def __init__(self, cache_dir: str = "pipeline/cache"):
        self.cache_dir = Path(cache_dir)
        self.cache_dir.mkdir(parents=True, exist_ok=True)
        
    def get_doc_url(self, doc_name: str) -> str:
        """Retourne l'URL d'un document"""
        doc_id = self.KNOWN_DOCS.get(doc_name)
        if doc_id:
            return f"https://docs.google.com/document/d/{doc_id}/edit"
        return ""
    
    def get_doc_id(self, doc_name: str) -> str:
        """Retourne l'ID d'un document"""
        return self.KNOWN_DOCS.get(doc_name, "")
    
    def list_docs(self) -> Dict[str, str]:
        """Liste tous les documents connus"""
        return {
            name: {
                "id": doc_id,
                "url": f"https://docs.google.com/document/d/{doc_id}/edit",
                "sections": self.SECTIONS_BY_DOC.get(name, "")
            }
            for name, doc_id in self.KNOWN_DOCS.items()
        }
    
    def calculate_spec_hash(self, content: str) -> str:
        """Calcule le SHA-256 du contenu normalisé"""
        # Normaliser le contenu
        normalized = content.strip()
        normalized = re.sub(r'\s+', ' ', normalized)  # Normaliser les espaces
        normalized = normalized.lower()  # Ignorer la casse
        
        return hashlib.sha256(normalized.encode('utf-8')).hexdigest()
    
    def extract_sections_from_content(self, content: str) -> List[Dict]:
        """Extrait les sections d'un contenu de spec"""
        sections = []
        
        # Pattern pour trouver les sections (S001, S100, F01, etc.)
        # Ajuster selon le format réel des specs
        patterns = [
            r'(S\d{2,3})\s*[-:]\s*([^\n]+)',  # S001: Nom
            r'(F\d{2})\s*[-:]\s*([^\n]+)',     # F01: Nom
            r'##\s*(S\d{2,3})\s*[-:]\s*([^\n]+)',  # ## S001: Nom (Markdown)
        ]
        
        for pattern in patterns:
            matches = re.findall(pattern, content, re.MULTILINE)
            for match in matches:
                section_id, name = match
                sections.append({
                    "section_id": section_id.strip(),
                    "name": name.strip(),
                    "hash": self.calculate_spec_hash(f"{section_id}:{name}")
                })
        
        return sections
    
    def cache_content(self, doc_name: str, content: str) -> str:
        """Cache le contenu d'un document"""
        cache_file = self.cache_dir / f"{doc_name}.txt"
        
        with open(cache_file, "w", encoding="utf-8") as f:
            f.write(content)
        
        # Calculer et sauvegarder le hash
        content_hash = self.calculate_spec_hash(content)
        hash_file = self.cache_dir / f"{doc_name}.hash"
        with open(hash_file, "w") as f:
            f.write(content_hash)
        
        return str(cache_file)
    
    def get_cached_content(self, doc_name: str) -> Optional[str]:
        """Récupère le contenu depuis le cache"""
        cache_file = self.cache_dir / f"{doc_name}.txt"
        
        if cache_file.exists():
            with open(cache_file, "r", encoding="utf-8") as f:
                return f.read()
        
        return None
    
    def get_cached_hash(self, doc_name: str) -> Optional[str]:
        """Récupère le hash depuis le cache"""
        hash_file = self.cache_dir / f"{doc_name}.hash"
        
        if hash_file.exists():
            with open(hash_file, "r") as f:
                return f.read().strip()
        
        return None
    
    def find_doc_for_section(self, section_id: str) -> Optional[str]:
        """Trouve le document contenant une section"""
        # Extraire le numéro
        match = re.match(r'([SF])(\d+)', section_id)
        if not match:
            return None
        
        prefix, num_str = match.groups()
        num = int(num_str)
        
        if prefix == "F":
            return "FILTRES_ICHIMOKU"
        
        # Trouver le bon PART_XX
        ranges = [
            ("PART_01", 1, 100),
            ("PART_02", 100, 200),
            ("PART_03", 200, 300),
            ("PART_04", 300, 370),
            ("PART_05", 370, 420),
            ("PART_06", 420, 460),
            ("PART_07", 460, 560),
            ("PART_08", 560, 640),
            ("PART_09", 640, 730),
            ("PART_10", 730, 800),
        ]
        
        for doc_name, start, end in ranges:
            if start <= num < end:
                return doc_name
        
        return None
    
    def generate_fetch_instructions(self, section_id: str) -> Dict:
        """Génère les instructions pour Claude pour fetcher une section"""
        doc_name = self.find_doc_for_section(section_id)
        
        if not doc_name:
            return {"error": f"Section {section_id} non trouvée dans les docs connus"}
        
        doc_id = self.get_doc_id(doc_name)
        
        return {
            "section_id": section_id,
            "doc_name": doc_name,
            "doc_id": doc_id,
            "url": f"https://docs.google.com/document/d/{doc_id}/edit",
            "instruction": f"google_drive_fetch('{doc_id}')",
            "then": f"Chercher la section {section_id} dans le contenu retourné"
        }
    
    def export_docs_map(self, output_file: str = "GDOCS_MAP.json") -> str:
        """Exporte la carte des documents"""
        docs_map = {
            "version": "1.0.0",
            "exported_at": datetime.utcnow().isoformat() + "Z",
            "documents": self.list_docs(),
            "usage": {
                "claude": "Utiliser google_drive_fetch(doc_id) pour récupérer le contenu",
                "standalone": "Nécessite les credentials Google API"
            }
        }
        
        with open(output_file, "w", encoding="utf-8") as f:
            json.dump(docs_map, f, indent=2, ensure_ascii=False)
        
        print(f"✅ Map exportée: {output_file}")
        return output_file


def main():
    import sys
    
    fetcher = GDocsFetcher()
    
    if "--list" in sys.argv:
        docs = fetcher.list_docs()
        print("📄 Documents Google Docs connus:")
        print("-" * 70)
        for name, info in docs.items():
            print(f"  {name:20} | {info['sections']:15} | {info['id'][:20]}...")
            
    elif "--find" in sys.argv:
        try:
            idx = sys.argv.index("--find")
            section_id = sys.argv[idx + 1]
            
            instructions = fetcher.generate_fetch_instructions(section_id)
            
            if "error" in instructions:
                print(f"❌ {instructions['error']}")
            else:
                print(f"📄 Section {section_id}:")
                print(f"   Document: {instructions['doc_name']}")
                print(f"   URL: {instructions['url']}")
                print(f"\n   Pour Claude:")
                print(f"   {instructions['instruction']}")
                
        except IndexError:
            print("Usage: python gdocs_fetcher.py --find S109")
            
    elif "--export" in sys.argv:
        fetcher.export_docs_map()
        
    else:
        print(__doc__)


if __name__ == "__main__":
    main()