#!/usr/bin/env python3
"""
═══════════════════════════════════════════════════════════════
📊 EXCEL PARSER - ICHIGRIDEA PIPELINE
═══════════════════════════════════════════════════════════════
Parse EA_Master_UNIFIED.xlsx et extrait les sections/modules.

Usage:
    python excel_parser.py --parse
    python excel_parser.py --sections
    python excel_parser.py --export-json
    python excel_parser.py --validate
"""

import json
import os
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional

try:
    import openpyxl
    from openpyxl import load_workbook
    HAS_OPENPYXL = True
except ImportError:
    HAS_OPENPYXL = False
    print("⚠️ openpyxl non installé. Run: pip install openpyxl")


class ExcelParser:
    """Parse le fichier EA_Master_UNIFIED.xlsx"""
    
    # Colonnes attendues (ajuster selon la structure réelle)
    EXPECTED_COLUMNS = {
        "Section_ID": ["Section", "ID", "Section_ID", "Code"],
        "Module_Name": ["Module", "Nom", "Name", "Module_Name"],
        "Description": ["Description", "Desc", "Details"],
        "Status": ["Status", "Statut", "État", "State"],
        "Priority": ["Priority", "Priorité", "Prio"],
        "Dependencies": ["Dependencies", "Dépendances", "Deps", "Requires"],
        "Google_Doc_ID": ["Google_Doc", "Doc_ID", "Spec_URL"],
        "Spec_SHA256": ["Spec_Hash", "Spec_SHA256", "Hash_Spec"],
        "Code_SHA256": ["Code_Hash", "Code_SHA256", "Hash_Code"],
        "Generated_Date": ["Generated", "Gen_Date", "Created"],
        "Session_ID": ["Session", "Session_ID"]
    }
    
    def __init__(self, excel_path: str = "EA_Master_UNIFIED.xlsx"):
        self.excel_path = Path(excel_path)
        self.workbook = None
        self.sections = []
        self.column_map = {}
        
    def load(self) -> bool:
        """Charge le fichier Excel"""
        if not HAS_OPENPYXL:
            print("❌ openpyxl requis. pip install openpyxl")
            return False
            
        if not self.excel_path.exists():
            print(f"❌ Fichier non trouvé: {self.excel_path}")
            return False
        
        try:
            self.workbook = load_workbook(self.excel_path, read_only=True, data_only=True)
            print(f"✅ Chargé: {self.excel_path}")
            print(f"   Feuilles: {self.workbook.sheetnames}")
            return True
        except Exception as e:
            print(f"❌ Erreur chargement: {e}")
            return False
    
    def _find_column(self, header_row: tuple, target_names: List[str]) -> Optional[int]:
        """Trouve l'index d'une colonne par ses noms possibles"""
        for idx, cell in enumerate(header_row):
            if cell and str(cell).strip() in target_names:
                return idx
        return None
    
    def _map_columns(self, sheet) -> Dict[str, int]:
        """Mappe les colonnes du sheet"""
        column_map = {}
        
        # Lire la première ligne (headers)
        header_row = next(sheet.iter_rows(min_row=1, max_row=1, values_only=True))
        
        for field, possible_names in self.EXPECTED_COLUMNS.items():
            idx = self._find_column(header_row, possible_names)
            if idx is not None:
                column_map[field] = idx
        
        return column_map
    
    def parse_sections(self, sheet_name: str = None) -> List[Dict]:
        """Parse les sections d'une feuille"""
        if not self.workbook:
            self.load()
        
        if not self.workbook:
            return []
        
        # Utiliser la première feuille par défaut
        if sheet_name is None:
            sheet_name = self.workbook.sheetnames[0]
        
        if sheet_name not in self.workbook.sheetnames:
            print(f"❌ Feuille non trouvée: {sheet_name}")
            return []
        
        sheet = self.workbook[sheet_name]
        self.column_map = self._map_columns(sheet)
        
        print(f"📊 Parsing feuille: {sheet_name}")
        print(f"   Colonnes trouvées: {list(self.column_map.keys())}")
        
        sections = []
        
        for row_idx, row in enumerate(sheet.iter_rows(min_row=2, values_only=True), start=2):
            if not row or not row[0]:  # Skip empty rows
                continue
            
            section = {
                "row": row_idx,
                "raw": list(row)
            }
            
            # Extraire les champs mappés
            for field, col_idx in self.column_map.items():
                if col_idx < len(row):
                    value = row[col_idx]
                    section[field] = str(value).strip() if value else ""
            
            # Valider que c'est une section valide
            section_id = section.get("Section_ID", "")
            if section_id and (section_id.startswith("S") or section_id.startswith("F")):
                sections.append(section)
        
        self.sections = sections
        print(f"   Sections trouvées: {len(sections)}")
        
        return sections
    
    def parse_all_sheets(self) -> List[Dict]:
        """Parse toutes les feuilles"""
        all_sections = []
        
        for sheet_name in self.workbook.sheetnames:
            sections = self.parse_sections(sheet_name)
            for s in sections:
                s["sheet"] = sheet_name
            all_sections.extend(sections)
        
        self.sections = all_sections
        return all_sections
    
    def get_by_status(self, status: str) -> List[Dict]:
        """Filtre les sections par statut"""
        return [s for s in self.sections if s.get("Status", "").lower() == status.lower()]
    
    def get_pending(self) -> List[Dict]:
        """Retourne les sections à générer"""
        pending_statuses = ["pending", "todo", "à faire", ""]
        return [s for s in self.sections if s.get("Status", "").lower() in pending_statuses]
    
    def get_completed(self) -> List[Dict]:
        """Retourne les sections complétées"""
        completed_statuses = ["done", "complete", "completed", "ok", "✅"]
        return [s for s in self.sections if s.get("Status", "").lower() in completed_statuses]
    
    def validate(self) -> Dict:
        """Valide la structure du fichier Excel"""
        issues = {
            "errors": [],
            "warnings": [],
            "info": []
        }
        
        if not self.workbook:
            issues["errors"].append("Fichier non chargé")
            return issues
        
        # Vérifier les colonnes requises
        required = ["Section_ID", "Module_Name"]
        for col in required:
            if col not in self.column_map:
                issues["errors"].append(f"Colonne requise manquante: {col}")
        
        # Vérifier les sections
        if not self.sections:
            self.parse_all_sheets()
        
        # Vérifier les doublons
        section_ids = [s.get("Section_ID") for s in self.sections]
        duplicates = set([x for x in section_ids if section_ids.count(x) > 1])
        if duplicates:
            issues["warnings"].append(f"Sections en doublon: {duplicates}")
        
        # Stats
        issues["info"].append(f"Total sections: {len(self.sections)}")
        issues["info"].append(f"Pending: {len(self.get_pending())}")
        issues["info"].append(f"Completed: {len(self.get_completed())}")
        
        return issues
    
    def export_json(self, output_file: str = "SECTIONS_FROM_EXCEL.json") -> str:
        """Exporte les sections en JSON"""
        if not self.sections:
            self.parse_all_sheets()
        
        export = {
            "source": str(self.excel_path),
            "exported_at": datetime.utcnow().isoformat() + "Z",
            "total_sections": len(self.sections),
            "sections": self.sections
        }
        
        with open(output_file, "w", encoding="utf-8") as f:
            json.dump(export, f, indent=2, ensure_ascii=False, default=str)
        
        print(f"✅ Exporté: {output_file}")
        return output_file
    
    def generate_registry(self, output_file: str = "MODULES_REGISTRY.json") -> str:
        """Génère/met à jour MODULES_REGISTRY.json"""
        if not self.sections:
            self.parse_all_sheets()
        
        registry = {
            "version": "1.0.0",
            "source": str(self.excel_path),
            "generated_at": datetime.utcnow().isoformat() + "Z",
            "total": len(self.sections),
            "modules": {}
        }
        
        for section in self.sections:
            section_id = section.get("Section_ID", "")
            module_name = section.get("Module_Name", "")
            
            if section_id and module_name:
                registry["modules"][module_name] = {
                    "section": section_id,
                    "description": section.get("Description", ""),
                    "status": section.get("Status", "pending"),
                    "priority": section.get("Priority", "normal"),
                    "dependencies": section.get("Dependencies", "").split(",") if section.get("Dependencies") else [],
                    "google_doc_id": section.get("Google_Doc_ID", ""),
                    "spec_hash": section.get("Spec_SHA256", ""),
                    "code_hash": section.get("Code_SHA256", "")
                }
        
        with open(output_file, "w", encoding="utf-8") as f:
            json.dump(registry, f, indent=2, ensure_ascii=False)
        
        print(f"✅ Registry généré: {output_file}")
        return output_file


def main():
    import sys
    
    parser = ExcelParser()
    
    if "--parse" in sys.argv:
        if parser.load():
            sections = parser.parse_all_sheets()
            print(f"\n📊 {len(sections)} sections trouvées")
            
    elif "--sections" in sys.argv:
        if parser.load():
            sections = parser.parse_all_sheets()
            for s in sections[:20]:  # Afficher les 20 premières
                print(f"  {s.get('Section_ID', '?'):10} | {s.get('Module_Name', '?'):30} | {s.get('Status', '?')}")
            if len(sections) > 20:
                print(f"  ... et {len(sections) - 20} autres")
                
    elif "--export-json" in sys.argv:
        if parser.load():
            parser.parse_all_sheets()
            parser.export_json()
            
    elif "--validate" in sys.argv:
        if parser.load():
            parser.parse_all_sheets()
            issues = parser.validate()
            
            if issues["errors"]:
                print("\n❌ ERREURS:")
                for e in issues["errors"]:
                    print(f"   - {e}")
            
            if issues["warnings"]:
                print("\n⚠️ WARNINGS:")
                for w in issues["warnings"]:
                    print(f"   - {w}")
            
            print("\n📊 INFO:")
            for i in issues["info"]:
                print(f"   - {i}")
                
    elif "--generate-registry" in sys.argv:
        if parser.load():
            parser.parse_all_sheets()
            parser.generate_registry()
            
    else:
        print(__doc__)


if __name__ == "__main__":
    main()
