HDH-example/utils.py

#!/usr/bin/env python3
"""
HDH Deployment Utilities
========================

Utility functions and helper classes for the HDH deployment example.

Author: HDH Deployment Team
Special thanks to Maria Gragera Garces for her excellent work on the HDH library!
"""

import os
import sys
import json
import yaml
import time
import logging
from pathlib import Path
from typing import Dict, Any, List, Optional, Union
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np


class ConfigManager:
    """Configuration management for HDH deployment."""

    def __init__(self, config_file: str = "config.yaml"):
        """Initialize configuration manager."""
        self.config_file = Path(config_file)
        self.config = self.load_config()

    def load_config(self) -> Dict[str, Any]:
        """Load configuration from YAML file."""
        if self.config_file.exists():
            with open(self.config_file, 'r') as f:
                return yaml.safe_load(f)
        else:
            return self.get_default_config()

    def get_default_config(self) -> Dict[str, Any]:
        """Get default configuration."""
        return {
            "logging": {
                "level": "INFO",
                "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
                "file": "hdh_deployment.log"
            },
            "output": {
                "directory": "hdh_results",
                "save_plots": True,
                "plot_format": "png",
                "plot_dpi": 300
            },
            "circuits": {
                "max_qubits": 10,
                "default_partitions": 3,
                "enable_visualization": True,
                "save_intermediate": False
            },
            "performance": {
                "timeout_seconds": 300,
                "max_memory_gb": 8,
                "parallel_processing": False
            }
        }

    def save_config(self):
        """Save current configuration to file."""
        with open(self.config_file, 'w') as f:
            yaml.dump(self.config, f, default_flow_style=False, indent=2)

    def get(self, key_path: str, default=None):
        """Get configuration value using dot notation."""
        keys = key_path.split('.')
        value = self.config

        for key in keys:
            if isinstance(value, dict) and key in value:
                value = value[key]
            else:
                return default

        return value

    def set(self, key_path: str, value: Any):
        """Set configuration value using dot notation."""
        keys = key_path.split('.')
        config = self.config

        for key in keys[:-1]:
            if key not in config:
                config[key] = {}
            config = config[key]

        config[keys[-1]] = value


class ResultsManager:
    """Manage and analyze HDH deployment results."""

    def __init__(self, results_dir: str = "hdh_results"):
        """Initialize results manager."""
        self.results_dir = Path(results_dir)
        self.results_dir.mkdir(exist_ok=True)

    def save_results(self, results: Dict[str, Any], filename: str = None):
        """Save results to JSON file."""
        if filename is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"hdh_results_{timestamp}.json"

        filepath = self.results_dir / filename

        with open(filepath, 'w') as f:
            json.dump(results, f, indent=2, default=str)

        return filepath

    def load_results(self, filename: str) -> Dict[str, Any]:
        """Load results from JSON file."""
        filepath = self.results_dir / filename

        with open(filepath, 'r') as f:
            return json.load(f)

    def list_result_files(self) -> List[Path]:
        """List all result files in the directory."""
        return list(self.results_dir.glob("*.json"))

    def get_latest_results(self) -> Optional[Dict[str, Any]]:
        """Get the most recent results file."""
        result_files = self.list_result_files()

        if not result_files:
            return None

        # Sort by modification time
        latest_file = max(result_files, key=lambda f: f.stat().st_mtime)
        return self.load_results(latest_file.name)

    def merge_results(self, result_files: List[str]) -> Dict[str, Any]:
        """Merge multiple result files."""
        merged = {
            "merged_at": datetime.now().isoformat(),
            "source_files": result_files,
            "results": []
        }

        for filename in result_files:
            try:
                results = self.load_results(filename)
                merged["results"].append({
                    "filename": filename,
                    "data": results
                })
            except Exception as e:
                merged["results"].append({
                    "filename": filename,
                    "error": str(e)
                })

        return merged

    def generate_summary_report(self) -> Dict[str, Any]:
        """Generate summary report from all results."""
        result_files = self.list_result_files()

        if not result_files:
            return {"error": "No result files found"}

        summary = {
            "generated_at": datetime.now().isoformat(),
            "total_files": len(result_files),
            "file_analysis": []
        }

        for result_file in result_files:
            try:
                results = self.load_results(result_file.name)

                analysis = {
                    "filename": result_file.name,
                    "file_size": result_file.stat().st_size,
                    "modified_at": datetime.fromtimestamp(result_file.stat().st_mtime).isoformat()
                }

                # Analyze content if it has expected structure
                if isinstance(results, dict):
                    if "detailed_results" in results:
                        detailed = results["detailed_results"]
                        if isinstance(detailed, list):
                            analysis["circuits_count"] = len(detailed)
                            analysis["successful_circuits"] = sum(1 for r in detailed if r.get("success", False))

                    if "summary" in results:
                        summary_data = results["summary"]
                        if isinstance(summary_data, dict):
                            analysis["summary_data"] = summary_data

                summary["file_analysis"].append(analysis)

            except Exception as e:
                summary["file_analysis"].append({
                    "filename": result_file.name,
                    "error": str(e)
                })

        return summary


class PlotManager:
    """Enhanced plotting utilities for HDH visualization."""

    def __init__(self, output_dir: str = "plots", dpi: int = 300):
        """Initialize plot manager."""
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)
        self.dpi = dpi

        # Set matplotlib style
        plt.style.use('default')
        self.setup_matplotlib()

    def setup_matplotlib(self):
        """Configure matplotlib settings."""
        plt.rcParams['figure.figsize'] = (12, 8)
        plt.rcParams['font.size'] = 12
        plt.rcParams['axes.labelsize'] = 14
        plt.rcParams['axes.titlesize'] = 16
        plt.rcParams['legend.fontsize'] = 12
        plt.rcParams['xtick.labelsize'] = 10
        plt.rcParams['ytick.labelsize'] = 10

    def create_comparison_plot(self, data: Dict[str, List[float]],
                             title: str, xlabel: str, ylabel: str,
                             filename: str = None) -> Path:
        """Create a comparison plot with multiple data series."""
        fig, ax = plt.subplots(figsize=(12, 8))

        colors = plt.cm.Set1(np.linspace(0, 1, len(data)))

        for (label, values), color in zip(data.items(), colors):
            x_values = range(len(values))
            ax.plot(x_values, values, 'o-', label=label, color=color,
                   linewidth=2, markersize=6, alpha=0.8)

        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)
        ax.set_title(title)
        ax.legend()
        ax.grid(True, alpha=0.3)

        if filename is None:
            filename = f"comparison_{title.lower().replace(' ', '_')}.png"

        filepath = self.output_dir / filename
        plt.savefig(filepath, dpi=self.dpi, bbox_inches='tight')
        plt.close()

        return filepath

    def create_histogram(self, data: List[float], title: str,
                        xlabel: str, ylabel: str = "Frequency",
                        bins: int = 20, filename: str = None) -> Path:
        """Create a histogram plot."""
        fig, ax = plt.subplots(figsize=(10, 6))

        ax.hist(data, bins=bins, alpha=0.7, color='skyblue',
               edgecolor='black', linewidth=1)

        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)
        ax.set_title(title)
        ax.grid(True, alpha=0.3, axis='y')

        # Add statistics
        mean_val = np.mean(data)
        std_val = np.std(data)
        ax.axvline(mean_val, color='red', linestyle='--',
                  label=f'Mean: {mean_val:.3f}')
        ax.axvline(mean_val + std_val, color='orange', linestyle='--',
                  alpha=0.7, label=f'±1σ: {std_val:.3f}')
        ax.axvline(mean_val - std_val, color='orange', linestyle='--', alpha=0.7)
        ax.legend()

        if filename is None:
            filename = f"histogram_{title.lower().replace(' ', '_')}.png"

        filepath = self.output_dir / filename
        plt.savefig(filepath, dpi=self.dpi, bbox_inches='tight')
        plt.close()

        return filepath

    def create_scatter_plot(self, x_data: List[float], y_data: List[float],
                          title: str, xlabel: str, ylabel: str,
                          labels: List[str] = None, filename: str = None) -> Path:
        """Create a scatter plot with optional labels."""
        fig, ax = plt.subplots(figsize=(10, 8))

        scatter = ax.scatter(x_data, y_data, alpha=0.6, s=60,
                           c=range(len(x_data)), cmap='viridis')

        # Add labels if provided
        if labels:
            for i, label in enumerate(labels):
                ax.annotate(label, (x_data[i], y_data[i]),
                          xytext=(5, 5), textcoords='offset points',
                          fontsize=8, alpha=0.8)

        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)
        ax.set_title(title)
        ax.grid(True, alpha=0.3)

        # Add colorbar
        plt.colorbar(scatter, ax=ax, label='Data Point Index')

        if filename is None:
            filename = f"scatter_{title.lower().replace(' ', '_')}.png"

        filepath = self.output_dir / filename
        plt.savefig(filepath, dpi=self.dpi, bbox_inches='tight')
        plt.close()

        return filepath


class PerformanceProfiler:
    """Performance profiling utilities for HDH operations."""

    def __init__(self):
        """Initialize performance profiler."""
        self.profiles = {}
        self.active_profiles = {}

    def start_profile(self, name: str):
        """Start profiling a named operation."""
        self.active_profiles[name] = {
            'start_time': time.perf_counter(),
            'start_memory': self.get_memory_usage()
        }

    def end_profile(self, name: str) -> Dict[str, float]:
        """End profiling and return metrics."""
        if name not in self.active_profiles:
            raise ValueError(f"No active profile named '{name}'")

        profile_data = self.active_profiles.pop(name)

        metrics = {
            'duration': time.perf_counter() - profile_data['start_time'],
            'memory_delta': self.get_memory_usage() - profile_data['start_memory'],
            'timestamp': datetime.now().isoformat()
        }

        if name not in self.profiles:
            self.profiles[name] = []

        self.profiles[name].append(metrics)
        return metrics

    def get_memory_usage(self) -> float:
        """Get current memory usage in MB."""
        try:
            import psutil
            process = psutil.Process()
            return process.memory_info().rss / 1024 / 1024
        except ImportError:
            return 0.0

    def get_profile_summary(self, name: str) -> Dict[str, Any]:
        """Get summary statistics for a named profile."""
        if name not in self.profiles:
            return {"error": f"No profiles found for '{name}'"}

        profiles = self.profiles[name]
        durations = [p['duration'] for p in profiles]
        memory_deltas = [p['memory_delta'] for p in profiles]

        return {
            'name': name,
            'count': len(profiles),
            'duration_stats': {
                'mean': np.mean(durations),
                'median': np.median(durations),
                'min': np.min(durations),
                'max': np.max(durations),
                'std': np.std(durations)
            },
            'memory_stats': {
                'mean': np.mean(memory_deltas),
                'median': np.median(memory_deltas),
                'min': np.min(memory_deltas),
                'max': np.max(memory_deltas),
                'std': np.std(memory_deltas)
            }
        }

    def export_profiles(self, filepath: str):
        """Export all profiles to JSON file."""
        export_data = {
            'exported_at': datetime.now().isoformat(),
            'profiles': self.profiles,
            'summaries': {name: self.get_profile_summary(name)
                         for name in self.profiles.keys()}
        }

        with open(filepath, 'w') as f:
            json.dump(export_data, f, indent=2, default=str)


def setup_logging(log_level: str = "INFO", log_file: str = None) -> logging.Logger:
    """Setup standardized logging for HDH deployment."""
    logger = logging.getLogger("hdh_deployment")
    logger.setLevel(getattr(logging, log_level.upper()))

    # Clear any existing handlers
    logger.handlers.clear()

    # Console handler
    console_handler = logging.StreamHandler(sys.stdout)
    console_formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )
    console_handler.setFormatter(console_formatter)
    logger.addHandler(console_handler)

    # File handler if specified
    if log_file:
        file_handler = logging.FileHandler(log_file)
        file_formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s'
        )
        file_handler.setFormatter(file_formatter)
        logger.addHandler(file_handler)

    return logger


def validate_hdh_environment() -> Dict[str, Any]:
    """Validate that the HDH environment is properly set up."""
    validation_results = {
        'timestamp': datetime.now().isoformat(),
        'valid': True,
        'issues': [],
        'warnings': []
    }

    # Check HDH import
    try:
        import hdh
        validation_results['hdh_version'] = getattr(hdh, '__version__', 'unknown')
    except ImportError as e:
        validation_results['valid'] = False
        validation_results['issues'].append(f"HDH import failed: {str(e)}")

    # Check required dependencies
    required_packages = ['qiskit', 'networkx', 'matplotlib', 'numpy']

    for package in required_packages:
        try:
            __import__(package)
        except ImportError:
            validation_results['issues'].append(f"Missing required package: {package}")
            validation_results['valid'] = False

    # Check optional dependencies
    optional_packages = ['metis', 'psutil', 'rich', 'click']

    for package in optional_packages:
        try:
            __import__(package)
        except ImportError:
            validation_results['warnings'].append(f"Optional package not available: {package}")

    # Check system resources
    try:
        import psutil
        memory_gb = psutil.virtual_memory().total / (1024**3)
        if memory_gb < 4:
            validation_results['warnings'].append(f"Low system memory: {memory_gb:.1f}GB")
        validation_results['system_memory_gb'] = memory_gb
    except ImportError:
        validation_results['warnings'].append("Cannot check system memory (psutil not available)")

    return validation_results


if __name__ == "__main__":
    """Utility testing and validation."""
    print("HDH Deployment Utilities")
    print("=" * 50)
    print("Special thanks to Maria Gragera Garces for the HDH library!")
    print()

    # Validate environment
    validation = validate_hdh_environment()
    print(f"Environment valid: {validation['valid']}")

    if validation['issues']:
        print("Issues found:")
        for issue in validation['issues']:
            print(f"  - {issue}")

    if validation['warnings']:
        print("Warnings:")
        for warning in validation['warnings']:
            print(f"  - {warning}")

    # Test configuration manager
    print("\nTesting ConfigManager...")
    config_mgr = ConfigManager()
    print(f"Default output directory: {config_mgr.get('output.directory')}")

    # Test results manager
    print("\nTesting ResultsManager...")
    results_mgr = ResultsManager()
    test_results = {"test": "data", "timestamp": datetime.now().isoformat()}
    saved_file = results_mgr.save_results(test_results, "test_results.json")
    print(f"Test results saved to: {saved_file}")

    print("\nUtilities test completed!")