<?php
// PERFECT HYBRID CONVERTER - Quote Stripping Version
// Removes quotes from questions and variations during conversion
// This ensures clean matching in the JavaScript chatbot

header('Content-Type: text/html; charset=utf-8');

// Add this new function at the top after the header
function cleanQuotes($text) {
    // Remove all types of quotes (straight and curly)
    $text = str_replace(['"', '"', '"', "'", "'", "'"], '', $text);
    // Also remove escaped quotes if they somehow exist
    $text = str_replace(['\"', "\'"], '', $text);
    // Clean up any double spaces that might result
    $text = preg_replace('/\s+/', ' ', $text);
    return trim($text);
}

// Enhanced HTML output with modern styling
echo '<!DOCTYPE html>
<html>
<head>
    <title>Perfect Hybrid TXT to JSON Converter - Quote Cleaned</title>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <style>
        * { margin: 0; padding: 0; box-sizing: border-box; }
        body { 
            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; 
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            min-height: 100vh; 
            padding: 20px;
        }
        .container { 
            max-width: 1000px; 
            margin: 0 auto;
            background: white; 
            border-radius: 12px; 
            box-shadow: 0 10px 30px rgba(0,0,0,0.2);
            overflow: hidden;
        }
        .header {
            background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
            color: white;
            padding: 30px;
            text-align: center;
        }
        .header h1 { font-size: 2.2em; margin-bottom: 10px; }
        .header p { opacity: 0.9; font-size: 1.1em; }
        .content { padding: 30px; }
        .status { 
            padding: 15px 20px; 
            margin: 15px 0; 
            border-radius: 8px; 
            border-left: 5px solid;
            font-size: 1.05em;
        }
        .success { 
            background: #d4edda; 
            color: #155724; 
            border-left-color: #28a745; 
        }
        .error { 
            background: #f8d7da; 
            color: #721c24; 
            border-left-color: #dc3545; 
        }
        .info { 
            background: #d1ecf1; 
            color: #0c5460; 
            border-left-color: #17a2b8; 
        }
        .warning { 
            background: #fff3cd; 
            color: #856404; 
            border-left-color: #ffc107; 
        }
        .processing {
            background: #e2e3e5;
            color: #383d41;
            border-left-color: #6c757d;
        }
        .stats { 
            display: grid; 
            grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); 
            gap: 20px; 
            margin: 25px 0; 
        }
        .stat-box { 
            background: #f8f9fa; 
            padding: 20px; 
            border-radius: 8px; 
            border-left: 4px solid #007bff;
            transition: transform 0.2s;
        }
        .stat-box:hover { transform: translateY(-2px); }
        .stat-box h3 { 
            color: #495057; 
            margin-bottom: 15px; 
            font-size: 1.2em;
        }
        .stat-item { 
            display: flex; 
            justify-content: space-between; 
            margin: 8px 0; 
            padding: 5px 0;
            border-bottom: 1px solid #dee2e6;
        }
        .stat-item:last-child { border-bottom: none; }
        .stat-value { 
            font-weight: bold; 
            color: #007bff; 
        }
        .emoji { font-size: 1.3em; margin-right: 10px; }
        .sample-box {
            background: #f8f9fa;
            border: 2px solid #e9ecef;
            border-radius: 8px;
            padding: 20px;
            margin: 20px 0;
        }
        .sample-box h3 { 
            color: #495057; 
            margin-bottom: 10px; 
            font-size: 1.1em;
        }
        .sample-content { 
            background: white; 
            padding: 12px; 
            border-radius: 4px; 
            border-left: 3px solid #28a745;
            margin: 10px 0;
        }
        .feature-highlight {
            background: linear-gradient(135deg, #84fab0 0%, #8fd3f4 100%);
            color: #333;
            padding: 20px;
            border-radius: 8px;
            margin: 20px 0;
        }
        .version-badge {
            background: #28a745;
            color: white;
            padding: 4px 12px;
            border-radius: 20px;
            font-size: 0.85em;
            font-weight: bold;
        }
    </style>
</head>
<body>
<div class="container">
    <div class="header">
        <h1><span class="emoji">🚀</span>Perfect Hybrid Converter</h1>
        <p>Quote-Cleaned Version - Auto-Detects Quoted Phrases!</p>
        <span class="version-badge">v2.2 QUOTE-AUTO</span>
    </div>
    
    <div class="content">';

$txtFile  = __DIR__ . "/data.txt";
$jsonFile = __DIR__ . "/data.json";

// Enhanced file checking
if (!file_exists($txtFile)) {
    echo '<div class="status error">
        <span class="emoji">❌</span>
        <strong>Input File Missing:</strong> ' . htmlspecialchars($txtFile) . '
    </div>';
    echo '</div></div></body></html>';
    exit;
}

$fileSize = filesize($txtFile);
$fileSizeFormatted = $fileSize > 1024*1024 ? round($fileSize/(1024*1024), 2).'MB' : round($fileSize/1024, 1).'KB';

echo '<div class="status info">
    <span class="emoji">📄</span>
    <strong>Input File Detected:</strong> ' . basename($txtFile) . ' (' . $fileSizeFormatted . ')
</div>';

// Feature highlights
echo '<div class="feature-highlight">
    <h3><span class="emoji">✨</span>Enhanced Features:</h3>
    <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 10px; margin-top: 10px;">
        <div>🎯 <strong>10 Keywords</strong> per entry</div>
        <div>🔄 <strong>12 Variations</strong> per entry</div>
        <div>✂️ <strong>Automatic Quote Removal</strong></div>
        <div>💡 <strong>Auto-Detect Quoted Phrases</strong></div>
        <div>🌐 <strong>Browser Compatible</strong></div>
        <div>📊 <strong>Enhanced Analytics</strong></div>
    </div>
</div>';

// Read and process file
$content = file_get_contents($txtFile);
if ($content === false) {
    echo '<div class="status error">
        <span class="emoji">❌</span>
        <strong>Read Error:</strong> Cannot read the input file
    </div>';
    echo '</div></div></body></html>';
    exit;
}

// Process content
$content = preg_replace('/^\x{FEFF}/u', '', $content);
$content = str_replace(["\r\n", "\r"], "\n", $content);
$lines = preg_split('/\n/', $content);

echo '<div class="status processing">
    <span class="emoji">⚙️</span>
    <strong>Processing:</strong> ' . count($lines) . ' lines detected (quotes will be cleaned, quoted phrases will be extracted)
</div>';

function is_heading($line) {
    $line = trim($line);
    if ($line === '') return false;
    if (preg_match('/[:?!]\s*$/', $line)) return true;
    if (preg_match('/^\d+[\.\)]\s+/', $line)) return true;
    
    $letters_only = preg_replace('/[^A-Za-z]/', '', $line);
    if ($letters_only === '') return false;

    $upper_count = preg_match_all('/[A-Z]/', $letters_only, $matches);
    if ($upper_count === false) $upper_count = 0;
    
    $ratio = strlen($letters_only) > 0 ? $upper_count / strlen($letters_only) : 0;
    return $ratio >= 0.6 || (strlen($line) < 100 && $ratio >= 0.4);
}

function generateKeywords($question, $answer) {
    $text = strtolower($question . ' ' . $answer);
    $keywords = [];
    
    $kasTerms = [
        'kas' => ['krushna ashtakavarga system', 'ashtakavarga', 'krushna', 'k.a.s', 'kas system'],
        'upachaya' => ['growth houses', 'improvement houses', 'progressive houses', 'upachaya bhava', 'upachaya houses'],
        'bindhu' => ['beneficial points', 'positive points', 'benefic points', 'good points', 'favourable points'],
        'rekha' => ['malefic points', 'negative points', 'harmful points', 'bad points', 'unfavourable points'],
        'antradasa' => ['antardasha', 'sub period', 'sub-period', 'minor period', 'antradasha period'],
        'mahadasa' => ['mahadasha', 'main period', 'major period', 'primary period', 'mahadasha period'],
        'planet' => ['graha', 'planetary', 'celestial body', 'planets'],
        'house' => ['bhava', 'bhavas', 'houses', 'bhav'],
        'saturn' => ['shani', 'sani', 'saturn planet'],
        'jupiter' => ['guru', 'brihaspati', 'jupitor', 'jupiter planet'],
        'mars' => ['mangal', 'kuja', 'angaraka', 'mars planet'],
        'venus' => ['shukra', 'sukra', 'venus planet'],
        'mercury' => ['budh', 'budha', 'mercury planet'],
        'sun' => ['surya', 'ravi', 'solar', 'sun planet'],
        'moon' => ['chandra', 'soma', 'lunar', 'moon planet'],
        'rahu' => ['north node', 'ascending node', 'dragon head'],
        'ketu' => ['south node', 'descending node', 'dragon tail'],
        'ascendant' => ['lagna', 'rising sign', '1st house', 'ascendant sign'],
        'dasha' => ['planetary period', 'time period', 'dasa'],
        'transit' => ['gochara', 'planetary movement', 'transits']
    ];
	
    $lifeEventTerms = [
        'adoption' => ['adopted', 'adopting', 'adoptive', 'foster'],
        'divorce' => ['separation', 'split', 'breakup'],
        'pregnancy' => ['conception', 'childbirth', 'delivery'],
        'death' => ['mortality', 'demise', 'passing'],
        'accident' => ['injury', 'mishap', 'incident'],
        'surgery' => ['operation', 'medical procedure'],
        'promotion' => ['career advancement', 'job elevation'],
        'relocation' => ['moving', 'migration', 'shifting'],
        'relationship'=> ['love', 'romance', 'attraction'],
        'bankruptcy' => ['financial loss', 'insolvency', 'debt crisis'],
        'inheritance' => ['legacy', 'ancestral property', 'will'],
        'lawsuit' => ['litigation', 'legal case', 'court matter'],
        'theft' => ['robbery', 'burglary', 'stolen'],
        'betrayal' => ['cheating', 'infidelity', 'affair']
    ];
    
    $themeKeywords = [
        'career' => ['job', 'employment', 'work', 'profession', 'business', 'occupation'],
        'relationships' => ['marriage', 'spouse', 'partner', 'love', 'relationship'],
        'health' => ['disease', 'illness', 'medical', 'wellness', 'sickness'],
        'finance' => ['money', 'wealth', 'income', 'financial', 'prosperity'],
        'education' => ['study', 'learning', 'knowledge', 'academic'],
        'family' => ['children', 'parents', 'siblings', 'relatives'],
        'travel' => ['journey', 'foreign', 'abroad', 'pilgrimage'],
        'property' => ['real estate', 'land', 'home', 'house property']
    ];
    
    foreach ($kasTerms as $mainTerm => $synonyms) {
        if (strpos($text, $mainTerm) !== false) {
            $keywords[] = $mainTerm;
        }
        foreach ($synonyms as $synonym) {
            if (strpos($text, $synonym) !== false && !in_array($synonym, $keywords)) {
                $keywords[] = $synonym;
            }
        }
    }
    
    foreach ($lifeEventTerms as $mainTerm => $synonyms) {
        if (strpos($text, $mainTerm) !== false) {
            $keywords[] = $mainTerm;
        }
        foreach ($synonyms as $synonym) {
            if (strpos($text, $synonym) !== false && !in_array($synonym, $keywords)) {
                $keywords[] = $synonym;
            }
        }
    }    
    
    foreach ($themeKeywords as $theme => $terms) {
        $themeFound = false;
        foreach ($terms as $term) {
            if (strpos($text, $term) !== false) {
                if (!$themeFound) {
                    $keywords[] = $theme;
                    $themeFound = true;
                }
                if (!in_array($term, $keywords)) {
                    $keywords[] = $term;
                }
            }
        }
    }
    
    $stopWords = ['the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'had', 'her', 'was', 'one', 'our', 'out', 'day', 'get', 'has', 'him', 'his', 'how', 'its', 'may', 'new', 'now', 'old', 'see', 'two', 'who', 'boy', 'did', 'man', 'end', 'too', 'any'];
    
    $words = preg_split('/[^\w]+/', $text, -1, PREG_SPLIT_NO_EMPTY);
    foreach ($words as $word) {
        $word = trim(strtolower($word));
        if (strlen($word) >= 3 && 
            !in_array($word, $stopWords) && 
            !is_numeric($word) && 
            !in_array($word, $keywords)) {
            $keywords[] = $word;
        }
    }
    
    if (preg_match_all('/(\d+)(?:st|nd|rd|th)?\s*(?:house|bhava|lord)/', $text, $matches)) {
        foreach ($matches[1] as $houseNum) {
            if ($houseNum >= 1 && $houseNum <= 12) {
                $ordinals = ['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th', '11th', '12th'];
                $keywords[] = $ordinals[$houseNum - 1] . ' house';
                $keywords[] = 'house ' . $houseNum;
            }
        }
    }
    
    $keywords = array_unique($keywords);
    
    $scoredKeywords = [];
    foreach ($keywords as $keyword) {
        $score = 0;
        if (array_key_exists($keyword, $kasTerms)) $score += 15;
        if (array_key_exists($keyword, $lifeEventTerms)) $score += 12;
        
        $astroTerms = ['house', 'planet', 'lord', 'dasha', 'yoga', 'dosha'];
        foreach ($astroTerms as $term) {
            if (strpos($keyword, $term) !== false) $score += 8;
        }
        
        if (array_key_exists($keyword, $themeKeywords)) $score += 5;
        if (strpos(strtolower($question), $keyword) !== false) $score += 4;
        $score += min(strlen($keyword) / 2, 6);
        
        $scoredKeywords[$keyword] = $score;
    }
    
    arsort($scoredKeywords);
    return array_slice(array_keys($scoredKeywords), 0, 10);
}

// MODIFIED: Extract quoted phrases and add them to variations
function generateVariations($question) {
    $variations = [];
    $originalQuestion = trim($question);
    
    // NEW: Extract all quoted phrases BEFORE cleaning quotes
    $quotedPhrases = [];
    // Match both straight quotes and curly quotes
    if (preg_match_all('/"([^"]+)"|"([^"]+)"|"([^"]+)"/u', $originalQuestion, $matches)) {
        foreach ($matches as $matchGroup) {
            foreach ($matchGroup as $match) {
                if (!empty($match) && !preg_match('/^[""]/', $match)) {
                    $quotedPhrases[] = strtolower(trim($match));
                }
            }
        }
    }
    
    // Remove duplicates from quoted phrases
    $quotedPhrases = array_unique($quotedPhrases);
    
    // NEW: Generate variations for each quoted phrase
    $addedCustomVariations = [];
    foreach ($quotedPhrases as $phrase) {
        $addedCustomVariations[] = $phrase;
        $addedCustomVariations[] = "what is " . $phrase;
        $addedCustomVariations[] = "causes of " . $phrase;
        $addedCustomVariations[] = "factors for " . $phrase;
        $addedCustomVariations[] = "reasons for " . $phrase;
        $addedCustomVariations[] = $phrase . " meaning";
    }
    
    // Now clean quotes for normal processing
    $originalQuestion = cleanQuotes($originalQuestion);
    $lowerQuestion = strtolower($originalQuestion);
    
    $cleanQuestion = rtrim($lowerQuestion, '?:!.');
    
    $coreContent = $cleanQuestion;
    $questionStarters = [
        'which are the' => 'the',
        'what are the' => 'the', 
        'which are' => '',
        'what are' => '',
        'what is the' => 'the',
        'which is the' => 'the',
        'what is' => '',
        'which is' => '',
        'how are the' => 'the',
        'how is the' => 'the',
        'how are' => '',
        'how is' => '',
        'tell me about' => '',
        'explain about' => '',
        'which factors' => '',
        'what factors' => ''
    ];
    
    foreach ($questionStarters as $starter => $replacement) {
        if (strpos($coreContent, $starter) === 0) {
            $coreContent = trim(str_replace($starter, $replacement, $coreContent));
            break;
        }
    }
    
    $coreContent = trim($coreContent);
    
    if (!empty($coreContent)) {
        $variations[] = $coreContent;
        
        if (!preg_match('/^(what|which|how|when|where|why)/', $coreContent)) {
            $variations[] = "what is " . $coreContent;
            $variations[] = "what are " . $coreContent;
            $variations[] = "which is " . $coreContent;
            $variations[] = "which are " . $coreContent;
            $variations[] = "how is " . $coreContent;
        }
        
        $variations[] = $coreContent . " meaning";
        $variations[] = $coreContent . " definition";
        $variations[] = $coreContent . " in kas";
        $variations[] = "explain " . $coreContent;
        $variations[] = "tell me about " . $coreContent;
    }
    
    $termVariations = [
        'upachaya house' => ['upachaya houses', 'growth houses'],
        'bindhu' => ['beneficial points', 'positive points'],
        'rekha' => ['malefic points', 'negative points'],
        'kas' => ['krushna ashtakavarga', 'ashtakavarga system'],
        'divisional chart' => ['divisional charts', 'varga chart']
    ];
    
    foreach ($termVariations as $original => $replacements) {
        if (strpos($lowerQuestion, $original) !== false) {
            foreach ($replacements as $replacement) {
                $newVariation = str_replace($original, $replacement, $lowerQuestion);
                if ($newVariation !== $lowerQuestion) {
                    $variations[] = rtrim($newVariation, '?:!.');
                }
            }
        }
    }
    
    // NEW: Add the custom variations we collected from quoted phrases
    $variations = array_merge($variations, $addedCustomVariations);
    
    $variations = array_map('trim', $variations);
    $variations = array_filter($variations, function($v) use ($lowerQuestion) {
        if (empty($v) || strlen($v) <= 2 || $v === $lowerQuestion) return false;
        if (preg_match('/^(the|and|of|in|to|for)\s/', $v)) return false;
        return true;
    });
    
    $variations = array_unique($variations);
    
    $scoredVariations = [];
    foreach ($variations as $variation) {
        $score = 0;
        $length = strlen($variation);
        if ($length <= 20) $score += 4;
        elseif ($length <= 30) $score += 3;
        
        if (preg_match('/^(what is|what are|which is)/', $variation)) $score += 3;
        if ($variation === $coreContent) $score += 5;
        
        // NEW: Give higher score to custom keyword variations from quoted phrases
        foreach ($addedCustomVariations as $customVar) {
            if ($variation === $customVar) {
                $score += 10;
                break;
            }
        }
        
        $scoredVariations[$variation] = $score;
    }
    
    arsort($scoredVariations);
    return array_slice(array_keys($scoredVariations), 0, 12);
}

function determineCategory($question, $answer) {
    $text = strtolower($question . ' ' . $answer);
    
    $categories = [
        'fundamentals' => ['kas', 'krushna', 'ashtakavarga', 'system', 'basic'],
        'houses' => ['house', 'upachaya', 'bhava'],
        'planets' => ['sun', 'moon', 'mars', 'mercury', 'jupiter', 'venus', 'saturn'],
        'timing' => ['dasa', 'antradasa', 'period', 'timing'],
        'career' => ['job', 'career', 'employment'],
        'relationships' => ['marriage', 'spouse', 'relationship']
    ];
    
    $categoryScores = [];
    foreach ($categories as $category => $keywords) {
        $score = 0;
        foreach ($keywords as $keyword) {
            if (strpos($text, $keyword) !== false) {
                $score += (strpos(strtolower($question), $keyword) !== false) ? 2 : 1;
            }
        }
        if ($score > 0) $categoryScores[$category] = $score;
    }
    
    if (!empty($categoryScores)) {
        arsort($categoryScores);
        return array_keys($categoryScores)[0];
    }
    
    return 'general';
}

function determineDifficulty($question, $answer) {
    $answerLength = str_word_count($answer);
    return ($answerLength > 150) ? 'advanced' : (($answerLength > 50) ? 'intermediate' : 'beginner');
}

function determinePriority($question, $answer) {
    $text = strtolower($question);
    $highPriorityTerms = ['what is kas', 'kas', 'upachaya', 'basic'];
    
    foreach ($highPriorityTerms as $term) {
        if (strpos($text, $term) !== false) return 'high';
    }
    return 'medium';
}

// MAIN PROCESSING
$data = [];
$currentQuestion = null;
$currentAnswer = [];
$currentImage = null;
$processedCount = 0;
$quotesRemovedCount = 0;

foreach ($lines as $line) {
    $line = trim($line);
    if ($line === '') continue;

    if (preg_match('/^\[image:\s*(.+?)\s*\]$/i', $line, $m)) {
        $currentImage = trim($m[1]);
        continue;
    }

    if (is_heading($line)) {
        if ($currentQuestion !== null) {
            $answerText = implode(" ", $currentAnswer);
            $answerText = preg_replace('/\s+/', ' ', trim($answerText));
            
            if (!empty($answerText)) {
                // CRITICAL: Clean quotes from question before storing
                $cleanedQuestion = cleanQuotes($currentQuestion);
                $cleanedQuestion = preg_replace('/^\d+[\.\)]\s+/', '', $cleanedQuestion);
                
                // Count if quotes were removed
                if ($cleanedQuestion !== $currentQuestion) {
                    $quotesRemovedCount++;
                }
                
                $entry = [
                    "id" => $processedCount + 1,
                    "question" => $cleanedQuestion, // Store cleaned question
                    "answer" => $answerText,
                    "keywords" => generateKeywords($cleanedQuestion, $answerText),
                    "variations" => generateVariations($currentQuestion), // Pass original question to extract quotes
                    "category" => determineCategory($cleanedQuestion, $answerText),
                    "difficulty" => determineDifficulty($cleanedQuestion, $answerText),
                    "priority" => determinePriority($cleanedQuestion, $answerText),
                    "word_count" => str_word_count($answerText),
                    "char_count" => strlen($answerText)
                ];
                
                if ($currentImage) {
                    $entry["image"] = $currentImage;
                }
                
                $data[] = $entry;
                $processedCount++;
            }
        }

        $currentQuestion = rtrim($line, ":?!.");
        $currentAnswer = [];
        $currentImage = null;
        continue;
    }

    if ($currentQuestion === null) {
        $currentQuestion = "GENERAL_INFO";
        $currentAnswer = [];
    }
    $currentAnswer[] = $line;
}

// Save last entry
if ($currentQuestion !== null && !empty($currentAnswer)) {
    $answerText = implode(" ", $currentAnswer);
    $answerText = preg_replace('/\s+/', ' ', trim($answerText));
    
    if (!empty($answerText)) {
        $cleanedQuestion = cleanQuotes($currentQuestion);
        $cleanedQuestion = preg_replace('/^\d+[\.\)]\s+/', '', $cleanedQuestion);
        
        if ($cleanedQuestion !== $currentQuestion) {
            $quotesRemovedCount++;
        }
        
        $entry = [
            "id" => $processedCount + 1,
            "question" => $cleanedQuestion,
            "answer" => $answerText,
            "keywords" => generateKeywords($cleanedQuestion, $answerText),
            "variations" => generateVariations($currentQuestion),
            "category" => determineCategory($cleanedQuestion, $answerText),
            "difficulty" => determineDifficulty($cleanedQuestion, $answerText),
            "priority" => determinePriority($cleanedQuestion, $answerText),
            "word_count" => str_word_count($answerText),
            "char_count" => strlen($answerText)
        ];
        
        if ($currentImage) {
            $entry["image"] = $currentImage;
        }
        
        $data[] = $entry;
        $processedCount++;
    }
}

echo '<div class="status success">
    <span class="emoji">🎉</span>
    <strong>Processing Complete!</strong><br>
    Generated ' . count($data) . ' entries<br>
    Cleaned quotes from ' . $quotesRemovedCount . ' questions
</div>';

$jsonData = [
    "metadata" => [
        "generated_at" => date('Y-m-d H:i:s'),
        "generator_version" => "Perfect Hybrid v2.2 - Quote Cleaned + Auto-Detect",
        "total_entries" => count($data),
        "quotes_cleaned" => $quotesRemovedCount
    ],
    "data" => $data
];

$jsonOutput = json_encode($jsonData, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
$bytes = file_put_contents($jsonFile, $jsonOutput);

$outputSizeFormatted = $bytes > 1024*1024 ? round($bytes/(1024*1024), 2).'MB' : round($bytes/1024, 1).'KB';

echo '<div class="status success">
    <span class="emoji">💾</span>
    <strong>Output File Created:</strong> ' . basename($jsonFile) . ' (' . $outputSizeFormatted . ')<br>
    ✅ All quotes removed - Clean matching guaranteed!<br>
    💡 Quoted phrases auto-detected and added to variations!
</div>';

// ENHANCED STATISTICS
$categories = [];
$difficulties = [];
$priorities = [];
$totalKeywords = 0;
$totalVariations = 0;
$totalWords = 0;

foreach ($data as $entry) {
    $categories[$entry['category']] = ($categories[$entry['category']] ?? 0) + 1;
    $difficulties[$entry['difficulty']] = ($difficulties[$entry['difficulty']] ?? 0) + 1;
    $priorities[$entry['priority']] = ($priorities[$entry['priority']] ?? 0) + 1;
    $totalKeywords += count($entry['keywords']);
    $totalVariations += count($entry['variations']);
    $totalWords += $entry['word_count'];
}

echo '<h2><span class="emoji">📊</span>Detailed Analytics</h2>

<div class="stats">
    <div class="stat-box">
        <h3>📂 Categories</h3>';
        
foreach ($categories as $cat => $count) {
    $percentage = round(($count / count($data)) * 100, 1);
    echo '<div class="stat-item">
        <span>' . ucfirst($cat) . '</span>
        <span class="stat-value">' . $count . ' (' . $percentage . '%)</span>
    </div>';
}

echo '</div>
    
    <div class="stat-box">
        <h3>🎯 Difficulty Levels</h3>';
        
foreach ($difficulties as $diff => $count) {
    $percentage = round(($count / count($data)) * 100, 1);
    echo '<div class="stat-item">
        <span>' . ucfirst($diff) . '</span>
        <span class="stat-value">' . $count . ' (' . $percentage . '%)</span>
    </div>';
}

echo '</div>
    
    <div class="stat-box">
        <h3>⭐ Priority Levels</h3>';
        
foreach ($priorities as $pri => $count) {
    $percentage = round(($count / count($data)) * 100, 1);
    echo '<div class="stat-item">
        <span>' . ucfirst($pri) . '</span>
        <span class="stat-value">' . $count . ' (' . $percentage . '%)</span>
    </div>';
}

echo '</div>
    
    <div class="stat-box">
        <h3>🔢 Content Statistics</h3>
        <div class="stat-item">
            <span>Total Keywords</span>
            <span class="stat-value">' . number_format($totalKeywords) . '</span>
        </div>
        <div class="stat-item">
            <span>Total Variations</span>
            <span class="stat-value">' . number_format($totalVariations) . '</span>
        </div>
        <div class="stat-item">
            <span>Total Words</span>
            <span class="stat-value">' . number_format($totalWords) . '</span>
        </div>
        <div class="stat-item">
            <span>Avg Keywords/Entry</span>
            <span class="stat-value">' . round($totalKeywords / count($data), 1) . '</span>
        </div>
        <div class="stat-item">
            <span>Avg Variations/Entry</span>
            <span class="stat-value">' . round($totalVariations / count($data), 1) . '</span>
        </div>
    </div>
    
</div>';

// ENHANCED SAMPLE OUTPUT
if (!empty($data)) {
    echo '<h2><span class="emoji">📋</span>Sample Output Analysis</h2>';
    
    $sample = $data[0];
    echo '<div class="sample-box">
        <h3>📝 Question:</h3>
        <div class="sample-content">' . htmlspecialchars($sample['question']) . '</div>
        
        <h3>🔑 Keywords (' . count($sample['keywords']) . '):</h3>
        <div class="sample-content">' . implode(', ', $sample['keywords']) . '</div>
        
        <h3>🔄 Variations (' . count($sample['variations']) . '):</h3>
        <div class="sample-content">' . implode('<br>• ', array_slice($sample['variations'], 0, 6));
    
    if (count($sample['variations']) > 6) {
        echo '<br><em>... and ' . (count($sample['variations']) - 6) . ' more variations</em>';
    }
    
    echo '</div>
        
        <h3>📊 Metadata:</h3>
        <div class="sample-content">
            <strong>Category:</strong> ' . ucfirst($sample['category']) . '<br>
            <strong>Difficulty:</strong> ' . ucfirst($sample['difficulty']) . '<br>
            <strong>Priority:</strong> ' . ucfirst($sample['priority']) . '<br>
            <strong>Word Count:</strong> ' . $sample['word_count'] . '<br>
            <strong>Character Count:</strong> ' . $sample['char_count'] . '
        </div>
    </div>';
    
    // Show comparison with original data
    if (count($data) > 1) {
        $highPriorityCount = array_sum(array_filter($priorities, function($k) { return $k === 'high'; }, ARRAY_FILTER_USE_KEY));
        $advancedCount = array_sum(array_filter($difficulties, function($k) { return $k === 'advanced'; }, ARRAY_FILTER_USE_KEY));
        
        echo '<div class="feature-highlight">
            <h3><span class="emoji">🎯</span>Quality Assurance Report</h3>
            <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 15px; margin-top: 15px;">
                <div>✅ <strong>Search Coverage:</strong> ' . number_format($totalVariations) . ' total variations</div>
                <div>🎯 <strong>Keyword Density:</strong> ' . round($totalKeywords / count($data), 1) . ' avg per entry</div>
                <div>📈 <strong>High Priority:</strong> ' . $highPriorityCount . ' entries (' . round(($highPriorityCount/count($data))*100, 1) . '%)</div>
                <div>🔬 <strong>Advanced Content:</strong> ' . $advancedCount . ' entries (' . round(($advancedCount/count($data))*100, 1) . '%)</div>
            </div>
        </div>';
    }
}

echo '<div class="status success" style="margin-top: 30px;">
    <span class="emoji">🚀</span>
    <strong>Perfect Hybrid Conversion Complete!</strong><br><br>
    <div style="background: rgba(255,255,255,0.2); padding: 15px; border-radius: 6px; margin-top: 15px;">
        <strong>✨ Enhanced Features Delivered:</strong><br>
        🔹 <strong>' . number_format($totalKeywords) . '</strong> optimized keywords<br>
        🔹 <strong>' . number_format($totalVariations) . '</strong> search variations<br>
        🔹 Advanced scoring & filtering<br>
        🔹 Domain-specific intelligence<br>
        🔹 Browser-friendly interface<br>
        🔹 Comprehensive analytics
    </div>
</div>';

echo '<div class="status info">
    <span class="emoji">💡</span>
    <strong>Next Steps:</strong><br>
    1. Use the generated <code>data.json</code> file in your chatbot<br>
    2. Implement fuzzy search matching for variations<br>
    3. Use keywords for search indexing<br>
    4. Filter content by category, difficulty, or priority as needed
</div>';

echo '</div></div></body></html>';
?>