<?php

namespace App\Services;

class DictionaryTranslationService
{
    protected $translations = [
        // Divisions
        'ঢাকা' => 'Dhaka',
        'চট্টগ্রাম' => 'Chattogram',
        'রাজশাহী' => 'Rajshahi',
        'খুলনা' => 'Khulna',
        'বরিশাল' => 'Barishal',
        'রংপুর' => 'Rangpur',
        'সিলেট' => 'Sylhet',
        'ময়মনসিংহ' => 'Mymensingh',
        
        // Districts
        'ফরিদপুর' => 'Faridpur',
        'গাজীপুর' => 'Gazipur',
        'গোপালগঞ্জ' => 'Gopalganj',
        'কিশোরগঞ্জ' => 'Kishoreganj',
        'মাদারীপুর' => 'Madaripur',
        'মানিকগঞ্জ' => 'Manikganj',
        'মুন্সীগঞ্জ' => 'Munshiganj',
        'নারায়ণগঞ্জ' => 'Narayanganj',
        'নরসিংদী' => 'Narsingdi',
        'রাজবাড়ী' => 'Rajbari',
        'শরীয়তপুর' => 'Shariatpur',
        'টাঙ্গাইল' => 'Tangail',
        'জামালপুর' => 'Jamalpur',
        'নেত্রকোনা' => 'Netrokona',
        'শেরপুর' => 'Sherpur',
        
        // Upazilas
        'সদর' => 'Sadar',
        'বেলাবো' => 'Belabo',
        'মনোহরদী' => 'Monohardi',
        'নরসিংদী সদর' => 'Narsingdi Sadar',
        'পলাশ' => 'Palash',
        'রায়পুরা' => 'Raipura',
        'শিবপুর' => 'Shibpur',
        'শিরাজগঞ্জ' => 'Sirajganj',
        'চরউজিলাব' => 'Char Ujilab',
        'দেয়ারা' => 'Deara',
        'নবাবগঞ্জ' => 'Nawabganj',
        'কালিয়াকৈর' => 'Kaliakoir',
        'কাপাসিয়া' => 'Kapasia',
        'নড়িয়া' => 'Naria',
        'শ্রীপুর' => 'Sreepur',
        
        // Unions
        'আমলাব' => 'Amlab',
        'বাজনাব' => 'Bajanab',
        'বেলাব' => 'Belab',
        'বিন্নাবাইদ' => 'Binnabaid',
        'দৈয়ারা' => 'Daiyara',
        'পাটুলী' => 'Patuli',
        'সল্লাবাদ' => 'Sollabad',
        
        // Common words
        'উপজেলা' => 'Upazila',
        'জেলা' => 'District',
        'বিভাগ' => 'Division',
        'ইউনিয়ন' => 'Union',
        'বাজার' => 'Bazar',
        'গঞ্জ' => 'Ganj',
        'পুর' => 'Pur',
        'বাদ' => 'Bad',
        'চর' => 'Char',
        'হাট' => 'Hat',
    ];
    
    public function translateBanglaToEnglish($text)
    {
        if (empty($text)) {
            return '';
        }
        
        // First, check for exact match
        $cleanText = trim($text);
        if (isset($this->translations[$cleanText])) {
            return $this->translations[$cleanText];
        }
        
        // Try to match with common suffixes removed
        $suffixes = ['উপজেলা', 'সদর', 'বাজার', 'পৌরসভা', 'জেলা'];
        foreach ($suffixes as $suffix) {
            $pattern = str_replace($suffix, '', $cleanText);
            $pattern = trim($pattern);
            if (isset($this->translations[$pattern])) {
                $translated = $this->translations[$pattern];
                // Add back the suffix if it was present
                if (strpos($cleanText, $suffix) !== false && isset($this->translations[$suffix])) {
                    $translated .= ' ' . $this->translations[$suffix];
                }
                return $translated;
            }
        }
        
        // Try word-by-word translation for compound names
        $words = preg_split('/\s+/u', $cleanText);
        if (count($words) > 1) {
            $translatedWords = [];
            foreach ($words as $word) {
                $word = trim($word);
                if (isset($this->translations[$word])) {
                    $translatedWords[] = $this->translations[$word];
                } else {
                    $translatedWords[] = $this->transliterateBanglaToEnglish($word);
                }
            }
            return implode(' ', $translatedWords);
        }
        
        // If no translation found, use transliteration
        return $this->transliterateBanglaToEnglish($cleanText);
    }
    
    /**
     * Advanced transliteration for Bangla to English
     * Handles conjuncts and diacritics properly
     */
    private function transliterateBanglaToEnglish($text)
    {
        // Pre-process: Normalize special Bangla characters
        // য় can be encoded as either U+09DF (precomposed) or U+09AF + U+09BC (composed)
        // ড় can be encoded as either U+09DC (precomposed) or U+09A1 + U+09BC (composed)
        // ঢ় can be encoded as either U+09DD (precomposed) or U+09A2 + U+09BC (composed)
        
        // First, normalize composed forms to precomposed forms
        $text = str_replace(['য' . "\u{09BC}", 'ড' . "\u{09BC}", 'ঢ' . "\u{09BC}"], 
                          ["\u{09DF}", "\u{09DC}", "\u{09DD}"], $text);
        
        // Bangla Unicode ranges
        $vowels = [
            'অ' => 'a', 'আ' => 'a', 'ই' => 'i', 'ঈ' => 'i', 'উ' => 'u', 'ঊ' => 'u',
            'ঋ' => 'ri', 'এ' => 'e', 'ঐ' => 'oi', 'ও' => 'o', 'ঔ' => 'ou'
        ];
        
        $consonants = [
            'ক' => 'k', 'খ' => 'kh', 'গ' => 'g', 'ঘ' => 'gh', 'ঙ' => 'ng',
            'চ' => 'ch', 'ছ' => 'chh', 'জ' => 'j', 'ঝ' => 'jh', 'ঞ' => 'n',
            'ট' => 't', 'ঠ' => 'th', 'ড' => 'd', 'ঢ' => 'dh', 'ণ' => 'n',
            'ত' => 't', 'থ' => 'th', 'দ' => 'd', 'ধ' => 'dh', 'ন' => 'n',
            'প' => 'p', 'ফ' => 'ph', 'ব' => 'b', 'ভ' => 'bh', 'ম' => 'm',
            'য' => 'j', 'র' => 'r', 'ল' => 'l', 'শ' => 'sh', 'ষ' => 'sh',
            'স' => 's', 'হ' => 'h', 'ৎ' => 't', 'শ্র' => 'shr'
        ];
        
        // Special characters that modify pronunciation
        // Using both precomposed and their Unicode code points
        $specialChars = [
            'ড়' => 'r',         // Precomposed form
            "\u{09DC}" => 'r',   // ড় (U+09DC) - ra-phala
            'ঢ়' => 'rh',        // Precomposed form
            "\u{09DD}" => 'rh',  // ঢ় (U+09DD)
            'য়' => 'y',         // Precomposed form
            "\u{09DF}" => 'y',   // য় (U+09DF) - ya-phala - antastha ya
            'ং' => 'ng',        // anusvara
            "\u{0982}" => 'ng',  // ং (U+0982) - anusvara
            'ঃ' => 'h',         // visarga
            "\u{0983}" => 'h',   // ঃ (U+0983) - visarga
            'ঁ' => '',          // chandrabindu
            "\u{0981}" => '',    // ঁ (U+0981) - chandrabindu
        ];
        
        $vowelSigns = [
            'া' => 'a', 'ি' => 'i', 'ী' => 'i', 'ু' => 'u', 'ূ' => 'u',
            'ৃ' => 'ri', 'ে' => 'e', 'ৈ' => 'oi', 'ো' => 'o', 'ৌ' => 'ou',
            '্' => '' // Hasant (vowel killer/halant)
        ];
        
        $numbers = [
            '০' => '0', '১' => '1', '২' => '2', '৩' => '3', '৪' => '4',
            '৫' => '5', '৬' => '6', '৭' => '7', '৮' => '8', '৯' => '9'
        ];
        
        $result = '';
        $chars = preg_split('//u', $text, -1, PREG_SPLIT_NO_EMPTY);
        
        for ($i = 0; $i < count($chars); $i++) {
            $char = $chars[$i];
            
            // Handle numbers
            if (isset($numbers[$char])) {
                $result .= $numbers[$char];
                continue;
            }
            
            // Handle vowels (independent vowels)
            if (isset($vowels[$char])) {
                $result .= $vowels[$char];
                continue;
            }
            
            // Handle vowel signs (kar) - but not hasant yet
            if (isset($vowelSigns[$char]) && $char !== '্') {
                $result .= $vowelSigns[$char];
                continue;
            }
            
            // Skip hasant when encountered independently (it's handled in consonant logic)
            if ($char === '্') {
                continue;
            }
            
            // IMPORTANT: Check special characters BEFORE regular consonants
            // These are: ড়, ঢ়, য়, ং, ঃ, ঁ
            // They don't take the inherent 'a' sound
            if (isset($specialChars[$char])) {
                $result .= $specialChars[$char];
                continue; // Don't add default 'a'
            }
            
            // Handle regular consonants
            if (isset($consonants[$char])) {
                $result .= $consonants[$char];
                
                // Look ahead to determine if we should add default 'a'
                $addDefaultA = true;
                
                if ($i + 1 < count($chars)) {
                    $nextChar = $chars[$i + 1];
                    
                    // Don't add 'a' if followed by:
                    // 1. Hasant (্)
                    if ($nextChar === '্') {
                        $addDefaultA = false;
                        $i++; // Skip the hasant
                        
                        // Check if there's a consonant after hasant (conjunct)
                        if ($i + 1 < count($chars)) {
                            $afterHasant = $chars[$i + 1];
                            // The next consonant will be processed in next iteration
                            // and will get its own 'a' if needed
                        }
                    }
                    // 2. Vowel sign (kar)
                    elseif (isset($vowelSigns[$nextChar]) && $nextChar !== '্') {
                        $addDefaultA = false;
                        $result .= $vowelSigns[$nextChar];
                        $i++; // Skip the vowel sign
                    }
                    // 3. Special characters that don't need 'a'
                    elseif (isset($specialChars[$nextChar])) {
                        $addDefaultA = false;
                    }
                }
                
                if ($addDefaultA) {
                    $result .= 'a';
                }
                continue;
            }
            
            // Keep other characters as is (spaces, punctuation, etc.)
            $result .= $char;
        }
        
        // Post-processing cleanup
        // Remove multiple consecutive vowels
        $result = preg_replace('/([aeiou])\1+/', '$1', $result);
        
        // Capitalize first letter of each word
        $result = ucwords(strtolower($result));
        
        return $result;
    }
    
    /**
     * Add a new translation to the dictionary
     */
    public function addTranslation($bangla, $english)
    {
        $this->translations[$bangla] = $english;
    }
    
    /**
     * Get all translations
     */
    public function getTranslations()
    {
        return $this->translations;
    }
}