const ELEVENLABS_API_KEY = 'sk_e36321be63f289f8deb013d9b6f9de8ec033b90611a7b247';

if (!ELEVENLABS_API_KEY) {
  throw new Error(
    'Missing VITE_ELEVENLABS_API_KEY environment variable. Please add it to your .env file.'
  );
}

// Voice IDs for a male and female voice
const VOICE_IDS = {
  male: 'pNInz6obpgDQGcFmaJgB', // Adam
  female: 'EXAVITQu4vr4xnSDxMaL', // Bella
};

// Speech settings for different durations
const SPEECH_SETTINGS = {
  veryShort: {
    stability: 0.5,
    similarity_boost: 0.75,
    style: 0.0,
    speaking_rate: 1.2, // Faster for very short content (Instagram/YouTube Shorts)
  },
  short: {
    stability: 0.5,
    similarity_boost: 0.75,
    style: 0.0,
    speaking_rate: 1.1, // Slightly faster for short content (TikTok)
  },
  medium: {
    stability: 0.5,
    similarity_boost: 0.75,
    style: 0.0,
    speaking_rate: 0.95, // Normal pace
  },
  long: {
    stability: 0.5,
    similarity_boost: 0.75,
    style: 0.0,
    speaking_rate: 0.8, // Slower, more deliberate for long content
  }
};

export async function generatePodcastAudio(
  speaker1Lines: string[],
  speaker2Lines: string[],
  duration: string = '>8'
): Promise<Blob> {
  const chunks: Blob[] = [];

  // Determine speech settings based on duration
  let speechSettings;
  if (duration === '<1') {
    speechSettings = SPEECH_SETTINGS.veryShort; // Instagram/YouTube Shorts
  } else if (duration === '>1') {
    speechSettings = SPEECH_SETTINGS.short; // TikTok
  } else if (duration === '>8') {
    speechSettings = SPEECH_SETTINGS.long; // YouTube
  } else if (duration === '>15') {
    speechSettings = SPEECH_SETTINGS.long; // Podcast platforms
  } else {
    speechSettings = SPEECH_SETTINGS.medium; // Default
  }

  // Add pauses between lines based on duration
  const pauseDuration = 
    duration === '<1' ? 300 : // Very short pause for Instagram/YouTube Shorts
    duration === '>1' ? 400 : // Short pause for TikTok
    duration === '>8' ? 1000 : // Longer pause for YouTube
    duration === '>15' ? 1200 : // Longest pause for podcast platforms
    500; // Default

  // Interleave the lines and generate audio for each
  const maxLines = Math.max(speaker1Lines.length, speaker2Lines.length);
  
  for (let i = 0; i < maxLines; i++) {
    if (speaker1Lines[i]) {
      // Add natural pauses and emphasis for longer content
      let processedText = speaker1Lines[i];
      if (duration === '>8' || duration === '>15') {
        processedText = addNaturalPauses(processedText);
      }
      
      const audio1 = await generateVoiceOver(processedText, VOICE_IDS.male, speechSettings);
      chunks.push(audio1);
      
      // Add a pause between speakers
      if (speaker2Lines[i]) {
        await new Promise(resolve => setTimeout(resolve, pauseDuration));
      }
    }
    
    if (speaker2Lines[i]) {
      // Add natural pauses and emphasis for longer content
      let processedText = speaker2Lines[i];
      if (duration === '>8' || duration === '>15') {
        processedText = addNaturalPauses(processedText);
      }
      
      const audio2 = await generateVoiceOver(processedText, VOICE_IDS.female, speechSettings);
      chunks.push(audio2);
      
      // Add a pause between exchanges
      if (i < maxLines - 1) {
        await new Promise(resolve => setTimeout(resolve, pauseDuration));
      }
    }
  }

  return new Blob(chunks, { type: 'audio/mpeg' });
}

// Add natural pauses and emphasis to make longer content sound more natural
function addNaturalPauses(text: string): string {
  // Add commas for natural pauses
  text = text.replace(/(\w+)(\s+but\s+)/gi, '$1,$2');
  text = text.replace(/(\w+)(\s+however\s+)/gi, '$1,$2');
  text = text.replace(/(\w+)(\s+therefore\s+)/gi, '$1,$2');
  text = text.replace(/(\w+)(\s+additionally\s+)/gi, '$1,$2');
  text = text.replace(/(\w+)(\s+moreover\s+)/gi, '$1,$2');
  text = text.replace(/(\w+)(\s+furthermore\s+)/gi, '$1,$2');
  text = text.replace(/(\w+)(\s+in fact\s+)/gi, '$1,$2');
  
  // Add more natural pauses for longer sentences
  const longSentenceThreshold = 20; // words
  const sentences = text.split('. ');
  
  for (let i = 0; i < sentences.length; i++) {
    const words = sentences[i].split(' ');
    if (words.length > longSentenceThreshold) {
      // Add a pause roughly in the middle of long sentences
      const midPoint = Math.floor(words.length / 2);
      words.splice(midPoint, 0, ',');
      sentences[i] = words.join(' ');
    }
  }
  
  // Add emphasis with SSML for important points
  if (sentences.length > 2) {
    // Add emphasis to one of the middle sentences for variety
    const middleIndex = Math.floor(sentences.length / 2);
    sentences[middleIndex] = `<emphasis>${sentences[middleIndex]}</emphasis>`;
    
    // For longer content, add a second emphasis point
    if (sentences.length > 4) {
      const secondEmphasisIndex = Math.floor(sentences.length * 0.75);
      if (secondEmphasisIndex !== middleIndex) {
        sentences[secondEmphasisIndex] = `<emphasis>${sentences[secondEmphasisIndex]}</emphasis>`;
      }
    }
  }
  
  return sentences.join('. ');
}

async function generateVoiceOver(text: string, voiceId: string, settings: any): Promise<Blob> {
  // For longer texts, we might need to split them to avoid API limitations
  if (text.length > 5000) {
    const chunks = splitTextIntoChunks(text, 4000);
    const audioChunks: Blob[] = [];
    
    for (const chunk of chunks) {
      const audioBlob = await callElevenLabsAPI(chunk, voiceId, settings);
      audioChunks.push(audioBlob);
    }
    
    return new Blob(audioChunks, { type: 'audio/mpeg' });
  }
  
  return callElevenLabsAPI(text, voiceId, settings);
}

function splitTextIntoChunks(text: string, maxChunkSize: number): string[] {
  const chunks: string[] = [];
  let currentChunk = '';
  
  // Split by sentences to avoid cutting in the middle of a sentence
  const sentences = text.split('. ');
  
  for (const sentence of sentences) {
    const potentialChunk = currentChunk ? `${currentChunk}. ${sentence}` : sentence;
    
    if (potentialChunk.length <= maxChunkSize) {
      currentChunk = potentialChunk;
    } else {
      // If the current chunk is not empty, add it to chunks
      if (currentChunk) {
        chunks.push(currentChunk + '.');
      }
      currentChunk = sentence;
    }
  }
  
  // Add the last chunk if it's not empty
  if (currentChunk) {
    chunks.push(currentChunk);
  }
  
  return chunks;
}

async function callElevenLabsAPI(text: string, voiceId: string, settings: any): Promise<Blob> {
  const response = await fetch(
    `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`,
    {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'xi-api-key': ELEVENLABS_API_KEY,
      },
      body: JSON.stringify({
        text,
        model_id: 'eleven_monolingual_v1',
        voice_settings: {
          stability: settings.stability,
          similarity_boost: settings.similarity_boost,
          style: settings.style,
          use_speaker_boost: true,
        },
        speaking_rate: settings.speaking_rate,
      }),
    }
  );

  if (!response.ok) {
    const errorText = await response.text();
    console.error('ElevenLabs API error:', errorText);
    throw new Error(`Failed to generate audio: ${response.status} ${response.statusText}`);
  }

  return response.blob();
}