import { useState, useEffect, useCallback, useRef } from 'react';

// 添加类型声明
declare global {
  interface Window {
    webkitSpeechRecognition: any;
  }
}

// 创建单例 recognition 实例
const recognition: any = ('webkitSpeechRecognition' in window) 
  ? new window.webkitSpeechRecognition()
  : null;

if (recognition) {
  recognition.continuous = true;
  recognition.interimResults = true;
}

const useSpeechRecognition = (onResult: ((speechResult: SpeechResult) => void) | null = null) => {
  const [transcript, setTranscript] = useState('');
  const [isListening, setIsListening] = useState(false);
  const [lang, setLang] = useState('en-US');
  const isListeningRef = useRef(false);
  isListeningRef.current = isListening;
  const langRef = useRef(lang);
  langRef.current = lang;
  const transcriptRef = useRef('');
  const onResultCallbackRef = useRef<((speechResult: SpeechResult) => void) | null>(null);

  const calculateDelta = (prev: string, current: string) => {
    if (current.startsWith(prev)) {
      return current.slice(prev.length);
    }
    return '';
  }

  const setOnResultCallback = (cb: ((speechResult: SpeechResult) => void) | null) => {
    onResultCallbackRef.current = cb;
  }

  // 处理识别结果
  const handleResult = useCallback((event: any) => {
    const current = event.resultIndex;
    const transcript = event.results[current][0].transcript;
    const isFinal = event.results[current].isFinal
    const prev= transcriptRef.current;

    console.log(`[speech], intermediate result : ${isFinal} ${transcript}`);
    if (!isFinal) {
      if (transcript.length <= prev.length) {
        return;
      }
    }

    const delta = calculateDelta(prev, transcript);
    transcriptRef.current = transcript;
    setTranscript(transcript);
    if (onResultCallbackRef.current) {
      onResultCallbackRef.current({transcript, delta, isFinal});
    }
    if (isFinal) {
      transcriptRef.current = '';
    }
  }, [onResultCallbackRef]);

  // 处理错误
  const handleError = useCallback((event: any) => {
    console.error('[speech] start recognition error', event);
    if (isListening) {
      setTimeout(() => {
        if (isListening && recognition) {
          try {
            recognition.start();
          } catch (error) {
            console.error('[speech] start recognition error', error);
          }
        }
      }, 1000);
    }
  }, [isListening]);

  useEffect(() => {
    if (!recognition) {
      console.error('Speech recognition not supported');
      return;
    }

    recognition.lang = langRef.current;
    
    // 绑定事件监听器
    recognition.onresult = handleResult;
    recognition.onerror = handleError;
    recognition.onstart = () => {
      // console.log('[speech] recognition started');
    };
    recognition.onend = () => {
      // console.log('[speech] recognition ended');
    };

    recognition.stop();
    if (isListening) {
      console.log(`[speech] start recognition, language: ${langRef.current}, isListening: ${isListening}`);
      try {
        recognition.start();
      } catch (error: any) {
        console.error('[speech] start recognition error', error.message);
      }
    } 
    // 清理函数
    return () => {
      recognition.stop();
      recognition.onresult = null;
      recognition.onerror = null;
    };
  }, [isListening, handleResult, handleError]);

  const startListening = useCallback((lang: string) => {
    // console.log(`[speech] ================================================`);
    // console.log(`[speech] startListening invoked, language: ${newLang}, oldListening: ${isListeningRef.current}`);
    if (isListeningRef.current) {
      return;
    } 
    langRef.current = lang;
    setLang(lang);
    isListeningRef.current = true;
    setIsListening(true);
  }, []);

  const stopListening = useCallback(() => {
    if (isListeningRef.current) {
      setIsListening(false);
      isListeningRef.current = false;
    }
  }, []);

  return { transcript, isListening, startListening, stopListening, setOnResultCallback };
};

export type SpeechResult = {
  transcript: string;
  delta: string;
  isFinal: boolean;
}

export default useSpeechRecognition;
