import type { DescribeImagesResponse } from '@/app/api/describe-images/route';
import type { TextExtractorErrorCallback } from '@/components/ExtractTextFromUpload/types';
import config from '@/config/frontend';
import type { Language } from '@magicschool/supabase/types';
import { checkIfFileHasValidMimeType } from '../files/util';
import { imageTextExtractor } from './tesseract';

export interface ExtractTextFromImageOptions {
  extractionMethod: 'tesseract' | 'openai';
}

export async function extractTextFromImage(
  file: File,
  locale: Language,
  onError: TextExtractorErrorCallback,
  { extractionMethod }: ExtractTextFromImageOptions = { extractionMethod: 'tesseract' },
): Promise<string> {
  const isFileValid = checkIfFileHasValidMimeType(file, config.textExtract.image.validMimeTypes);
  if (!isFileValid) {
    onError('image_extract.error.invalid_file');
    return '';
  }

  try {
    switch (extractionMethod) {
      case 'tesseract':
        return await extractTextUsingTesseract(file, locale);
      case 'openai':
        return await extractTextUsingOpenAI(file, locale);
    }
  } catch {
    onError('image_extract.error.unknown_error');
    return '';
  }
}

function convertImageFileToDataURL(file: File) {
  return new Promise<string>((resolve) => {
    const reader = new FileReader();
    reader.onload = (event) => resolve(event.target?.result as string);
    reader.readAsDataURL(file);
  });
}

async function describeImageBatch(images: string[], locale: Language, startPage?: number, endPage?: number): Promise<string> {
  // Send over the images to be described
  const res = await fetch<DescribeImagesResponse>('/api/describe-images', {
    method: 'POST',
    headers: {
      'Content-Type': 'application/json',
    },
    body: JSON.stringify({ images, locale, startPage, endPage }),
  });

  const json = await res.json();
  return json.description;
}

const MAX_BATCH_SIZE = 1.5 * 1024 * 1024; // 3MB

// Batches images in chunks of less than MAX_BATCH_SIZE bytes
// This is done to avoid hitting vercel request limits and to avoid timeouts
export async function describeImages(images: string[], locale: Language): Promise<string> {
  let currentImage = 0;
  let currentBatch: string[] = [];
  let currentBatchSize = 0;
  let startPage = 1;
  let promises: Promise<string>[] = [];

  while (currentImage < images.length) {
    const image = images[currentImage];
    const imageBlob = new Blob([image]);
    const imageSize = imageBlob.size;

    if (currentBatchSize + imageSize > MAX_BATCH_SIZE) {
      promises.push(describeImageBatch(currentBatch, locale, startPage, currentImage - 1));
      currentBatch = [];
      currentBatchSize = 0;
      startPage = currentImage;
    }

    currentBatch.push(image);
    currentBatchSize += imageSize;
    currentImage++;
  }

  if (currentBatch.length > 0) {
    promises.push(describeImageBatch(currentBatch, locale, startPage, currentImage));
  }

  return (await Promise.all(promises)).join('\n');
}

function extractTextUsingTesseract(file: File, locale: Language) {
  return new Promise<string>((resolve, reject) => {
    const image = new Image();
    image.onload = async () => {
      try {
        await imageTextExtractor.initialize(locale as Language);
        let text = await imageTextExtractor.extract(image);
        resolve(text);
      } catch (e) {
        reject(e);
      }
    };
    convertImageFileToDataURL(file)
      .then((dataUrl) => (image.src = dataUrl))
      .catch(reject);
  });
}

async function extractTextUsingOpenAI(file: File, locale: Language) {
  const dataUrl = await convertImageFileToDataURL(file);
  return await describeImages([dataUrl], locale);
}
