import { useEffect, useState } from 'react';
import { SegmentType } from '../../generated/api';
import { parseCSV } from '../../lib/fileparse';
import { AnalyzerTaskletEnum } from '../../queries/constants';
import {
  DataOrientation,
  SegmentWiseFileUrls,
  SegmentWiseFiles,
} from './context/DatasetRegistrationContext';

const SPLIT_SEGMENT_TYPES = [SegmentType.Train, SegmentType.Test, SegmentType.Validate];

export const isSplitSegmentType = (segmentType: SegmentType) =>
  SPLIT_SEGMENT_TYPES.includes(segmentType);

export const getSegmentFilesForDataOrientation = (
  dataOrientation: DataOrientation,
  files: (SegmentWiseFileUrls | SegmentWiseFiles)[],
) => {
  if (dataOrientation === DataOrientation.UNSPLIT_DATA) {
    return files.filter(file => file.segmentType === SegmentType.Unknown);
  }
  return files;
};

export const useSegmentFileParser = (segmentFiles: SegmentWiseFiles[], delimiter?: string) => {
  const [segmentwiseData, setSegmentwiseData] = useState<Record<SegmentType, string[][]> | null>(
    null,
  );

  useEffect(() => {
    const filePromises = segmentFiles.map(segmentFile =>
      parseCSV(segmentFile.file, delimiter ?? ''),
    );

    Promise.all(filePromises).then((results: string[][][]) => {
      const dataBySegment = segmentFiles.reduce((acc, val, idx) => {
        acc[val.segmentType] = results[idx];
        return acc;
      }, {} as Record<SegmentType, string[][]>);

      setSegmentwiseData(dataBySegment);
    });
  }, [segmentFiles, delimiter]);

  return segmentwiseData;
};
/**
 * Example url: s3://mskcustomer1/data/hatespeech/en-us/hate_speech.csv
 * Output: hate_speech
 **/
export const getProcessedDatasetName = (url: string) => {
  const fileName = url.split('/').pop() || '';
  return fileName.split('.')[0];
};

// taWH1: NGram analysis
// taWH7: TFIDF Keyword Analysis
// taTopM1: NGram-LDA TopicModel
// BSML01: Baseline Model
// taQU01: Data quality score

export const taskletIdsFromOnboardingInfo = {
  TEXT_ANALYSIS: ['taWH1', 'taWH7', 'taTopM1'],
  MODEL_APPS: ['BSML01'],
  DATA_RELABELING: ['taQU01'],
  DATASET_ANALYSIS: ['taWH1', 'taWH7', 'taTopM1'],
};

export enum RegistrationUserIntent {
  BASIC_UNDERSTANDING = 'basic-understanding',
  LABEL_QUALITY = 'label-quality',
  CLUSTERING = 'clustering',
  TOPIC_MODELING = 'topic-modeling',
  // BASIC_TEXT = 'basic-text',
  KEYWORD_EXTRACTION = 'keyword-extraction',
  SENTIMENT_ANALYSIS = 'sentiment-analysis',
  PROFANITY_ANALYSIS = 'profanity-analysis',
  NER = 'ner',
  MODEL_APP = 'model-app',
}

// Note: These lists contain all possible taskletIds for different data types. Need to resolve
// which ones apply for a given dataset when reading these.
export const taskletIdsFromIntent: Record<RegistrationUserIntent, string[]> = {
  [RegistrationUserIntent.BASIC_UNDERSTANDING]: [], // Add once defaults are removed
  [RegistrationUserIntent.LABEL_QUALITY]: [AnalyzerTaskletEnum.DATA_QUALITY_SCORE],
  [RegistrationUserIntent.CLUSTERING]: [
    AnalyzerTaskletEnum.VECTOR_ANALYSIS_TEXT,
    AnalyzerTaskletEnum.VECTOR_ANALYSIS_MIXED_CAT,
  ],
  [RegistrationUserIntent.TOPIC_MODELING]: [
    AnalyzerTaskletEnum.NGRAM_LDA_TOPIC_MODEL,
    AnalyzerTaskletEnum.LDA_TOPIC_MODEL,
    AnalyzerTaskletEnum.LSI_TOPIC_MODEL,
    AnalyzerTaskletEnum.BERT_TOPIC_MODEL,
    AnalyzerTaskletEnum.TOPIC_MODEL_LDA_ALL,
  ],
  // [RegistrationUserIntent.BASIC_TEXT]: [
  //   AnalyzerTaskletEnum.NGRAM,
  //   AnalyzerTaskletEnum.NOUN_VERB,
  //   AnalyzerTaskletEnum.SKIP_GRAM,
  // ],
  [RegistrationUserIntent.KEYWORD_EXTRACTION]: [
    AnalyzerTaskletEnum.TFIDF_KEYWORD,
    AnalyzerTaskletEnum.RAKE_KEYWORD,
    AnalyzerTaskletEnum.YAKE_KEYWORD,
    AnalyzerTaskletEnum.TEXT_RANK_KEYWORD,
    AnalyzerTaskletEnum.TOPIC_RANK_KEYWORD,
    AnalyzerTaskletEnum.PATTERN_RANK_KEYWORD,
  ],
  [RegistrationUserIntent.SENTIMENT_ANALYSIS]: [
    AnalyzerTaskletEnum.SENTIMENT,
    AnalyzerTaskletEnum.SENTIMENT_COMPOUND_VALENCE,
    AnalyzerTaskletEnum.ASPECT_SENTIMENT,
  ],
  [RegistrationUserIntent.PROFANITY_ANALYSIS]: [AnalyzerTaskletEnum.PROFANITY],
  [RegistrationUserIntent.NER]: [AnalyzerTaskletEnum.NER],
  [RegistrationUserIntent.MODEL_APP]: [AnalyzerTaskletEnum.BASELINE_MODEL],
};

export type InsightIntent = Exclude<RegistrationUserIntent, RegistrationUserIntent.MODEL_APP>;

export const isInsightIntent = (intent: RegistrationUserIntent): intent is InsightIntent =>
  intent !== RegistrationUserIntent.MODEL_APP;

export interface IntentDetails {
  title: string;
  description: string;
  illustrationSrc: string;
}

export const intentDetailsMap: Record<InsightIntent, IntentDetails> = {
  [RegistrationUserIntent.BASIC_UNDERSTANDING]: {
    title: 'Understand your data',
    description: "Get an overview of your data's class distribution, column profiling and more",
    illustrationSrc:
      'https://ik.imagekit.io/markovml/homepage/understand_data__4HPN5N5H.svg?updatedAt=1718036927416',
  },
  [RegistrationUserIntent.TOPIC_MODELING]: {
    title: 'Identify topics and themes',
    description: 'Discover underlying topics or themes through topic modeling',
    illustrationSrc:
      'https://ik.imagekit.io/markovml/homepage/themes_topics_3MjMkL3cT.svg?updatedAt=1718037256303',
  },
  [RegistrationUserIntent.CLUSTERING]: {
    title: 'Visualize your data',
    description:
      'Identify complex relationships by plotting data points in 2D or 3D space based on their properties',
    illustrationSrc:
      'https://ik.imagekit.io/markovml/homepage/visualize_data_kadBU_qMl.svg?updatedAt=1718037452050',
  },
  [RegistrationUserIntent.SENTIMENT_ANALYSIS]: {
    title: 'Identify sentiment',
    description: 'Determine the sentiment or emotional tone expressed in text',
    illustrationSrc:
      'https://ik.imagekit.io/markovml/homepage/identify_sentiment_6j-JMzKLa.svg?updatedAt=1718037706099',
  },
  [RegistrationUserIntent.PROFANITY_ANALYSIS]: {
    title: 'Detect profanity',
    description: 'Detect and categorize offensive language in text data',
    illustrationSrc:
      'https://ik.imagekit.io/markovml/homepage/detect_profanity_CXPqJ7cAZ.svg?updatedAt=1718037786223',
  },
  [RegistrationUserIntent.NER]: {
    title: 'Detect named entities',
    description: 'Identify significant named entities and frequent pairs in text',
    illustrationSrc:
      'https://ik.imagekit.io/markovml/homepage/ner_FB3eGkhcn.svg?updatedAt=1718037857967',
  },
  [RegistrationUserIntent.LABEL_QUALITY]: {
    title: 'Assess label quality',
    description: 'Assess the quality of labeled data points',
    illustrationSrc:
      'https://ik.imagekit.io/markovml/homepage/label_quality_szZIAXDYJ.svg?updatedAt=1718037963763',
  },
  [RegistrationUserIntent.KEYWORD_EXTRACTION]: {
    title: 'Identify keywords and phrases',
    description: 'Extract essential keywords and phrases from your text to understand its content',
    illustrationSrc:
      'https://ik.imagekit.io/markovml/homepage/keywords_phrases_-sA6438eV.svg?updatedAt=1718038032638',
  },
};
