import {
	stripOkurigana,
	tokenize,
	isKanji,
	isKana,
	isHiragana,
	isKatakana,
} from 'wanakana'
import zip from '~/utils/array-zip'

// Define type aliases for better readability
type FuriPair = [string, string]
type FuriLoc = [[number, number], string]
type FuriLocs = FuriLoc[]
type FuriObject = Record<string, string>

/**
 * Combines furigana with kanji into an array of string pairs.
 * @example
 * combineFuri('お世辞', 'おせじ', '1:せ;2:じ')
 * // => [['', 'お'], ['せ', '世'], ['じ', '辞']]
 * combineFuri('大人しい', 'おとなしい') // smart fallbacks
 * // => [['おとな', '大人'], ['', 'しい']]
 * combineFuri('使い方', 'つかいかた') // smart fallbacks
 * // => [['つか', '使'], ['', 'い'], ['かた', '方']]
 *
 * // Special compound readings (義訓/熟字訓) are spread across relevant kanji
 * combineFuri('胡座', 'あぐら', '0:あぐら')
 * // => [['あぐら', '胡座']]
 */
export function combineFuri(
	word: string = '',
	reading: string = '',
	furi: string | FuriObject = '',
): FuriPair[] {
	const furiLocs: FuriLocs = parseFuri(furi)
	// 義訓/熟字訓 words with a single furi loc: 今日 "0:きょう"
	const isSpecialReading = furiLocs.length === 1 && [...word].every(isKanji)
	const isKanaWord = [...word].every(isKana)
	const isWanikaniMadness =
		[...reading].some(isHiragana) && [...reading].some(isKatakana)

	if (word === reading || isKanaWord) {
		return [['', word]]
	}

	if (!furi || isSpecialReading || isWanikaniMadness) {
		return basicFuri(word, reading)
	}

	return generatePairs(word, furiLocs)
}

/**
 * Displays simple furigana by removing redundant kana.
 * @example
 * basicFuri('お見舞い', 'おみまい')
 * // => [['', 'お'], ['見舞', 'みま'], ['', 'い']]
 */
export function basicFuri(word: string = '', reading: string = ''): FuriPair[] {
	// Early return + guard against words like １日 which are tokenized unfavourably
	if ([...word].every((c) => !isKana(c))) {
		return [[reading, word]]
	}

	const [bikago, okurigana] = [
		reading.slice(
			0,
			word.length -
				stripOkurigana(word, { leading: true, matchKanji: undefined }).length,
		),
		reading.slice(
			stripOkurigana(reading, { matchKanji: word, leading: false }).length,
		),
	]

	const innerWordTokens = tokenize(
		removeExtraneousKana(word, bikago, okurigana),
	)
	let innerReadingChars: string | string[] = removeExtraneousKana(
		reading,
		bikago,
		okurigana,
	)

	const kanjiOddKanaEvenRegex = new RegExp(
		innerWordTokens
			.map((char) => (isKanji(char as string) ? '(.*)' : `(${char})`))
			.join(''),
	)

	const match = innerReadingChars.match(kanjiOddKanaEvenRegex) || []
	;[, ...innerReadingChars] = match

	// @ts-expect-error (not sure)
	const ret = zip(innerReadingChars, innerWordTokens).map(
		// @ts-expect-error (not sure)
		skipRedundantReadings,
	)

	if (bikago) {
		ret.unshift(['', bikago])
	}

	if (okurigana) {
		ret.push(['', okurigana])
	}

	return ret
}

function removeExtraneousKana(
	str: string = '',
	leading: string = '',
	trailing: string = '',
): string {
	return str
		.replace(new RegExp(`^${leading}`), '')
		.replace(new RegExp(`${trailing}$`), '')
}

function skipRedundantReadings([reading, word = '']: FuriPair): FuriPair {
	return !reading || reading === word ? ['', word] : [reading, word]
}

export function parseFuri(data: string | FuriObject): FuriLocs {
	return typeof data === 'string'
		? parseFuriString(data)
		: parseFuriObject(data)
}

/**
 * Parses furigana placement object.
 * @example
 * parseFuriObject({ '1': 'せ', '2': 'じ' })
 * // => [ [[1, 2], 'せ'], [[2, 3], 'じ'] ]
 */
function parseFuriObject(locations: FuriObject = {}): FuriLocs {
	return Object.entries(locations).map(([startStr, content]): FuriLoc => {
		const start = Number(startStr)
		return [[start, start + 1], content]
	})
}

/**
 * Parses furigana placement string.
 * @example
 * parseFuriString('1:せ;2:じ')
 * // => [ [[1, 2], 'せ'], [[2, 3], 'じ'] ]
 */
function parseFuriString(locations: string = ''): FuriLocs {
	return locations.split(';').map((entry): FuriLoc => {
		const [indexes, content] = entry.split(':')
		const [startStr, endStr] = indexes.split('-')
		const start = Number(startStr)
		const end = endStr ? Number(endStr) + 1 : start + 1
		// NOTE: In the JMDict furistring data, the end index is either missing
		// or it is listed as the *start* index of the final char ¯\_(ツ)_/¯
		// So we need to bump it either way to encompass that char
		return [[start, end], content]
	})
}

/**
 * Generates array pairs via furigana location data.
 * @example
 * generatePairs('お世辞', [ [[1, 2], 'せ'], [[2, 3], 'じ'] ])
 * // => [['', 'お'], ['せ', '世'], ['じ', '辞']]
 */
export function generatePairs(
	word: string = '',
	furiLocs: FuriLocs = [],
): FuriPair[] {
	let prevCharEnd = 0

	return furiLocs.reduce(
		(pairs: FuriPair[], [[start, end], furiText], index, source) => {
			// If no furigana at this index, add intervening chars
			if (start !== prevCharEnd) {
				pairs.push(['', word.slice(prevCharEnd, start)])
			}

			// Add furigana and associated chars
			pairs.push([furiText, word.slice(start, end)])

			// If no more furigana left, add any remaining chars/okurigana with blank furi
			if (end < word.length && !source[index + 1]) {
				pairs.push(['', word.slice(end)])
			}

			prevCharEnd = end
			return pairs
		},
		[],
	)
}
