import Papa from 'papaparse';
import { indexOf, slice, isEmpty, split, trim, map, sortBy, prop } from 'ramda';

const DEFAULT_DATA_KEYS = ['start_state', 'utterance', 'target_state'];
const DELIMITER = '\t';
const NEW_LINE_SYMBOL = '\n';

const parseDataKeys = (str) => {
	// returns an array of first row trimmed
	return map(trim, split(DELIMITER, split(NEW_LINE_SYMBOL, str)[0]));
};

let dataKeys;

const parseStrippedHeaders = async (data) =>
	await new Promise((resolve) =>
		Papa.parse(data, {
			header: false,
			skipEmptyLines: true,
			delimiter: DELIMITER,
			complete: resolve,
			// stripping headers
			beforeFirstChunk: (firstChunk) => {
				if (firstChunk.includes('start_state')) {
					dataKeys = parseDataKeys(firstChunk);
					return slice(indexOf(NEW_LINE_SYMBOL, firstChunk) + 1, Infinity, firstChunk);
				} else {
					dataKeys = DEFAULT_DATA_KEYS;
					return firstChunk;
				}
			},
		})
	);

const normalizeData = (data) => {
	const result = {};
	dataKeys.forEach((key, index) => {
		result[key] = data[index] || '';
	});
	return result;
};

const parseUtteranceTesterInputData = async (rawData) => {
	const parsedData = await parseStrippedHeaders(rawData);
	const { errors, data } = parsedData;

	if (!isEmpty(errors)) {
		const errorMessage = sortBy(prop('row'), errors)
			.map((e) => `Row ${e.row + 1}: ${e.message}`)
			.join('\n');
		throw new Error(errorMessage);
	}

	return data.map((dataRow) => normalizeData(dataRow));
};

export default parseUtteranceTesterInputData;
