import * as React from 'react';
import styled from 'styled-components';
import escapeRegExp from 'lodash.escaperegexp';
import {useGlobalConfig} from '../context';
import {TextProps} from '@volkswagen-onehub/components-core';

// safe words have spaces and hyphens replaced with non-breaking
// unicode variants, but may contain soft hyphens for explicit hyphenation
const nonBreakingStyle = `-webkit-hyphens: manual; -moz-hyphens: manual; -ms-hyphens: manual; hyphens: manual; word-break: normal;`;

export const StyledSafeWord = styled.span<TextProps>`
    ${nonBreakingStyle}
`;

// Needed for HTML rendering, because breaking apart the html string
// to place react elements inbetween will break html in some cases.
// e.g. <span html="text<p>">, <span>safeword</span>, <span html="</p>" />
// won't work because the start end end tag of <p> are children of different elements.
const getHtmlString = (content: string, htmlTag: string) =>
    `<${htmlTag} style="${nonBreakingStyle}">${content}</${htmlTag}>`;

// generated via https://apps.timwhitlock.info/js/regex#
const UNICODE_PUNCTUATION_RANGE = `!-#%-*,-/:-;?-@[-\\]_{}¡«·»¿;·՚-՟։-֊־׀׃׆׳-״؉-؊،-؍؛؞-؟٪-٭۔܀-܍߷-߹।-॥॰෴๏๚-๛༄-༒༺-༽྅࿐-࿔၊-၏჻፡-፨᙭-᙮᚛-᚜᛫-᛭᜵-᜶។-៖៘-៚᠀-᠊᥄-᥅᧞-᧟᨞-᨟᭚-᭠᰻-᰿᱾-᱿‐-‧‰-⁃⁅-⁑⁓-⁞⁽-⁾₍-₎〈-〉❨-❵⟅-⟆⟦-⟯⦃-⦘⧘-⧛⧼-⧽⳹-⳼⳾-⳿⸀-⸮⸰、-〃〈-】〔-〟〰〽゠・꘍-꘏꙳꙾꡴-꡷꣎-꣏꤮-꤯꥟꩜-꩟﴾-﴿︐-︙︰-﹒﹔-﹡﹣﹨﹪-﹫！-＃％-＊，-／：-；？-＠［-］＿｛｝｟-･]|\ud800[\udd00-\udd01\udf9f\udfd0]|\ud802[\udd1f\udd3f\ude50-\ude58]|\ud809[\udc70-\udc73`;

export const getRegExp = (wordsToReplace: string[]) => {
    // important! escape regex, to not parse safewords as actual regex as that could
    // lead to unwanted behaviour, or just the regex being invalid and throwing an
    // exception
    const wordUnion = wordsToReplace.map(escapeRegExp).join('|');
    // We can't use regex word boundaries '\b' because it only works for standard
    // latin characters. Because of course "words" are only made of those 🙄.
    //
    // > and < added because in some cases the string can contain HTML.
    // This is not a clean solution. We are using regexp on unknown HTML, which we
    // shouldn't do as it's unsafe. Unknown HTML is not regular.

    const pattern = `([\\s>${UNICODE_PUNCTUATION_RANGE}]|^)(${wordUnion})(?=[\\s<${UNICODE_PUNCTUATION_RANGE}]|$)`;
    return new RegExp(pattern, 'g');
};

type Props = {
    children: string;
    __content_is_html_and_i_know_for_sure_it_is_safe_and_wont_cause_xss_vulnerability?: boolean;
};

export function getHtmlStringWithNonBreakingSafewords(
    text: string,
    safewords: Record<string, string>,
    contentArray: string[],
    tagOverride?: string
): string {
    const htmlTag = tagOverride ? tagOverride : 'span';

    if (!Object.keys(safewords).length) {
        return text;
    }

    return contentArray.reduce(
        (acc, part) =>
            Object.prototype.hasOwnProperty.call(safewords, part)
                ? acc +
                  getHtmlString(
                      // making sure the safeword isn't parsed as html
                      safewords[part]
                          .replace(/&/g, '&amp;')
                          .replace(/</g, '&lt;')
                          .replace(/>/g, '&gt;'),
                      htmlTag
                  )
                : // Note: strings which only consist of a whitespace will be replaced to prevent the browser from omitting "unecessary" whitespace
                  acc + part,
        ''
    );
}

export function TextWithNonBreakingSafewords({
    children: text,
    __content_is_html_and_i_know_for_sure_it_is_safe_and_wont_cause_xss_vulnerability: isHTML
}: Props): JSX.Element {
    // Record (Object) Keys are the Safewords as they should be detected in text
    // Record (Object) Values are the words the matched safewords should be replaced with.
    // The values have non-breaking unicode variants, sometimes including unicode-encoded
    // soft hyphens ("&shy;") for explicit hyphenation.
    const globalConfig = useGlobalConfig();

    const safewords = React.useMemo(() => globalConfig.safeWords || {}, [
        globalConfig
    ]);

    const regex = React.useMemo(() => getRegExp([...Object.keys(safewords)]), [
        safewords
    ]);

    // This will split the text at every matched regex capturing group(!),
    // could look like this
    // [
    //     'The <b',            (not matched)
    //     '>',                 (group 1, needed because we don't have regex lookbehind yet)
    //     'Volkswagen',    <== (matched safeword! capturing group 2)
    //     '</b> website.'      (not matched)
    // ];
    // We don't care that this won't tells us which of the strings is actually
    // our safeword, as we can re-check the strings easily.
    const contentArray = React.useMemo(() => text.split(regex), [regex, text]);

    if (isHTML) {
        const textString = getHtmlStringWithNonBreakingSafewords(
            text,
            safewords,
            contentArray
        );
        return <span dangerouslySetInnerHTML={{__html: textString}} />;
    } else if (!Object.keys(safewords).length) {
        return <>{text}</>;
    }

    return (
        <>
            {contentArray.map((part, index) =>
                Object.prototype.hasOwnProperty.call(safewords, part) ? (
                    <StyledSafeWord key={index}>
                        {safewords[part]}
                    </StyledSafeWord>
                ) : (
                    part
                )
            )}
        </>
    );
}
