/*
 * Copyright (c) 2025-2026, Shannon Booth <shannon@serenityos.org>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

#include <AK/Utf16String.h>
#include <LibRegex/ECMAScriptRegex.h>
#include <LibURL/Pattern/Component.h>
#include <LibURL/Pattern/PatternParser.h>
#include <LibURL/Pattern/String.h>
#include <LibURL/URL.h>

namespace URL::Pattern {

// https://urlpattern.spec.whatwg.org/#protocol-component-matches-a-special-scheme
bool protocol_component_matches_a_special_scheme(Component const& protocol_component)
{
    // 1. Let special scheme list be a list populated with all of the special schemes.
    // 2. For each scheme of special scheme list:
    for (StringView scheme : special_schemes()) {
        // 1. Let test result be RegExpBuiltinExec(protocol component’s regular expression, scheme).
        auto test_result = protocol_component.matches(scheme);

        // 2. If test result is not null, then return true.
        if (test_result)
            return true;
    }

    // 3. Return false.
    return false;
}

// https://urlpattern.spec.whatwg.org/#generate-a-regular-expression-and-name-list
struct RegularExpressionAndNameList {
    String regular_expression;
    Vector<String> name_list;
};

static RegularExpressionAndNameList generate_a_regular_expression_and_name_list(Vector<Part> const& part_list, Options const& options)
{
    // 1. Let result be "^".
    StringBuilder result;
    result.append('^');

    // 2. Let name list be a new list.
    Vector<String> name_list;

    // 3. For each part of part list:
    for (auto const& part : part_list) {
        // 1. If part’s type is "fixed-text":
        if (part.type == Part::Type::FixedText) {
            // 1. If part’s modifier is "none", then append the result of running escape a regexp string given part’s
            //    value to the end of result.
            if (part.modifier == Part::Modifier::None) {
                result.append(escape_a_regexp_string(part.value));
            }
            // 2. Otherwise:
            else {
                // 1. Append "(?:" to the end of result.
                result.append("(?:"sv);

                // 2. Append the result of running escape a regexp string given part’s value to the end of result.
                result.append(escape_a_regexp_string(part.value));

                // 3. Append ")" to the end of result.
                result.append(')');

                // 4. Append the result of running convert a modifier to a string given part’s modifier to the end of result.
                result.append(Part::convert_modifier_to_string(part.modifier));
            }

            // 3. Continue.
            continue;
        }

        // 2. Assert: part’s name is not the empty string.
        VERIFY(!part.name.is_empty());

        // 3. Append part’s name to name list.
        name_list.append(part.name);

        // 4. Let regexp value be part’s value.
        auto regexp_value = part.value;

        // 5. If part’s type is "segment-wildcard", then set regexp value to the result of running generate a segment wildcard regexp given options.
        if (part.type == Part::Type::SegmentWildcard) {
            regexp_value = generate_a_segment_wildcard_regexp(options);
        }
        // 6. Otherwise if part’s type is "full-wildcard", then set regexp value to full wildcard regexp value.
        else if (part.type == Part::Type::FullWildcard) {
            regexp_value = MUST(String::from_utf8(full_wildcard_regexp_value));
        }

        // 7. If part’s prefix is the empty string and part’s suffix is the empty string:
        if (part.prefix.is_empty() && part.suffix.is_empty()) {
            // 1. If part’s modifier is "none" or "optional", then:
            if (part.modifier == Part::Modifier::None || part.modifier == Part::Modifier::Optional) {
                // 1. Append "(" to the end of result.
                result.append('(');

                // 2. Append regexp value to the end of result.
                result.append(regexp_value);

                // 3. Append ")" to the end of result.
                result.append(')');

                // 4. Append the result of running convert a modifier to a string given part’s modifier to the end of result.
                result.append(Part::convert_modifier_to_string(part.modifier));
            }
            // 2. Otherwise:
            else {
                // 1. Append "((?:" to the end of result.
                result.append("((?:"sv);

                // 2. Append regexp value to the end of result.
                result.append(regexp_value);

                // 3. Append ")" to the end of result.
                result.append(')');

                // 4. Append the result of running convert a modifier to a string given part’s modifier to the end of result.
                result.append(Part::convert_modifier_to_string(part.modifier));

                // 5. Append ")" to the end of result.
                result.append(')');
            }

            // 3. Continue.
            continue;
        }

        // 8. If part’s modifier is "none" or "optional":
        if (part.modifier == Part::Modifier::None || part.modifier == Part::Modifier::Optional) {
            // 1. Append "(?:" to the end of result.
            result.append("(?:"sv);

            // 2. Append the result of running escape a regexp string given part’s prefix to the end of result.
            result.append(escape_a_regexp_string(part.prefix));

            // 3. Append "(" to the end of result.
            result.append('(');

            // 4. Append regexp value to the end of result.
            result.append(regexp_value);

            // 5. Append ")" to the end of result.
            result.append(')');

            // 6. Append the result of running escape a regexp string given part’s suffix to the end of result.
            result.append(escape_a_regexp_string(part.suffix));

            // 7. Append ")" to the end of result.
            result.append(')');

            // 8. Append the result of running convert a modifier to a string given part’s modifier to the end of result.
            result.append(Part::convert_modifier_to_string(part.modifier));

            // 9. Continue.
            continue;
        }

        // 9. Assert: part’s modifier is "zero-or-more" or "one-or-more".
        VERIFY(part.modifier == Part::Modifier::ZeroOrMore || part.modifier == Part::Modifier::OneOrMore);

        // 10. Assert: part’s prefix is not the empty string or part’s suffix is not the empty string.
        VERIFY(!part.prefix.is_empty() || !part.suffix.is_empty());

        // 11. Append "(?:" to the end of result.
        result.append("(?:"sv);

        // 12. Append the result of running escape a regexp string given part’s prefix to the end of result.
        result.append(escape_a_regexp_string(part.prefix));

        // 13. Append "((?:" to the end of result.
        result.append("((?:"sv);

        // 14. Append regexp value to the end of result.
        result.append(regexp_value);

        // 15. Append ")(?:" to the end of result.
        result.append(")(?:"sv);

        // 16. Append the result of running escape a regexp string given part’s suffix to the end of result.
        result.append(escape_a_regexp_string(part.suffix));

        // 17. Append the result of running escape a regexp string given part’s prefix to the end of result.
        result.append(escape_a_regexp_string(part.prefix));

        // 18. Append "(?:" to the end of result.
        result.append("(?:"sv);

        // 19. Append regexp value to the end of result.
        result.append(regexp_value);

        // 20. Append "))*)" to the end of result.
        result.append("))*)"sv);

        // 21. Append the result of running escape a regexp string given part’s suffix to the end of result.
        result.append(escape_a_regexp_string(part.suffix));

        // 22. Append ")" to the end of result.
        result.append(')');

        // 23. If part’s modifier is "zero-or-more" then append "?" to the end of result.
        if (part.modifier == Part::Modifier::ZeroOrMore)
            result.append('?');
    }

    // 4. Append "$" to the end of result.
    result.append('$');

    // 5. Return (result, name list).
    return { result.to_string_without_validation(), move(name_list) };
}

// https://urlpattern.spec.whatwg.org/#compile-a-component
PatternErrorOr<Component> Component::compile(Utf8View const& input, PatternParser::EncodingCallback encoding_callback, Options const& options)
{
    // 1. Let part list be the result of running parse a pattern string given input, options, and encoding callback.
    auto part_list = TRY(PatternParser::parse(input, options, move(encoding_callback)));

    // 2. Let (regular expression string, name list) be the result of running generate a regular expression and name
    //    list given part list and options.
    auto [regular_expression_string, name_list] = generate_a_regular_expression_and_name_list(part_list, options);

    // 3. Let flags be an empty string.
    // NOTE: These flags match the flags for the empty string of the LibJS RegExp implementation.
    regex::ECMAScriptCompileFlags flags {};

    // 4. If options’s ignore case is true then set flags to "vi".
    if (options.ignore_case) {
        flags.unicode_sets = true;
        flags.ignore_case = true;
    }
    // 5. Otherwise set flags to "v"
    else {
        flags.unicode_sets = true;
    }

    // 6. Let regular expression be RegExpCreate(regular expression string, flags). If this throws an exception, catch
    //    it, and throw a TypeError.
    auto regex = regex::ECMAScriptRegex::compile(regular_expression_string.bytes_as_string_view(), flags);
    if (regex.is_error())
        return ErrorInfo { MUST(String::formatted("RegExp compile error: {}", regex.release_error())) };

    // 7. Let pattern string be the result of running generate a pattern string given part list and options.
    auto pattern_string = generate_a_pattern_string(part_list, options);

    // 8. Let has regexp groups be false.
    bool has_regexp_groups = false;

    // 9. For each part of part list:
    for (auto const& part : part_list) {
        // 1. If part’s type is "regexp", then set has regexp groups to true.
        if (part.type == Part::Type::Regexp) {
            has_regexp_groups = true;
            break;
        }
    }

    // 10. Return a new component whose pattern string is pattern string, regular expression is regular expression,
    //     group name list is name list, and has regexp groups is has regexp groups.
    return Component {
        .pattern_string = move(pattern_string),
        .regular_expression = adopt_own(*new regex::ECMAScriptRegex(regex.release_value())),
        .group_name_list = move(name_list),
        .has_regexp_groups = has_regexp_groups,
    };
}

Component::ExecutionResult Component::execute(String const& input) const
{
    auto utf16_input = Utf16String::from_utf8(input);
    auto match_result = regular_expression->exec(utf16_input.utf16_view(), 0);
    if (match_result != regex::MatchResult::Match)
        return {};

    ExecutionResult result;
    result.success = true;
    result.captures.ensure_capacity(group_name_list.size());
    for (size_t index = 1; index <= group_name_list.size(); ++index) {
        auto start = regular_expression->capture_slot(index * 2);
        auto end = regular_expression->capture_slot(index * 2 + 1);
        if (start < 0 || end < 0) {
            result.captures.append({});
            continue;
        }

        auto capture = utf16_input.substring_view(static_cast<size_t>(start), static_cast<size_t>(end - start));
        result.captures.append(MUST(capture.to_utf8()));
    }

    return result;
}

bool Component::matches(StringView input) const
{
    auto utf16_input = Utf16String::from_utf8(input);
    return regular_expression->test(utf16_input.utf16_view(), 0) == regex::MatchResult::Match;
}

// https://urlpattern.spec.whatwg.org/#create-a-component-match-result
Component::Result Component::create_match_result(String const& input, ExecutionResult const& exec_result) const
{
    // 1. Let result be a new URLPatternComponentResult.
    Component::Result result;

    // 2. Set result["input"] to input.
    result.input = input;

    // 3. Let groups be a record<USVString, (USVString or undefined)>.
    OrderedHashMap<String, Variant<String, Empty>> groups;

    // 4. Let index be 1.
    // 5. While index is less than or equal to component’s group name list’s size:
    VERIFY(exec_result.captures.size() == group_name_list.size());
    for (size_t index = 1; index <= group_name_list.size(); ++index) {
        // 1. Let name be component’s group name list[index − 1].
        auto name = group_name_list[index - 1];

        // 2. Let value be Get(execResult, ToString(index)).
        // 3. Set groups[name] to value.
        auto const& capture = exec_result.captures[index - 1];
        if (!capture.has_value())
            groups.set(name, Empty {});
        else
            groups.set(name, *capture);

        // 4. Increment index by 1.
    }

    // 6. Set result["groups"] to groups.
    result.groups = move(groups);

    // 7. Return result.
    return result;
}

}
