// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: https://www.unicode.org/copyright.html

// utfiteratortest.cpp
// created: 2024aug12 Markus W. Scherer

#include <algorithm>
#include <array>
#include <iterator>
#include <forward_list>
#include <list>
#if defined(__cpp_lib_ranges)
#include <ranges>
#endif
#include <streambuf>
#include <sstream>
#include <string>
#include <string_view>
#include <vector>

// Test header-only ICU C++ APIs. Do not use other ICU C++ APIs.
// Non-default configuration:
#define U_SHOW_CPLUSPLUS_API 0
// Default configuration:
// #define U_SHOW_CPLUSPLUS_HEADER_API 1

#include "unicode/utypes.h"
#include "unicode/utf8.h"
#include "unicode/utf16.h"
#include "unicode/utfiterator.h"
#include "intltest.h"

// Makes u"literal"sv std::u16string_view literals possible.
// https://en.cppreference.com/w/cpp/string/basic_string_view/operator%22%22sv
using namespace std::string_view_literals;

using icu::header::UTFIterator;
using icu::header::utfIterator;
using icu::header::UTFStringCodePoints;
using icu::header::utfStringCodePoints;
using icu::header::UnsafeUTFIterator;
using icu::header::unsafeUTFIterator;
using icu::header::UnsafeUTFStringCodePoints;
using icu::header::unsafeUTFStringCodePoints;

namespace {

// Sentinel for NUL-terminated strings.
class Nul {};

template<typename Unit>
class SinglePassIter;

// Shared state for one or more copies of single-pass iterators.
// Similar to https://en.cppreference.com/w/cpp/iterator/istreambuf_iterator
// but the iterators only implement LegacyIterator (* and ++) without post-increment.
template<typename Unit>
class SinglePassSource {
public:
    explicit SinglePassSource(std::basic_string_view<Unit> s) :
            p(s.data()), limit(s.data() + s.length()) {}
    SinglePassSource(const Unit *s, const Nul &) :
            p(s), limit(nullptr) {}

    SinglePassIter<Unit> begin() { return SinglePassIter<Unit>(*this); }
    SinglePassIter<Unit> end() { return SinglePassIter<Unit>(); }

private:
    template<typename U>
    friend class SinglePassIter;

    const Unit *p;  // incremented by iterators
    const Unit *limit;
};

template<typename Unit>
class SinglePassIter {
public:
    typedef Unit value_type;
    typedef Unit &reference;
    typedef Unit *pointer;
    typedef std::ptrdiff_t difference_type;
    // This is a LegacyIterator but there is no specific category for that,
    // so we claim it to be a LegacyInputIterator. It *is* single-pass.
    typedef std::input_iterator_tag iterator_category;

    explicit SinglePassIter(SinglePassSource<Unit> &src) : src(&src) {}
    // limit sentinel
    SinglePassIter() : src(nullptr) {}

    // movable
    SinglePassIter(SinglePassIter &&src) noexcept = default;
    SinglePassIter &operator=(SinglePassIter &&src) noexcept = default;

    // not copyable
    SinglePassIter(const SinglePassIter &other) = delete;
    SinglePassIter &operator=(const SinglePassIter &other) = delete;

    bool operator==(const SinglePassIter &other) const {
        bool done = isDone();
        bool otherDone = other.isDone();
        return done ? otherDone : (!otherDone && src->p == other.src->p);
    }
    bool operator!=(const SinglePassIter &other) const { return !operator==(other); }

    // Asymmetric equality & nonequality with a sentinel type.
    friend bool operator==(const SinglePassIter &iter, const Nul &) {
        return iter.atNul();
    }
#if U_CPLUSPLUS_VERSION < 20
    // C++17: Need to define all four combinations of == / != vs. parameter order.
    // Once we require C++20, we could remove all but the first == because
    // the compiler would generate the rest.
    friend bool operator==(const Nul &, const SinglePassIter &iter) {
        return iter.atNul();
    }
    friend bool operator!=(const SinglePassIter &iter, const Nul &nul) { return !(iter == nul); }
    friend bool operator!=(const Nul &nul, const SinglePassIter &iter) { return !(iter == nul); }
#endif  // C++17

    Unit operator*() const { return *(src->p); }
    SinglePassIter &operator++() {  // pre-increment
        ++(src->p);
        return *this;
    }
    // *no* post-increment

private:
    bool isDone() const { return src == nullptr || src->p == src->limit; }
    bool atNul() const { return *src->p == 0; }

    SinglePassSource<Unit> *src;
};

template<typename Unit>
class FwdIter {
public:
    typedef Unit value_type;
    typedef Unit &reference;
    typedef Unit *pointer;
    typedef std::ptrdiff_t difference_type;
    // https://en.cppreference.com/w/cpp/named_req/ForwardIterator#Multi-pass_guarantee
    typedef std::forward_iterator_tag iterator_category;

    explicit FwdIter(const Unit *data) : p(data) {}
    FwdIter() = default;

    bool operator==(const FwdIter &other) const { return p == other.p; }
    bool operator!=(const FwdIter &other) const { return !operator==(other); }

    // Asymmetric equality & nonequality with a sentinel type.
    friend bool operator==(const FwdIter &iter, const Nul &) {
        return *iter.p == 0;
    }
#if U_CPLUSPLUS_VERSION < 20
    // C++17: Need to define all four combinations of == / != vs. parameter order.
    // Once we require C++20, we could remove all but the first == because
    // the compiler would generate the rest.
    friend bool operator==(const Nul &, const FwdIter &iter) {
        return *iter.p == 0;
    }
    friend bool operator!=(const FwdIter &iter, const Nul &nul) { return !(iter == nul); }
    friend bool operator!=(const Nul &nul, const FwdIter &iter) { return !(iter == nul); }
#endif  // C++17

    Unit operator*() const { return *p; }
    FwdIter &operator++() {  // pre-increment
        ++p;
        return *this;
    }
    FwdIter operator++(int) {  // post-increment
        FwdIter result(*this);
        ++p;
        return result;
    }

private:
    const Unit *p;
};

template<typename StringView>
std::vector<StringView> split(StringView s) {
    using Unit = typename StringView::value_type;
    std::vector<StringView> result;
    while (!s.empty()) {
        auto pos = s.find(static_cast<Unit>(u'|'));
        if (pos == StringView::npos) { break; }
        result.push_back(s.substr(0, pos));
        s.remove_prefix(pos + 1);
    }
    result.push_back(s);
    return result;
}

template<typename Unit>
std::basic_string<Unit> join(std::vector<std::basic_string_view<Unit>> parts) {
    std::basic_string<Unit> result;
    for (auto part : parts) {
        result.append(part);
    }
    return result;
}

// Avoids having to cast each byte value to char or uint8_t or similar.
std::string string8FromBytes(const int bytes[], size_t length) {
    std::string result;
    for (size_t i = 0; i < length; ++i) {
        result.push_back(static_cast<char>(bytes[i]));
    }
    return result;
}

template<typename T>
T reverseCopy(T x) {
    T result{x};
    std::reverse(result.begin(), result.end());
    return result;
}

// Use SAFE when we don't care about ILL_FORMED vs. WELL_FORMED.
enum TestMode { SAFE, ILL_FORMED, WELL_FORMED, UNSAFE };
enum IterType { INPUT, FWD, CONTIG };

// Use this don't-care behavior value for unsafe iterators that do not use the behavior tparam.
constexpr auto ANY_B = UTF_BEHAVIOR_FFFD;

template<typename Unit>
struct ImplTest {
    std::basic_string<Unit> str;
    std::vector<std::basic_string<Unit>> parts;
    std::u32string codePoints;

    ImplTest reverseParts() const {
        ImplTest result;
        // We cannot just reverse the string.
        // We need to keep the code units for each subsequence in order.
        // We could join the reversed parts, but so far we don't need the reversed string.
        result.parts = reverseCopy(parts);
        // result.str = join<Unit>(result.parts);
        result.codePoints = reverseCopy(codePoints);
        return result;
    }
};

}  // namespace

class UTFIteratorTest : public IntlTest {
public:
    void runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) override {
        if (exec) { logln("TestSuite UTFIteratorTest: "); }
        TESTCASE_AUTO_BEGIN;

        TESTCASE_AUTO(testSafe16Good);
        TESTCASE_AUTO(testSafe16Negative);
        TESTCASE_AUTO(testSafe16FFFD);
        TESTCASE_AUTO(testSafe16Surrogate);
        TESTCASE_AUTO(testUnsafe16);

        TESTCASE_AUTO(testSafe16SinglePassIterGood);
        TESTCASE_AUTO(testSafe16SinglePassIterNegative);
        TESTCASE_AUTO(testUnsafe16SinglePassIter);
        TESTCASE_AUTO(testSafe16SinglePassIterNulGood);

        TESTCASE_AUTO(testSafe16FwdIter);
        TESTCASE_AUTO(testUnsafe16FwdIter);

        TESTCASE_AUTO(testSafe8Good);
        TESTCASE_AUTO(testSafe8Negative);
        TESTCASE_AUTO(testSafe8FFFD);
        TESTCASE_AUTO(testUnsafe8);

        TESTCASE_AUTO(testSafe8SinglePassIterGood);
        TESTCASE_AUTO(testSafe8SinglePassIterFFFD);
        TESTCASE_AUTO(testUnsafe8SinglePassIter);
        TESTCASE_AUTO(testUnsafe8SinglePassIterNul);

        TESTCASE_AUTO(testSafe8FwdIter);
        TESTCASE_AUTO(testUnsafe8FwdIter);
        TESTCASE_AUTO(testSafe8FwdIterNul);

        TESTCASE_AUTO(testSafe32Good);
        TESTCASE_AUTO(testSafe32Negative);
        TESTCASE_AUTO(testSafe32FFFD);
        TESTCASE_AUTO(testSafe32Surrogate);
        TESTCASE_AUTO(testUnsafe32);

        TESTCASE_AUTO(testSafe32SinglePassIterGood);
        TESTCASE_AUTO(testSafe32SinglePassIterSurrogate);
        TESTCASE_AUTO(testUnsafe32SinglePassIter);

        TESTCASE_AUTO(testSafe32FwdIter);
        TESTCASE_AUTO(testUnsafe32FwdIter);
        TESTCASE_AUTO(testUnsafe32FwdIterNul);

        TESTCASE_AUTO(testSafe16LongLinearContig);
        TESTCASE_AUTO(testSafe8LongLinearContig);
        TESTCASE_AUTO(testSafe32LongLinearContig);

        TESTCASE_AUTO(testUnsafe16LongLinearContig);
        TESTCASE_AUTO(testUnsafe8LongLinearContig);
        TESTCASE_AUTO(testUnsafe32LongLinearContig);

        TESTCASE_AUTO(testSafe16LongLinearInput);
        TESTCASE_AUTO(testSafe8LongLinearInput);
        TESTCASE_AUTO(testSafe32LongLinearInput);

        TESTCASE_AUTO(testUnsafe16LongLinearInput);
        TESTCASE_AUTO(testUnsafe8LongLinearInput);
        TESTCASE_AUTO(testUnsafe32LongLinearInput);

        TESTCASE_AUTO(testSafe16LongLinearFwd);
        TESTCASE_AUTO(testSafe8LongLinearFwd);
        TESTCASE_AUTO(testSafe32LongLinearFwd);

        TESTCASE_AUTO(testUnsafe16LongLinearFwd);
        TESTCASE_AUTO(testUnsafe8LongLinearFwd);
        TESTCASE_AUTO(testUnsafe32LongLinearFwd);

        TESTCASE_AUTO(testSafe16LongBackward);
        TESTCASE_AUTO(testSafe8LongBackward);
        TESTCASE_AUTO(testSafe32LongBackward);

        TESTCASE_AUTO(testUnsafe16LongBackward);
        TESTCASE_AUTO(testUnsafe8LongBackward);
        TESTCASE_AUTO(testUnsafe32LongBackward);

        TESTCASE_AUTO(testSafe16LongReverse);
        TESTCASE_AUTO(testSafe8LongReverse);
        TESTCASE_AUTO(testSafe32LongReverse);

        TESTCASE_AUTO(testUnsafe16LongReverse);
        TESTCASE_AUTO(testUnsafe8LongReverse);
        TESTCASE_AUTO(testUnsafe32LongReverse);

        TESTCASE_AUTO(testSafe16Zigzag);
        TESTCASE_AUTO(testSafe8Zigzag);
        TESTCASE_AUTO(testSafe32Zigzag);

        TESTCASE_AUTO(testUnsafe16Zigzag);
        TESTCASE_AUTO(testUnsafe8Zigzag);
        TESTCASE_AUTO(testUnsafe32Zigzag);

        TESTCASE_AUTO(testSafe16ZigzagReverse);
        TESTCASE_AUTO(testSafe8ZigzagReverse);
        TESTCASE_AUTO(testSafe32ZigzagReverse);

        TESTCASE_AUTO(testUnsafe16ZigzagReverse);
        TESTCASE_AUTO(testUnsafe8ZigzagReverse);
        TESTCASE_AUTO(testUnsafe32ZigzagReverse);

        TESTCASE_AUTO(testOwnership);
        TESTCASE_AUTO(testCPDefaultConstructors);

        TESTCASE_AUTO(testIteratorCompatibility);

        // C++20 ranges with all 2021 defect reports.  There is no separate
        // feature test macro value for https://wg21.link/P2210R2, but 2021'10
        // gets us https://wg21.link/P2415R2 as well as
        // https://wg21.link/P2325R3, and in practice implementations that have
        // both also have P2210R2, see
        // https://en.cppreference.com/w/cpp/compiler_support.html.
        // 2021'06, which guarantees P2325R3, is not good enough: GCC 11.3 and
        // MSVC 19.30 have P2325R3 but not P2210R2 (we need GCC 12 and MSVC
        // 19.31).
#if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 2021'10
        TESTCASE_AUTO(testUncommonInputRange);
        TESTCASE_AUTO(testUncommonForwardRange);
        TESTCASE_AUTO(testUncommonBidirectionalRange);
        TESTCASE_AUTO(testCommonForwardRange);
        TESTCASE_AUTO(testCommonBidirectionalRange);
        TESTCASE_AUTO(testCommonContiguousRange);
#endif

        TESTCASE_AUTO(testAllCodePoints);
        TESTCASE_AUTO(testAllScalarValues);

        TESTCASE_AUTO_END;
    }

    template<typename CP32, UTFIllFormedBehavior behavior, typename StringView>
    CP32 sub(StringView part) {
        switch (behavior) {
            case UTF_BEHAVIOR_NEGATIVE: return U_SENTINEL;
            case UTF_BEHAVIOR_FFFD: return 0xfffd;
            case UTF_BEHAVIOR_SURROGATE: {
                auto c = part[0];
                return U_IS_SURROGATE(c) ? static_cast<char32_t>(c) : 0xfffd;
            }
        }
    }

    template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior, typename StringView>
    void testBidiIter(StringView piped);

    template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior,
             typename StringView, typename CodePoints>
    void testBidiIter(StringView sv, const std::vector<StringView> &parts, CodePoints range);

    template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior, typename StringView>
    void testSinglePassIter(StringView piped);

    template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior, typename StringView>
    void testSinglePassIterNul(StringView piped);

    template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior,
             typename StringView, typename Iter, typename Sentinel>
    void testSinglePassIter(const std::vector<StringView> &parts,
                            Iter &iter, const Sentinel &rangeLimit);

    template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior, typename StringView>
    void testFwdIter(StringView piped);

    template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior, typename StringView>
    void testFwdIterNul(StringView piped);

    template<TestMode mode, typename StringView,
             typename LimitIter, typename Iter, typename Sentinel>
    void testFwdIter(const std::vector<StringView> &parts, LimitIter goodLimit,
                     Iter iter, Sentinel rangeLimit);

    static constexpr std::u16string_view good16{u"a|b|ç|カ|🚴"};
    static const char *good8Chars;
    static constexpr std::u32string_view good32{U"a|b|ç|カ|🚴"};

    static constexpr char16_t badChars16[] = {
        u'a', u'|', 0xd900, u'|', u'ç', u'|', 0xdc05, u'|', u"🚴"[0], u"🚴"[1]
    };
    static constexpr std::u16string_view bad16{badChars16, std::size(badChars16)};

    static constexpr int badChars8[] = {
        u8'a', u8'|', 0xe0, 0xa0, u8'|', u8"ç"[0], u8"ç"[1], u8'|',
        0xf4, 0x8f, 0xbf, u8'|', u8"🚴"[0], u8"🚴"[1], u8"🚴"[2], u8"🚴"[3]
    };

    static constexpr char32_t badChars32[] = {
        U'a', U'|', 0xd900, U'|', U'ç', U'|', 0x110000, U'|', U'🚴'
    };
    static constexpr std::u32string_view bad32{badChars32, std::size(badChars32)};

    void testSafe16Good() {
        testBidiIter<WELL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(good16);
    }
    void testSafe16Negative() {
        testBidiIter<ILL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(bad16);
    }
    void testSafe16FFFD() {
        testBidiIter<ILL_FORMED, char32_t, UTF_BEHAVIOR_FFFD>(bad16);
    }
    void testSafe16Surrogate() {
        testBidiIter<ILL_FORMED, uint32_t, UTF_BEHAVIOR_SURROGATE>(bad16);
    }
    void testUnsafe16() {
        testBidiIter<UNSAFE, UChar32, ANY_B>(good16);
    }

    void testSafe16SinglePassIterGood() {
        testSinglePassIter<WELL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(good16);
    }
    void testSafe16SinglePassIterNegative() {
        testSinglePassIter<ILL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(bad16);
    }
    void testUnsafe16SinglePassIter() {
        testSinglePassIter<UNSAFE, UChar32, ANY_B>(good16);
    }
    void testSafe16SinglePassIterNulGood() {
        testSinglePassIterNul<WELL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(good16);
    }

    void testSafe16FwdIter() {
        testFwdIter<SAFE, UChar32, UTF_BEHAVIOR_NEGATIVE>(good16);
    }
    void testUnsafe16FwdIter() {
        testFwdIter<UNSAFE, UChar32, ANY_B>(good16);
    }

    void testSafe8Good() {
        testBidiIter<WELL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(std::string_view{good8Chars});
    }
    void testSafe8Negative() {
        testBidiIter<ILL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(
                std::string_view(string8FromBytes(badChars8, std::size(badChars8))));
    }
    void testSafe8FFFD() {
        testBidiIter<ILL_FORMED, char32_t, UTF_BEHAVIOR_FFFD>(
                std::string_view(string8FromBytes(badChars8, std::size(badChars8))));
    }
    void testUnsafe8() {
        testBidiIter<UNSAFE, UChar32, ANY_B>(std::string_view{good8Chars});
    }

    void testSafe8SinglePassIterGood() {
        testSinglePassIter<WELL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(
            std::string_view{good8Chars});
    }
    void testSafe8SinglePassIterFFFD() {
        testSinglePassIter<ILL_FORMED, char32_t, UTF_BEHAVIOR_FFFD>(
            std::string_view(string8FromBytes(badChars8, std::size(badChars8))));
    }
    void testUnsafe8SinglePassIter() {
        testSinglePassIter<UNSAFE, UChar32, ANY_B>(std::string_view{good8Chars});
    }
    void testUnsafe8SinglePassIterNul() {
        testSinglePassIterNul<UNSAFE, UChar32, ANY_B>(std::string_view{good8Chars});
    }

    void testSafe8FwdIter() {
        testFwdIter<SAFE, UChar32, UTF_BEHAVIOR_NEGATIVE>(std::string_view{good8Chars});
    }
    void testUnsafe8FwdIter() {
        testFwdIter<UNSAFE, UChar32, ANY_B>(std::string_view{good8Chars});
    }
    void testSafe8FwdIterNul() {
        testFwdIterNul<SAFE, UChar32, UTF_BEHAVIOR_NEGATIVE>(std::string_view{good8Chars});
    }

    void testSafe32Good() {
        testBidiIter<WELL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(good32);
    }
    void testSafe32Negative() {
        testBidiIter<ILL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(bad32);
    }
    void testSafe32FFFD() {
        testBidiIter<ILL_FORMED, char32_t, UTF_BEHAVIOR_FFFD>(bad32);
    }
    void testSafe32Surrogate() {
        testBidiIter<ILL_FORMED, uint32_t, UTF_BEHAVIOR_SURROGATE>(bad32);
    }
    void testUnsafe32() {
        testBidiIter<UNSAFE, UChar32, ANY_B>(good32);
    }

    void testSafe32SinglePassIterGood() {
        testSinglePassIter<WELL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(good32);
    }
    void testSafe32SinglePassIterSurrogate() {
        testSinglePassIter<ILL_FORMED, uint32_t, UTF_BEHAVIOR_SURROGATE>(bad32);
    }
    void testUnsafe32SinglePassIter() {
        testSinglePassIter<UNSAFE, UChar32, ANY_B>(good32);
    }

    void testSafe32FwdIter() {
        testFwdIter<SAFE, UChar32, UTF_BEHAVIOR_NEGATIVE>(good32);
    }
    void testUnsafe32FwdIter() {
        testFwdIter<UNSAFE, UChar32, ANY_B>(good32);
    }
    void testUnsafe32FwdIterNul() {
        testFwdIterNul<UNSAFE, UChar32, ANY_B>(good32);
    }

#if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 2021'10 // See above for the value.
    void testUncommonInputRange() {
        auto codePoint = [](const auto &codeUnits) { return codeUnits.codePoint(); };
        constexpr char source[] = "D808 DC2D D808 DEBA D808 DE40 200B D808 DF60 D808 DEA9";
        // Reads a sequence of space-separated hex code units from a stream.
        auto streamCodeUnits = [](std::stringstream&& s) {
            return std::views::istream<uint16_t>(s >> std::hex);
        };
        using CodeUnitRange = decltype(streamCodeUnits(std::stringstream(source)));
        // This range has a sentinel.
        static_assert(!std::ranges::common_range<CodeUnitRange>);
        static_assert(std::ranges::input_range<CodeUnitRange>);
        static_assert(!std::ranges::forward_range<CodeUnitRange>);
        {
            using CodePoints =
                decltype(utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(streamCodeUnits({})));
            static_assert(!std::ranges::common_range<CodePoints>);
            static_assert(std::ranges::input_range<CodePoints>);
            static_assert(!std::ranges::forward_range<CodePoints>);
            static_assert(!std::ranges::borrowed_range<CodePoints>);
            assertTrue("uncommon input streamed range",
                       std::ranges::equal(utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(
                                              streamCodeUnits(std::stringstream(source))) |
                                              std::ranges::views::transform(codePoint),
                                          std::u32string_view(U"𒀭𒊺𒉀\u200B𒍠𒊩")));
            assertFalse("uncommon input streamed range: find",
                        std::ranges::find_if(utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(
                                                 streamCodeUnits(std::stringstream(source)))
                                                 .begin(),
                                             std::default_sentinel, [](auto u) {
                                                 return u.codePoint() == U'𒊩';
                                             }) == std::default_sentinel);
        }
        {
            using UnsafeCodePoints = decltype(unsafeUTFStringCodePoints<char32_t>(streamCodeUnits({})));
            static_assert(!std::ranges::common_range<UnsafeCodePoints>);
            static_assert(std::ranges::input_range<UnsafeCodePoints>);
            static_assert(!std::ranges::forward_range<UnsafeCodePoints>);
            static_assert(!std::ranges::borrowed_range<UnsafeCodePoints>);
            assertTrue("unsafe uncommon input streamed range",
                       std::ranges::equal(unsafeUTFStringCodePoints<char32_t>(
                                              streamCodeUnits(std::stringstream(source))) |
                                              std::ranges::views::transform(codePoint),
                                          std::u32string_view(U"𒀭𒊺𒉀\u200B𒍠𒊩")));
            assertFalse("unsafe uncommon input streamed range: find",
                        std::ranges::find_if(unsafeUTFStringCodePoints<char32_t>(
                                                 streamCodeUnits(std::stringstream(source)))
                                                 .begin(),
                                             std::default_sentinel, [](auto u) {
                                                 return u.codePoint() == U'𒊩';
                                             }) == std::default_sentinel);
        }
    }

    void testUncommonForwardRange() {
        auto codePoint = [](const auto &codeUnits) { return codeUnits.codePoint(); };
        const std::u16string text = u"𒌉 𒂍𒁾𒁀𒀀 𒌓 𒌌𒆷𒀀𒀭 𒈨𒂠 𒉌𒁺𒉈𒂗\n"
                                        u"𒂍𒁾𒁀𒀀𒂠 𒉌𒁺𒉈𒂗\n"
                                        u"𒂍𒁾𒁀𒀀 𒀀𒈾𒀀𒀭 𒉌𒀝\n"
                                        u"𒁾𒈬 𒉌𒋃 𒃻𒅗𒁺𒈬 𒉌𒅥\n";
        auto lines3sqq = text | std::ranges::views::lazy_split(u'\n') | std::views::drop(2);
        // Code units from the third line in `text`.
        auto codeUnits = *lines3sqq.begin();
        using CodeUnitRange = decltype(codeUnits);
        // This range has a sentinel.
        static_assert(!std::ranges::common_range<CodeUnitRange>);
        // Even though `source` is contiguous, the lazy split makes this forward-only.
        static_assert(std::ranges::forward_range<CodeUnitRange>);
        static_assert(!std::ranges::bidirectional_range<CodeUnitRange>);
        {
            using CodePoints = decltype(utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(codeUnits));
            static_assert(!std::ranges::common_range<CodePoints>);
            static_assert(std::ranges::forward_range<CodePoints>);
            static_assert(!std::ranges::bidirectional_range<CodePoints>);
            static_assert(!std::ranges::borrowed_range<CodePoints>);
            assertTrue("uncommon forward lazily split range",
                       std::ranges::equal(utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(codeUnits) |
                                              std::ranges::views::transform(codePoint),
                                          std::u32string_view(U"𒂍𒁾𒁀𒀀 𒀀𒈾𒀀𒀭 𒉌𒀝")));
        }
        {
            using UnsafeCodePoints = decltype(unsafeUTFStringCodePoints<char32_t>(codeUnits));
            static_assert(!std::ranges::common_range<UnsafeCodePoints>);
            static_assert(std::ranges::forward_range<UnsafeCodePoints>);
            static_assert(!std::ranges::bidirectional_range<UnsafeCodePoints>);
            static_assert(!std::ranges::borrowed_range<UnsafeCodePoints>);
            assertTrue("unsafe uncommon forward lazily split range",
                       std::ranges::equal(unsafeUTFStringCodePoints<char32_t>(codeUnits) |
                                              std::ranges::views::transform(codePoint),
                                          std::u32string_view(U"𒂍𒁾𒁀𒀀 𒀀𒈾𒀀𒀭 𒉌𒀝")));
        }
    }

    void testUncommonBidirectionalRange() {
        auto codePoint = [](const auto &codeUnits) { return codeUnits.codePoint(); };
        const std::u8string text = u8"𒀭𒊺𒉀 𒍠𒊩  # ᵈnisaba za₃-mim";
        // Code units before the #.
        auto codeUnits = text | std::ranges::views::take_while([](char8_t c) { return c != u8'#'; });
        using CodeUnitRange = decltype(codeUnits);
        // This range has a sentinel.
        static_assert(!std::ranges::common_range<CodeUnitRange>);
        static_assert(std::ranges::contiguous_range<CodeUnitRange>);
        {
            using CodePoints = decltype(utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(codeUnits));
            static_assert(!std::ranges::common_range<CodePoints>);
            static_assert(std::ranges::bidirectional_range<CodePoints>);
            static_assert(!std::ranges::random_access_range<CodePoints>);
            static_assert(!std::ranges::borrowed_range<CodePoints>);
            assertTrue("uncommon bidirectional prefix range",
                       std::ranges::equal(utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(codeUnits) |
                                              std::ranges::views::transform(codePoint),
                                          std::u32string_view(U"𒀭𒊺𒉀 𒍠𒊩  ")));
            assertTrue("reversed uncommon bidirectional prefix range",
                       std::ranges::equal(utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(codeUnits) |
                                              std::ranges::views::reverse |
                                              std::ranges::views::transform(codePoint),
                                          std::u32string_view(U"  𒊩𒍠 𒉀𒊺𒀭")));
        }
        {
            using UnsafeCodePoints = decltype(unsafeUTFStringCodePoints<char32_t>(codeUnits));
            static_assert(!std::ranges::common_range<UnsafeCodePoints>);
            static_assert(std::ranges::bidirectional_range<UnsafeCodePoints>);
            static_assert(!std::ranges::random_access_range<UnsafeCodePoints>);
            static_assert(!std::ranges::borrowed_range<UnsafeCodePoints>);
            assertTrue("unsafe uncommon bidirectional prefix range",
                       std::ranges::equal(unsafeUTFStringCodePoints<char32_t>(codeUnits) |
                                              std::ranges::views::transform(codePoint),
                                          std::u32string_view(U"𒀭𒊺𒉀 𒍠𒊩  ")));
            assertTrue("reversed unsafe uncommon bidirectional prefix range",
                       std::ranges::equal(unsafeUTFStringCodePoints<char32_t>(codeUnits) |
                                              std::ranges::views::reverse |
                                              std::ranges::views::transform(codePoint),
                                          std::u32string_view(U"  𒊩𒍠 𒉀𒊺𒀭")));
        }
    }

    void testCommonForwardRange() {
        auto codePoint = [](const auto &codeUnits) { return codeUnits.codePoint(); };
        const std::forward_list<std::u16string_view> words{u"𒌉",
                                                           u"𒂍𒁾𒁀𒀀",
                                                           u"𒌓",
                                                           u"𒌌𒆷𒀀𒀭",
                                                           u"𒈨𒂠",
                                                           u"𒉌𒁺𒉈𒂗"};
        // Code units from the concatenated words.
        auto codeUnits = words | std::ranges::views::join;
        using CodeUnitRange = decltype(codeUnits);
        // This range does not have a sentinel.
        static_assert(std::ranges::common_range<CodeUnitRange>);
        // Forward-only because we started from a forward list.
        static_assert(std::ranges::forward_range<CodeUnitRange>);
        static_assert(!std::ranges::bidirectional_range<CodeUnitRange>);
        {
            using CodePoints = decltype(utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(codeUnits));
            static_assert(std::ranges::common_range<CodePoints>);
            static_assert(std::ranges::forward_range<CodePoints>);
            static_assert(!std::ranges::bidirectional_range<CodePoints>);
            static_assert(!std::ranges::borrowed_range<CodePoints>);
            assertTrue("common forward concatenated range",
                       std::ranges::equal(utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(codeUnits) |
                                              std::ranges::views::transform(codePoint),
                                          std::u32string_view(U"𒌉𒂍𒁾𒁀𒀀𒌓𒌌𒆷𒀀𒀭𒈨𒂠𒉌𒁺𒉈𒂗")));
        }
        {
            using UnsafeCodePoints = decltype(unsafeUTFStringCodePoints<char32_t>(codeUnits));
            static_assert(std::ranges::common_range<UnsafeCodePoints>);
            static_assert(std::ranges::forward_range<UnsafeCodePoints>);
            static_assert(!std::ranges::bidirectional_range<UnsafeCodePoints>);
            static_assert(!std::ranges::borrowed_range<UnsafeCodePoints>);
            assertTrue("unsafe common forward concatenated range",
                       std::ranges::equal(unsafeUTFStringCodePoints<char32_t>(codeUnits) |
                                              std::ranges::views::transform(codePoint),
                                          std::u32string_view(U"𒌉𒂍𒁾𒁀𒀀𒌓𒌌𒆷𒀀𒀭𒈨𒂠𒉌𒁺𒉈𒂗")));
        }
    }

    void testCommonBidirectionalRange() {
        auto codePoint = [](const auto &codeUnits) { return codeUnits.codePoint(); };
        const std::u8string card{0xF0, 0x92, 0xFF, 0x89, 0xFF, 0xFF, 0xAD};
        // Read code units from `card`, skipping any bytes set to FF.
        auto codeUnits = card | std::ranges::views::filter([](char8_t c) { return c != 0xFF; });
        using CodeUnitRange = decltype(codeUnits);
        // This range does not have a sentinel.
        static_assert(std::ranges::common_range<CodeUnitRange>);
        // Bidirectional but not contiguous (there are holes where the bytes are FF).
        static_assert(std::ranges::bidirectional_range<CodeUnitRange>);
        static_assert(!std::ranges::contiguous_range<CodeUnitRange>);
        {
            using CodePoints = decltype(utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(codeUnits));
            static_assert(std::ranges::common_range<CodePoints>);
            static_assert(std::ranges::forward_range<CodePoints>);
            static_assert(std::ranges::bidirectional_range<CodePoints>);
            static_assert(!std::ranges::borrowed_range<CodePoints>);
            assertTrue("common bidirectional filtered range",
                       std::ranges::equal(utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(codeUnits) |
                                              std::ranges::views::transform(codePoint),
                                          std::u32string_view(U"𒉭")));
            assertTrue("reversed common bidirectional filtered range",
                       std::ranges::equal(utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(codeUnits) |
                                              std::ranges::views::reverse |
                                              std::ranges::views::transform(codePoint),
                                          std::u32string_view(U"𒉭")));
#if U_CPLUSPLUS_VERSION >= 23 && __cpp_lib_ranges >= 2022'02 && __cpp_lib_bind_back >= 2022'02
            assertTrue("reversed common bidirectional filtered range: one big pipeline",
                       std::ranges::equal(
                           card
                           | std::ranges::views::filter([](char8_t c) { return c != 0xFF; })
                           | utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>
                           | std::ranges::views::reverse
                           | std::ranges::views::transform(codePoint),
                           std::u32string_view(U"𒉭")));
#endif
        }
        {
            using UnsafeCodePoints = decltype(unsafeUTFStringCodePoints<char32_t>(codeUnits));
            static_assert(std::ranges::common_range<UnsafeCodePoints>);
            static_assert(std::ranges::forward_range<UnsafeCodePoints>);
            static_assert(std::ranges::bidirectional_range<UnsafeCodePoints>);
            static_assert(!std::ranges::borrowed_range<UnsafeCodePoints>);
            assertTrue("unsafe common bidirectional filtered range",
                       std::ranges::equal(unsafeUTFStringCodePoints<char32_t>(codeUnits) |
                                              std::ranges::views::transform(codePoint),
                                          std::u32string_view(U"𒉭")));
            assertTrue("reversed unsafe common bidirectional filtered range",
                       std::ranges::equal(unsafeUTFStringCodePoints<char32_t>(codeUnits) |
                                              std::ranges::views::reverse |
                                              std::ranges::views::transform(codePoint),
                                          std::u32string_view(U"𒉭")));
#if U_CPLUSPLUS_VERSION >= 23 && __cpp_lib_ranges >= 2022'02 && __cpp_lib_bind_back >= 2022'02
            assertTrue("reversed unsafe common bidirectional filtered range: one big pipeline",
                       std::ranges::equal(
                           card
                           | std::ranges::views::filter([](char8_t c) { return c != 0xFF; })
                           | unsafeUTFStringCodePoints<char32_t>
                           | std::ranges::views::reverse
                           | std::ranges::views::transform(codePoint),
                           std::u32string_view(U"𒉭")));
#endif
        }
    }

    void testCommonContiguousRange() {
        auto codePoint = [](const auto &codeUnits) { return codeUnits.codePoint(); };
        const std::u16string codeUnits = u"𒀭𒊺𒉀​𒍠𒊩";
        static_assert(std::ranges::contiguous_range<decltype(codeUnits)>);
        auto codePoints = utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(codeUnits);
        static_assert(std::is_same_v<decltype(codePoints),
                                     UTFStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD,
                                                         std::ranges::ref_view<const std::u16string>>>);
        static_assert(std::ranges::common_range<decltype(codePoints)>);
        static_assert(std::ranges::bidirectional_range<decltype(codePoints)>);
        static_assert(!std::ranges::random_access_range<decltype(codePoints)>);
        static_assert(std::ranges::borrowed_range<decltype(codePoints)>);
        auto unsafeCodePoints = unsafeUTFStringCodePoints<char32_t>(codeUnits);
        static_assert(std::is_same_v<
                      decltype(unsafeCodePoints),
                      UnsafeUTFStringCodePoints<char32_t, std::ranges::ref_view<const std::u16string>>>);
        static_assert(std::ranges::common_range<decltype(unsafeCodePoints)>);
        static_assert(std::ranges::bidirectional_range<decltype(unsafeCodePoints)>);
        static_assert(!std::ranges::random_access_range<decltype(unsafeCodePoints)>);
        static_assert(std::ranges::borrowed_range<decltype(unsafeCodePoints)>);
        assertTrue("safe contiguous range by reference",
                   std::ranges::equal(codePoints | std::ranges::views::transform(codePoint),
                                      std::u32string_view(U"𒀭𒊺𒉀​𒍠𒊩")));
        assertTrue("unsafe contiguous range by reference",
                   std::ranges::equal(unsafeCodePoints | std::ranges::views::transform(codePoint),
                                      std::u32string_view(U"𒀭𒊺𒉀​𒍠𒊩")));
        auto ownedCodePoints =
            utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(std::u16string(u"𒀭𒊺𒉀​𒍠𒊩"));
        static_assert(std::is_same_v<
                      decltype(ownedCodePoints),
                           UTFStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD,
                                                         std::ranges::owning_view<std::u16string>>>);
        static_assert(!std::ranges::borrowed_range<decltype(ownedCodePoints)>);
        auto unsafeOwnedCodePoints =
            unsafeUTFStringCodePoints<char32_t>(std::u16string(u"𒀭𒊺𒉀​𒍠𒊩"));
        static_assert(std::is_same_v<
                      decltype(unsafeOwnedCodePoints),
                      UnsafeUTFStringCodePoints<char32_t, std::ranges::owning_view<std::u16string>>>);
        static_assert(!std::ranges::borrowed_range<decltype(unsafeOwnedCodePoints)>);
        assertTrue("safe owned contiguous range",
                   std::ranges::equal(ownedCodePoints | std::ranges::views::transform(codePoint),
                                      std::u32string_view(U"𒀭𒊺𒉀​𒍠𒊩")));
        assertTrue("unsafe owned contiguous range",
                   std::ranges::equal(unsafeOwnedCodePoints | std::ranges::views::transform(codePoint),
                                      std::u32string_view(U"𒀭𒊺𒉀​𒍠𒊩")));
    }
#endif

    // implementation code coverage ---------------------------------------- ***

    void initLong();

    template<TestMode mode, UTFIllFormedBehavior behavior,
             IterType type, typename Unit, typename Units>
    void checkUnits(const Units &units, std::basic_string_view<Unit> part, UChar32 expectedCP);

    template<TestMode mode, UTFIllFormedBehavior behavior, IterType type, typename Unit, typename Iter>
    void testLongLinear(const ImplTest<Unit> &test, Iter begin, Iter end) {
        for (size_t i = 0; begin != end; ++i, ++begin) {
            checkUnits<mode, behavior, type, Unit>(*begin, test.parts[i], test.codePoints[i]);
        }
    }

    template<TestMode mode, UTFIllFormedBehavior behavior, typename Unit>
    void testLongLinearContig(const ImplTest<Unit> &test) {
        initLong();
        if constexpr (mode == UNSAFE) {
            auto range = unsafeUTFStringCodePoints<UChar32>(test.str);
            testLongLinear<mode, behavior, CONTIG, Unit>(test, range.begin(), range.end());
        } else {
            auto range = utfStringCodePoints<UChar32, behavior>(test.str);
            testLongLinear<mode, behavior, CONTIG, Unit>(test, range.begin(), range.end());
        }
    }

    template<TestMode mode, UTFIllFormedBehavior behavior, typename Unit>
    void testLongLinearInput(const ImplTest<Unit> &test) {
        initLong();
        SinglePassSource<Unit> src(test.str);
        if constexpr (mode == UNSAFE) {
            testLongLinear<mode, behavior, INPUT, Unit>(
                test,
                unsafeUTFIterator<UChar32>(src.begin()),
                unsafeUTFIterator<UChar32>(src.end()));
        } else {
            testLongLinear<mode, behavior, INPUT, Unit>(
                test,
                utfIterator<UChar32, behavior>(src.begin(), src.end()),
                utfIterator<UChar32, behavior>(src.end(), src.end()));
        }
    }

    template<TestMode mode, UTFIllFormedBehavior behavior, typename Unit>
    void testLongLinearFwd(const ImplTest<Unit> &test) {
        initLong();
        FwdIter<Unit> srcBegin(test.str.data());
        FwdIter<Unit> srcLimit(test.str.data() + test.str.length());
        if constexpr (mode == UNSAFE) {
            testLongLinear<mode, behavior, FWD, Unit>(
                test,
                unsafeUTFIterator<UChar32>(srcBegin),
                unsafeUTFIterator<UChar32>(srcLimit));
        } else {
            testLongLinear<mode, behavior, FWD, Unit>(
                test,
                utfIterator<UChar32, behavior>(srcBegin, srcLimit),
                utfIterator<UChar32, behavior>(srcLimit));
        }
    }

    // backward: from end to begin with *--iter
    template<TestMode mode, UTFIllFormedBehavior behavior,
             IterType type, typename Unit, typename Iter>
    void testLongBackward(const ImplTest<Unit> &test, Iter begin, Iter end) {
        for (size_t i = test.codePoints.length(); begin != end;) {
            --i;
            checkUnits<mode, behavior, type, Unit>(*--end, test.parts[i], test.codePoints[i]);
        }
    }

    template<TestMode mode, UTFIllFormedBehavior behavior, typename Unit>
    void testLongBackward(const ImplTest<Unit> &test) {
        initLong();
        if constexpr (mode == UNSAFE) {
            auto range = unsafeUTFStringCodePoints<UChar32>(test.str);
            testLongBackward<mode, behavior, CONTIG, Unit>(test, range.begin(), range.end());
        } else {
            auto range = utfStringCodePoints<UChar32, behavior>(test.str);
            testLongBackward<mode, behavior, CONTIG, Unit>(test, range.begin(), range.end());
        }
    }

    // reverse: from rbegin() to rend(), uses the reverse_iterator
    template<TestMode mode, UTFIllFormedBehavior behavior, typename Unit>
    void testLongReverse(const ImplTest<Unit> &test) {
        initLong();
        auto reverse = test.reverseParts();
        if constexpr (mode == UNSAFE) {
            auto range = unsafeUTFStringCodePoints<UChar32>(test.str);
            testLongLinear<mode, behavior, CONTIG, Unit>(reverse, range.rbegin(), range.rend());
        } else {
            auto range = utfStringCodePoints<UChar32, behavior>(test.str);
            testLongLinear<mode, behavior, CONTIG, Unit>(reverse, range.rbegin(), range.rend());
        }
    }

    // Test state keeping in a bidirectional_iterator:
    // Change directions, increment/decrement without reading, etc.
    template<TestMode mode, UTFIllFormedBehavior behavior,
             IterType type, typename Unit, typename Iter>
    void zigzag(const ImplTest<Unit> &test, size_t i,
                const Iter &begin, Iter iter, const Iter &end);

    template<TestMode mode, UTFIllFormedBehavior behavior, IterType type, typename Unit, typename Iter>
    void testZigzag(const ImplTest<Unit> &test, Iter begin, Iter end) {
        size_t i = 0;
        for (Iter iter = begin; iter != end; ++i, ++iter) {
            zigzag<mode, behavior, type, Unit>(test, i, begin, iter, end);
        }
    }

    template<TestMode mode, UTFIllFormedBehavior behavior, typename Unit>
    void testZigzag(const ImplTest<Unit> &test) {
        initLong();
        if constexpr (mode == UNSAFE) {
            auto range = unsafeUTFStringCodePoints<UChar32>(test.str);
            testZigzag<mode, behavior, CONTIG, Unit>(test, range.begin(), range.end());
        } else {
            auto range = utfStringCodePoints<UChar32, behavior>(test.str);
            testZigzag<mode, behavior, CONTIG, Unit>(test, range.begin(), range.end());
        }
    }

    // Exercise the reverse_iterator as well.
    template<TestMode mode, UTFIllFormedBehavior behavior, typename Unit>
    void testZigzagReverse(const ImplTest<Unit> &test) {
        initLong();
        auto reverse = test.reverseParts();
        if constexpr (mode == UNSAFE) {
            auto range = unsafeUTFStringCodePoints<UChar32>(test.str);
            testZigzag<mode, behavior, CONTIG, Unit>(reverse, range.rbegin(), range.rend());
        } else {
            auto range = utfStringCodePoints<UChar32, behavior>(test.str);
            testZigzag<mode, behavior, CONTIG, Unit>(reverse, range.rbegin(), range.rend());
        }
    }

    void testSafe16LongLinearContig() {
        testLongLinearContig<SAFE, UTF_BEHAVIOR_SURROGATE, char16_t>(longBad16);
    }
    void testSafe8LongLinearContig() {
        testLongLinearContig<SAFE, UTF_BEHAVIOR_NEGATIVE, char>(longBad8);
    }
    void testSafe32LongLinearContig() {
        testLongLinearContig<SAFE, UTF_BEHAVIOR_SURROGATE, char32_t>(longBad32);
    }

    void testUnsafe16LongLinearContig() {
        testLongLinearContig<UNSAFE, ANY_B, char16_t>(longGood16);
    }
    void testUnsafe8LongLinearContig() {
        testLongLinearContig<UNSAFE, ANY_B, char>(longGood8);
    }
    void testUnsafe32LongLinearContig() {
        testLongLinearContig<UNSAFE, ANY_B, char32_t>(longGood32);
    }

    void testSafe16LongLinearInput() {
        testLongLinearInput<SAFE, UTF_BEHAVIOR_SURROGATE, char16_t>(longBad16);
    }
    void testSafe8LongLinearInput() {
        testLongLinearInput<SAFE, UTF_BEHAVIOR_NEGATIVE, char>(longBad8);
    }
    void testSafe32LongLinearInput() {
        testLongLinearInput<SAFE, UTF_BEHAVIOR_SURROGATE, char32_t>(longBad32);
    }

    void testUnsafe16LongLinearInput() {
        testLongLinearInput<UNSAFE, ANY_B, char16_t>(longGood16);
    }
    void testUnsafe8LongLinearInput() {
        testLongLinearInput<UNSAFE, ANY_B, char>(longGood8);
    }
    void testUnsafe32LongLinearInput() {
        testLongLinearInput<UNSAFE, ANY_B, char32_t>(longGood32);
    }

    void testSafe16LongLinearFwd() {
        testLongLinearFwd<SAFE, UTF_BEHAVIOR_SURROGATE, char16_t>(longBad16);
    }
    void testSafe8LongLinearFwd() {
        testLongLinearFwd<SAFE, UTF_BEHAVIOR_NEGATIVE, char>(longBad8);
    }
    void testSafe32LongLinearFwd() {
        testLongLinearFwd<SAFE, UTF_BEHAVIOR_SURROGATE, char32_t>(longBad32);
    }

    void testUnsafe16LongLinearFwd() {
        testLongLinearFwd<UNSAFE, ANY_B, char16_t>(longGood16);
    }
    void testUnsafe8LongLinearFwd() {
        testLongLinearFwd<UNSAFE, ANY_B, char>(longGood8);
    }
    void testUnsafe32LongLinearFwd() {
        testLongLinearFwd<UNSAFE, ANY_B, char32_t>(longGood32);
    }

    void testSafe16LongBackward() {
        testLongBackward<SAFE, UTF_BEHAVIOR_SURROGATE, char16_t>(longBad16);
    }
    void testSafe8LongBackward() {
        testLongBackward<SAFE, UTF_BEHAVIOR_NEGATIVE, char>(longBad8);
    }
    void testSafe32LongBackward() {
        testLongBackward<SAFE, UTF_BEHAVIOR_SURROGATE, char32_t>(longBad32);
    }

    void testUnsafe16LongBackward() {
        testLongBackward<UNSAFE, ANY_B, char16_t>(longGood16);
    }
    void testUnsafe8LongBackward() {
        testLongBackward<UNSAFE, ANY_B, char>(longGood8);
    }
    void testUnsafe32LongBackward() {
        testLongBackward<UNSAFE, ANY_B, char32_t>(longGood32);
    }

    void testSafe16LongReverse() {
        testLongReverse<SAFE, UTF_BEHAVIOR_SURROGATE, char16_t>(longBad16);
    }
    void testSafe8LongReverse() {
        testLongReverse<SAFE, UTF_BEHAVIOR_NEGATIVE, char>(longBad8);
    }
    void testSafe32LongReverse() {
        testLongReverse<SAFE, UTF_BEHAVIOR_SURROGATE, char32_t>(longBad32);
    }

    void testUnsafe16LongReverse() {
        testLongReverse<UNSAFE, ANY_B, char16_t>(longGood16);
    }
    void testUnsafe8LongReverse() {
        testLongReverse<UNSAFE, ANY_B, char>(longGood8);
    }
    void testUnsafe32LongReverse() {
        testLongReverse<UNSAFE, ANY_B, char32_t>(longGood32);
    }

    void testSafe16Zigzag() {
        testZigzag<SAFE, UTF_BEHAVIOR_SURROGATE, char16_t>(longBad16);
    }
    void testSafe8Zigzag() {
        testZigzag<SAFE, UTF_BEHAVIOR_NEGATIVE, char>(longBad8);
    }
    void testSafe32Zigzag() {
        testZigzag<SAFE, UTF_BEHAVIOR_SURROGATE, char32_t>(longBad32);
    }

    void testUnsafe16Zigzag() {
        testZigzag<UNSAFE, ANY_B, char16_t>(longGood16);
    }
    void testUnsafe8Zigzag() {
        testZigzag<UNSAFE, ANY_B, char>(longGood8);
    }
    void testUnsafe32Zigzag() {
        testZigzag<UNSAFE, ANY_B, char32_t>(longGood32);
    }

    void testSafe16ZigzagReverse() {
        testZigzagReverse<SAFE, UTF_BEHAVIOR_SURROGATE, char16_t>(longBad16);
    }
    void testSafe8ZigzagReverse() {
        testZigzagReverse<SAFE, UTF_BEHAVIOR_NEGATIVE, char>(longBad8);
    }
    void testSafe32ZigzagReverse() {
        testZigzagReverse<SAFE, UTF_BEHAVIOR_SURROGATE, char32_t>(longBad32);
    }

    void testUnsafe16ZigzagReverse() {
        testZigzagReverse<UNSAFE, ANY_B, char16_t>(longGood16);
    }
    void testUnsafe8ZigzagReverse() {
        testZigzagReverse<UNSAFE, ANY_B, char>(longGood8);
    }
    void testUnsafe32ZigzagReverse() {
        testZigzagReverse<UNSAFE, ANY_B, char32_t>(longGood32);
    }
    void testOwnership() {
        class NonCopyableString : public std::u16string {
          public:
            NonCopyableString(std::u16string s) : std::u16string(std::move(s)) {}
            NonCopyableString(NonCopyableString const&) = delete;
            NonCopyableString &operator=(NonCopyableString const&) = delete;
            NonCopyableString(NonCopyableString&&) = default;
            NonCopyableString &operator=(NonCopyableString&&) = default;
        };
        const NonCopyableString referenced(u"𒀭𒊺𒉀 𒍠𒊩");
        {
            auto referencingRange = utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(referenced);
            auto owningRange =
                utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(NonCopyableString(u"𒀭𒊺𒉀 𒍠𒊩"));
#if defined(__cpp_lib_ranges)
            static_assert(std::ranges::borrowed_range<decltype(referencingRange)>);
            static_assert(!std::ranges::borrowed_range<decltype(owningRange)>);
#endif
            auto itr = referencingRange.begin();
            auto ito = owningRange.begin();
            for (; itr != referencingRange.end() && ito != owningRange.end(); ++itr, ++ito) {
                assertEquals("Referenced and owned iteration", itr->codePoint(), ito->codePoint());
            }
        }
        {
            auto unsafeReferencingRange = unsafeUTFStringCodePoints<char32_t>(referenced);
            auto unsafeOwningRange = unsafeUTFStringCodePoints<char32_t>(NonCopyableString(u"𒀭𒊺𒉀 𒍠𒊩"));
#if defined(__cpp_lib_ranges)
            static_assert(std::ranges::borrowed_range<decltype(unsafeReferencingRange)>);
            static_assert(!std::ranges::borrowed_range<decltype(unsafeOwningRange)>);
#endif
            auto itr = unsafeReferencingRange.begin();
            auto ito = unsafeOwningRange.begin();
            for (; itr != unsafeReferencingRange.end() && ito != unsafeOwningRange.end(); ++itr, ++ito) {
                assertEquals("Referenced and owned unsafe iteration", itr->codePoint(),
                             ito->codePoint());
            }
        }
    }

    void testCPDefaultConstructors() {
        {
            // validating
            using CodePoints =
                UTFStringCodePoints<UChar32, UTF_BEHAVIOR_NEGATIVE, std::u16string_view>;
            CodePoints cpRange;  // default constructor
            assertTrue("empty safe range", cpRange.begin() == cpRange.end());
            {
                cpRange = utfStringCodePoints<UChar32, UTF_BEHAVIOR_NEGATIVE>(u"abçカ🚴"sv);
                auto iter = cpRange.begin();
                ++++iter;
                assertEquals("safe1[2]", U'ç', iter->codePoint());
                ++++iter;
                assertEquals("safe1[4]", U'🚴', iter->codePoint());
            }
            {
                cpRange = utfStringCodePoints<UChar32, UTF_BEHAVIOR_NEGATIVE>(u"Fuß"sv);
                auto iter = cpRange.begin();
                assertEquals("safe2[0]", U'F', iter->codePoint());
                ++++iter;
                assertEquals("safe2[2]", U'ß', iter->codePoint());
            }
        }
        {
            // unsafe
            using CodePoints =
                UnsafeUTFStringCodePoints<UChar32, std::u16string_view>;
            CodePoints cpRange;  // default constructor
            assertTrue("empty unsafe range", cpRange.begin() == cpRange.end());
            {
                cpRange = unsafeUTFStringCodePoints<UChar32>(u"abçカ🚴"sv);
                auto iter = cpRange.begin();
                ++++iter;
                assertEquals("unsafe1[2]", U'ç', iter->codePoint());
                ++++iter;
                assertEquals("unsafe1[4]", U'🚴', iter->codePoint());
            }
            {
                cpRange = unsafeUTFStringCodePoints<UChar32>(u"Fuß"sv);
                auto iter = cpRange.begin();
                assertEquals("unsafe2[0]", U'F', iter->codePoint());
                ++++iter;
                assertEquals("unsafe2[2]", U'ß', iter->codePoint());
            }
        }
    }
    void testIteratorCompatibility() {
        std::u16string zamin = u"𒀭𒎏𒄈𒋢𒍠𒊩";
        {
            auto it = unsafeUTFStringCodePoints<char32_t>(zamin).begin();
            ++it;
            auto ningirsuBegin = it->begin();
            std::advance(it, 2);
            auto ningirsuEnd = it->end();
            // In order for this to compile, the ningirsuBegin and ningirsuEnd iterators must be
            // std::u16string iterators.
            zamin.replace(ningirsuBegin, ningirsuEnd, u"𒊺𒉀");
            assertEquals("Replacing ningirsu with nisaba", zamin, u"𒀭𒊺𒉀𒍠𒊩");
        }
        {
            // Same with validating iterators.
            auto it = utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(zamin).begin();
            ++it;
            auto nisabaBegin = it->begin();
            ++it;
            auto nisabaEnd = it->end();
            zamin.replace(nisabaBegin, nisabaEnd, u"𒂗𒆤");
            assertEquals("Replacing nisaba with enlil", zamin, u"𒀭𒂗𒆤𒍠𒊩");
        }
    }

    void testAllCodePoints();
    void testAllScalarValues();

    ImplTest<char> longGood8;
    ImplTest<char16_t> longGood16;
    ImplTest<char32_t> longGood32;
    ImplTest<char> longBad8;
    ImplTest<char16_t> longBad16;
    ImplTest<char32_t> longBad32;
};

const char *UTFIteratorTest::good8Chars = reinterpret_cast<const char *>(u8"a|b|ç|カ|🚴");

extern IntlTest *createUTFIteratorTest() {
    return new UTFIteratorTest();
}

template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior, typename StringView>
void UTFIteratorTest::testBidiIter(StringView piped) {
    using Unit = typename StringView::value_type;
    static_assert(icu::header::prv::is_basic_string_view_v<StringView>);
    auto parts = split(piped);
    auto joined = join<Unit>(parts);
    StringView sv(joined);
    // "abçカ🚴"
    // or
    // "a?ç?🚴" where the ? sequences are ill-formed
    if constexpr (mode == UNSAFE) {
        auto range = unsafeUTFStringCodePoints<CP32>(sv);
        // Check that we take the StringView by copy, not by reference, even in C++17.
        static_assert(std::is_same_v<decltype(range), UnsafeUTFStringCodePoints<UChar32, StringView>>);
        testBidiIter<mode, CP32, behavior>(sv, parts, range);
    } else {
        auto range = utfStringCodePoints<CP32, behavior>(sv);
        static_assert(std::is_same_v<decltype(range), UTFStringCodePoints<CP32, behavior, StringView>>);
        testBidiIter<mode, CP32, behavior>(sv, parts, range);
    }
}

template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior,
         typename StringView, typename CodePoints>
void UTFIteratorTest::testBidiIter(
        StringView sv, const std::vector<StringView> &parts, CodePoints range) {
    constexpr bool isWellFormed = mode != ILL_FORMED;
    auto last = parts[4];
    auto iter = range.begin();
    assertTrue(
        "bidirectional_iterator_tag",
        std::is_same_v<
            typename std::iterator_traits<decltype(iter)>::iterator_category,
            std::bidirectional_iterator_tag>);
    assertEquals("iter[0] * codePoint", U'a', (*iter).codePoint());
    assertEquals("iter[0] -> codePoint", U'a', iter->codePoint());
    ++iter;  // pre-increment
    auto units = *iter;
    CP32 expectedCP = isWellFormed ? U'b' : sub<CP32, behavior>(parts[1]);
    assertEquals("iter[1] * codePoint", expectedCP, units.codePoint());
    assertEquals("iter[1] * length", parts[1].length(), units.length());
    if constexpr (mode != UNSAFE) {
        assertEquals("iter[1] * wellFormed", isWellFormed, units.wellFormed());
    }
    assertTrue("iter[1] * stringView()", units.stringView() == parts[1]);
    auto unitsIter = units.begin();
    for (auto c : parts[1]) {
        assertEquals("iter[1] * begin()[i]",
                     static_cast<UChar32>(c), static_cast<UChar32>(*unitsIter++));
    }
    assertTrue("iter[1] * end()[0]", *units.end() == parts[2][0]);
    ++iter;
    assertEquals("iter[2] * codePoint", U'ç', (*iter++).codePoint());  // post-increment
    units = *iter++;  // post-increment
    expectedCP = isWellFormed ? U'カ' : sub<CP32, behavior>(parts[3]);
    assertEquals("iter[3] * codePoint", expectedCP, units.codePoint());
    if constexpr (mode != UNSAFE) {
        assertEquals("iter[3] * wellFormed", isWellFormed, units.wellFormed());
    }
    // Fetch the current code point twice.
    assertEquals("iter[4.0] * codePoint", U'🚴', (*iter).codePoint());
    units = *iter++;  // post-increment
    assertEquals("iter[4] * codePoint", U'🚴', units.codePoint());
    assertEquals("iter[4] * length", last.length(), units.length());
    if constexpr (mode != UNSAFE) {
        assertTrue("iter[4] * wellFormed", units.wellFormed());
    }
    assertTrue("iter[4] * stringView()", units.stringView() == last);
    unitsIter = units.begin();
    for (auto c : last) {
        assertEquals("iter[back 4] * begin()[i]",
                     static_cast<UChar32>(c), static_cast<UChar32>(*unitsIter++));
    }
    assertTrue("iter[4] * end() == endIter", units.end() == sv.end());
    assertTrue("iter == endIter", iter == range.end());
    // backwards
    units = *--iter;  // pre-decrement
    assertEquals("iter[back 4] * codePoint", U'🚴', units.codePoint());
    assertEquals("iter[back 4] * length", last.length(), units.length());
    if constexpr (mode != UNSAFE) {
        assertTrue("iter[back 4] * wellFormed", units.wellFormed());
    }
    assertTrue("iter[back 4] * stringView()", units.stringView() == last);
    unitsIter = units.begin();
    for (auto c : last) {
        assertEquals("iter[back 4] * begin()[i]",
                     static_cast<UChar32>(c), static_cast<UChar32>(*unitsIter++));
    }
    assertTrue("iter[back 4] * end() == endIter", units.end() == sv.end());
    --iter;
    if constexpr (mode != UNSAFE) {
        assertEquals("iter[back 3] -> wellFormed", isWellFormed, iter->wellFormed());
    }
    assertEquals("iter[back 3] * codePoint", expectedCP, (*iter--).codePoint());  // post-decrement
    assertEquals("iter[back 2] * codePoint", U'ç', (*iter).codePoint());
    assertEquals("iter[back 2] -> length", parts[2].length(), iter->length());
    if constexpr (mode != UNSAFE) {
        assertTrue("iter[back 2] -> wellFormed", iter->wellFormed());
    }
    units = *--iter;
    expectedCP = isWellFormed ? U'b' : sub<CP32, behavior>(parts[1]);
    assertEquals("iter[back 1] * codePoint", expectedCP, units.codePoint());
    if constexpr (mode != UNSAFE) {
        assertEquals("iter[back 1] * wellFormed", isWellFormed, units.wellFormed());
    }
    assertTrue("iter[back 1] * stringView()", units.stringView() == parts[1]);
    --iter;
    assertEquals("iter[back 0] -> codePoint", U'a', iter->codePoint());
    assertTrue("iter[back 0] -> begin() == beginIter", iter->begin() == sv.begin());
    assertTrue("iter == beginIter", iter == range.begin());
}

template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior, typename StringView>
void UTFIteratorTest::testSinglePassIter(StringView piped) {
    using Unit = typename StringView::value_type;
    auto parts = split(piped);
    auto joined = join<Unit>(parts);
    SinglePassSource<Unit> good(joined);
    // "abçカ🚴"
    // or
    // "a?ç?🚴" where the ? sequences are ill-formed
    if constexpr (mode == UNSAFE) {
        auto iter = unsafeUTFIterator<CP32>(good.begin());
        auto rangeLimit = unsafeUTFIterator<CP32>(good.end());
        testSinglePassIter<mode, CP32, behavior>(parts, iter, rangeLimit);
    } else {
        auto iter = utfIterator<CP32, behavior>(good.begin(), good.end());
        auto rangeLimit = utfIterator<CP32, behavior>(good.end(), good.end());
        testSinglePassIter<mode, CP32, behavior>(parts, iter, rangeLimit);
    }
}

template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior, typename StringView>
void UTFIteratorTest::testSinglePassIterNul(StringView piped) {
    using Unit = typename StringView::value_type;
    auto parts = split(piped);
    auto joined = join<Unit>(parts);
    Nul sentinel;
    SinglePassSource<Unit> good(joined.c_str(), sentinel);
    // "abçカ🚴"
    // or
    // "a?ç?🚴" where the ? sequences are ill-formed
    if constexpr (mode == UNSAFE) {
        auto iter = unsafeUTFIterator<CP32>(good.begin());
        testSinglePassIter<mode, CP32, behavior>(parts, iter, sentinel);
    } else {
        auto iter = utfIterator<CP32, behavior>(good.begin(), sentinel);
        testSinglePassIter<mode, CP32, behavior>(parts, iter, sentinel);
    }
}

template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior,
         typename StringView, typename Iter, typename Sentinel>
void UTFIteratorTest::testSinglePassIter(
        const std::vector<StringView> &parts, Iter &iter, const Sentinel &rangeLimit) {
    constexpr bool isWellFormed = mode != ILL_FORMED;
    assertTrue(
        "input_iterator_tag",
        std::is_same_v<
            typename std::iterator_traits<std::remove_reference_t<decltype(iter)>>::
                iterator_category,
            std::input_iterator_tag>);
    assertEquals("iter[0] * codePoint", U'a', (*iter).codePoint());
    assertEquals("iter[0] -> codePoint", U'a', iter->codePoint());
    ++iter;  // pre-increment
    auto units = *iter;
    CP32 expectedCP = isWellFormed ? U'b' : sub<CP32, behavior>(parts[1]);
    assertEquals("iter[1] * codePoint", expectedCP, units.codePoint());
    assertEquals("iter[1] * length", parts[1].length(), units.length());
    if constexpr (mode != UNSAFE) {
        assertEquals("iter[1] * wellFormed", isWellFormed, units.wellFormed());
    }
    // No units.stringView() when the unit iterator is not a pointer.
    // No begin() for a single-pass unit iterator.
    ++iter;
    assertEquals("iter[2] * codePoint", U'ç', (*iter++).codePoint());  // post-increment
    expectedCP = isWellFormed ? U'カ' : sub<CP32, behavior>(parts[3]);
    assertEquals("iter[3] -> codePoint", expectedCP, iter->codePoint());
    ++iter;
    // Fetch the current code point twice.
    assertFalse("iter != endIter", iter == rangeLimit);
    assertEquals("iter[4.0] * codePoint", U'🚴', (*iter).codePoint());
    units = *iter++;
    assertEquals("iter[4] * codePoint", U'🚴', units.codePoint());
    assertEquals("iter[4] * length", parts[4].length(), units.length());
    if constexpr (mode != UNSAFE) {
        assertTrue("iter[4] * wellFormed", units.wellFormed());
    }
    assertTrue("iter == endIter", iter == rangeLimit);
}

template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior, typename StringView>
void UTFIteratorTest::testFwdIter(StringView piped) {
    using Unit = typename StringView::value_type;
    auto parts = split(piped);
    auto joined = join<Unit>(parts);
    // "abçカ🚴"
    FwdIter<Unit> goodBegin(joined.data());
    FwdIter<Unit> goodLimit(joined.data() + joined.length());
    if constexpr (mode == UNSAFE) {
        auto iter = unsafeUTFIterator<CP32>(goodBegin);
        auto rangeLimit = unsafeUTFIterator<CP32>(goodLimit);
        testFwdIter<mode, StringView>(parts, goodLimit, iter, rangeLimit);
    } else {
        auto iter = utfIterator<CP32, behavior>(goodBegin, goodLimit);
        auto rangeLimit = utfIterator<CP32, behavior>(goodLimit);
        testFwdIter<mode, StringView>(parts, goodLimit, iter, rangeLimit);
    }
}

template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior, typename StringView>
void UTFIteratorTest::testFwdIterNul(StringView piped) {
    using Unit = typename StringView::value_type;
    auto parts = split(piped);
    auto joined = join<Unit>(parts);
    // "abçカ🚴"
    FwdIter<Unit> goodBegin(joined.data());
    Nul sentinel;
    if constexpr (mode == UNSAFE) {
        auto iter = unsafeUTFIterator<CP32>(goodBegin);
        testFwdIter<mode, StringView>(parts, sentinel, iter, sentinel);
    } else {
        auto iter = utfIterator<CP32, behavior>(goodBegin, sentinel);
        testFwdIter<mode, StringView>(parts, sentinel, iter, sentinel);
    }
}

template<TestMode mode, typename StringView,
         typename LimitIter, typename Iter, typename Sentinel>
void UTFIteratorTest::testFwdIter(const std::vector<StringView> &parts, LimitIter goodLimit,
                                  Iter iter, Sentinel rangeLimit) {
    assertTrue(
        "forward_iterator_tag",
        std::is_same_v<
            typename std::iterator_traits<decltype(iter)>::iterator_category,
            std::forward_iterator_tag>);
    assertEquals("iter[0] * codePoint", U'a', (*iter).codePoint());
    assertEquals("iter[0] -> codePoint", U'a', iter->codePoint());
    ++iter;  // pre-increment
    auto units = *iter;
    assertEquals("iter[1] * codePoint", U'b', units.codePoint());
    assertEquals("iter[1] * length", parts[1].length(), units.length());
    if constexpr (mode != UNSAFE) {
        assertTrue("iter[1] * wellFormed", units.wellFormed());
    }
    // No units.stringView() when the unit iterator is not a pointer.
    auto unitsIter = units.begin();
    for (auto c : parts[1]) {
        assertEquals("iter[1] * begin()[i]",
                     static_cast<UChar32>(c), static_cast<UChar32>(*unitsIter++));
    }
    assertTrue("iter[1] * end()[0]", *units.end() == parts[2][0]);
    ++iter;
    assertEquals("iter[2] * codePoint", U'ç', (*iter++).codePoint());  // post-increment
    assertEquals("iter[3] -> codePoint", U'カ', iter->codePoint());
    assertFalse("iter[3] * end() != endIter", units.end() == goodLimit);
    ++iter;
    // Fetch the current code point twice.
    assertFalse("iter != endIter", iter == rangeLimit);
    assertEquals("iter[4.0] * codePoint", U'🚴', (*iter).codePoint());
    units = *iter++;
    assertEquals("iter[4] * codePoint", U'🚴', units.codePoint());
    assertEquals("iter[4] * length", parts[4].length(), units.length());
    if constexpr (mode != UNSAFE) {
        assertTrue("iter[4] * wellFormed", units.wellFormed());
    }
    unitsIter = units.begin();
    for (auto c : parts[4]) {
        assertEquals("iter[back 4] * begin()[i]",
                     static_cast<UChar32>(c), static_cast<UChar32>(*unitsIter++));
    }
    assertTrue("iter[4] * end() == endIter", units.end() == goodLimit);
    assertTrue("iter == endIter", iter == rangeLimit);
}

namespace {

enum PartType { GOOD, BAD8, BAD16, BAD32 };

struct Part {
    constexpr Part(char32_t c) : type_(GOOD), len_(0), c_(c) {}
    constexpr Part(PartType t, int32_t u0) : type_(t), len_(1), u0_(u0) {}
    constexpr Part(PartType t, int32_t u0, int32_t u1) : type_(t), len_(2), u0_(u0), u1_(u1) {}
    constexpr Part(PartType t, int32_t u0, int32_t u1, int32_t u2) :
            type_(t), len_(3), u0_(u0), u1_(u1), u2_(u2) {}

    PartType type_;
    uint8_t len_;
    char32_t c_ = U'?';
    int32_t u0_ = 0;
    int32_t u1_ = 0;
    int32_t u2_ = 0;
};

// Careful: We test with the reverse order of parts as well.
// For that to yield self-contained results, parts must not
// continue sequences across part boundaries in either order.
constexpr Part testParts[] = {
    // "abçカ🚴"
    U'a',
    0x7f,
    0x80,
    Part(BAD8, 0xc0),
    Part(BAD8, 0x80),
    Part(BAD8, 0xc1),
    0,
    Part(BAD8, 0xe0),
    Part(BAD8, 0xe0, 0xa0),
    Part(BAD8, 0xe0, 0xbf),
    Part(BAD8, 0xed, 0x9f),
    // ED A0 xx .. ED BF xx would be surrogate code points
    Part(BAD8, 0xed),
    Part(BAD8, 0xa0),
    Part(BAD8, 0xed),
    Part(BAD8, 0xbf),
    U'ç',
    Part(BAD8, 0xbf),  // extra trail byte
    U'カ',
    Part(BAD8, 0xee, 0x80),
    Part(BAD8, 0xef, 0xbf),
    Part(BAD8, 0xf0),
    Part(BAD8, 0x8f),
    U'b',
    Part(BAD8, 0xf0),
    Part(BAD8, 0xf0, 0x90),
    Part(BAD8, 0xf0, 0x90, 0x80),
    Part(BAD8, 0xf4),
    Part(BAD8, 0xf4, 0x8f),
    Part(BAD8, 0xf4, 0x8f, 0xbf),
    Part(BAD8, 0xf5),
    Part(BAD8, 0xbf),
    U'🚴',
    Part(BAD8, 0x80),  // extra trail byte
    0x7ff,
    0x800,
    0xfff,
    0x1000,
    0xd7ff,
    Part(BAD16, 0xd800),
    Part(BAD16, 0xdbff),
    U'カ',
    Part(BAD16, 0xdc00),
    Part(BAD16, 0xdfff),
    0xe000,
    0xfffd,
    0xffff,
    0x10000,
    0x10ffff,
    Part(BAD32, 0x110000),
    Part(BAD32, -1)
};

}  // namespace

void UTFIteratorTest::initLong() {
    if (!longGood32.str.empty()) { return; }
    for (auto part : testParts) {
        switch (part.type_) {
        case GOOD: {
            char u8[4];
            size_t len8 = 0;
            U8_APPEND_UNSAFE(u8, len8, part.c_);
            longGood8.str.append(u8, len8);
            longGood8.parts.push_back({u8, len8});
            longBad8.str.append(u8, len8);
            longBad8.parts.push_back({u8, len8});
            longBad8.codePoints.push_back(part.c_);

            char16_t u16[2];
            size_t len16 = 0;
            U16_APPEND_UNSAFE(u16, len16, part.c_);
            longGood16.str.append(u16, len16);
            longGood16.parts.push_back({u16, len16});
            longBad16.str.append(u16, len16);
            longBad16.parts.push_back({u16, len16});
            longBad16.codePoints.push_back(part.c_);

            longGood32.str.push_back(part.c_);
            longGood32.parts.push_back({&part.c_, 1});
            longBad32.str.push_back(part.c_);
            longBad32.parts.push_back({&part.c_, 1});
            longBad32.codePoints.push_back(part.c_);
            break;
        }
        case BAD8: {
            char u8[3] = {
                static_cast<char>(part.u0_),
                static_cast<char>(part.u1_),
                static_cast<char>(part.u2_)
            };
            longBad8.str.append(u8, part.len_);
            longBad8.parts.push_back({u8, part.len_});
            longBad8.codePoints.push_back(U'?');
            break;
        }
        case BAD16: {  // surrogate code unit / code point
            char16_t u16 = part.u0_;
            longBad16.str.push_back(u16);
            longBad16.parts.push_back({&u16, 1});
            longBad16.codePoints.push_back(U'?');
            char32_t u32 = part.u0_;
            longBad32.str.push_back(u32);
            longBad32.parts.push_back({&u32, 1});
            longBad32.codePoints.push_back(U'?');
            break;
        }
        case BAD32: {
            char32_t u32 = part.u0_;
            longBad32.str.push_back(u32);
            longBad32.parts.push_back({&u32, 1});
            longBad32.codePoints.push_back(U'?');
            break;
        }
        }
    }
    longGood8.codePoints = longGood16.codePoints = longGood32.codePoints = longGood32.str;
}

template<TestMode mode, UTFIllFormedBehavior behavior, IterType type, typename Unit, typename Units>
void UTFIteratorTest::checkUnits(
        const Units &units, std::basic_string_view<Unit> part, UChar32 expectedCP) {
    bool expectedWellFormed = true;
    if (expectedCP == U'?') {
        expectedCP = sub<UChar32, behavior>(part);
        expectedWellFormed = false;
    }
    assertEquals("cp[i]", expectedCP, units.codePoint());
    assertEquals("length[i]", part.length(), units.length());
    if constexpr (mode != UNSAFE) {
        assertEquals("wellFormed[i]", expectedWellFormed, units.wellFormed());
    }
    if constexpr (type >= FWD) {
        int32_t j = 0;
        for (Unit unit : units) {  // begin()..end()
            assertEquals("units.iter[i][j]",
                         static_cast<UChar32>(part[j]), static_cast<UChar32>(unit));
            ++j;
        }
        assertEquals("units.iter.length[i]", part.length(), j);
    }
    if constexpr (type >= CONTIG) {
        assertTrue("stringView[i]", part == units.stringView());
    }
}

template<TestMode mode, UTFIllFormedBehavior behavior, IterType type, typename Unit, typename Iter>
void UTFIteratorTest::zigzag(const ImplTest<Unit> &test, size_t i,
                             const Iter &begin, Iter iter, const Iter &end) {
    static constexpr const char *path = "**+*+--*PPp++*p--+P+pP-*-*";
    size_t iLimit = test.codePoints.length();
    for (const char *p = path; *p != 0; ++p) {
        switch(*p) {
        case '*':
            if (i < iLimit) {
                checkUnits<mode, behavior, type, Unit>(
                    *iter, test.parts[i], test.codePoints[i]);
            }
            break;
        case '+':  // pre-increment
            if (i < iLimit) {
                ++i;
                ++iter;
            } else {
                assertTrue("at limit", iter == end);
            }
            break;
        case '-':  // pre-decrement
            if (i > 0) {
                --i;
                --iter;
            } else {
                assertTrue("at start", iter == begin);
            }
            break;
        case 'P':  // post-increment
            if (i < iLimit) {
                checkUnits<mode, behavior, type, Unit>(
                    *iter++, test.parts[i], test.codePoints[i]);
                ++i;
            }
            break;
        case 'p':  // post-decrement
            if (0 < i && i < iLimit) {
                checkUnits<mode, behavior, type, Unit>(
                    *iter--, test.parts[i], test.codePoints[i]);
                --i;
            }
            break;
        default:
            break;
        }
    }
}

#if defined(__cpp_lib_concepts) && __cpp_lib_concepts >= 2020'02 // Test against C++20 concepts.
namespace {
template <typename Iterator>
using CodePointIterator = UTFIterator<char32_t, UTF_BEHAVIOR_FFFD, Iterator>;
template <typename Iterator>
using UnsafeCodePointIterator = UnsafeUTFIterator<char32_t, Iterator>;

// Check that the iterators satisfy the right concepts.
namespace input {
using CodeUnitIterator = std::istreambuf_iterator<char16_t>;
static_assert(std::input_iterator<CodeUnitIterator>);
static_assert(!std::forward_iterator<CodeUnitIterator>);
static_assert(std::input_iterator<CodePointIterator<CodeUnitIterator>>);
static_assert(!std::forward_iterator<CodePointIterator<CodeUnitIterator>>);
static_assert(std::input_iterator<UnsafeCodePointIterator<CodeUnitIterator>>);
static_assert(!std::forward_iterator<UnsafeCodePointIterator<CodeUnitIterator>>);
} // namespace input
namespace forward {
using CodeUnitIterator = std::forward_list<char16_t>::iterator;
static_assert(std::forward_iterator<CodeUnitIterator>);
static_assert(!std::bidirectional_iterator<CodeUnitIterator>);
static_assert(std::forward_iterator<CodePointIterator<CodeUnitIterator>>);
static_assert(!std::bidirectional_iterator<CodePointIterator<CodeUnitIterator>>);
static_assert(std::forward_iterator<UnsafeCodePointIterator<CodeUnitIterator>>);
static_assert(!std::bidirectional_iterator<UnsafeCodePointIterator<CodeUnitIterator>>);
} // namespace forward
namespace bidirectional {
using CodeUnitIterator = std::list<char16_t>::iterator;
static_assert(std::bidirectional_iterator<CodeUnitIterator>);
static_assert(!std::random_access_iterator<CodeUnitIterator>);
static_assert(std::bidirectional_iterator<CodePointIterator<CodeUnitIterator>>);
static_assert(!std::random_access_iterator<CodePointIterator<CodeUnitIterator>>);
static_assert(std::bidirectional_iterator<UnsafeCodePointIterator<CodeUnitIterator>>);
static_assert(!std::random_access_iterator<UnsafeCodePointIterator<CodeUnitIterator>>);
} // namespace bidirectional
namespace contiguous {
using CodeUnitIterator = std::u16string::iterator;
static_assert(std::contiguous_iterator<CodeUnitIterator>);
static_assert(std::bidirectional_iterator<CodePointIterator<CodeUnitIterator>>);
static_assert(!std::random_access_iterator<CodePointIterator<CodeUnitIterator>>);
static_assert(std::bidirectional_iterator<UnsafeCodePointIterator<CodeUnitIterator>>);
static_assert(!std::random_access_iterator<UnsafeCodePointIterator<CodeUnitIterator>>);
} // namespace contiguous

} // namespace
#endif

void UTFIteratorTest::testAllCodePoints() {
    int32_t count = 0;
    UChar32 previous = -1;
    for (UChar32 c : icu::header::AllCodePoints<UChar32>()) {
        // Not assertTrue() / assertEquals() because they are slow for this many code points.
        if (!U_IS_CODE_POINT(c)) {
            errln("!U_IS_CODE_POINT(U+%04lx)", static_cast<long>(c));
        }
        if (c != (previous + 1)) {
            errln("expected U+%04lx = U+%04lx + 1",
                  static_cast<long>(c), static_cast<long>(previous));
        }
        previous = c;
        ++count;
    }
    assertEquals("count", 0x110000, count);
}

void UTFIteratorTest::testAllScalarValues() {
    int32_t count = 0;
    UChar32 previous = -1;
    for (UChar32 c : icu::header::AllScalarValues<UChar32>()) {
        // Not assertTrue() / assertEquals() because they are slow for this many code points.
        if (!U_IS_SCALAR_VALUE(c)) {
            errln("!U_IS_SCALAR_VALUE(U+%04lx)", static_cast<long>(c));
        }
        if (previous == 0xd7ff) {
            previous = 0xdfff;
        }
        if (c != (previous + 1)) {
            errln("expected U+%04lx = U+%04lx + 1",
                  static_cast<long>(c), static_cast<long>(previous));
        }
        previous = c;
        ++count;
    }
    assertEquals("count", 0x110000 - 0x800, count);
}
