Description: use the system ICU UTF functions We already depend on ICU, so it is useless to copy these functions here. I checked the history of that directory, and other than the renames I am undoing, there were no modifications at all. Author: Kevin, Kofler Origin: Fedora, https://src.fedoraproject.org/rpms/qt5-qtwebengine/blob/master/f/qtwebengine-everywhere-src-5.10.0-system-icu-utf.patch Forwarded: not-needed Reviewed-by: Sandro Knauß Last-Update: 2021-03-08 --- This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ --- a/src/3rdparty/chromium/base/BUILD.gn +++ b/src/3rdparty/chromium/base/BUILD.gn @@ -105,6 +105,7 @@ config("base_flags") { } ldflags = [ "-lnspr4", + "-licuuc", ] } @@ -729,8 +730,6 @@ jumbo_component("base") { "third_party/cityhash/city.h", "third_party/cityhash_v103/src/city_v103.cc", "third_party/cityhash_v103/src/city_v103.h", - "third_party/icu/icu_utf.cc", - "third_party/icu/icu_utf.h", "third_party/superfasthash/superfasthash.c", "thread_annotations.h", "threading/hang_watcher.cc", @@ -1281,6 +1280,7 @@ jumbo_component("base") { "//build:branding_buildflags", "//build:chromeos_buildflags", "//third_party/modp_b64", + "//third_party/icu:icuuc", ] # native_unwinder_android is intended for use solely via a dynamic feature --- a/src/3rdparty/chromium/base/files/file_path.cc +++ b/src/3rdparty/chromium/base/files/file_path.cc @@ -19,7 +19,7 @@ #if defined(OS_APPLE) #include "base/mac/scoped_cftyperef.h" -#include "base/third_party/icu/icu_utf.h" +#include #endif #if defined(OS_WIN) @@ -1167,9 +1167,9 @@ inline int HFSReadNextNonIgnorableCodepo int* index) { int codepoint = 0; while (*index < length && codepoint == 0) { - // CBU8_NEXT returns a value < 0 in error cases. For purposes of string + // U8_NEXT returns a value < 0 in error cases. For purposes of string // comparison, we just use that value and flag it with DCHECK. - CBU8_NEXT(string, *index, length, codepoint); + U8_NEXT(string, *index, length, codepoint); DCHECK_GT(codepoint, 0); if (codepoint > 0) { // Check if there is a subtable for this upper byte. --- a/src/3rdparty/chromium/base/json/json_parser.cc +++ b/src/3rdparty/chromium/base/json/json_parser.cc @@ -19,7 +19,7 @@ #include "base/strings/stringprintf.h" #include "base/strings/utf_string_conversion_utils.h" #include "base/strings/utf_string_conversions.h" -#include "base/third_party/icu/icu_utf.h" +#include namespace base { namespace internal { @@ -639,9 +639,9 @@ bool JSONParser::DecodeUTF16(uint32_t* o // If this is a high surrogate, consume the next code unit to get the // low surrogate. - if (CBU16_IS_SURROGATE(code_unit16_high)) { + if (U16_IS_SURROGATE(code_unit16_high)) { // Make sure this is the high surrogate. - if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) { + if (!U16_IS_SURROGATE_LEAD(code_unit16_high)) { if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) return false; *out_code_point = kUnicodeReplacementPoint; @@ -665,7 +665,7 @@ bool JSONParser::DecodeUTF16(uint32_t* o if (!UnprefixedHexStringToInt(*escape_sequence, &code_unit16_low)) return false; - if (!CBU16_IS_TRAIL(code_unit16_low)) { + if (!U16_IS_TRAIL(code_unit16_low)) { if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) return false; *out_code_point = kUnicodeReplacementPoint; @@ -673,12 +673,12 @@ bool JSONParser::DecodeUTF16(uint32_t* o } uint32_t code_point = - CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low); + U16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low); *out_code_point = code_point; } else { // Not a surrogate. - DCHECK(CBU16_IS_SINGLE(code_unit16_high)); + DCHECK(U16_IS_SINGLE(code_unit16_high)); *out_code_point = code_unit16_high; } --- a/src/3rdparty/chromium/base/json/string_escape.cc +++ b/src/3rdparty/chromium/base/json/string_escape.cc @@ -14,7 +14,7 @@ #include "base/strings/stringprintf.h" #include "base/strings/utf_string_conversion_utils.h" #include "base/strings/utf_string_conversions.h" -#include "base/third_party/icu/icu_utf.h" +#include namespace base { @@ -92,7 +92,7 @@ bool EscapeJSONStringImpl(const S& str, for (int32_t i = 0; i < length; ++i) { uint32_t code_point; if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point) || - code_point == static_cast(CBU_SENTINEL) || + code_point == static_cast(U_SENTINEL) || !IsValidCodepoint(code_point)) { code_point = kReplacementCodePoint; did_replacement = true; --- a/src/3rdparty/chromium/base/strings/escape.cc +++ b/src/3rdparty/chromium/base/strings/escape.cc @@ -6,7 +6,7 @@ #include "base/strings/string_util.h" #include "base/strings/utf_string_conversion_utils.h" -#include "base/third_party/icu/icu_utf.h" +#include namespace base { @@ -83,14 +83,14 @@ bool UnescapeUTF8CharacterAtIndex(String std::string* unescaped_out) { DCHECK(unescaped_out->empty()); - unsigned char bytes[CBU8_MAX_LENGTH]; + unsigned char bytes[U8_MAX_LENGTH]; if (!UnescapeUnsignedByteAtIndex(escaped_text, index, &bytes[0])) return false; size_t num_bytes = 1; // If this is a lead byte, need to collect trail bytes as well. - if (CBU8_IS_LEAD(bytes[0])) { + if (U8_IS_LEAD(bytes[0])) { // Look for the last trail byte of the UTF-8 character. Give up once // reach max character length number of bytes, or hit an unescaped // character. No need to check length of escaped_text, as @@ -98,7 +98,7 @@ bool UnescapeUTF8CharacterAtIndex(String while (num_bytes < size(bytes) && UnescapeUnsignedByteAtIndex(escaped_text, index + num_bytes * 3, &bytes[num_bytes]) && - CBU8_IS_TRAIL(bytes[num_bytes])) { + U8_IS_TRAIL(bytes[num_bytes])) { ++num_bytes; } } --- a/src/3rdparty/chromium/base/strings/pattern.cc +++ b/src/3rdparty/chromium/base/strings/pattern.cc @@ -4,13 +4,13 @@ #include "base/strings/pattern.h" -#include "base/third_party/icu/icu_utf.h" +#include namespace base { namespace { -constexpr bool IsWildcard(base_icu::UChar32 character) { +constexpr bool IsWildcard(UChar32 character) { return character == '*' || character == '?'; } @@ -55,9 +55,9 @@ constexpr bool SearchForChars(const CHAR // Check if the chars match, if so, increment the ptrs. const CHAR* pattern_next = *pattern; const CHAR* string_next = *string; - base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); + UChar32 pattern_char = next(&pattern_next, pattern_end); if (pattern_char == next(&string_next, string_end) && - pattern_char != CBU_SENTINEL) { + pattern_char != U_SENTINEL) { *pattern = pattern_next; *string = string_next; continue; @@ -121,20 +121,20 @@ constexpr bool MatchPatternT(const CHAR* } struct NextCharUTF8 { - base_icu::UChar32 operator()(const char** p, const char* end) { - base_icu::UChar32 c; + UChar32 operator()(const char** p, const char* end) { + UChar32 c; int offset = 0; - CBU8_NEXT(*p, offset, end - *p, c); + U8_NEXT(*p, offset, end - *p, c); *p += offset; return c; } }; struct NextCharUTF16 { - base_icu::UChar32 operator()(const char16** p, const char16* end) { - base_icu::UChar32 c; + UChar32 operator()(const char16** p, const char16* end) { + UChar32 c; int offset = 0; - CBU16_NEXT(*p, offset, end - *p, c); + U16_NEXT(*p, offset, end - *p, c); *p += offset; return c; } --- a/src/3rdparty/chromium/base/strings/string_split.cc +++ b/src/3rdparty/chromium/base/strings/string_split.cc @@ -9,7 +9,7 @@ #include "base/logging.h" #include "base/strings/string_split_internal.h" #include "base/strings/string_util.h" -#include "base/third_party/icu/icu_utf.h" +#include namespace base { --- a/src/3rdparty/chromium/base/strings/string_util.cc +++ b/src/3rdparty/chromium/base/strings/string_util.cc @@ -27,7 +27,7 @@ #include "base/strings/string_util_internal.h" #include "base/strings/utf_string_conversion_utils.h" #include "base/strings/utf_string_conversions.h" -#include "base/third_party/icu/icu_utf.h" +#include #include "build/build_config.h" namespace base { @@ -173,19 +173,19 @@ void TruncateUTF8ToByteSize(const std::s } DCHECK_LE(byte_size, static_cast(std::numeric_limits::max())); - // Note: This cast is necessary because CBU8_NEXT uses int32_ts. + // Note: This cast is necessary because U8_NEXT uses int32_ts. int32_t truncation_length = static_cast(byte_size); int32_t char_index = truncation_length - 1; const char* data = input.data(); - // Using CBU8, we will move backwards from the truncation point + // Using U8, we will move backwards from the truncation point // to the beginning of the string looking for a valid UTF8 // character. Once a full UTF8 character is found, we will // truncate the string to the end of that character. while (char_index >= 0) { int32_t prev = char_index; - base_icu::UChar32 code_point = 0; - CBU8_NEXT(data, char_index, truncation_length, code_point); + UChar32 code_point = 0; + U8_NEXT(data, char_index, truncation_length, code_point); if (!IsValidCharacter(code_point) || !IsValidCodepoint(code_point)) { char_index = prev - 1; --- a/src/3rdparty/chromium/base/strings/string_util_internal.h +++ b/src/3rdparty/chromium/base/strings/string_util_internal.h @@ -10,7 +10,7 @@ #include "base/logging.h" #include "base/notreached.h" #include "base/strings/string_piece.h" -#include "base/third_party/icu/icu_utf.h" +#include namespace base { @@ -234,7 +234,7 @@ inline static bool DoIsStringUTF8(String while (char_index < src_len) { int32_t code_point; - CBU8_NEXT(src, char_index, src_len, code_point); + U8_NEXT(src, char_index, src_len, code_point); if (!Validator(code_point)) return false; } --- a/src/3rdparty/chromium/base/strings/utf_string_conversion_utils.cc +++ b/src/3rdparty/chromium/base/strings/utf_string_conversion_utils.cc @@ -4,7 +4,7 @@ #include "base/strings/utf_string_conversion_utils.h" -#include "base/third_party/icu/icu_utf.h" +#include #include "build/build_config.h" namespace base { @@ -19,7 +19,7 @@ bool ReadUnicodeCharacter(const char* sr // use a signed type for code_point. But this function returns false // on error anyway, so code_point_out is unsigned. int32_t code_point; - CBU8_NEXT(src, *char_index, src_len, code_point); + U8_NEXT(src, *char_index, src_len, code_point); *code_point_out = static_cast(code_point); // The ICU macro above moves to the next char, we want to point to the last @@ -34,16 +34,16 @@ bool ReadUnicodeCharacter(const char16* int32_t src_len, int32_t* char_index, uint32_t* code_point) { - if (CBU16_IS_SURROGATE(src[*char_index])) { - if (!CBU16_IS_SURROGATE_LEAD(src[*char_index]) || + if (U16_IS_SURROGATE(src[*char_index])) { + if (!U16_IS_SURROGATE_LEAD(src[*char_index]) || *char_index + 1 >= src_len || - !CBU16_IS_TRAIL(src[*char_index + 1])) { + !U16_IS_TRAIL(src[*char_index + 1])) { // Invalid surrogate pair. return false; } // Valid surrogate pair. - *code_point = CBU16_GET_SUPPLEMENTARY(src[*char_index], + *code_point = U16_GET_SUPPLEMENTARY(src[*char_index], src[*char_index + 1]); (*char_index)++; } else { @@ -77,30 +77,30 @@ size_t WriteUnicodeCharacter(uint32_t co } - // CBU8_APPEND_UNSAFE can append up to 4 bytes. + // U8_APPEND_UNSAFE can append up to 4 bytes. size_t char_offset = output->length(); size_t original_char_offset = char_offset; - output->resize(char_offset + CBU8_MAX_LENGTH); + output->resize(char_offset + U8_MAX_LENGTH); - CBU8_APPEND_UNSAFE(&(*output)[0], char_offset, code_point); + U8_APPEND_UNSAFE(&(*output)[0], char_offset, code_point); - // CBU8_APPEND_UNSAFE will advance our pointer past the inserted character, so + // U8_APPEND_UNSAFE will advance our pointer past the inserted character, so // it will represent the new length of the string. output->resize(char_offset); return char_offset - original_char_offset; } size_t WriteUnicodeCharacter(uint32_t code_point, string16* output) { - if (CBU16_LENGTH(code_point) == 1) { + if (U16_LENGTH(code_point) == 1) { // Thie code point is in the Basic Multilingual Plane (BMP). output->push_back(static_cast(code_point)); return 1; } // Non-BMP characters use a double-character encoding. size_t char_offset = output->length(); - output->resize(char_offset + CBU16_MAX_LENGTH); - CBU16_APPEND_UNSAFE(&(*output)[0], char_offset, code_point); - return CBU16_MAX_LENGTH; + output->resize(char_offset + U16_MAX_LENGTH); + U16_APPEND_UNSAFE(&(*output)[0], char_offset, code_point); + return U16_MAX_LENGTH; } // Generalized Unicode converter ----------------------------------------------- --- a/src/3rdparty/chromium/base/strings/utf_string_conversions.cc +++ b/src/3rdparty/chromium/base/strings/utf_string_conversions.cc @@ -12,7 +12,7 @@ #include "base/strings/string_piece.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversion_utils.h" -#include "base/third_party/icu/icu_utf.h" +#include #include "build/build_config.h" #if defined(OS_MAC) @@ -79,12 +79,12 @@ using EnableIfBitsAre = std::enable_if_t template = true> void UnicodeAppendUnsafe(Char* out, int32_t* size, uint32_t code_point) { - CBU8_APPEND_UNSAFE(out, *size, code_point); + U8_APPEND_UNSAFE(out, *size, code_point); } template = true> void UnicodeAppendUnsafe(Char* out, int32_t* size, uint32_t code_point) { - CBU16_APPEND_UNSAFE(out, *size, code_point); + U16_APPEND_UNSAFE(out, *size, code_point); } template = true> @@ -105,7 +105,7 @@ bool DoUTFConversion(const char* src, for (int32_t i = 0; i < src_len;) { int32_t code_point; - CBU8_NEXT(src, i, src_len, code_point); + U8_NEXT(src, i, src_len, code_point); if (!IsValidCodepoint(code_point)) { success = false; @@ -126,7 +126,7 @@ bool DoUTFConversion(const char16* src, bool success = true; auto ConvertSingleChar = [&success](char16 in) -> int32_t { - if (!CBU16_IS_SINGLE(in) || !IsValidCodepoint(in)) { + if (!U16_IS_SINGLE(in) || !IsValidCodepoint(in)) { success = false; return kErrorCodePoint; } @@ -140,8 +140,8 @@ bool DoUTFConversion(const char16* src, while (i < src_len - 1) { int32_t code_point; - if (CBU16_IS_LEAD(src[i]) && CBU16_IS_TRAIL(src[i + 1])) { - code_point = CBU16_GET_SUPPLEMENTARY(src[i], src[i + 1]); + if (U16_IS_LEAD(src[i]) && U16_IS_TRAIL(src[i + 1])) { + code_point = U16_GET_SUPPLEMENTARY(src[i], src[i + 1]); if (!IsValidCodepoint(code_point)) { code_point = kErrorCodePoint; success = false; --- a/src/3rdparty/chromium/components/download/internal/common/download_path_reservation_tracker.cc +++ b/src/3rdparty/chromium/components/download/internal/common/download_path_reservation_tracker.cc @@ -23,7 +23,7 @@ #include "base/task/lazy_thread_pool_task_runner.h" #include "base/task/post_task.h" #include "base/task_runner_util.h" -#include "base/third_party/icu/icu_utf.h" +#include #include "base/time/time.h" #include "build/build_config.h" #include "components/download/public/common/download_features.h" --- a/src/3rdparty/chromium/components/filename_generation/filename_generation.cc +++ b/src/3rdparty/chromium/components/filename_generation/filename_generation.cc @@ -11,7 +11,7 @@ #include "base/strings/string_util.h" #include "base/strings/sys_string_conversions.h" #include "base/strings/utf_string_conversions.h" -#include "base/third_party/icu/icu_utf.h" +#include #include "build/build_config.h" #include "components/url_formatter/url_formatter.h" #include "net/base/filename_util.h" @@ -167,7 +167,7 @@ bool TruncateFilename(base::FilePath* pa #elif defined(OS_WIN) // UTF-16. DCHECK(name.size() > limit); - truncated = name.substr(0, CBU16_IS_TRAIL(name[limit]) ? limit - 1 : limit); + truncated = name.substr(0, U16_IS_TRAIL(name[limit]) ? limit - 1 : limit); #else // We cannot generally assume that the file name encoding is in UTF-8 (see // the comment for FilePath::AsUTF8Unsafe), hence no safe way to truncate. --- a/src/3rdparty/chromium/content/browser/devtools/devtools_stream_file.cc +++ b/src/3rdparty/chromium/content/browser/devtools/devtools_stream_file.cc @@ -10,7 +10,7 @@ #include "base/sequenced_task_runner.h" #include "base/strings/string_util.h" #include "base/task/lazy_thread_pool_task_runner.h" -#include "base/third_party/icu/icu_utf.h" +#include #include "content/public/browser/browser_task_traits.h" #include "content/public/browser/browser_thread.h" #include "storage/browser/file_system/file_system_context.h" @@ -104,7 +104,7 @@ void DevToolsStreamFile::ReadOnFileSeque } else { // Provided client has requested sufficient large block, make their // life easier by not truncating in the middle of a UTF-8 character. - if (size_got > 6 && !CBU8_IS_SINGLE(buffer[size_got - 1])) { + if (size_got > 6 && !U8_IS_SINGLE(buffer[size_got - 1])) { base::TruncateUTF8ToByteSize(buffer, size_got, &buffer); size_got = buffer.size(); } else { --- a/src/3rdparty/chromium/net/base/escape.cc +++ b/src/3rdparty/chromium/net/base/escape.cc @@ -9,7 +9,7 @@ #include "base/strings/string_util.h" #include "base/strings/utf_string_conversion_utils.h" #include "base/strings/utf_string_conversions.h" -#include "base/third_party/icu/icu_utf.h" +#include namespace net { --- a/src/3rdparty/chromium/net/cert/internal/parse_name.cc +++ b/src/3rdparty/chromium/net/cert/internal/parse_name.cc @@ -11,7 +11,7 @@ #include "base/strings/utf_string_conversion_utils.h" #include "base/strings/utf_string_conversions.h" #include "base/sys_byteorder.h" -#include "base/third_party/icu/icu_utf.h" +#include namespace net { @@ -36,7 +36,7 @@ bool ConvertBmpStringValue(const der::In // BMPString only supports codepoints in the Basic Multilingual Plane; // surrogates are not allowed. - if (CBU_IS_SURROGATE(c)) + if (U_IS_SURROGATE(c)) return false; } return base::UTF16ToUTF8(in_16bit.data(), in_16bit.size(), out); @@ -56,7 +56,7 @@ bool ConvertUniversalStringValue(const d for (const uint32_t c : in_32bit) { // UniversalString is UCS-4 in big-endian order. uint32_t codepoint = base::NetToHost32(c); - if (!CBU_IS_UNICODE_CHAR(codepoint)) + if (!U_IS_UNICODE_CHAR(codepoint)) return false; base::WriteUnicodeCharacter(codepoint, out); --- a/src/3rdparty/chromium/net/tools/transport_security_state_generator/BUILD.gn +++ b/src/3rdparty/chromium/net/tools/transport_security_state_generator/BUILD.gn @@ -43,6 +43,9 @@ source_set("transport_security_state_gen "//testing/gtest", "//third_party/boringssl", ] + libs = [ + "icuuc", + ] } executable("transport_security_state_generator") { --- a/src/3rdparty/chromium/third_party/openscreen/src/third_party/chromium_quic/build/base/BUILD.gn +++ b/src/3rdparty/chromium/third_party/openscreen/src/third_party/chromium_quic/build/base/BUILD.gn @@ -427,8 +427,6 @@ source_set("base") { "../../src/base/test/fuzzed_data_provider.cc", "../../src/base/test/fuzzed_data_provider.h", "../../src/base/third_party/dynamic_annotations/dynamic_annotations.h", - "../../src/base/third_party/icu/icu_utf.cc", - "../../src/base/third_party/icu/icu_utf.h", "../../src/base/third_party/nspr/prtime.cc", "../../src/base/third_party/nspr/prtime.h", "../../src/base/third_party/superfasthash/superfasthash.c", --- a/src/3rdparty/chromium/ui/base/ime/character_composer.cc +++ b/src/3rdparty/chromium/ui/base/ime/character_composer.cc @@ -13,7 +13,7 @@ #include "base/strings/string_util.h" #include "base/strings/utf_string_conversion_utils.h" #include "base/strings/utf_string_conversions.h" -#include "base/third_party/icu/icu_utf.h" +#include #include "ui/events/event.h" #include "ui/events/keycodes/dom/dom_key.h" #include "ui/events/keycodes/dom/keycode_converter.h" @@ -37,12 +37,12 @@ bool CheckCharacterComposeTable( bool UTF32CharacterToUTF16(uint32_t character, base::string16* output) { output->clear(); // Reject invalid character. (e.g. codepoint greater than 0x10ffff) - if (!CBU_IS_UNICODE_CHAR(character)) + if (!U_IS_UNICODE_CHAR(character)) return false; if (character) { - output->resize(CBU16_LENGTH(character)); + output->resize(U16_LENGTH(character)); size_t i = 0; - CBU16_APPEND_UNSAFE(&(*output)[0], i, character); + U16_APPEND_UNSAFE(&(*output)[0], i, character); } return true; } --- a/src/3rdparty/chromium/ui/gfx/utf16_indexing.cc +++ b/src/3rdparty/chromium/ui/gfx/utf16_indexing.cc @@ -5,13 +5,13 @@ #include "ui/gfx/utf16_indexing.h" #include "base/check_op.h" -#include "base/third_party/icu/icu_utf.h" +#include namespace gfx { bool IsValidCodePointIndex(const base::string16& s, size_t index) { return index == 0 || index == s.length() || - !(CBU16_IS_TRAIL(s[index]) && CBU16_IS_LEAD(s[index - 1])); + !(U16_IS_TRAIL(s[index]) && U16_IS_LEAD(s[index - 1])); } ptrdiff_t UTF16IndexToOffset(const base::string16& s, size_t base, size_t pos) {