deps: update icu to 74.1 · nodejs/node@2a2bf57
@@ -21,6 +21,7 @@
2121#include "unicode/uscript.h"
2222#include "unicode/ucharstrie.h"
2323#include "unicode/bytestrie.h"
24+#include "unicode/rbbi.h"
24252526#include "brkeng.h"
2627#include "cmemory.h"
@@ -70,19 +71,21 @@ UnhandledEngine::~UnhandledEngine() {
7071}
71727273UBool
73-UnhandledEngine::handles(UChar32 c) const {
74+UnhandledEngine::handles(UChar32 c, const char* locale) const {
75+ (void)locale; // Unused
7476return fHandled && fHandled->contains(c);
7577}
76787779int32_t
7880UnhandledEngine::findBreaks( UText *text,
79-int32_t /* startPos */,
81+int32_t startPos,
8082int32_t endPos,
8183 UVector32 &/*foundBreaks*/,
8284 UBool /* isPhraseBreaking */,
8385 UErrorCode &status) const {
8486if (U_FAILURE(status)) return 0;
85- UChar32 c = utext_current32(text);
87+utext_setNativeIndex(text, startPos);
88+ UChar32 c = utext_current32(text);
8689while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
8790utext_next32(text); // TODO: recast loop to work with post-increment operations.
8891 c = utext_current32(text);
@@ -120,49 +123,47 @@ ICULanguageBreakFactory::~ICULanguageBreakFactory() {
120123 }
121124}
122125123-U_NAMESPACE_END
124-U_CDECL_BEGIN
125-static void U_CALLCONV _deleteEngine(void *obj) {
126-delete (const icu::LanguageBreakEngine *) obj;
126+void ICULanguageBreakFactory::ensureEngines(UErrorCode& status) {
127+static UMutex gBreakEngineMutex;
128+ Mutex m(&gBreakEngineMutex);
129+if (fEngines == nullptr) {
130+ LocalPointer<UStack> engines(new UStack(uprv_deleteUObject, nullptr, status), status);
131+if (U_SUCCESS(status)) {
132+fEngines = engines.orphan();
133+ }
134+ }
127135}
128-U_CDECL_END
129-U_NAMESPACE_BEGIN
130136131137const LanguageBreakEngine *
132-ICULanguageBreakFactory::getEngineFor(UChar32 c) {
138+ICULanguageBreakFactory::getEngineFor(UChar32 c, const char* locale) {
133139const LanguageBreakEngine *lbe = nullptr;
134140 UErrorCode status = U_ZERO_ERROR;
141+ensureEngines(status);
142+if (U_FAILURE(status) ) {
143+// Note: no way to return error code to caller.
144+return nullptr;
145+ }
135146136147static UMutex gBreakEngineMutex;
137148 Mutex m(&gBreakEngineMutex);
138-139-if (fEngines == nullptr) {
140- LocalPointer<UStack> engines(new UStack(_deleteEngine, nullptr, status), status);
141-if (U_FAILURE(status) ) {
142-// Note: no way to return error code to caller.
143-return nullptr;
144- }
145-fEngines = engines.orphan();
146- } else {
147-int32_t i = fEngines->size();
148-while (--i >= 0) {
149- lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
150-if (lbe != nullptr && lbe->handles(c)) {
151-return lbe;
152- }
149+int32_t i = fEngines->size();
150+while (--i >= 0) {
151+ lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
152+if (lbe != nullptr && lbe->handles(c, locale)) {
153+return lbe;
153154 }
154155 }
155-156+156157// We didn't find an engine. Create one.
157- lbe = loadEngineFor(c);
158+ lbe = loadEngineFor(c, locale);
158159if (lbe != nullptr) {
159160fEngines->push((void *)lbe, status);
160161 }
161162return U_SUCCESS(status) ? lbe : nullptr;
162163}
163164164165const LanguageBreakEngine *
165-ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
166+ICULanguageBreakFactory::loadEngineFor(UChar32 c, const char*) {
166167 UErrorCode status = U_ZERO_ERROR;
167168 UScriptCode code = uscript_getScript(c, &status);
168169if (U_SUCCESS(status)) {
@@ -299,6 +300,70 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
299300return nullptr;
300301}
301302303+304+void ICULanguageBreakFactory::addExternalEngine(
305+ ExternalBreakEngine* external, UErrorCode& status) {
306+ LocalPointer<ExternalBreakEngine> engine(external, status);
307+ensureEngines(status);
308+ LocalPointer<BreakEngineWrapper> wrapper(
309+new BreakEngineWrapper(engine.orphan(), status), status);
310+static UMutex gBreakEngineMutex;
311+ Mutex m(&gBreakEngineMutex);
312+fEngines->push(wrapper.getAlias(), status);
313+ wrapper.orphan();
314+}
315+316+BreakEngineWrapper::BreakEngineWrapper(
317+ ExternalBreakEngine* engine, UErrorCode &status) : delegate(engine, status) {
318+}
319+320+BreakEngineWrapper::~BreakEngineWrapper() {
321+}
322+323+UBool BreakEngineWrapper::handles(UChar32 c, const char* locale) const {
324+return delegate->isFor(c, locale);
325+}
326+327+int32_t BreakEngineWrapper::findBreaks(
328+ UText *text,
329+int32_t startPos,
330+int32_t endPos,
331+ UVector32 &foundBreaks,
332+ UBool /* isPhraseBreaking */,
333+ UErrorCode &status) const {
334+if (U_FAILURE(status)) return 0;
335+int32_t result = 0;
336+337+// Find the span of characters included in the set.
338+// The span to break begins at the current position in the text, and
339+// extends towards the start or end of the text, depending on 'reverse'.
340+341+utext_setNativeIndex(text, startPos);
342+int32_t start = (int32_t)utext_getNativeIndex(text);
343+int32_t current;
344+int32_t rangeStart;
345+int32_t rangeEnd;
346+ UChar32 c = utext_current32(text);
347+while((current = (int32_t)utext_getNativeIndex(text)) < endPos && delegate->handles(c)) {
348+utext_next32(text); // TODO: recast loop for postincrement
349+ c = utext_current32(text);
350+ }
351+ rangeStart = start;
352+ rangeEnd = current;
353+int32_t beforeSize = foundBreaks.size();
354+int32_t additionalCapacity = rangeEnd - rangeStart + 1;
355+// enlarge to contains (rangeEnd-rangeStart+1) more items
356+ foundBreaks.ensureCapacity(beforeSize+additionalCapacity, status);
357+if (U_FAILURE(status)) return 0;
358+ foundBreaks.setSize(beforeSize + beforeSize+additionalCapacity);
359+ result = delegate->fillBreaks(text, rangeStart, rangeEnd, foundBreaks.getBuffer()+beforeSize,
360+ additionalCapacity, status);
361+if (U_FAILURE(status)) return 0;
362+ foundBreaks.setSize(beforeSize + result);
363+utext_setNativeIndex(text, current);
364+return result;
365+}
366+302367U_NAMESPACE_END
303368304369#endif /* #if !UCONFIG_NO_BREAK_ITERATION */