スポンサーリンク
#include <iostream> // 自作ラッパー #include "GraphemeText.hpp" #pragma warning(disable:4996) int main() { // 文字列の定義 GText gtext(u"aあغ山👨👧經经"); gtext.insert(0, u"غ"); FILE* fp = fopen("C:\\test\\test.txt", "wb"); for (size_t i = 0; i < gtext.length(); i++) { wchar_t c[100]{ 0 }; int len = gtext[i].size(); // 一文字書き込み memcpy(c, gtext[i].ptr(), len); fwrite(c, 1, len, fp); } fclose(fp); }
#pragma once // 以下のdllを要求される // icudt69.dll // icuuc69.dll//! @brief 書記素クラス class Grapheme { void* _UnicodeString; int _index; int _length; enum UScriptCode _script; bool _emoji; public: Grapheme(void* _UnicodeString_, int _index_, int _length_); ~Grapheme() {} const char16_t* ptr()const; //! @brief この書記素の元の文字列上のindexを取得。 int index()const { return _index; } //! @brief この書記素のchar16_tとしての文字数。 int length()const {return _length;} //! @brief この書記素のバイト数。 int size()const { return _length * 2; } //! @brief この文字の用字の種類 enum UScriptCode script()const { return _script; } //! @brief これが絵文字かどうか bool isEmoji()const { return _emoji; } };
/////////////////////////////////// //! @brief 書記素の配列オブジェクト class GText { struct GText_Impl* _impl; public: GText(const char16_t* text); GText(const GText& src); ~GText(); const Grapheme& operator[](const size_t index)const; //! @brief この文字列の書記素数 size_t length()const; //! @brief 書記素を一つ置き換える //! @attention Graphemeリストを更新するのでこれまでのGraphemeは使えなくなる void replace(const size_t index, const char16_t c); void replace(const size_t index, const char16_t* c); void erase(const size_t index); void insert(const size_t index, const char16_t c); void insert(const size_t index, const char16_t* c); const char16_t* ptr()const; //! @brief この文字列のバイト数 int size()const; };
#include<vector> #include <string> #include <unicode/ucnv.h> #include <unicode/brkiter.h> #include <unicode/utypes.h> #include <unicode/uscript.h> #include "GraphemeText.hpp" // 要リンク #pragma comment(lib, "icuuc.lib") #pragma comment(lib, "icudt.lib") // 書記素クラス Grapheme::Grapheme(void* _UnicodeString_, int _index_, int _length_) : _UnicodeString(_UnicodeString_), _index(_index_), _length(_length_) { icu::UnicodeString* p = (icu::UnicodeString*)_UnicodeString; UErrorCode err; UChar32 c32 = p->char32At(_index); // スクリプト _script = uscript_getScript(c32, &err); _emoji = false; // 絵文字かどうかのフラグをセット _emoji |= (bool)u_getIntPropertyValue(c32, UCHAR_EMOJI); _emoji |= (bool)u_getIntPropertyValue(c32, UCHAR_EMOJI_PRESENTATION); _emoji |= (bool)u_getIntPropertyValue(c32, UCHAR_EMOJI_MODIFIER); _emoji |= (bool)u_getIntPropertyValue(c32, UCHAR_EMOJI_MODIFIER_BASE); _emoji |= (bool)u_getIntPropertyValue(c32, UCHAR_EMOJI_COMPONENT); } const char16_t* Grapheme::ptr()const { return static_cast<icu::UnicodeString*>(_UnicodeString)->getBuffer() + _index; }
// Graphemeの配列を作成
void makeGraphemeList(std::vector< Grapheme >& glist, icu::UnicodeString& text) { UErrorCode err; icu::BreakIterator* bi = icu::BreakIterator::createCharacterInstance( icu::Locale::getDefault(), err); if (bi == nullptr) throw "bi is NULL"; bi->setText(text); int32_t current = bi->first(); while (current != icu::BreakIterator::DONE) { int32_t prev = current; current = bi->next(); if (current == UBRK_DONE) { break; } int32_t count = current - prev;//文字の長さ //書記素保存 glist.emplace_back(&text, prev, count); } delete bi; }
struct GText_Impl { std::vector< Grapheme > glist; icu::UnicodeString utext; }; GText::GText(const char16_t* text) { _impl = new GText_Impl; _impl->utext = text; makeGraphemeList(_impl->glist, _impl->utext); } const char16_t* GText::ptr()const { return _impl->utext.getBuffer(); } GText::GText(const GText& src) { _impl->utext = src._impl->utext; _impl->glist.clear(); makeGraphemeList(_impl->glist, _impl->utext); } GText::~GText() { delete _impl; } const Grapheme& GText::operator[](const size_t index)const { return _impl->glist[index]; } size_t GText::length()const { return _impl->glist.size(); } int GText::size()const { return _impl->utext.length() * 2; } void GText::replace(const size_t index, const char16_t c) { if (index >= _impl->glist.size()) throw "GText replace index is too big"; _impl->utext.replace(_impl->glist[index].index(), _impl->glist[index].length(), c); _impl->glist.clear(); makeGraphemeList(_impl->glist, _impl->utext); } void GText::replace(const size_t index, const char16_t* c) { if (index >= _impl->glist.size()) throw "GText replace index is too big"; size_t len = std::char_traits<char16_t>::length(c); _impl->utext.replace(_impl->glist[index].index(), _impl->glist[index].length(),c, len); _impl->glist.clear(); makeGraphemeList(_impl->glist, _impl->utext); } void GText::erase(const size_t index) { if (index >= _impl->glist.size()) throw "GText erase index is too big"; _impl->utext.remove(_impl->glist[index].index(), _impl->glist[index].length()); _impl->glist.clear(); makeGraphemeList(_impl->glist, _impl->utext); } void GText::insert(const size_t index, const char16_t c) { if (index >= _impl->glist.size()) throw "GText insert index is too big"; _impl->utext.insert(_impl->glist[index].index(), c); _impl->glist.clear(); makeGraphemeList(_impl->glist, _impl->utext); } void GText::insert(const size_t index, const char16_t* c) { if (index >= _impl->glist.size()) throw "GText insert index is too big"; size_t len = std::char_traits<char16_t>::length(c); _impl->utext.insert(_impl->glist[index].index(), c, len); _impl->glist.clear(); makeGraphemeList(_impl->glist, _impl->utext); }