10 #include "lcf/config.h"
11 #include "lcf/encoder.h"
12 #include "lcf/reader_util.h"
13 #include "lcf/scope_guard.h"
19 # include <unicode/ucsdet.h>
20 # include <unicode/ucnv.h>
23 # error MSVC builds require ICU
40 if (ucnv_compareNames(enc.c_str(),
"UTF-8") == 0) {
45 if (enc ==
"utf-8" || enc ==
"UTF-8" || enc ==
"65001") {
52 Encoder::Encoder(std::string encoding)
62 bool Encoder::IsOk()
const {
63 return _encoding.empty() || (_conv_storage && _conv_runtime);
66 void Encoder::Encode(std::string& str) {
67 if (_encoding.empty() || str.empty()) {
70 Convert(str, _conv_runtime, _conv_storage);
73 void Encoder::Decode(std::string& str) {
74 if (_encoding.empty() || str.empty()) {
77 Convert(str, _conv_storage, _conv_runtime);
80 void Encoder::Init() {
81 if (_encoding.empty()) {
86 auto code_page = atoi(_encoding.c_str());
87 const auto& storage_encoding = code_page > 0
88 ? ReaderUtil::CodepageToEncoding(code_page)
91 auto status = U_ZERO_ERROR;
92 constexpr
auto runtime_encoding =
"UTF-8";
93 auto conv_runtime = ucnv_open(runtime_encoding, &status);
95 if (conv_runtime ==
nullptr) {
96 fprintf(stderr,
"liblcf: ucnv_open() error for encoding \"%s\": %s\n", runtime_encoding, u_errorName(status));
99 status = U_ZERO_ERROR;
100 auto sg = makeScopeGuard([&]() { ucnv_close(conv_runtime); });
102 auto conv_storage = ucnv_open(storage_encoding.c_str(), &status);
104 if (conv_storage ==
nullptr) {
105 fprintf(stderr,
"liblcf: ucnv_open() error for dest encoding \"%s\": %s\n", storage_encoding.c_str(), u_errorName(status));
111 _conv_runtime = conv_runtime;
112 _conv_storage = conv_storage;
114 _conv_runtime =
const_cast<char*
>(
"UTF-8");
115 _conv_storage =
const_cast<char*
>(_encoding.c_str());
119 void Encoder::Reset() {
121 auto* conv =
reinterpret_cast<UConverter*
>(_conv_runtime);
122 if (conv) ucnv_close(conv);
123 conv =
reinterpret_cast<UConverter*
>(_conv_storage);
124 if (conv) ucnv_close(conv);
129 void Encoder::Convert(std::string& str,
void* conv_dst_void,
void* conv_src_void) {
131 const auto& src = str;
132 auto* conv_dst =
reinterpret_cast<UConverter*
>(conv_dst_void);
133 auto* conv_src =
reinterpret_cast<UConverter*
>(conv_src_void);
135 auto status = U_ZERO_ERROR;
136 _buffer.resize(src.size() * 4);
138 const auto* src_p = src.c_str();
139 auto* dst_p = _buffer.data();
141 ucnv_convertEx(conv_dst, conv_src,
142 &dst_p, dst_p + _buffer.size(),
143 &src_p, src_p + src.size(),
144 nullptr,
nullptr,
nullptr,
nullptr,
148 if (U_FAILURE(status)) {
149 fprintf(stderr,
"liblcf: ucnv_convertEx() error when encoding \"%s\": %s\n", src.c_str(), u_errorName(status));
153 str.assign(_buffer.data(), dst_p);
156 auto* conv_dst =
reinterpret_cast<const char*
>(conv_dst_void);
157 auto* conv_src =
reinterpret_cast<const char*
>(conv_src_void);
158 iconv_t cd = iconv_open(conv_dst, conv_src);
159 if (cd == (iconv_t)-1)
161 char *src = &str.front();
162 size_t src_left = str.size();
163 size_t dst_size = str.size() * 5 + 10;
164 _buffer.resize(dst_size);
165 char *dst = _buffer.data();
166 size_t dst_left = dst_size;
168 char ICONV_CONST *p = src;
173 size_t status = iconv(cd, &p, &src_left, &q, &dst_left);
175 if (status == (
size_t) -1 || src_left > 0) {
180 str.assign(dst, dst_size - dst_left);
static std::string filterUtf8Compatible(std::string enc)