/* Copyright (C) 2003 by David White Copyright (C) 2005 by Guillaume Melquiond Copyright (C) 2005 - 2015 by Philippe Plantier Part of the Battle for Wesnoth Project http://www.wesnoth.org/ This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY. See the COPYING file for more details. */ /** * @file * Unicode support functions. */ #include "global.hpp" #include "ucs4_convert_impl.hpp" #include "unicode_cast.hpp" #include "serialization/unicode.hpp" #include "log.hpp" #include "util.hpp" #include #include #include static lg::log_domain log_engine("engine"); #define ERR_GENERAL LOG_STREAM(err, lg::general) namespace utf8 { static int byte_size_from_utf8_first(const unsigned char ch) { if (!(ch & 0x80)) { return 1; // US-ASCII character, 1 byte } /* first bit set: character not in US-ASCII, multiple bytes * number of set bits at the beginning = bytes per character * e.g. 11110xxx indicates a 4-byte character */ int count = count_leading_ones(ch); if (count == 1 || count > 6) { // count > 4 after RFC 3629 throw invalid_utf8_exception(); // Stop on invalid characters } return count; } utf8::string lowercase(const utf8::string& s) { if(!s.empty()) { utf8::iterator itor(s); utf8::string res; for(;itor != utf8::iterator::end(s); ++itor) { ucs4::char_t uchar = *itor; // If wchar_t is less than 32 bits wide, we cannot apply towlower() to all codepoints if(uchar <= static_cast(std::numeric_limits::max())) uchar = towlower(static_cast(uchar)); res += unicode_cast(uchar); } res.append(itor.substr().second, s.end()); return res; } return s; } size_t index(const utf8::string& str, const size_t index) { // chr counts characters, i is the codepoint index // remark: several functions rely on the fallback to str.length() unsigned int i = 0, len = str.size(); try { for (unsigned int chr=0; chr size(str)) return str; unsigned pos = index(str, start); if (len == std::string::npos) { // without second argument, std::string::erase truncates return str.erase(pos); } else { return str.erase(pos, index(str,start+len) - pos); } } utf8::string& truncate(utf8::string& str, const size_t size) { return erase(str, size); } void truncate_as_ucs4(utf8::string &str, const size_t size) { ucs4::string u4_str = unicode_cast(str); if(u4_str.size() > size) { u4_str.resize(size); str = unicode_cast(u4_str); } } } // end namespace utf8