|
|
|
@ -28,6 +28,8 @@
|
|
|
|
|
#include <unicode/ucsdet.h>
|
|
|
|
|
#include <unicode/ustring.h>
|
|
|
|
|
|
|
|
|
|
#include <cutils/properties.h>
|
|
|
|
|
|
|
|
|
|
namespace android {
|
|
|
|
|
|
|
|
|
|
CharacterEncodingDetector::CharacterEncodingDetector() {
|
|
|
|
@ -38,6 +40,26 @@ CharacterEncodingDetector::CharacterEncodingDetector() {
|
|
|
|
|
ALOGE("could not create UConverter for UTF-8");
|
|
|
|
|
mUtf8Conv = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Read system locale setting from system property and map to ICU encoding names.
|
|
|
|
|
mLocaleEnc = NULL;
|
|
|
|
|
char locale_value[PROPERTY_VALUE_MAX] = "";
|
|
|
|
|
if (property_get("persist.sys.locale", locale_value, NULL) > 0) {
|
|
|
|
|
const size_t len = strnlen(locale_value, sizeof(locale_value));
|
|
|
|
|
|
|
|
|
|
if (len == 3 && !strncmp(locale_value, "und", 3)) {
|
|
|
|
|
// Undetermined
|
|
|
|
|
} else if (!strncmp(locale_value, "th", 2)) { // Thai
|
|
|
|
|
mLocaleEnc = "windows-874-2000";
|
|
|
|
|
}
|
|
|
|
|
if (mLocaleEnc != NULL) {
|
|
|
|
|
ALOGV("System locale encoding = %s", mLocaleEnc);
|
|
|
|
|
} else {
|
|
|
|
|
ALOGV("Didn't recognize system locale setting, defaulting to en_US");
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
ALOGV("Couldn't read system locale setting, assuming en_US");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
CharacterEncodingDetector::~CharacterEncodingDetector() {
|
|
|
|
@ -157,7 +179,11 @@ void CharacterEncodingDetector::detectAndConvert() {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (bestCombinedMatch != NULL) {
|
|
|
|
|
if (mLocaleEnc != NULL && !goodmatch && highest < 50) {
|
|
|
|
|
combinedenc = mLocaleEnc;
|
|
|
|
|
ALOGV("confidence is low but we have recognized predefined encoding, "
|
|
|
|
|
"so try this (%s) instead", mLocaleEnc);
|
|
|
|
|
} else if (bestCombinedMatch != NULL) {
|
|
|
|
|
combinedenc = ucsdet_getName(bestCombinedMatch, &status);
|
|
|
|
|
} else {
|
|
|
|
|
combinedenc = "ISO-8859-1";
|
|
|
|
|