Попробуйте привести строки к одной общей кодировке, а затем сравнить. Для этого можно воспользоваться библиотекой libiconv:
main.cpp:
#include <iostream>
#include <fstream>
#include <cstdlib>
using namespace std;
#include <iconv.h>
string iconv_recode(const string from, const string to, string text)
{
iconv_t cnv = iconv_open(to.c_str(), from.c_str());
if (cnv == (iconv_t) - 1) {
iconv_close(cnv);
return "";
}
char *outbuf;
if ((outbuf = (char *) malloc(text.length()*2 + 1)) == NULL) {
iconv_close(cnv);
return "";
}
char *ip = (char *) text.c_str(), *op = outbuf;
size_t icount = text.length(), ocount = text.length()*2;
if (iconv(cnv, &ip, &icount, &op, &ocount) != (size_t) - 1) {
outbuf[text.length()*2 - ocount] = '\0';
text = outbuf;
} else {
text = "";
}
free(outbuf);
iconv_close(cnv);
return text;
}
void compare_strings(const string &aString1, const string &aString2) {
cout << "String 1: " << aString1 << endl
<< "String 2: " << aString2 << endl;
if (aString1 == aString2) {
cout << "Identical strings!" << endl
<< "-----" << endl;
} else {
cout << "Different strings!" << endl
<< "-----" << endl;
}
}
int main()
{
ifstream file_1("word_1.txt"); // The "Proverka" Word in UTF-8
ifstream file_2("word_2.txt"); // The "Proverka" Word in CP1251
string word_1, word_2;
file_1 >> word_1;
file_2 >> word_2;
compare_strings(word_1, word_2);
word_2 = iconv_recode("CP1251", "UTF-8", word_2);
compare_strings(word_1, word_2);
return 0;
}
exl@exl-Lenovo-G560e:~/SandBox/text_enc > enca -L russian word_1.txt
Universal transformation format 8 bits; UTF-8
Doubly-encoded to UTF-8 from ISO-8859-5
exl@exl-Lenovo-G560e:~/SandBox/text_enc > enca -L russian word_2.txt
MS-Windows code page 1251
LF line terminators
exl@exl-Lenovo-G560e:~/SandBox/text_enc > cat word_1.txt
Проверка
exl@exl-Lenovo-G560e:~/SandBox/text_enc > cat word_2.txt
��������
exl@exl-Lenovo-G560e:~/SandBox/text_enc > ./text_coding
String 1: Проверка
String 2: ��������
Different strings!
-----
String 1: Проверка
String 2: Проверка
Identical strings!
-----