Updated 21 December 2007.
If you want simple source code that will read a standard, efficiently loadable file format for internationalization string lookups, this may work for you. I wrote this because I was annoyed by trying to compile and use the big and cumbersome code that I found out there.
Here is the code packed into one .cpp file (about 220 lines), along with sample files. The .po file is just a human-readable source file and is not read by the code. You use a tool to generate a .mo file from this. See the comments for more information.
Pasted below is just the source code, if you just want to look it over:
/* The MIT License Copyright (c) 2007 Jonathan Blow (jon [at] number-none [dot] com) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* This code is designed to be a very simple drop-in method for reading .mo files, which are a standard format for storing text strings to help with internationalization. For more information, see: http://www.gnu.org/software/gettext/manual/gettext.html Unfortunately the gettext code is unwieldy. For a simple program like a video game, we just want to look up some strings in a pre-baked file, which is what this code does. We assume that you started with a .po file (which is a human-readable format containing all the strings) and then compiled it into a .mo file (which is a binary format that can be efficiently read into memory all in one chunk). This code then reads straight from the .mo file, so there is no extra string allocation and corresponding memory fragmentation. You can generate a .mo file from a .po file using programs such as poedit or msgfmt. This code assumes you compiled the hash table into the .mo file. It also doesn't attempt to care about the encoding of your text; it assumes you already know what that is. (I use UTF-8 which seems like the only sane choice). There's no good reason that this is a C++ file, rather than a C file; I just wrote it that way originally. You can trivially get rid of the C++-ness if you want it to compile as straight C. Send questions to jon [at] number-none [dot] com. 21 December 2007 */ struct Localization_Text { Localization_Text(); ~Localization_Text(); void abort(); void *mo_data; int reversed; int num_strings; int original_table_offset; int translated_table_offset; int hash_num_entries; int hash_offset; }; #include <stdio.h> #include <assert.h> #include <sys/stat.h> #include <string.h> // This is just the common hashpjw routine, pasted in: #define HASHWORDBITS 32 inline unsigned long hashpjw(const char *str_param) { unsigned long hval = 0; unsigned long g; const char *s = str_param; while (*s) { hval <<= 4; hval += (unsigned char) *s++; g = hval & ((unsigned long int) 0xf << (HASHWORDBITS - 4)); if (g != 0) { hval ^= g >> (HASHWORDBITS - 8); hval ^= g; } } return hval; } // Read an entire file into memory. Replace this with any equivalent function. #ifdef WIN32 #define fileno _fileno #define fstat _fstat #define stat _stat #endif int os_read_entire_file(FILE *file, void **data_return) { assert(file); int descriptor = fileno(file); struct stat file_stats; int result = fstat(descriptor, &file_stats); if (result == -1) return -1; int length = file_stats.st_size; unsigned char *data = new unsigned char[length]; fseek(file, 0, SEEK_SET); int success = fread((void *)data, length, 1, file); if (success < 1) { delete [] data; return -1; } *data_return = data; return length; } // Swap the endianness of a 4-byte word. // On some architectures you can replace my_swap4 with an inlined instruction. inline unsigned long my_swap4(unsigned long result) { unsigned long c0 = (result >> 0) & 0xff; unsigned long c1 = (result >> 8) & 0xff; unsigned long c2 = (result >> 16) & 0xff; unsigned long c3 = (result >> 24) & 0xff; return (c0 << 24) | (c1 << 16) | (c2 << 8) | c3; } inline int read4_from_offset(Localization_Text *loc, int offset) { unsigned long *ptr = (unsigned long *)(((char *)loc->mo_data) + offset); if (loc->reversed) { return my_swap4(*ptr); } else { return *ptr; } } inline char *get_source_string(Localization_Text *loc, int index) { int addr_offset = loc->original_table_offset + 8 * index + 4; int string_offset = read4_from_offset(loc, addr_offset); char *t = ((char *)loc->mo_data) + string_offset; return t; } inline char *get_translated_string(Localization_Text *loc, int index) { int addr_offset = loc->translated_table_offset + 8 * index + 4; int string_offset = read4_from_offset(loc, addr_offset); char *t = ((char *)loc->mo_data) + string_offset; return t; } static bool label_matches(Localization_Text *loc, char *s, int index) { char *t = get_source_string(loc, index); if (strcmp(s, t) == 0) return true; return false; } inline int get_target_index(Localization_Text *loc, char *s) { unsigned long V = hashpjw(s); int S = loc->hash_num_entries; int hash_cursor = V % S; int orig_hash_cursor = hash_cursor; int increment = 1 + (V % (S - 2)); while (1) { unsigned int index = read4_from_offset(loc, loc->hash_offset + 4 * hash_cursor); if (index == 0) break; index--; // Because entries in the table are stored +1 so that 0 means empty. if (label_matches(loc, s, index)) return index; hash_cursor += increment; hash_cursor %= S; if (hash_cursor == orig_hash_cursor) break; } return -1; } Localization_Text::Localization_Text() { mo_data = NULL; num_strings = 0; original_table_offset = 0; translated_table_offset = 0; hash_num_entries = 0; hash_offset = 0; char *filename = "default.mo"; // Replace this with the name of your .mo file. FILE *f = fopen(filename, "rb"); if (!f) return; void *data; int length = os_read_entire_file(f, &data); // Replace this with any function that will read an entire file and return it in a block of newly-allocated memory. fclose(f); if (length < 0) return; // os_read_entire_file returns -1 on failure. if (length < 24) { // There has to be at least this much in the header... abort(); return; } mo_data = data; unsigned long *long_ptr = (unsigned long *)data; const unsigned long TARGET_MAGIC = 0x950412DE; const unsigned long TARGET_MAGIC_REVERSED = 0xDE120495; unsigned long magic = long_ptr[0]; if (magic == TARGET_MAGIC) { reversed = 0; } else if (magic == TARGET_MAGIC_REVERSED) { reversed = 1; } else { abort(); return; } num_strings = read4_from_offset(this, 8); original_table_offset = read4_from_offset(this, 12); translated_table_offset = read4_from_offset(this, 16); hash_num_entries = read4_from_offset(this, 20); hash_offset = read4_from_offset(this, 24); if (hash_num_entries == 0) { // We expect a hash table to be there; if it's not, bail. abort(); return; } } void Localization_Text::abort() { delete [] (char *)mo_data; mo_data = NULL; return; } Localization_Text::~Localization_Text() { if (mo_data) delete [] ((char *)mo_data); } // // Call text_lookup to go from a source string to a translated string. // This is the main entry point that you would use. (The only other // entry point is to construct a Localization_Text() to initialize // things.) // Localization_Text *the_localization_text; char *text_lookup(char *s) { Localization_Text *loc = the_localization_text; if (!loc->mo_data) return s; int target_index = get_target_index(loc, s); if (target_index == -1) return s; // Maybe we want to log an error? return get_translated_string(loc, target_index); } int main() { the_localization_text = new Localization_Text(); printf("The string for 'hello' is: %s\n", text_lookup("hello")); }