/////////////////////////////////////////////////////////////////////////////
//
//	File: UtfTest.cpp
//
//	$Header: /TS/UnitTest/UtfTest.cpp  16  2009/9/14 1:48:03p  Lee $
//
/////////////////////////////////////////////////////////////////////////////


#include "QzCommon.h"
#include "QzTxtReader.h"
#include "QzTxtWriter.h"
#include "UtfData.h"


#ifdef USE_MALLOC_MACRO
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif


/////////////////////////////////////////////////////////////////////////////
//
//	TestUtfTables()
//
//	Run some consistency tests between the different tables in UtfData.cpp.
//
bool TestUtfTables(void)
{
	U32 errorCount = 0;

	U32 composeCount = UtfComposeTableSize();
	U32 symbolCount  = UtfSymbolTableSize();
	U32 sortCount    = UtfDefaultSortTableSize();

	// Make certain all composed symbols exist within the main symbol table.
	for (U32 i = 0; i < composeCount; ++i) {
		bool found = false;

		for (U32 j = 0; j < symbolCount; ++j) {
			if (g_UtfSymbolTable[j].Symbol == g_UtfComposeValues[i].Composed) {
				found = true;
				break;
			}
		}

		if (false == found) {
			++errorCount;
		}
	}

	// Make certain all symbols in the main symbol table have entries defined
	// in the sorting table.
	for (U32 i = 0; i < symbolCount; ++i) {
		bool found = false;

		for (U32 j = 0; j < sortCount; ++j) {
			if (g_UtfDefaultSortTable[j].Symbol == g_UtfSymbolTable[i].Symbol) {
				found = true;
				break;
			}
		}

		if (false == found) {
			++errorCount;
		}
	}

	return (0 == errorCount);
}


/////////////////////////////////////////////////////////////////////////////
//
//	TestUtfSize()
//
//	Verify that UtfCharCount() -- and by extension, UtfStringSize() -- will
//	return the correct number of characters when the last character has been
//	truncated.  Truncation should not occur naturally, since all of the copy
//	functions are supposed to detect it and discard any trailing partial
//	UTF-8 symbols.  We'll introduce it here by manually zeroing out bytes at
//	the end of the string until the string is completely empty.
//
bool TestUtfSize(void)
{
	U32 errorCount = 0;

	Utf32_t baseSamples1[6] = { 0x0064, 0x0123, 0x0732, 0x3423, 0x31234, 0 };
	Utf08_t scratch[32];

	U32 scratchLength = UtfConvert32to08(scratch, ArraySize(scratch), baseSamples1);

	if (12 != scratchLength) {
		++errorCount;
	}

	// Raw symbol count.
	if (5 != UtfCharCount(scratch)) { ++errorCount; }

	// The last symbol should take four bytes.  Trimming one byte off at a
	// time should report one less symbol.

	scratch[11] = 0;

	if (4 != UtfCharCount(scratch)) { ++errorCount; }

	scratch[10] = 0;

	if (4 != UtfCharCount(scratch)) { ++errorCount; }

	scratch[9] = 0;

	if (4 != UtfCharCount(scratch)) { ++errorCount; }

	scratch[8] = 0;

	if (4 != UtfCharCount(scratch)) { ++errorCount; }

	// The fourth symbol should require 3 bytes.

	scratch[7] = 0;

	if (3 != UtfCharCount(scratch)) { ++errorCount; }

	scratch[6] = 0;

	if (3 != UtfCharCount(scratch)) { ++errorCount; }

	scratch[5] = 0;

	if (3 != UtfCharCount(scratch)) { ++errorCount; }

	// The third symbol requires 2 bytes.

	scratch[4] = 0;

	if (2 != UtfCharCount(scratch)) { ++errorCount; }

	scratch[3] = 0;

	if (2 != UtfCharCount(scratch)) { ++errorCount; }

	// The second symbol also requires 2 bytes.

	scratch[2] = 0;

	if (1 != UtfCharCount(scratch)) { ++errorCount; }

	scratch[1] = 0;

	// The first symbol only requires 1 byte.

	if (1 != UtfCharCount(scratch)) { ++errorCount; }

	scratch[0] = 0;

	if (0 != UtfCharCount(scratch)) { ++errorCount; }


	//
	// Second validation pass.  This time make certain the truncation
	// detection stops at the start of the buffer for multi-byte symbols.
	//

	Utf32_t baseSamples2[2] = { 0x4321, 0 };

	scratchLength = UtfConvert32to08(scratch, ArraySize(scratch), baseSamples2);

	if (3 != scratchLength) {
		++errorCount;
	}

	// Raw symbol count.
	if (1 != UtfCharCount(scratch)) { ++errorCount; }

	scratch[2] = 0;

	if (0 != UtfCharCount(scratch)) { ++errorCount; }

	scratch[1] = 0;

	if (0 != UtfCharCount(scratch)) { ++errorCount; }

	scratch[0] = 0;

	if (0 != UtfCharCount(scratch)) { ++errorCount; }

	return (0 == errorCount);
}


/////////////////////////////////////////////////////////////////////////////
//
//	TestUtf32to08()
//
//	Take all valid Unicode symbols (regardless of whether they are used in
//	the current version of the standard) and convert them from UTF-32 to
//	UTF-8, then back to UTF-32 to sanity check both of those conversion
//	functions.
//
bool TestUtf32to08(void)
{
	Utf08_t imm[1024];
	Utf32_t src[256];
	Utf32_t dst[256];

	U32 errorCount = 0;
	U32 offset     = 0;

	// There are too many codes to test all at once.  We'll fill a buffer
	// with some of them, batch convert that string, then refill the buffer
	// with the next batch of symbols.

	for (U32 i = 1; i <= Unicode_MaxValidCode; ++i) {
		if (UtfIsValid(i)) {
			src[offset++] = i;
		}

		if (200 == offset) {
			src[offset] = '\0';
			UtfConvert32to08(imm, ArraySize(imm), src);
			UtfConvert08to32(dst, ArraySize(dst), imm);
			for (U32 j = 0; j < offset; ++j) {
				if (dst[j] != src[j]) {
					++errorCount;
				}
			}
			offset = 0;
		}
	}

	if (offset > 0) {
		src[offset] = '\0';
		UtfConvert32to08(imm, ArraySize(imm), src);
		UtfConvert08to32(dst, ArraySize(dst), imm);
		for (U32 j = 0; j < offset; ++j) {
			if (dst[j] != src[j]) {
				++errorCount;
			}
		}
	}

	return (0 == errorCount);
}


/////////////////////////////////////////////////////////////////////////////
//
//	TestUtf32to16()
//
//	Take all valid Unicode symbols (regardless of whether they are used in
//	the current version of the standard) and convert them from UTF-32 to
//	UTF-16, then back to UTF-32 to sanity check both of those conversion
//	functions.
//
bool TestUtf32to16(void)
{
	Utf16_t imm[512];
	Utf32_t src[256];
	Utf32_t dst[256];

	U32 errorCount = 0;
	U32 offset     = 0;

	// There are too many codes to test all at once.  We'll fill a buffer
	// with some of them, batch convert that string, then refill the buffer
	// with the next batch of symbols.

	for (U32 i = 1; i <= Unicode_MaxValidCode; ++i) {
		if (UtfIsValid(i)) {
			src[offset++] = i;
		}

		if (200 == offset) {
			src[offset] = '\0';
			UtfConvert32to16(imm, ArraySize(imm), src);
			UtfConvert16to32(dst, ArraySize(dst), imm);
			for (U32 j = 0; j < offset; ++j) {
				if (dst[j] != src[j]) {
					++errorCount;
				}
			}
			offset = 0;
		}
	}

	if (offset > 0) {
		src[offset] = '\0';
		UtfConvert32to16(imm, ArraySize(imm), src);
		UtfConvert16to32(dst, ArraySize(dst), imm);
		for (U32 j = 0; j < offset; ++j) {
			if (dst[j] != src[j]) {
				++errorCount;
			}
		}
	}

	return (0 == errorCount);
}


/////////////////////////////////////////////////////////////////////////////
//
//	TestUtf16to08()
//
//	Take all valid Unicode symbols (regardless of whether they are used in
//	the current version of the standard) and convert them from UTF-16 to
//	UTF-8, then back to UTF-16 to sanity check both of those conversion
//	functions.
//
//	Since generating all valid surrogates can be a bit clumsy, we'll create
//	all symbols in 32-bit format, convert through 16 to 8, then all of the
//	way back for verification.
//
bool TestUtf16to08(void)
{
	Utf08_t imm[1024];
	Utf16_t immSrc[512];
	Utf16_t immDst[512];
	Utf32_t src[256];
	Utf32_t dst[256];

	U32 errorCount = 0;
	U32 offset     = 0;

	// There are too many codes to test all at once.  We'll fill a buffer
	// with some of them, batch convert that string, then refill the buffer
	// with the next batch of symbols.

	for (U32 i = 1; i <= Unicode_MaxValidCode; ++i) {
		if (UtfIsValid(i)) {
			src[offset++] = i;
		}

		if (200 == offset) {
			src[offset] = '\0';
			UtfConvert32to16(immSrc, ArraySize(immSrc), src);
			UtfConvert16to08(imm, ArraySize(imm), immSrc);
			UtfConvert08to16(immDst, ArraySize(immDst), imm);
			UtfConvert16to32(dst, ArraySize(dst), immDst);
			for (U32 j = 0; j < offset; ++j) {
				if (dst[j] != src[j]) {
					++errorCount;
				}
			}
			offset = 0;
		}
	}

	if (offset > 0) {
		src[offset] = '\0';
		UtfConvert32to16(immSrc, ArraySize(immSrc), src);
		UtfConvert16to08(imm, ArraySize(imm), immSrc);
		UtfConvert08to16(immDst, ArraySize(immDst), imm);
		UtfConvert16to32(dst, ArraySize(dst), immDst);
		for (U32 j = 0; j < offset; ++j) {
			if (dst[j] != src[j]) {
				++errorCount;
			}
		}
	}

	return (0 == errorCount);
}


/////////////////////////////////////////////////////////////////////////////
//
//	TestUtfNormalize()
//
bool TestUtfNormalize(void)
{
	U32 errorCount = 0;

	// Sample raw input.
	Utf32_t denorm32[] = { 0x61, 0x20, 0x301, 0x62, 0 };
	Utf16_t denorm16[64];
	Utf08_t denorm08[64];

	// Input after being composed.
	Utf32_t comp32[64];
	Utf16_t comp16[64];
	Utf08_t comp08[64];

	// Output from normalizing pass.
	Utf32_t renorm32[64];
	Utf16_t renorm16[64];
	Utf08_t renorm08[64];

	UtfConvert32to08(denorm08, ArraySize(denorm08), denorm32);
	UtfConvert32to16(denorm16, ArraySize(denorm16), denorm32);

	// Can only compose the source from 32 to 8, then convert 8 to the other
	// composed formats.
	UtfCompose32to08(comp08, ArraySize(comp08), denorm32);
	UtfConvert08to16(comp16, ArraySize(comp16), comp08);
	UtfConvert08to32(comp32, ArraySize(comp32), comp08);

	UtfNormalize08to08(renorm08, ArraySize(renorm08), comp08);
	UtfNormalize08to16(renorm16, ArraySize(renorm16), comp08);
	UtfNormalize08to32(renorm32, ArraySize(renorm32), comp08);

	for (U32 i = 0; i < 64; ++i) {
		if (denorm32[i] != renorm32[i]) {
			++errorCount;
		}
		if (0 == denorm32[i]) {
			break;
		}
	}

	for (U32 i = 0; i < 64; ++i) {
		if (denorm16[i] != renorm16[i]) {
			++errorCount;
		}
		if (0 == denorm16[i]) {
			break;
		}
	}

	for (U32 i = 0; i < 64; ++i) {
		if (denorm08[i] != renorm08[i]) {
			++errorCount;
		}
		if (0 == denorm08[i]) {
			break;
		}
	}

	return (0 == errorCount);
}


/////////////////////////////////////////////////////////////////////////////
//
//	TestUtfCompare()
//
bool TestUtfCompare(void)
{
	U32 errorCount = 0;

	const Utf08_t *p0 = reinterpret_cast<const Utf08_t*>("");
	const Utf08_t *p1 = reinterpret_cast<const Utf08_t*>("a");
	const Utf08_t *p2 = reinterpret_cast<const Utf08_t*>("ab");
	const Utf08_t *p3 = reinterpret_cast<const Utf08_t*>("b");
	const Utf08_t *p4 = reinterpret_cast<const Utf08_t*>("ba");
	const Utf08_t *p5 = reinterpret_cast<const Utf08_t*>("acb");
	const Utf08_t *p6 = reinterpret_cast<const Utf08_t*>("acb");

	if (UtfCompareBytewise(p0, p1) >= 0) {
		++errorCount;
	}
	if (UtfCompareBytewise(p1, p0) <= 0) {
		++errorCount;
	}

	if (UtfCompareBytewise(p1, p2) >= 0) {
		++errorCount;
	}
	if (UtfCompareBytewise(p2, p1) <= 0) {
		++errorCount;
	}

	if (UtfCompareBytewise(p3, p4) >= 0) {
		++errorCount;
	}
	if (UtfCompareBytewise(p4, p3) <= 0) {
		++errorCount;
	}

	// Compare two identical strings that are stored at different addresses.
	if (UtfCompareBytewise(p5, p6) != 0) {
		++errorCount;
	}

	return (0 == errorCount);
}


/////////////////////////////////////////////////////////////////////////////
//
//	TestUtfNumbers()
//
bool TestUtfNumbers(void)
{
	U32 errorCount = 0;

	Utf08_t buffer[64];


	//////////////////////
	//  UtfFromFloat()  //
	//////////////////////

	float f = 5.4321f;

	UtfFromFloat(buffer, f, 6);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("5.432100"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 5);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("5.43210"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 4);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("5.4321"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 3);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("5.432"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 2);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("5.43"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 1);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("5.4"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 0);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("5"))) {
		++errorCount;
	}

	f = 2.666666f;

	UtfFromFloat(buffer, f, 6);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("2.666666"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 5);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("2.66667"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 4);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("2.6667"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 3);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("2.667"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 2);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("2.67"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 1);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("2.7"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 0);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("3"))) {
		++errorCount;
	}

	f = -8.765432f;

	UtfFromFloat(buffer, f, 6);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("-8.765432"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 5);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("-8.76543"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 4);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("-8.7654"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 3);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("-8.765"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 2);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("-8.77"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 1);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("-8.8"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 0);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("-9"))) {
		++errorCount;
	}


	f = -0.001234f;

	UtfFromFloat(buffer, f, 6);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("-0.001234"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 5);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("-0.00123"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 4);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("-0.0012"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 3);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("-0.001"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 2);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("0.00"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 1);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("0.0"))) {
		++errorCount;
	}
	UtfFromFloat(buffer, f, 0);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("0"))) {
		++errorCount;
	}


	////////////////////////////
	//  UtfLocalizedFrom32()  //
	////////////////////////////

	// Default English format is to use commas for separation (at least on the
	// west side of The Pond).
	UtfLocalizedFrom32(buffer, 1234567890);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("1,234,567,890"))) {
		++errorCount;
	}

	// Reset language to German, which uses periods for separators.
	UtfSetLanguage(LangID_German);

	UtfLocalizedFrom32(buffer, 1234567890);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("1.234.567.890"))) {
		++errorCount;
	}

	// The verify we've correctly reset back to English.
	UtfSetLanguage(LangID_English);

	UtfLocalizedFrom32(buffer, 1234567890);
	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("1,234,567,890"))) {
		++errorCount;
	}

	return (0 == errorCount);
}


/////////////////////////////////////////////////////////////////////////////
//
//	TestUtfCanonical()
//
bool TestUtfCanonical(void)
{
	U32 errorCount = 0;

	// Make certain that the internal data table is properly formatted.
	if (false == UtfDecomposeValidate()) {
		printf("TestUtfCanonical failed UtfDecomposeValidate\n");
		++errorCount;
	}

	Utf32_t *pSrc32  = new Utf32_t[0x10000];
	Utf08_t *pSrc08  = new Utf08_t[0x40000];
	Utf08_t *pDecomp = new Utf08_t[0x40000];
	Utf08_t *pDst08  = new Utf08_t[0x40000];
	Utf32_t *pDst32  = new Utf32_t[0x10000];

	U32 srcCount32 = 0;

	// Generate an array containing all of the Unicode symbols supported by
	// the current version of UtfData.cpp.
	for (U32 i = 0; i < 0x10000; ++i) {
		if (UtfIsKnown(i)) {
			pSrc32[srcCount32++] = i;
		}
	}

	// Terminate the string.
	pSrc32[srcCount32] = '\0';

	// Convert all of those symbols to UTF-8.
//	U32 srcCount08 =
		UtfConvert32to08(pSrc08, 0x40000, pSrc32);

	// Apply the canonical decomposition.
//	U32 decompCount =
		UtfCanonicalDecompose08to08(pDecomp, 0x40000, pSrc08);

	// Convert back to the internal composed format.
//	U32 dstCount08 =
		UtfCompose08to08(pDst08, 0x40000, pDecomp);

	// Verify that none of the characters were changed.
	U32 dstCount32 = UtfConvert08to32(pDst32, 0x10000, pDst08);

	if (srcCount32 != dstCount32) {
		++errorCount;
		printf("TestUtfCanonical failed UtfConvert08to32, %d != %d\n", srcCount32, dstCount32);
	}
	else {
		for (U32 i = 0; i < srcCount32; ++i) {
			if (pDst32[i] != pSrc32[i]) {
				printf("%04X %04X %04X %04X %04X %04X\n", pDst32[i-1], pDst32[i], pDst32[i+1], pSrc32[i-1], pSrc32[i], pSrc32[i+1]);
				++errorCount;
			}
		}
	}

	SafeDeleteArray(pSrc32);
	SafeDeleteArray(pSrc08);
	SafeDeleteArray(pDecomp);
	SafeDeleteArray(pDst08);
	SafeDeleteArray(pDst32);

	return (0 == errorCount);
}


/////////////////////////////////////////////////////////////////////////////
//
//	TestUtfFormat()
//
//	This test intentionally generates errors, which are recorded in the log
//	file -- the presence of those logged errors does not indicate that this
//	test failed.  This test only fails if the correct "invalid" string is
//	not generated.
//
bool TestUtfFormat(void)
{
	U32 errorCount = 0;

	Utf08_t buffer[128];

	UtfFormat fmt;

	// Make certain "%%" is converted to "%" instead of being interpreted
	// as a parameter reference.  This should not insert "bad" into the
	// output buffer.
	fmt.Reset();
	fmt.AddString("bad");
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("ab%%1;e"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("ab%1;e"))) {
		++errorCount;
	}

	// Test whether FourCCs are being converted to text properly.
	// Note that this operation is byte-order dependent, so it will fail
	// on any Big-Endian machine.
	fmt.Reset();
	fmt.AddFourCC(QzMakeFourCC('a','b','c','d'));
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("\"%1;\""));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("\"0x64636261=abcd\""))) {
		++errorCount;
	}

	fmt.Reset();
	fmt.AddString("");
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("\"%1;\""));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("\"\""))) {
		++errorCount;
	}

	fmt.Reset();
	fmt.AddString("");
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("\"%1w1;\""));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("\" \""))) {
		++errorCount;
	}

	// Default right-alignment of strings.
	fmt.Reset();
	fmt.AddString("jinkies");
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("\"%1w10;\""));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("\"   jinkies\""))) {
		++errorCount;
	}

	// Force left-alignment of strings.
	fmt.Reset();
	fmt.AddString("jinkies");
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("\"%1-w10;\""));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("\"jinkies   \""))) {
		++errorCount;
	}

	// Default right-alignment of ints.
	fmt.Reset();
	fmt.AddInt(9);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("\"%1w3;\""));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("\"  9\""))) {
		++errorCount;
	}

	// Force left-alignment of ints.
	fmt.Reset();
	fmt.AddInt(9);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("\"%1-w3;\""));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("\"9  \""))) {
		++errorCount;
	}

	// Truncation of strings: extra chars at end are discarded.
	fmt.Reset();
	fmt.AddString("zeplin");
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("\"%1m4;\""));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("\"zepl\""))) {
		++errorCount;
	}

	// Truncation of numbers: entire value is replaced with asterisks.
	fmt.Reset();
	fmt.AddInt(12345);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("\"%1m3;\""));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("\"***\""))) {
		++errorCount;
	}

	// Truncate a float.  The digits after the decimal should be stripped
	// off to make it fit.
	fmt.Reset();
	fmt.AddFloat(123.456f);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("%1m5;"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("123.4"))) {
		++errorCount;
	}

	// Truncate a float some more.  Now there is no digits left after the
	// decimal point, so that char should also have been stripped off.
	fmt.Reset();
	fmt.AddFloat(123.456f);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("%1m4;"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("123"))) {
		++errorCount;
	}

	// Truncate a float some more.  Now there is no room left for the decimal
	// point either, but that makes the number short enough to fit within the
	// field, so it gets printed correctly.
	fmt.Reset();
	fmt.AddFloat(123.456f);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("%1m3;"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("123"))) {
		++errorCount;
	}

	// Truncate a float some more.  Now the number is too large to fit,
	// resulting in "**" to indicate maxWidth is too small for the value
	// that needs to be printed.
	fmt.Reset();
	fmt.AddFloat(123.456f);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("%1m2;"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("**"))) {
		++errorCount;
	}

	// Reference an invalid parameter.  There is no "%2;".
	fmt.Reset();
	fmt.AddInt(876);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("%1;%2;"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("876<INVALID PARAM NUM>"))) {
		++errorCount;
	}

	// Force a '+' in front of an int.
	fmt.Reset();
	fmt.AddInt(1);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("%1+;"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("+1"))) {
		++errorCount;
	}

	// Force a '+' in front of a float.
	fmt.Reset();
	fmt.AddFloat(1.123f);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("%1+w3;"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("+1.123"))) {
		++errorCount;
	}

	// Trying to force a '+' in front of unsigned numbers also works.
	fmt.Reset();
	fmt.AddInt(1);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("%1+u;"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("+1"))) {
		++errorCount;
	}

	// Trying to force a '+' in front of hex numbers has no effect.
	fmt.Reset();
	fmt.AddInt(0x123);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("%1+x;"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("123"))) {
		++errorCount;
	}

	// Print a signed negative number.
	fmt.Reset();
	fmt.AddInt(-1);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("%1;"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("-1"))) {
		++errorCount;
	}

	// Print an unsigned negative number, which becomes really large.
	fmt.Reset();
	fmt.AddInt(-1);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("%1u;"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("4294967295"))) {
		++errorCount;
	}

	// Truncate a string parameter.
	fmt.Reset();
	fmt.AddString("we go");
	fmt.Generate(buffer, 10, reinterpret_cast<const Utf08_t*>("around %1;"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("around we"))) {
		++errorCount;
	}

	// Parameters can be accessed in any order, and used multiple times.
	fmt.Reset();
	fmt.AddChar('a');
	fmt.AddChar('b');
	fmt.AddChar('c');
	fmt.AddChar('d');
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("%4;%3;%2;%1;%2;%3;%4;"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("dcbabcd"))) {
		++errorCount;
	}

	// Verify mistyped parameter reference.  The '%' will be lost, while the
	// characters after it are printed (even if they're supposed to be part
	// of a parameter field).
	fmt.Reset();
	fmt.AddInt(9);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("boo%a;ya"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("booa;ya"))) {
		++errorCount;
	}

	// Verify mistyped parameter reference.  Parameter numbers are one-based,
	// not zero-based, so '0' is not a valid reference.
	fmt.Reset();
	fmt.AddInt(9);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("boo%0;ya"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("boo0;ya"))) {
		++errorCount;
	}

	// Verify mistyped parameter reference.  The ';' is missing, which will
	// result in an error to the log.
	fmt.Reset();
	fmt.AddInt(9);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("where %1xw8"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("where <BAD PARAM FORMAT>"))) {
		++errorCount;
	}

	// This is the same, but it should realize that the parameter is badly
	// formatted when it sees the ' ' and resume printing from there.
	fmt.Reset();
	fmt.AddInt(9);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("where %1xw8 zoogy"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("where <BAD PARAM FORMAT> zoogy"))) {
		++errorCount;
	}

	// Same thing again, but it should print the second parameter correctly.
	fmt.Reset();
	fmt.AddInt(9);
	fmt.AddInt(4);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("where %1xw8%2; zoogy"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("where <BAD PARAM FORMAT>4 zoogy"))) {
		++errorCount;
	}

	// Check that field padding works.  This uses default ' ' for padding.
	fmt.Reset();
	fmt.AddInt(456);
	fmt.AddInt(4123);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("%1w8; %2-w6;"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>("     456 4123  "))) {
		++errorCount;
	}

	// Again check the padding, this time with explicit padding symbols.
	fmt.Reset();
	fmt.AddInt(456);
	fmt.AddInt(4123);
	fmt.Generate(buffer, ArraySize(buffer), reinterpret_cast<const Utf08_t*>("%1w8p.; %2-p_w6;"));

	if (0 != UtfCompareBytewise(buffer, reinterpret_cast<const Utf08_t*>(".....456 4123__"))) {
		++errorCount;
	}

	return (0 == errorCount);
}


/////////////////////////////////////////////////////////////////////////////
//
//	TestUtfCasing()
//
//	Verify all case mappings.  Convert upper to lower, then back again (or
//	vice versa), and verify that the result is the original symbol.
//
//	Note that a few characters are lower- or upper-case, but do not have a
//	corresponding upper- or lower-case letter.  These safely map back to
//	themselves, so doing this test will still work on all of those symbols.
//
bool TestUtfCasing(void)
{
	U32 errorCount = 0;

	for (U32 i = 0; i < 0xFFFF; ++i) {
		// Only test characters that are in the symbol table.
		// Technically, this is unnecessary, since UtfToLower/Upper will
		// return the given character unchanged if the case mapping cannot
		// be performed.
		if (UtfIsKnown(i)) {
			if (UtfIsLower(i)) {
				Utf32_t upperCase = UtfToUpper(i);
				Utf32_t lowerCase = UtfToLower(upperCase);

				if (lowerCase != i) {
					++errorCount;
				}
			}
			else if (UtfIsUpper(i)) {
				Utf32_t lowerCase = UtfToLower(i);
				Utf32_t upperCase = UtfToUpper(lowerCase);

				if (upperCase != i) {
					++errorCount;
				}
			}
		}
	}

	return (0 == errorCount);
}

/*
/////////////////////////////////////////////////////////////////////////////
//
//	Czech
//
//	"ch" is a compression after 'h'
//	"c + caron" is unique letter, between 'c' and 'd'
//	"d + caron"
//	"e + caron"
//	"n + caron"
//	"r + caron"
//	"s + caron" is unique letter, between 's' and 't'
//	"t + caron"
//	"u + ring above"
//	"z + caron" is unique letter, after 'z'
//
static Utf16_t g_SortCzech[][16] =
{
	{ 'a', 'n', 'd', 'e', 'r', 'e' },
	{ 0x00E4, 'n', 'd', 'e', 'r', 'e' },			// andere
	{ 'c', 'o', 't', 'e' },
	{ 'c', 'o', 't', 0x00E9 },						// cote
	{ 'c', 0x00F4, 't', 'e' },						// cote
	{ 'c', 0x00F4, 't', 0x00E9 },					// cote
	{ 'C', 'z', 'e', 'c', 'h' },
	{ 0x010D, 'u', 0x010D, 0x0113, 't' },			// cucet
	{ 'h', 'i', 0x0161, 'a' },						// hisa
	{ 'c', 'h', 'a', 'q', 'u', 'e' },
	{ 'c', 'h', 'e', 'm', 'i', 'n' },
	{ 'i', 'r', 'd', 'i', 's', 'c', 'h' },
	{ 'l', 0x00E4, 'v', 'i' },						// lavi
	{ 'l', 'i', 'e' },
	{ 'l', 'i', 'r', 'e' },
	{ 'l', 'l', 'a', 'm', 'a' },
	{ 'l', 0x00F5, 'u', 'g' },						// loug
	{ 'L', 0x00F6, 'w', 'e', 'n' },					// Lowen
	{ 'l', 0x00F2, 'z', 'a' },						// loza
	{ 'L', 0x00FC, 'b', 'e', 'c', 'k' },			// Lubeck
	{ 'l', 'u', 'c', 'k' },
	{ 'l', 'u', 0x010D },							// luc
	{ 'l', 'y', 'e' },
	{ 'M', 0x00E4, 'n', 'n', 'e', 'r' },			// Manner
	{ 'm', 0x00E0, 0x0161, 't', 'a' },				// masta
	{ 'm', 0x00EE, 'r' },							// mir
	{ 'm', 0x00F6, 'c', 'h', 't', 'e', 'n' },		// mochten
	{ 'm', 'y', 'n', 'd', 'i', 'g' },
	{ 'p', 'i', 0x00F1, 'a' },						// pina
	{ 'p', 'i', 'n', 't' },
	{ 'p', 'y', 'l', 'o', 'n' },
	{ 's', 0x00E4, 'm', 't', 'l', 'i', 'c', 'h' },	// samtlich
	{ 's', 'a', 'v', 'o', 'i', 'r' },
	{ 'S', 'i', 'e', 't', 'l', 'a' },
	{ 0x015B, 'l', 'u', 'b' },						// slub
	{ 's', 'u', 'b', 't', 'l', 'e' },
	{ 's', 'y', 'm', 'b', 'o', 'l' },
	{ 0x0161, 0x00E0, 'r', 'a', 'n' },				// saran
	{ 0x0160, 'e', 'r', 'b', 0x016B, 'r', 'a' },	// Serbura
	{ 'v', 0x00E4, 'g', 'a' },						// vaga
	{ 'v', 'e', 'r', 'k', 'e', 'h', 'r', 't' },
	{ 'v', 'o', 'x' },
	{ 'w', 'a', 'f', 'f', 'l', 'e' },
	{ 'w', 'o', 'o', 'd' },
	{ 'y', 'e', 'n' },
	{ 'y', 'u', 'a', 'n' },
	{ 'y', 'u', 'c', 'c', 'a' },
	{ 'z', 'o', 'o' },
	{ 'Z', 0x00FC, 'r', 'i', 'c', 'h' },			// Zurich
	{ 'Z', 'v', 'i', 'e', 'd', 'r', 'i', 'j', 'a' },
	{ 'z', 'y', 's', 'k' },
	{ 0x017E, 'a', 'l' },							// zal
	{ 0x017E, 'e', 'n', 'a' },						// zena
	{ 0x017D, 'e', 'n', 0x0113, 'v', 'a' }			// Zeneva
};


/////////////////////////////////////////////////////////////////////////////
//
//	Danish and Norwegian
//
//	"u + diaeresis" is equivalent to 'y'
//	"a + diaeresis" is unique letter, after 'z' and before "o + diaeresis"
//	"o + diaeresis" is unique letter, after "a + diaeresis"
//
static Utf16_t g_SortDanishNorwegian[][16] =
{
	{ 'a', 'n', 'd', 'e', 'r', 'e' },
	{ 'c', 'h', 'a', 'q', 'u', 'e' },
	{ 'c', 'h', 'e', 'm', 'i', 'n' },
	{ 'c', 'o', 't', 'e' },
	{ 'c', 'o', 't', 0x00E9 },						// cote
	{ 'c', 0x00F4, 't', 'e' },						// cote
	{ 'c', 0x00F4, 't', 0x00E9 },					// cote
	{ 0x010D, 'u', 0x010D, 0x0113, 't' },			// cucet
	{ 'C', 'z', 'e', 'c', 'h' },
	{ 'h', 'i', 0x0161, 'a' },						// hisa
	{ 'i', 'r', 'd', 'i', 's', 'c', 'h' },
	{ 'l', 'i', 'e' },
	{ 'l', 'i', 'r', 'e' },
	{ 'l', 'l', 'a', 'm', 'a' },
	{ 'l', 0x00F5, 'u', 'g' },						// loug
	{ 'l', 0x00F2, 'z', 'a' },						// loza
	{ 'l', 'u', 0x010D },							// luc
	{ 'l', 'u', 'c', 'k' },
	{ 'L', 0x00FC, 'b', 'e', 'c', 'k' },			// Lubeck
	{ 'l', 'y', 'e' },
	{ 'l', 0x00E4, 'v', 'i' },						// lavi
	{ 'L', 0x00F6, 'w', 'e', 'n' },					// Lowen
	{ 'm', 0x00E0, 0x0161, 't', 'a' },				// masta
	{ 'm', 0x00EE, 'r' },							// mir
	{ 'm', 'y', 'n', 'd', 'i', 'g' },
	{ 'M', 0x00E4, 'n', 'n', 'e', 'r' },			// Manner
	{ 'm', 0x00F6, 'c', 'h', 't', 'e', 'n' },		// mochten
	{ 'p', 'i', 0x00F1, 'a' },						// pina
	{ 'p', 'i', 'n', 't' },
	{ 'p', 'y', 'l', 'o', 'n' },
	{ 0x0161, 0x00E0, 'r', 'a', 'n' },				// saran
	{ 's', 'a', 'v', 'o', 'i', 'r' },
	{ 0x0160, 'e', 'r', 'b', 0x016B, 'r', 'a' },	// Serbura
	{ 'S', 'i', 'e', 't', 'l', 'a' },
	{ 0x015B, 'l', 'u', 'b' },						// slub
	{ 's', 'u', 'b', 't', 'l', 'e' },
	{ 's', 'y', 'm', 'b', 'o', 'l' },
	{ 's', 0x00E4, 'm', 't', 'l', 'i', 'c', 'h' },	// samtlich
	{ 'v', 'e', 'r', 'k', 'e', 'h', 'r', 't' },
	{ 'v', 'o', 'x' },
	{ 'v', 0x00E4, 'g', 'a' },						// vaga
	{ 'w', 'a', 'f', 'f', 'l', 'e' },
	{ 'w', 'o', 'o', 'd' },
	{ 'y', 'e', 'n' },
	{ 'y', 'u', 'a', 'n' },
	{ 'y', 'u', 'c', 'c', 'a' },
	{ 0x017E, 'a', 'l' },							// zal
	{ 0x017E, 'e', 'n', 'a' },						// zena
	{ 0x017D, 'e', 'n', 0x0113, 'v', 'a' },			// Zeneva
	{ 'z', 'o', 'o' },
	{ 'Z', 'v', 'i', 'e', 'd', 'r', 'i', 'j', 'a' },
	{ 'Z', 0x00FC, 'r', 'i', 'c', 'h' },			// Zurich
	{ 'z', 'y', 's', 'k' },
	{ 0x00E4, 'n', 'd', 'e', 'r', 'e' }				// andere
};
*/

/////////////////////////////////////////////////////////////////////////////
//
//	Default Table
//
//	This represents the default Unicode sorting order.  Most languages are
//	based on this -- some languages require a few custom exceptions for
//	specific accented characters.
//
static Utf16_t g_SortDefault[][16] =
{
	{ 'a', 'n', 'd', 'e', 'r', 'e' },
	{ 0x00E4, 'n', 'd', 'e', 'r', 'e' },			// andere
	{ 'c', 'h', 'a', 'q', 'u', 'e' },
	{ 'c', 'h', 'e', 'm', 'i', 'n' },
	{ 'c', 'o', 't', 'e' },
	{ 'c', 'o', 't', 0x00E9 },						// cote
	{ 'c', 0x00F4, 't', 'e' },						// cote
	{ 'c', 0x00F4, 't', 0x00E9 },					// cote
	{ 0x010D, 'u', 0x010D, 0x0113, 't' },			// cucet
	{ 'C', 'z', 'e', 'c', 'h' },
	{ 'h', 'i', 0x0161, 'a' },						// hisa
	{ 'i', 'r', 'd', 'i', 's', 'c', 'h' },
	{ 'l', 0x00E4, 'v', 'i' },						// lavi
	{ 'l', 'i', 'e' },
	{ 'l', 'i', 'r', 'e' },
	{ 'l', 'l', 'a', 'm', 'a' },
	{ 'l', 0x00F5, 'u', 'g' },						// loug
	{ 'L', 0x00F6, 'w', 'e', 'n' },					// Lowen
	{ 'l', 0x00F2, 'z', 'a' },						// loza
	{ 'L', 0x00FC, 'b', 'e', 'c', 'k' },			// Lubeck
	{ 'l', 'u', 0x010D },							// luc
	{ 'l', 'u', 'c', 'k' },
	{ 'l', 'y', 'e' },
	{ 'M', 0x00E4, 'n', 'n', 'e', 'r' },			// Manner
	{ 'm', 0x00E0, 0x0161, 't', 'a' },				// masta
	{ 'm', 0x00EE, 'r' },							// mir
	{ 'm', 0x00F6, 'c', 'h', 't', 'e', 'n' },		// mochten
	{ 'm', 'y', 'n', 'd', 'i', 'g' },
	{ 'p', 'i', 0x00F1, 'a' },						// pina
	{ 'p', 'i', 'n', 't' },
	{ 'p', 'y', 'l', 'o', 'n' },
	{ 's', 0x00E4, 'm', 't', 'l', 'i', 'c', 'h' },	// samtlich
	{ 0x0161, 0x00E0, 'r', 'a', 'n' },				// saran
	{ 's', 'a', 'v', 'o', 'i', 'r' },
	{ 0x0160, 'e', 'r', 'b', 0x016B, 'r', 'a' },	// Serbura
	{ 'S', 'i', 'e', 't', 'l', 'a' },
	{ 0x015B, 'l', 'u', 'b' },						// slub
	{ 's', 'u', 'b', 't', 'l', 'e' },
	{ 's', 'y', 'm', 'b', 'o', 'l' },
	{ 'v', 0x00E4, 'g', 'a' },						// vaga
	{ 'v', 'e', 'r', 'k', 'e', 'h', 'r', 't' },
	{ 'v', 'o', 'x' },
	{ 'w', 'a', 'f', 'f', 'l', 'e' },
	{ 'w', 'o', 'o', 'd' },
	{ 'y', 'e', 'n' },
	{ 'y', 'u', 'a', 'n' },
	{ 'y', 'u', 'c', 'c', 'a' },
	{ 0x017E, 'a', 'l' },							// zal
	{ 0x017E, 'e', 'n', 'a' },						// zena
	{ 0x017D, 'e', 'n', 0x0113, 'v', 'a' },			// Zeneva
	{ 'z', 'o', 'o' },
	{ 'Z', 0x00FC, 'r', 'i', 'c', 'h' },			// Zurich
	{ 'Z', 'v', 'i', 'e', 'd', 'r', 'i', 'j', 'a' },
	{ 'z', 'y', 's', 'k' }
};


/*
/////////////////////////////////////////////////////////////////////////////
//
//	Finnish and Swedish
//
//	"u + diaeresis" is equivalent to 'y'
//	'w' is equivalent to 'v'
//	"a + diaeresis" is unique letter, after both 'z' and "a + ring above"
//	"o + diaeresis" is unique letter, after "a + diaeresis"
//	"s + caron"
//	"z + caron"
//
static Utf16_t g_SortFinnishSwedish[][16] =
{
	{ 'a', 'n', 'd', 'e', 'r', 'e' },
	{ 'c', 'h', 'a', 'q', 'u', 'e' },
	{ 'c', 'h', 'e', 'm', 'i', 'n' },
	{ 'c', 'o', 't', 'e' },
	{ 'c', 'o', 't', 0x00E9 },						// cote
	{ 'c', 0x00F4, 't', 'e' },						// cote
	{ 'c', 0x00F4, 't', 0x00E9 },					// cote
	{ 0x010D, 'u', 0x010D, 0x0113, 't' },			// cucet
	{ 'C', 'z', 'e', 'c', 'h' },
	{ 'h', 'i', 0x0161, 'a' },						// hisa
	{ 'i', 'r', 'd', 'i', 's', 'c', 'h' },
	{ 'l', 'i', 'e' },
	{ 'l', 'i', 'r', 'e' },
	{ 'l', 'l', 'a', 'm', 'a' },
	{ 'l', 0x00F5, 'u', 'g' },						// loug
	{ 'l', 0x00F2, 'z', 'a' },						// loza
	{ 'l', 'u', 0x010D },							// luc
	{ 'l', 'u', 'c', 'k' },
	{ 'L', 0x00FC, 'b', 'e', 'c', 'k' },			// Lubeck
	{ 'l', 'y', 'e' },
	{ 'l', 0x00E4, 'v', 'i' },						// lavi
	{ 'L', 0x00F6, 'w', 'e', 'n' },					// Lowen
	{ 'm', 0x00E0, 0x0161, 't', 'a' },				// masta
	{ 'm', 0x00EE, 'r' },							// mir
	{ 'm', 'y', 'n', 'd', 'i', 'g' },
	{ 'M', 0x00E4, 'n', 'n', 'e', 'r' },			// Manner
	{ 'm', 0x00F6, 'c', 'h', 't', 'e', 'n' },		// mochten
	{ 'p', 'i', 0x00F1, 'a' },						// pina
	{ 'p', 'i', 'n', 't' },
	{ 'p', 'y', 'l', 'o', 'n' },
	{ 0x0161, 0x00E0, 'r', 'a', 'n' },				// saran
	{ 's', 'a', 'v', 'o', 'i', 'r' },
	{ 0x0160, 'e', 'r', 'b', 0x016B, 'r', 'a' },	// Serbura
	{ 'S', 'i', 'e', 't', 'l', 'a' },
	{ 0x015B, 'l', 'u', 'b' },						// slub
	{ 's', 'u', 'b', 't', 'l', 'e' },
	{ 's', 'y', 'm', 'b', 'o', 'l' },
	{ 's', 0x00E4, 'm', 't', 'l', 'i', 'c', 'h' },	// samtlich
	{ 'w', 'a', 'f', 'f', 'l', 'e' },
	{ 'v', 'e', 'r', 'k', 'e', 'h', 'r', 't' },
	{ 'w', 'o', 'o', 'd' },
	{ 'v', 'o', 'x' },
	{ 'v', 0x00E4, 'g', 'a' },						// vaga
	{ 'y', 'e', 'n' },
	{ 'y', 'u', 'a', 'n' },
	{ 'y', 'u', 'c', 'c', 'a' },
	{ 0x017E, 'a', 'l' },							// zal
	{ 0x017E, 'e', 'n', 'a' },						// zena
	{ 0x017D, 'e', 'n', 0x0113, 'v', 'a' },			// Zeneva
	{ 'z', 'o', 'o' },
	{ 'Z', 'v', 'i', 'e', 'd', 'r', 'i', 'j', 'a' },
	{ 'Z', 0x00FC, 'r', 'i', 'c', 'h' },			// Zurich
	{ 'z', 'y', 's', 'k' },
	// there should be a test for "a + ring above", which comes between 'z' and "a + diaeresis"
	{ 0x00E4, 'n', 'd', 'e', 'r', 'e' }				// andere
};
*/


/////////////////////////////////////////////////////////////////////////////
//
//	French
//
//	diacritics sort right-to-left
//	"oe"
//	"Y + diaeresis"
//
static Utf16_t g_SortFrench[][16] =
{
	{ 'a', 'n', 'd', 'e', 'r', 'e' },
	{ 0x00E4, 'n', 'd', 'e', 'r', 'e' },			// andere
	{ 'c', 'h', 'a', 'q', 'u', 'e' },
	{ 'c', 'h', 'e', 'm', 'i', 'n' },
	{ 'c', 'o', 't', 'e' },
	{ 'c', 0x00F4, 't', 'e' },						// cote
	{ 'c', 'o', 't', 0x00E9 },						// cote
	{ 'c', 0x00F4, 't', 0x00E9 },					// cote
	{ 0x010D, 'u', 0x010D, 0x0113, 't' },			// cucet
	{ 'C', 'z', 'e', 'c', 'h' },
	{ 'h', 'i', 0x0161, 'a' },						// hisa
	{ 'i', 'r', 'd', 'i', 's', 'c', 'h' },
	{ 'l', 0x00E4, 'v', 'i' },						// lavi
	{ 'l', 'i', 'e' },
	{ 'l', 'i', 'r', 'e' },
	{ 'l', 'l', 'a', 'm', 'a' },
	{ 'l', 0x00F5, 'u', 'g' },						// loug
	{ 'L', 0x00F6, 'w', 'e', 'n' },					// Lowen
	{ 'l', 0x00F2, 'z', 'a' },						// loza
	{ 'L', 0x00FC, 'b', 'e', 'c', 'k' },			// Lubeck
	{ 'l', 'u', 0x010D },							// luc
	{ 'l', 'u', 'c', 'k' },
	{ 'l', 'y', 'e' },
	{ 'M', 0x00E4, 'n', 'n', 'e', 'r' },			// Manner
	{ 'm', 0x00E0, 0x0161, 't', 'a' },				// masta
	{ 'm', 0x00EE, 'r' },							// mir
	{ 'm', 0x00F6, 'c', 'h', 't', 'e', 'n' },		// mochten
	{ 'm', 'y', 'n', 'd', 'i', 'g' },
	{ 'p', 'i', 0x00F1, 'a' },						// pina
	{ 'p', 'i', 'n', 't' },
	{ 'p', 'y', 'l', 'o', 'n' },
	{ 's', 0x00E4, 'm', 't', 'l', 'i', 'c', 'h' },	// samtlich
	{ 0x0161, 0x00E0, 'r', 'a', 'n' },				// saran
	{ 's', 'a', 'v', 'o', 'i', 'r' },
	{ 0x0160, 'e', 'r', 'b', 0x016B, 'r', 'a' },	// Serbura
	{ 'S', 'i', 'e', 't', 'l', 'a' },
	{ 0x015B, 'l', 'u', 'b' },						// slub
	{ 's', 'u', 'b', 't', 'l', 'e' },
	{ 's', 'y', 'm', 'b', 'o', 'l' },
	{ 'v', 0x00E4, 'g', 'a' },						// vaga
	{ 'v', 'e', 'r', 'k', 'e', 'h', 'r', 't' },
	{ 'v', 'o', 'x' },
	{ 'w', 'a', 'f', 'f', 'l', 'e' },
	{ 'w', 'o', 'o', 'd' },
	{ 'y', 'e', 'n' },
	{ 'y', 'u', 'a', 'n' },
	{ 'y', 'u', 'c', 'c', 'a' },
	{ 0x017E, 'a', 'l' },							// zal
	{ 0x017E, 'e', 'n', 'a' },						// zena
	{ 0x017D, 'e', 'n', 0x0113, 'v', 'a' },			// Zeneva
	{ 'z', 'o', 'o' },
	{ 'Z', 0x00FC, 'r', 'i', 'c', 'h' },			// Zurich
	{ 'Z', 'v', 'i', 'e', 'd', 'r', 'i', 'j', 'a' },
	{ 'z', 'y', 's', 'k' }
};

/*
/////////////////////////////////////////////////////////////////////////////
//
//	Lithuanian
//
//	'y' is equivalent to 'i'
//	"c + caron" is unique letter, between 'c' and 'd'
//	"s + caron" is unique letter, between 's' and 't'
//	"z + caron" is unique letter, after 'z'
//	"a + ogonek"
//	"e + ogonek"
//	"e + dot above"
//	"i + ogonek"
//	"u + macron"
//	"u + ogonek"
//
static Utf16_t g_SortLithuanian[][16] =
{
	{ 'a', 'n', 'd', 'e', 'r', 'e' },
	{ 0x00E4, 'n', 'd', 'e', 'r', 'e' },			// andere
	{ 'c', 'h', 'a', 'q', 'u', 'e' },
	{ 'c', 'h', 'e', 'm', 'i', 'n' },
	{ 'c', 'o', 't', 'e' },
	{ 'c', 'o', 't', 0x00E9 },						// cote
	{ 'c', 0x00F4, 't', 'e' },						// cote
	{ 'c', 0x00F4, 't', 0x00E9 },					// cote
	{ 'C', 'z', 'e', 'c', 'h' },
	{ 0x010D, 'u', 0x010D, 0x0113, 't' },			// cucet
	{ 'h', 'i', 0x0161, 'a' },						// hisa
	{ 'y', 'e', 'n' },
	{ 'i', 'r', 'd', 'i', 's', 'c', 'h' },
	{ 'y', 'u', 'a', 'n' },
	{ 'y', 'u', 'c', 'c', 'a' },
	{ 'l', 0x00E4, 'v', 'i' },						// lavi
	{ 'l', 'i', 'e' },
	{ 'l', 'y', 'e' },
	{ 'l', 'i', 'r', 'e' },
	{ 'l', 'l', 'a', 'm', 'a' },
	{ 'l', 0x00F5, 'u', 'g' },						// loug
	{ 'L', 0x00F6, 'w', 'e', 'n' },					// Lowen
	{ 'l', 0x00F2, 'z', 'a' },						// loza
	{ 'L', 0x00FC, 'b', 'e', 'c', 'k' },			// Lubeck
	{ 'l', 'u', 'c', 'k' },
	{ 'l', 'u', 0x010D },							// luc
	{ 'M', 0x00E4, 'n', 'n', 'e', 'r' },			// Manner
	{ 'm', 0x00E0, 0x0161, 't', 'a' },				// masta
	{ 'm', 'y', 'n', 'd', 'i', 'g' },
	{ 'm', 0x00EE, 'r' },							// mir
	{ 'm', 0x00F6, 'c', 'h', 't', 'e', 'n' },		// mochten
	{ 'p', 'y', 'l', 'o', 'n' },
	{ 'p', 'i', 0x00F1, 'a' },						// pina
	{ 'p', 'i', 'n', 't' },
	{ 's', 0x00E4, 'm', 't', 'l', 'i', 'c', 'h' },	// samtlich
	{ 's', 'a', 'v', 'o', 'i', 'r' },
	{ 'S', 'i', 'e', 't', 'l', 'a' },
	{ 's', 'y', 'm', 'b', 'o', 'l' },
	{ 0x015B, 'l', 'u', 'b' },						// slub
	{ 's', 'u', 'b', 't', 'l', 'e' },
	{ 0x0161, 0x00E0, 'r', 'a', 'n' },				// saran
	{ 0x0160, 'e', 'r', 'b', 0x016B, 'r', 'a' },	// Serbura
	{ 'v', 0x00E4, 'g', 'a' },						// vaga
	{ 'v', 'e', 'r', 'k', 'e', 'h', 'r', 't' },
	{ 'v', 'o', 'x' },
	{ 'w', 'a', 'f', 'f', 'l', 'e' },
	{ 'w', 'o', 'o', 'd' },
	{ 'z', 'y', 's', 'k' },
	{ 'z', 'o', 'o' },
	{ 'Z', 0x00FC, 'r', 'i', 'c', 'h' },			// Zurich
	{ 'Z', 'v', 'i', 'e', 'd', 'r', 'i', 'j', 'a' },
	{ 0x017E, 'a', 'l' },							// zal
	{ 0x017E, 'e', 'n', 'a' },						// zena
	{ 0x017D, 'e', 'n', 0x0113, 'v', 'a' }			// Zeneva
};
*/

/////////////////////////////////////////////////////////////////////////////
//
//	Polish
//
//	"s + acute" is unique letter, between 's' and 't'
//	"a + ogonek"
//	"c + acute"
//	"e + ogonek"
//	"l + stroke"
//	"n + acute"
//	"z + acute"
//	"z + dot above"
//
static Utf16_t g_SortPolish[][16] =
{
	{ 'a', 'n', 'd', 'e', 'r', 'e' },
	{ 0x00E4, 'n', 'd', 'e', 'r', 'e' },			// andere
	{ 'c', 'h', 'a', 'q', 'u', 'e' },
	{ 'c', 'h', 'e', 'm', 'i', 'n' },
	{ 'c', 'o', 't', 'e' },
	{ 'c', 'o', 't', 0x00E9 },						// cote
	{ 'c', 0x00F4, 't', 'e' },						// cote
	{ 'c', 0x00F4, 't', 0x00E9 },					// cote
	{ 0x010D, 'u', 0x010D, 0x0113, 't' },			// cucet
	{ 'C', 'z', 'e', 'c', 'h' },
	{ 'h', 'i', 0x0161, 'a' },						// hisa
	{ 'i', 'r', 'd', 'i', 's', 'c', 'h' },
	{ 'l', 0x00E4, 'v', 'i' },						// lavi
	{ 'l', 'i', 'e' },
	{ 'l', 'i', 'r', 'e' },
	{ 'l', 'l', 'a', 'm', 'a' },
	{ 'l', 0x00F5, 'u', 'g' },						// loug
	{ 'L', 0x00F6, 'w', 'e', 'n' },					// Lowen
	{ 'l', 0x00F2, 'z', 'a' },						// loza
	{ 'L', 0x00FC, 'b', 'e', 'c', 'k' },			// Lubeck
	{ 'l', 'u', 0x010D },							// luc
	{ 'l', 'u', 'c', 'k' },
	{ 'l', 'y', 'e' },
	{ 'M', 0x00E4, 'n', 'n', 'e', 'r' },			// Manner
	{ 'm', 0x00E0, 0x0161, 't', 'a' },				// masta
	{ 'm', 0x00EE, 'r' },							// mir
	{ 'm', 0x00F6, 'c', 'h', 't', 'e', 'n' },		// mochten
	{ 'm', 'y', 'n', 'd', 'i', 'g' },
	{ 'p', 'i', 0x00F1, 'a' },						// pina
	{ 'p', 'i', 'n', 't' },
	{ 'p', 'y', 'l', 'o', 'n' },
	{ 's', 0x00E4, 'm', 't', 'l', 'i', 'c', 'h' },	// samtlich
	{ 0x0161, 0x00E0, 'r', 'a', 'n' },				// saran
	{ 's', 'a', 'v', 'o', 'i', 'r' },
	{ 0x0160, 'e', 'r', 'b', 0x016B, 'r', 'a' },	// Serbura
	{ 'S', 'i', 'e', 't', 'l', 'a' },
	{ 's', 'u', 'b', 't', 'l', 'e' },
	{ 's', 'y', 'm', 'b', 'o', 'l' },
	{ 0x015B, 'l', 'u', 'b' },						// slub
	{ 'v', 0x00E4, 'g', 'a' },						// vaga
	{ 'v', 'e', 'r', 'k', 'e', 'h', 'r', 't' },
	{ 'v', 'o', 'x' },
	{ 'w', 'a', 'f', 'f', 'l', 'e' },
	{ 'w', 'o', 'o', 'd' },
	{ 'y', 'e', 'n' },
	{ 'y', 'u', 'a', 'n' },
	{ 'y', 'u', 'c', 'c', 'a' },
	{ 0x017E, 'a', 'l' },							// zal
	{ 0x017E, 'e', 'n', 'a' },						// zena
	{ 0x017D, 'e', 'n', 0x0113, 'v', 'a' },			// Zeneva
	{ 'z', 'o', 'o' },
	{ 'Z', 0x00FC, 'r', 'i', 'c', 'h' },			// Zurich
	{ 'Z', 'v', 'i', 'e', 'd', 'r', 'i', 'j', 'a' },
	{ 'z', 'y', 's', 'k' }
};


/////////////////////////////////////////////////////////////////////////////
//
//	Spanish
//
//	"ch" should come between 'c' and 'd', but is being phased out
//	"ll" should come between 'l' and 'm', but is being phased out
//	"n + tilde" is unique letter, after 'n'
//
static Utf16_t g_SortSpanish[][16] =
{
	{ 'a', 'n', 'd', 'e', 'r', 'e' },
	{ 0x00E4, 'n', 'd', 'e', 'r', 'e' },			// andere
	{ 'c', 'h', 'a', 'q', 'u', 'e' },
	{ 'c', 'h', 'e', 'm', 'i', 'n' },
	{ 'c', 'o', 't', 'e' },
	{ 'c', 'o', 't', 0x00E9 },						// cote
	{ 'c', 0x00F4, 't', 'e' },						// cote
	{ 'c', 0x00F4, 't', 0x00E9 },					// cote
	{ 0x010D, 'u', 0x010D, 0x0113, 't' },			// cucet
	{ 'C', 'z', 'e', 'c', 'h' },
	{ 'h', 'i', 0x0161, 'a' },						// hisa
	{ 'i', 'r', 'd', 'i', 's', 'c', 'h' },
	{ 'l', 0x00E4, 'v', 'i' },						// lavi
	{ 'l', 'i', 'e' },
	{ 'l', 'i', 'r', 'e' },
	{ 'l', 'l', 'a', 'm', 'a' },
	{ 'l', 0x00F5, 'u', 'g' },						// loug
	{ 'L', 0x00F6, 'w', 'e', 'n' },					// Lowen
	{ 'l', 0x00F2, 'z', 'a' },						// loza
	{ 'L', 0x00FC, 'b', 'e', 'c', 'k' },			// Lubeck
	{ 'l', 'u', 0x010D },							// luc
	{ 'l', 'u', 'c', 'k' },
	{ 'l', 'y', 'e' },
	{ 'M', 0x00E4, 'n', 'n', 'e', 'r' },			// Manner
	{ 'm', 0x00E0, 0x0161, 't', 'a' },				// masta
	{ 'm', 0x00EE, 'r' },							// mir
	{ 'm', 0x00F6, 'c', 'h', 't', 'e', 'n' },		// mochten
	{ 'm', 'y', 'n', 'd', 'i', 'g' },
	{ 'p', 'i', 'n', 't' },
	{ 'p', 'i', 0x00F1, 'a' },						// pina
	{ 'p', 'y', 'l', 'o', 'n' },
	{ 's', 0x00E4, 'm', 't', 'l', 'i', 'c', 'h' },	// samtlich
	{ 0x0161, 0x00E0, 'r', 'a', 'n' },				// saran
	{ 's', 'a', 'v', 'o', 'i', 'r' },
	{ 0x0160, 'e', 'r', 'b', 0x016B, 'r', 'a' },	// Serbura
	{ 'S', 'i', 'e', 't', 'l', 'a' },
	{ 0x015B, 'l', 'u', 'b' },						// slub
	{ 's', 'u', 'b', 't', 'l', 'e' },
	{ 's', 'y', 'm', 'b', 'o', 'l' },
	{ 'v', 0x00E4, 'g', 'a' },						// vaga
	{ 'v', 'e', 'r', 'k', 'e', 'h', 'r', 't' },
	{ 'v', 'o', 'x' },
	{ 'w', 'a', 'f', 'f', 'l', 'e' },
	{ 'w', 'o', 'o', 'd' },
	{ 'y', 'e', 'n' },
	{ 'y', 'u', 'a', 'n' },
	{ 'y', 'u', 'c', 'c', 'a' },
	{ 0x017E, 'a', 'l' },							// zal
	{ 0x017E, 'e', 'n', 'a' },						// zena
	{ 0x017D, 'e', 'n', 0x0113, 'v', 'a' },			// Zeneva
	{ 'z', 'o', 'o' },
	{ 'Z', 0x00FC, 'r', 'i', 'c', 'h' },			// Zurich
	{ 'Z', 'v', 'i', 'e', 'd', 'r', 'i', 'j', 'a' },
	{ 'z', 'y', 's', 'k' }
};


/////////////////////////////////////////////////////////////////////////////
//
//	TestUtfSorting()
//
//	Some languages have different sorting rules.  Switch between some of the
//	languages and spot test that certain cannonical strings get sorted into
//	the correct order.
//
//	These tables are old and not exhaustive, so they're not testing all of
//	the exceptions that should appear within each specific language.
//
bool TestUtfSorting(void)
{
	U32 errorCount = 0;

	Utf08_t scratch1[64];
	Utf08_t scratch2[64];

	// The first test will use English, which maps to the default sorting
	// tables used by most languages.
	UtfSetLanguage(LangID_English);

	for (U32 i = 0; i < ArraySize(g_SortDefault) - 1; ++i) {
		UtfConvert16to08(scratch1, ArraySize(scratch1), g_SortDefault[i]);
		UtfConvert16to08(scratch2, ArraySize(scratch2), g_SortDefault[i+1]);

		S32 result = UtfCompareLexical(scratch1, scratch2);

		if (result >= 0) {
			++errorCount;
		}

		result = UtfCompareLexical(scratch2, scratch1);

		if (result <= 0) {
			++errorCount;
		}
	}

	// French is special since diacritics need to be compared right-to-left.
	UtfSetLanguage(LangID_French);

	Utf08_t emptyA[2];
	Utf08_t emptyB[2];
	emptyA[0] = '\0';
	emptyB[0] = '\0';

	// Throw in one extra special comparison for French.  Since it needs to
	// compare the strings in reverse order for one pass, send in a pair of
	// empty strings to make certain the code doesn't fall off the start of
	// the empty buffers when traversing in reverse order.
	if (0 != UtfCompareLexical(emptyA, emptyB)) {
		++errorCount;
	}

	for (U32 i = 0; i < ArraySize(g_SortFrench) - 1; ++i) {
		UtfConvert16to08(scratch1, ArraySize(scratch1), g_SortFrench[i]);
		UtfConvert16to08(scratch2, ArraySize(scratch2), g_SortFrench[i+1]);

		S32 result = UtfCompareLexical(scratch1, scratch2);

		if (result >= 0) {
			++errorCount;
		}

		result = UtfCompareLexical(scratch2, scratch1);

		if (result <= 0) {
			++errorCount;
		}
	}

	// Polish has different symbol ordering for "s + acute".
	UtfSetLanguage(LangID_Polish);

	for (U32 i = 0; i < ArraySize(g_SortPolish) - 1; ++i) {
		UtfConvert16to08(scratch1, ArraySize(scratch1), g_SortPolish[i]);
		UtfConvert16to08(scratch2, ArraySize(scratch2), g_SortPolish[i+1]);

		S32 result = UtfCompareLexical(scratch1, scratch2);

		if (result >= 0) {
			++errorCount;
		}

		result = UtfCompareLexical(scratch2, scratch1);

		if (result <= 0) {
			++errorCount;
		}
	}

	// Spanish has different symbol ordering for "n + tilde".
	UtfSetLanguage(LangID_Spanish);

	for (U32 i = 0; i < ArraySize(g_SortSpanish) - 1; ++i) {
		UtfConvert16to08(scratch1, ArraySize(scratch1), g_SortSpanish[i]);
		UtfConvert16to08(scratch2, ArraySize(scratch2), g_SortSpanish[i+1]);

		S32 result = UtfCompareLexical(scratch1, scratch2);

		if (result >= 0) {
			++errorCount;
		}

		result = UtfCompareLexical(scratch2, scratch1);

		if (result <= 0) {
			++errorCount;
		}
	}

	UtfSetLanguage(LangID_English);

	return (0 == errorCount);
}


/////////////////////////////////////////////////////////////////////////////
//
//	TestUtfFile()
//
bool TestUtfFile(void)
{
	U32 errorCount = 0;

	QzTxtReader reader;
	QzTxtWriter writer;

	Utf08_t buffer08[64];
	Utf08_t converted[64];

	const Utf08_t *pFileName08 = reinterpret_cast<const Utf08_t*>("Scratch/test08.txt");
	const Utf08_t *pFileName16 = reinterpret_cast<const Utf08_t*>("Scratch/test16.txt");
	const Utf08_t *pFileName32 = reinterpret_cast<const Utf08_t*>("Scratch/test32.txt");

	//
	// Write UTF-8 file.
	//

	if (false == writer.CreateNew(pFileName08, UtfEncode_UTF_8)) {
		return false;
	}

	for (U32 i = 0; i < ArraySize(g_SortDefault); ++i) {
		UtfConvert16to08(buffer08, ArraySize(buffer08), g_SortDefault[i]);

		writer.WriteLine(buffer08);
	}

	writer.Close();


	//
	// Write UTF-16 file.
	//

	if (false == writer.CreateNew(pFileName16, UtfEncode_UTF_16)) {
		return false;
	}

	for (U32 i = 0; i < ArraySize(g_SortDefault); ++i) {
		UtfConvert16to08(buffer08, ArraySize(buffer08), g_SortDefault[i]);

		writer.WriteLine(buffer08);
	}

	writer.Close();


	//
	// Write UTF-32 file.
	//

	if (false == writer.CreateNew(pFileName32, UtfEncode_UTF_32)) {
		return false;
	}

	for (U32 i = 0; i < ArraySize(g_SortDefault); ++i) {
		UtfConvert16to08(buffer08, ArraySize(buffer08), g_SortDefault[i]);

		writer.WriteLine(buffer08);
	}

	writer.Close();


	//
	// Read UTF-8 file.
	//

	if (reader.LoadFile(pFileName08)) {
		for (U32 i = 0; i < ArraySize(g_SortDefault); ++i) {
			if (reader.ReadLine(buffer08, ArraySize(buffer08)) > 0) {
				UtfConvert16to08(converted, ArraySize(converted), g_SortDefault[i]);
				if (0 != UtfCompareBytewise(buffer08, converted)) {
					++errorCount;
				}
			}
			else {
				++errorCount;
			}
		}
	}
	else {
		++errorCount;
	}


	//
	// Read UTF-16 file.
	//

	if (reader.LoadFile(pFileName16)) {
		for (U32 i = 0; i < ArraySize(g_SortDefault); ++i) {
			if (reader.ReadLine(buffer08, ArraySize(buffer08)) > 0) {
				UtfConvert16to08(converted, ArraySize(converted), g_SortDefault[i]);
				if (0 != UtfCompareBytewise(buffer08, converted)) {
					++errorCount;
				}
			}
			else {
				++errorCount;
			}
		}
	}
	else {
		++errorCount;
	}


	//
	// Read UTF-32 file.
	//

	if (reader.LoadFile(pFileName32)) {
		for (U32 i = 0; i < ArraySize(g_SortDefault); ++i) {
			if (reader.ReadLine(buffer08, ArraySize(buffer08)) > 0) {
			}
			else {
				++errorCount;
			}
		}
	}
	else {
		++errorCount;
	}


	return (0 == errorCount);
}


/////////////////////////////////////////////////////////////////////////////
//
//	TestUtfHtmlConvert()
//
bool TestUtfHtmlConvert(void)
{
	U32 errorCount = 0;

	if (false == UtfVerifyHtmlConvert()) {
		return false;
	}

	Utf08_t scratch[256];

	// Insert a numerical code.
	UtfHtmlConvert(scratch, 64, reinterpret_cast<const Utf08_t*>("abc&#65;BC"));

	if (0 != UtfCompareBytewise(scratch, reinterpret_cast<const Utf08_t*>("abcABC"))) {
		++errorCount;
	}


	// Insert a symbolic code.
	UtfHtmlConvert(scratch, 64, reinterpret_cast<const Utf08_t*>("abc&amp;BC"));

	if (0 != UtfCompareBytewise(scratch, reinterpret_cast<const Utf08_t*>("abc&BC"))) {
		++errorCount;
	}


	// Stray ampersands will be copied as-is.
	UtfHtmlConvert(scratch, 64, reinterpret_cast<const Utf08_t*>("Donjons & Drakanoids"));

	if (0 != UtfCompareBytewise(scratch, reinterpret_cast<const Utf08_t*>("Donjons & Drakanoids"))) {
		++errorCount;
	}


	// Try to insert an unknown code, which will fail and copy the original
	// string without any changes.
	UtfHtmlConvert(scratch, 64, reinterpret_cast<const Utf08_t*>("abc&shmup;BC"));

	if (0 != UtfCompareBytewise(scratch, reinterpret_cast<const Utf08_t*>("abc&shmup;BC"))) {
		++errorCount;
	}


	// Try to insert a numerical code that is not a valid Unicode symbol.
	// This should fail and copy the original string without changes.
	UtfHtmlConvert(scratch, 64, reinterpret_cast<const Utf08_t*>("abc&#65535;BC"));

	if (0 != UtfCompareBytewise(scratch, reinterpret_cast<const Utf08_t*>("abc&#65535;BC"))) {
		++errorCount;
	}


	// Test copying data to short buffers.  Since the e-acute symbol takes
	// two bytes to store, the entire e-acute symbol will be discarded if
	// the buffer is less than 6 bytes long.  (The e-acute symbol, 0xE9,
	// is converted to the byte sequence 0xC3 0xA9 when stored as UTF-8.)
	Utf08_t shorty8[12] = { 'a', 'b', 'c', Utf08_t(0xC3), Utf08_t(0xA9), 'B', 'C', '\0' };
	Utf08_t shorty7[12] = { 'a', 'b', 'c', Utf08_t(0xC3), Utf08_t(0xA9), 'B', '\0' };
	Utf08_t shorty6[12] = { 'a', 'b', 'c', Utf08_t(0xC3), Utf08_t(0xA9), '\0' };
	Utf08_t shorty5[12] = { 'a', 'b', 'c', '\0' };
	Utf08_t shorty4[12] = { 'a', 'b', 'c', '\0' };
	Utf08_t shorty3[12] = { 'a', 'b', '\0' };

	UtfHtmlConvert(scratch, 8, reinterpret_cast<const Utf08_t*>("abc&eacute;BC"));
	if (0 != UtfCompareBytewise(scratch, shorty8)) {
		++errorCount;
	}

	UtfHtmlConvert(scratch, 7, reinterpret_cast<const Utf08_t*>("abc&eacute;BC"));
	if (0 != UtfCompareBytewise(scratch, shorty7)) {
		++errorCount;
	}

	UtfHtmlConvert(scratch, 6, reinterpret_cast<const Utf08_t*>("abc&eacute;BC"));
	if (0 != UtfCompareBytewise(scratch, shorty6)) {
		++errorCount;
	}

	UtfHtmlConvert(scratch, 5, reinterpret_cast<const Utf08_t*>("abc&eacute;BC"));
	if (0 != UtfCompareBytewise(scratch, shorty5)) {
		++errorCount;
	}

	UtfHtmlConvert(scratch, 4, reinterpret_cast<const Utf08_t*>("abc&eacute;BC"));
	if (0 != UtfCompareBytewise(scratch, shorty4)) {
		++errorCount;
	}

	UtfHtmlConvert(scratch, 3, reinterpret_cast<const Utf08_t*>("abc&eacute;BC"));
	if (0 != UtfCompareBytewise(scratch, shorty3)) {
		++errorCount;
	}


	return (0 == errorCount);
}


/////////////////////////////////////////////////////////////////////////////
//
//	TestUtfFile()
//
bool TestUtfWildcard(void)
{
	U32 errorCount = 0;

	if (false == UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("abc"))) {
		++errorCount;
	}

	if (false != UtfCompareWildcard((Utf08_t*)("Abc"), (Utf08_t*)("abc"))) {
		++errorCount;
	}

	if (false != UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("aBc"))) {
		++errorCount;
	}

	if (false != UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("abd"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("?bc"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("a?c"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("ab?"))) {
		++errorCount;
	}

	if (false != UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("abc?"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("*abc"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("a*bc"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("ab*c"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("abc*"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("*bc"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("a*c"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("*bc"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("a*c"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("**abc"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("a**bc"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("ab**c"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("abc**"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("*"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abc"), (Utf08_t*)("**"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abcdef"), (Utf08_t*)("a?*def"))) {
		++errorCount;
	}

	if (false != UtfCompareWildcard((Utf08_t*)("abcdef"), (Utf08_t*)("a?*df"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abcde"), (Utf08_t*)("a*c*e"))) {
		++errorCount;
	}

	if (false != UtfCompareWildcard((Utf08_t*)("abcde"), (Utf08_t*)("A*c*e"))) {
		++errorCount;
	}

	if (false != UtfCompareWildcard((Utf08_t*)("abcde"), (Utf08_t*)("a*C*e"))) {
		++errorCount;
	}

	if (false != UtfCompareWildcard((Utf08_t*)("abcde"), (Utf08_t*)("a*c*E"))) {
		++errorCount;
	}

	if (false != UtfCompareWildcard((Utf08_t*)("ABCDE"), (Utf08_t*)("A*c*e"))) {
		++errorCount;
	}

	if (false != UtfCompareWildcard((Utf08_t*)("ABCDE"), (Utf08_t*)("a*C*e"))) {
		++errorCount;
	}

	if (false != UtfCompareWildcard((Utf08_t*)("ABCDE"), (Utf08_t*)("a*c*E"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abbcde"), (Utf08_t*)("a*c*e"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcard((Utf08_t*)("abcbcde"), (Utf08_t*)("a*c*e"))) {
		++errorCount;
	}

	////////////////

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("abc"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("Abc"), (Utf08_t*)("abc"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("aBc"))) {
		++errorCount;
	}

	if (false != UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("abd"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("?bc"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("a?c"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("ab?"))) {
		++errorCount;
	}

	if (false != UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("abc?"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("*abc"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("a*bc"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("ab*c"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("abc*"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("*bc"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("a*c"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("*bc"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("a*c"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("**abc"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("a**bc"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("ab**c"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("abc**"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("*"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abc"), (Utf08_t*)("**"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abcdef"), (Utf08_t*)("a?*def"))) {
		++errorCount;
	}

	if (false != UtfCompareWildcardNocase((Utf08_t*)("abcdef"), (Utf08_t*)("a?*df"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abcde"), (Utf08_t*)("a*c*e"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abcde"), (Utf08_t*)("A*c*e"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abcde"), (Utf08_t*)("a*C*e"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abcde"), (Utf08_t*)("a*c*E"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("ABCDE"), (Utf08_t*)("A*c*e"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("ABCDE"), (Utf08_t*)("a*C*e"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("ABCDE"), (Utf08_t*)("a*c*E"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abbcde"), (Utf08_t*)("a*c*e"))) {
		++errorCount;
	}

	if (false == UtfCompareWildcardNocase((Utf08_t*)("abcbcde"), (Utf08_t*)("a*c*e"))) {
		++errorCount;
	}

	return (0 == errorCount);
}




