/////////////////////////////////////////////////////////////////////////////
//
//	File: QzTxtWriter.cpp
//
//	$Header: /TS/TsFile/QzTxtWriter.cpp  7  2009/9/7 3:18:38p  Lee $
//
//
//	Handles writing text files.  This can take the internal UTF-8 format and
//	write it to a suitably formatted file.  The default is that the file is
//	also UTF-8, so no extra processing is required.
//
//	This can write ASCII files, but must filter out any symbol that requires
//	more than 8 bits to store.  No attempt is made to map Unicode symbols to
//	ASCII, since most of them don't.  At best, it would have to strip off any
//	accents and print only the base characters.
//
//	It can also write files in UTF-16 and UTF-32 format, but that requires
//	expanding the UTF-8 codes, which generally results in a much larger file.
//
//	The assumption is that everything written is a line of text, so the code
//	will automatically append a CR-LF marker at the end of each write call.
//
/////////////////////////////////////////////////////////////////////////////


#include "QzCommon.h"
#include "QzTxtWriter.h"


#ifdef USE_MALLOC_MACRO
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif


/////////////////////////////////////////////////////////////////////////////
//
//	constructor
//
QzTxtWriter::QzTxtWriter(void)
	:	m_pFile(NULL),
		m_UtfFormat(UtfEncode_UTF_8),
		m_FileSize(0)
{
}


/////////////////////////////////////////////////////////////////////////////
//
//	destructor
//
QzTxtWriter::~QzTxtWriter(void)
{
	Close();
}


/////////////////////////////////////////////////////////////////////////////
//
//	CreateNew()
//
//	This will create a new file, always overwriting any existing file of the
//	same name.
//
bool QzTxtWriter::CreateNew(const Utf08_t filename[], U32 utfFormat)
{
	Close();

	m_UtfFormat = utfFormat;
	m_FileSize  = 0;
	m_pFile     = QzFileOpen(filename, QzFileOpen_Write);

	if (NULL == m_pFile) {
		return false;
	}

	// Need to place a marker at the start of the file to indicate which UTF
	// encoding is being used.
	Utf08_t marker[8];
	U32 byteCount = UtfWriteByteOrderMark(marker, utfFormat);

	if (byteCount > 0) {
		fwrite(marker, 1, byteCount, m_pFile);

		m_FileSize += byteCount;
	}

	return true;
}


/////////////////////////////////////////////////////////////////////////////
//
//	Close()
//
void QzTxtWriter::Close(void)
{
	if (NULL != m_pFile) {
		fclose(m_pFile);
		m_pFile = NULL;
	}
}


/////////////////////////////////////////////////////////////////////////////
//
//	Write()
//
void QzTxtWriter::Write(const Utf08_t line[], S32 byteCount)
{
	if (NULL == m_pFile) {
		return;
	}

	if (byteCount < 0) {
		byteCount = UtfByteCount(line);
	}

	if (0 == byteCount) {
		// do nothing;
	}

	else if (UtfEncode_UTF_8 == m_UtfFormat) {
		// Over-allocate in case normalization needs to increase the number
		// of symbols in the byte stream.
		U32 scratchLength = 2 * byteCount;

		Utf08_t *pScratch = reinterpret_cast<Utf08_t*>(alloca(scratchLength));

		// To be safe, we need to normalize the output.  This really only
		// matters for stand-alone diacritics.
		U32 outCount = UtfNormalize08to08(pScratch, scratchLength, line);

		if (outCount > 0) {
			fwrite(pScratch, 1, outCount, m_pFile);

			m_FileSize += outCount;
		}
	}

	else if (UtfEncode_UTF_16 == m_UtfFormat) {
		// Over-allocate in case normalization needs to increase the number
		// of symbols in the byte stream.
		U32 scratchLength = 2 * byteCount;

		Utf16_t *pScratch = reinterpret_cast<Utf16_t*>(alloca(scratchLength * sizeof(Utf16_t)));

		// To be safe, we need to normalize the output.  This really only
		// matters for stand-alone diacritics.
		U32 outCount = UtfNormalize08to16(pScratch, scratchLength, line);

		if (outCount > 0) {
			fwrite(pScratch, 2, outCount, m_pFile);

			m_FileSize += 2 * outCount;
		}
	}

	else if (UtfEncode_UTF_32 == m_UtfFormat) {
		// Over-allocate in case normalization needs to increase the number
		// of symbols in the byte stream.
		U32 scratchLength = 2 * byteCount;

		Utf32_t *pScratch = reinterpret_cast<Utf32_t*>(alloca(scratchLength * sizeof(Utf32_t)));

		// To be safe, we need to normalize the output.  This really only
		// matters for stand-alone diacritics.
		U32 outCount = UtfNormalize08to32(pScratch, scratchLength, line);

		if (outCount > 0) {
			fwrite(pScratch, 4, outCount, m_pFile);

			m_FileSize += 4 * outCount;
		}
	}

	// Assume anything else is ASCII.
	else {
		// Allocate a buffer that is the same size as the input.  We will
		// need to discard symbols when converting to ASCII, so the total
		// number of bytes may decrease, but it can never increase.
		Utf08_t *pScratch = reinterpret_cast<Utf08_t*>(alloca(byteCount));

		U32 srcOffset = 0;
		U32 dstOffset = 0;

		while (srcOffset < U32(byteCount)) {
			Utf32_t symbol = UtfNextChar(line, srcOffset);

			// Ignore any character that requires more than 8 bits.  All of
			// the 8-bit codes map to their ASCII equivalents (well, except
			// for the ones betwee 0x80 and 0xA0, but those are removed on
			// input during composition).
			if (symbol <= 0xFF) {
				pScratch[dstOffset++] = Utf08_t(symbol);
			}
		}

		if (dstOffset > 0) {
			fwrite(pScratch, 1, dstOffset, m_pFile);

			m_FileSize += dstOffset;
		}
	}
}


/////////////////////////////////////////////////////////////////////////////
//
//	WriteFormat()
//
void QzTxtWriter::WriteFormat(char pattern[], UtfFormat fmt)
{
	Utf08_t buffer[1024];

	// Reserve 2 bytes at the end of the buffer to hold the '\r\n' sequence.
	U32 byteCount = fmt.Generate(buffer, ArraySize(buffer) - 2, reinterpret_cast<Utf08_t*>(pattern));

	buffer[byteCount++] = '\r';
	buffer[byteCount++] = '\n';
	buffer[byteCount  ] = '\0';

	Write(buffer, byteCount);
}


/////////////////////////////////////////////////////////////////////////////
//
//	WriteLine()
//
void QzTxtWriter::WriteLine(Utf08_t line[])
{
	Write(line);
	Write(reinterpret_cast<const Utf08_t*>("\r\n"), 2);
}



