/* 
 * E-XML Library:  For XML, XML-RPC, HTTP, and related.
 * Copyright (C) 2002-2008  Elias Ross
 * 
 * genman@noderunner.net
 * http://noderunner.net/~genman
 * 
 * 1025 NE 73RD ST
 * SEATTLE WA 98115
 * USA
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * $Id$
 */

package net.noderunner.exml;

import java.io.IOException;
import java.io.Reader;
import java.io.Writer;

/** 
 * This is an implementation of an XML input scanner.  This is not
 * synchronized.  It is fully buffered, so using a
 * <code>java.io.BufferedReader</code> around the underlying stream is not
 * necessary and can only hurt performance.
 */
public class XmlScanner
{
	private Reader reader;
	/** position in array, pointing at the next character to read */
	private int pos;
	/** buffer itself */
	private char cbuf[];
	/** amount to reserve, used for fill() */
	private int reserve;
	/** string pool for names */
	private StringPool stringPool;
	/** URI map, used for getting the URI from Name prefixes */
	private UriMap uriMap;

	/**
	 * Constructs an <code>XmlScanner</code> with specific buffer size and reserve
	 * size of at most 4.
	 */
	public XmlScanner(Reader reader, int size) {
		this(reader, size, Math.min(4, size));
	}

	/**
	 * Constructs an <code>XmlScanner</code> out of a string.
	 * @param xml an XML document
	 */
	public XmlScanner(String xml) {
		setReadString(xml);
		this.stringPool = new StringPool(8);
		this.uriMap = new UriMap();
	}

	/**
	 * Constructs an <code>XmlScanner</code> with specific buffer size and
	 * unread reserve size.
	 */
	public XmlScanner(Reader reader, int size, int reserve) {
		this.reader = reader;
		cbuf = new char[size];
		pos = cbuf.length;
		this.reserve = reserve;
		this.stringPool = new StringPool();
		this.uriMap = new UriMap();
	}

	/**
	 * Sets the underlying reader to a different reader.
	 */
	public void setReader(Reader reader) {
		this.reader = reader;
		pos = cbuf.length;
	}

	/**
	 * Sets the scanner to read a character String.
	 */
	public void setReadString(String xml) {
		this.reader = NullReader.getInstance();
		this.reserve = 0;
		this.cbuf = xml.toCharArray();
		this.pos = 0;
	}

	/**
	 * Closes the underlying reader.
	 */
	public void close() 
		throws IOException
	{
		reader.close();
	}

	/**
	 * Returns the next character.
	 * @return a character, or -1 if EOF
	 */
	public int read()
		throws IOException
	{
		if (pos < cbuf.length)
			return cbuf[pos++];
		fill();
		if (pos < cbuf.length)
			return cbuf[pos++];
		return -1;
	}

	/** 
	 * Returns the number of characters that can be unread.
	 */
	public int canUnread() {
		return pos;
	}

	/**
	 * Unreads a character.
	 * @param c character, cannot be less than zero
	 */
	public void unread(int c)
		throws IOException
	{
		if (c < 0)
			throw new IllegalArgumentException("Cannot unread " + c);
		if (pos == 0)
			throw new IOException("Out of space unreading " + c);
		cbuf[--pos] = (char)c;
	}
	
	/**
	 * Fills in the buffer if it's empty.
	 * TODO:  Should check for invalid XML characters
	 */
	void fill()
		throws IOException
	{
		if (pos != cbuf.length)
			return;
		int want = pos - reserve;
		int take = reader.read(cbuf, reserve, want);
		if (take <= 0)
			return;
		pos -= take;
		if (take < want) {
			System.arraycopy(cbuf, reserve, cbuf, pos, take);
		}
	}

	/**
	 * Determines an event from a buffer.
	 */
	static int determineEvent(final char buf[], final int off)
	{
		char c = buf[off + 1];
		if (c == '/') {
			return XmlEvent.ETAG;
		} else if (c == '!') {
			switch (buf[off + 2]) {
				case '-': return XmlEvent.COMMENT;
				case '[': 
					if (buf[off + 3] == 'C')
						return XmlEvent.CDSECT;
					else
						return XmlEvent.CONDITIONAL_SECT;
				case 'D': return XmlEvent.DOCTYPE_DECL;
				case 'E': 
					if (buf[off + 3] == 'L') return XmlEvent.ELEMENT_DECL;
					else return XmlEvent.ENTITY_DECL;
				case 'A': return XmlEvent.ATTLIST_DECL;
				case 'N': return XmlEvent.NOTATATION_DECL;
			}
		} else if (c == '?') {
			return XmlEvent.PI;
		} else if (XmlReader.FirstNameChar(c)) {
			return XmlEvent.STAG;
		}
		return XmlEvent.NONE;
	}

	/** 
	 * Translates a character reference, starting from the &amp;
	 * character.  Rewrites the stream with an appropriate character. 
	 *
	 * @return true if a character reference was found
	 * @throws XmlException if bad number, invalid character,
	 * excessive number, or EOF
	 */
	public boolean charRef()
		throws IOException, XmlException
	{
		int amp = read();
		if (amp == -1)
			throw new XmlException("EOF scanning for CharRef");
		if (amp != '&' || peek() != '#') {
			unread(amp);
			return false;
		}
		read(); // #

		int c = read(); // x or digit
		int radix = 10;
		StringBuffer sb = new StringBuffer(16);
		if (c == 'x') {
			radix = 16;
		} else {
			sb.append((char)c);
		}
		while (true) {
			c = read();
			if (c == ';') {
				try { 
					int result = Integer.parseInt(sb.toString(), radix);
					if (!XmlReader.Char(result))
						throw new XmlException("CharRef not valid Xml char " + result);
					unread((char)result);
					return true;
				} catch (NumberFormatException e) {
					throw new XmlException("Bad CharRef " + sb + " " + e);
				}
			}
			if (c == -1)
				throw new XmlException("CharRef expected ; after " + sb);
			if (sb.length() > XmlReaderPrefs.MAX_NAME_LEN)
				throw new XmlException("CharRef exceeded MAX_NAME_LEN, expected ; after " + sb);
			sb.append((char)c);
		}
	}

	/** 
	 * Translates a reference.  If a built-reference,
	 * rewrites the stream with an appropriate character, returns true.
	 * Must have at least 6 reserve characters.
	 * @throws XmlException if EOF reached
	 */
	public boolean translateReference()
		throws XmlException, IOException
	{
		if (pos >= cbuf.length - 6) {
			char rbuf[] = new char[6];
			int count = read(rbuf, 0, 6);
			if (count < 3)
				throw new XmlException("Not enough data in reference");
			unread(rbuf, 0, count);
		}
		// 0123 012345
		// &lt; &apos;
		char a = cbuf[pos+1];
		char b = cbuf[pos+2];
		if (a == '#') {
			charRef();	
			return true;
		}
		if (b == 't' && cbuf[pos+3] == ';') {
			if (a == 'l') { pos += 3; cbuf[pos] = '<'; return true; }
			if (a == 'g') { pos += 3; cbuf[pos] = '>'; return true; }
		}
		if (a == 'a') {
			if (b == 'm' && cbuf[pos+3] == 'p' && cbuf[pos+4] == ';')
				{ pos += 4; cbuf[pos] = '&'; return true; }
			if (b == 'p' && cbuf[pos+3] == 'o' && cbuf[pos+4] == 's' 
				&& cbuf[pos+5] == ';') { pos += 5; cbuf[pos] = '\''; return true; }
		}
		if (a == 'q' && b == 'u' && cbuf[pos+3] == 'o' && 
			cbuf[pos+4] == 't' && cbuf[pos+5] == ';')
				{ pos += 5; cbuf[pos] = '"'; return true; }
		return false;
	}

	/**
	 * Returns the next event (as defined in {@link XmlEvent}) or returns
	 * {@link XmlEvent#NONE} if there is no (valid?) event at this parse
	 * location.  {@link XmlEvent#EOD} is returned if end of file.
	 *
	 * @see XmlEvent
	 */
	public int peekEvent()
		throws IOException
	{
		int c = peek();
		if (c != '<') {
			if (c == -1)
				return XmlEvent.EOD;
			else if (c == '&') {
				return XmlEvent.REFERENCE;
			} else {
				return XmlEvent.CHARDATA;
			}
		}
		// If we don't have 4 characters available:  this is a rare case
		// (which is not so fast)
		if (pos >= cbuf.length - 4) {
			char rbuf[] = new char[4];
			int count = peek(rbuf, 0, 4);
			if (count < 4)
				return XmlEvent.EOD;
			return determineEvent(rbuf, 0);
		}
		return determineEvent(cbuf, pos);
	}

	/**
	 * Places in the fields information about the
	 * name found at the parse location.
	 * Re-uses an existing structure.
	 */
	public void readNamespace(NamespaceImpl ns)
		throws IOException, XmlException
	{
		String s = getName(true);
		if (s == null) {
			ns.clear();
			return;
		}
		if (s.length() == 0)
			throw new XmlException("Local name without prefix");
		if (peek() == ':') {
			read();
			ns.setPrefix(s);
			ns.setNamespaceURI(uriMap.get(s));
			s = getName(true);
			if (s == null)
				throw new XmlException("Prefix without local name");
			ns.setLocalName(s);
			ns.setName(null);
		} else {
			ns.setNamespaceURI(null);
			ns.setPrefix(null);
			ns.setLocalName(null);
			ns.setName(s);
		}
	}

	/**
	 * Creates a 'Name' until a non-name character is found.
	 * The name returned is the fully-qualified name.
	 * This String is guaranteed to be Object equal (with the <code>==</code>
	 * operator) to any created before it.
	 */
	public String getName()
		throws IOException, XmlException
	{
		return getName(false);
	}

	/**
	 * Creates a 'Name' until a non-name character is found.
	 * @param part true if we should stop parsing at the colon 
	 */
	private String getName(boolean part)
		throws IOException, XmlException
	{
		int c = peek(); 
		if (c == -1)
			return null;
		if (!XmlReader.FirstNameChar((char)c)) {
			return null;
		}
		String result = null;
		while (true) {
			fill();
			if (pos == cbuf.length)
				return stringPool.intern(result);
			for (int i = pos; i < cbuf.length; i++) {
				c = cbuf[i];
				if (!XmlReader.NameChar((char)c) || (c == ':' && part)) { // match
					if (result == null) {
						result = stringPool.intern(cbuf, pos, i - pos);
					} else {
						result += new String(cbuf, pos, i - pos);
						result = stringPool.intern(result);
					}
					pos = i;
					return result;
				}
			}
			if (result == null)
				result = new String(cbuf, pos, cbuf.length - pos);
			else
				result += new String(cbuf, pos, cbuf.length - pos);
			if (result.length() > XmlReaderPrefs.MAX_NAME_LEN)
				throw new XmlException("Exceeded MAX_NAME_LEN in Name");
			pos = cbuf.length;
		}
	}

	/**
	 * Returns a text string, which, once found,
	 * is internalized using the underlying string pool.
	 * This routine was written along the same lines as {@link #readNamespace}.
	 * @see XmlSParser#getCanonicalText
	 */
	public String getCanonicalText()
		throws IOException, XmlException
	{
		int c = peek(); 
		if (c == -1 || c == '<')
			return "";
		StringBuffer result = null;
		while (true) {
			fill();
			if (pos == cbuf.length) {
				return (result == null) ? "" :
					stringPool.intern(result.toString());
			}
			if (peek() == '&') {
				if (!translateReference())
					return (result == null) ? "" :
						stringPool.intern(result.toString());
				else
					if (result == null)
						result = new StringBuffer().append((char)read());
					else
						result.append((char)read());
			}
			int i;
			for (i = pos; i < cbuf.length; i++) {
				c = cbuf[i];
				if (c == '&')
					break;
				if (c == '<') { // match
					String s;
					if (result == null) {
						s = stringPool.intern(cbuf, pos, i - pos);
					} else {
						result.append(cbuf, pos, i - pos);
						s = stringPool.intern(result.toString());
					}
					pos = i;
					return s;
				}
			}
			if (result == null)
				result = new StringBuffer().append(cbuf, pos, i - pos);
			else
				result.append(cbuf, pos, i - pos);
			pos = i;
		}
	}

	/**
	 * Returns the next character without removing it from the pushback stream.
	 * If there are no more characters to be read from the underlying stream, a
	 * character is read from it and put back.
	 */
	public int peek()
		throws IOException
	{
		if (pos < cbuf.length) {
			return cbuf[pos];
		}
		fill();
		int c = read();
		if (c != -1)
			unread(c);
		return c;
	}

	/**
	 * Peeks into the stream, this is faster than reading and unreading for
	 * scanning.  Returns the number of characters copied into
	 * <code>buf</code>.
	 */
	public int peek(char buf[], int off, int len)
		throws IOException
	{
		if (pos < cbuf.length - len) {
			System.arraycopy(cbuf, pos, buf, off, len);
			return len;
		} else {
			int count = read(buf, off, len);
			if (count == -1)
				return -1;
			unread(buf, off, count);
			return count;
		}
	}

	/**
	 * Skips until <code>a</code> is found.
	 * Returns the character found or -1 if EOF.
	 */
	public int skipUntil(char a) 
		throws IOException, XmlException
	{
		return skipUntil(a, a);
	}

	/**
	 * Skips until <code>a</code> or <code>b</code> is found.
	 * Returns the character found or -1 if EOF.
	 * @throws XmlException if match could not be found
	 */
	public int skipUntil(char a, char b) 
		throws IOException, XmlException
	{
		while (true) {
			fill();
			if (pos == cbuf.length)
				throw new XmlException("Premature EOF looking for match");
			for (int i = pos; i < cbuf.length; i++) {
				if (cbuf[i] == a || cbuf[i] == b) { // match
					pos = i;
					return cbuf[i];
				}
			}
			pos = cbuf.length;
		}
	}

	/**
	 * Copies until <code>a</code> is found.
	 * Returns the character found or -1 if EOF.
	 */
	public int copyUntil(Writer w, char a) 
		throws IOException, XmlException
	{
		return copyUntil(w, a, a);
	}

	/**
	 * Copies until <code>a</code> or <code>b</code> is found.
	 * Returns the character found or -1 if EOF.
	 */
	public int copyUntil(Writer w, char a, char b) 
		throws IOException, XmlException
	{
		while (true) {
			fill();
			if (pos == cbuf.length)
				return -1;
			for (int i = pos; i < cbuf.length; i++) {
				if (cbuf[i] == a || cbuf[i] == b) { // match
					w.write(cbuf, pos, i - pos);
					pos = i;
					return cbuf[i];
				}
			}
			w.write(cbuf, pos, cbuf.length - pos);
			pos = cbuf.length;
		}
	}

	/**
	 * Skips characters in the stream.  If attempting to read beyond EOF, does
	 * nothing.
	 */
	public void skip(int len)
		throws IOException
	{
		if (pos < cbuf.length) {
			int got = Math.min(len, cbuf.length - pos);
			len -= got;
			pos += got;
		}
		while (len-- > 0)
			read();
	}

	/** 
	 * Unreads an entire string.
	 */
	public void unread(String s)
		throws IOException
	{
		unread(s.toCharArray(), 0, s.length());
	}

	/**
	 * Unreads characters.
	 */
	public void unread(char buf[], int off, int len)
		throws IOException
	{
		if (pos < len)
			throw new IOException("Attempt to unread too many characters: " + len);
		pos -= len;
		System.arraycopy(buf, off, cbuf, pos, len);
	}

	/**
	 * Reads characters.
	 * @return -1 if EOF and no characters were pushed back
	 */
	public int read(char buf[], int off, int len)
		throws IOException
	{
		int got = 0;
		// read from our array
		if (pos < cbuf.length) {
			got = Math.min(len, cbuf.length - pos);
			System.arraycopy(cbuf, pos, buf, off, got);
			len -= got;
			pos += got;
			off += got;
		}
		// read from our array again
		if (len > 0) {
			fill();
			if (pos == cbuf.length) {
				if (got == 0)
					return -1;
			} else { 
				int min = Math.min(len, cbuf.length - pos);
				System.arraycopy(cbuf, pos, buf, off, min);
				pos += min;
				got += min;
			}
		}
		return got;
	}

	/**
	 * Returns a string representation of this XmlScanner.
	 */
	@Override
	public String toString() {
		String s = "XmlScanner avail=" + (cbuf.length - pos) + " [";
		for (int i = pos; i < cbuf.length ; i++) {
			s += cbuf[i];
		}
		s += "]";
		return s;
	}

	/**
	 * Returns the underlying StringPool instance.
	 */
	public StringPool getStringPool() {
		return stringPool;
	}

	/**
	 * Sets the underlying StringPool instance.
	 */
	public void setStringPool(StringPool stringPool) {
		this.stringPool = stringPool;
	}

	/**
	 * Returns the underlying UriMap instance.
	 */
	public UriMap getUriMap() {
		return uriMap;
	}

	/**
	 * Sets the underlying UriMap instance.
	 */
	public void setUriMap(UriMap uriMap) {
		this.uriMap = uriMap;
	}

}
