/*
 * Copyright 2005 Tridium, Inc. All Rights Reserved.
 */

/**
 * Converts HTML to XML.
 *
 * @author    Andy Frank
 * @creation  7 Oct 05
 * @version   $Revision$ $Date$
 * @since     Baja 1.0
 */

function HtmlToXml()
{
////////////////////////////////////////////////////////////////
// Attributes
////////////////////////////////////////////////////////////////
  
  var ELEMENT_NODE = 1;
  var TEXT_NODE    = 3;
  
  var xml = "";

////////////////////////////////////////////////////////////////
// Methods
////////////////////////////////////////////////////////////////
  
  /**
   * Convert the given html dom node to an xml string.
   * Returns the xml string.  Include root determines
   * if the root node object is included in the xml, or
   * if it starts with its children.
   */
  this.toXml = function(node, includeRoot)
  {
    if (includeRoot) 
    {
      // Start from root
      walk(node);
    }
    else
    {
      // Skip root, start with root's children
      var kids = node.childNodes;
      for (var i=0; i<kids.length; i++)
        walk(kids[i]);        
    }
    
    return xml;
  }
  
////////////////////////////////////////////////////////////////
// Private
////////////////////////////////////////////////////////////////
  
  /**
   * Walk node, converting to xml.
   */
  function walk(node)
  {
    switch (node.nodeType)
    {
      case ELEMENT_NODE: elem(node); break;
      case TEXT_NODE:    text(node); break;
    }
  }
  
  /**
   * Convert an ELEMENT node.
   */
  function elem(node)
  {      
    var name  = node.nodeName.toLowerCase();
    var attrs = node.attributes;
    var kids  = node.childNodes;
    
    // Check if we should ignore the markup for this node
    var skip = (attrs["name"] && attrs["name"].value == "ignore");
    if (name == "span" && attrs.length == 0) skip = true;
        
    // Start element
    if (!skip) xml += "<" + name;
    
    // Write attributes
    for (var i=0; !skip && i<attrs.length; i++)
    {
      var a = attrs[i];
      
      // If value is empty or null, we know we can skip it
      if (a.value == null || a.value == "null" || a.value.length == 0)
        continue;
      
      // This is the complete list of all DOM attributes
      // in Internet Expolorer, and in Mozilla includes
      // mozilla specific attributes - so selectivly choose
      // when attributes get used.
      switch (a.name)
      {
        case "class": // fall thru
        case "src":
        case "href":
        case "style": 
          // Add attrbute
          xml += " " + a.name + "='" + a.value + "'"; 
          break;
      }
    }
    
    // If element has no children, inline
    if (!skip && kids.length == 0)
    {
      xml += " />";
      return;
    }
    
    // Else close start bracket
    if (!skip) xml += ">";
    
    // Walk children
    for (var i=0; i<kids.length; i++)
      walk(kids[i]);
      
    // Close element
    if (!skip) xml += "</" + name + ">";   
  }
  
  /**
   * Convet a TEXT node.
   */
  function text(node)
  {
// TODO - make sure we don't fuck up text in a PRE tag!

    var text = node.nodeValue;
    
    // Escape chars
    text = text.replace(/\&/g, "&amp;");
    text = text.replace(/\"/g, "&quot;");
    text = text.replace(/</g, "&lt;");
    text = text.replace(/>/g, "&gt;");
    text = text.replace(/\u00A0/g, "&nbsp;");

// TODO - do we need to escape unicode chars as well?    

// TODO - we can't do this - b/c it fucks up PRE tags - so how
// are we going to deal with newlines when pushing shit around???
    
    // Replace carriage returns and line feeds
    text = text.replace(/\r\n/g, " ");
    text = text.replace(/\n/g, " ");
    text = text.replace(/\r/g, " ");

    xml += text;
  }  
}

      