42 files changed, 8646 insertions, 0 deletions
diff --git a/components/htmlparser/moz.build b/components/htmlparser/moz.build
new file mode 100644
index 000000000..ddcad7b1a
--- /dev/null
+++ b/components/htmlparser/moz.build
@@ -0,0 +1,49 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+XPIDL_SOURCES += [
+  'public/nsIExpatSink.idl',
+  'public/nsIExtendedExpatSink.idl',
+]
+
+EXPORTS += [
+  'src/nsElementTable.h',
+  'src/nsHTMLTagList.h',
+  'src/nsHTMLTags.h',
+  'src/nsIContentSink.h',
+  'src/nsIDTD.h',
+  'src/nsIFragmentContentSink.h',
+  'src/nsIHTMLContentSink.h',
+  'src/nsIParser.h',
+  'src/nsIParserService.h',
+  'src/nsITokenizer.h',
+  'src/nsParserBase.h',
+  'src/nsParserCIID.h',
+  'src/nsParserConstants.h',
+  'src/nsScannerString.h',
+  'src/nsToken.h',
+]
+
+SOURCES += [
+  'src/CNavDTD.cpp',
+  'src/CParserContext.cpp',
+  'src/nsElementTable.cpp',
+  'src/nsExpatDriver.cpp',
+  'src/nsHTMLEntities.cpp',
+  'src/nsHTMLTags.cpp',
+  'src/nsHTMLTokenizer.cpp',
+  'src/nsParser.cpp',
+  'src/nsParserModule.cpp',
+  'src/nsParserMsgUtils.cpp',
+  'src/nsParserService.cpp',
+  'src/nsScanner.cpp',
+  'src/nsScannerString.cpp',
+]
+
+if CONFIG['GNU_CXX']:
+  CXXFLAGS += ['-Wno-error=shadow']
+
+XPIDL_MODULE = 'htmlparser'
+FINAL_LIBRARY = 'xul'
diff --git a/components/htmlparser/public/nsIExpatSink.idl b/components/htmlparser/public/nsIExpatSink.idl
new file mode 100644
index 000000000..df0b2d869
--- /dev/null
+++ b/components/htmlparser/public/nsIExpatSink.idl
@@ -0,0 +1,109 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.idl"
+interface nsIScriptError;
+
+/**
+ * This interface should be implemented by any content sink that wants
+ * to get output from expat and do something with it; in other words,
+ * by any sink that handles some sort of XML dialect.
+ */
+
+[scriptable, uuid(01f681af-0f22-4725-a914-0d396114daf0)]
+interface nsIExpatSink : nsISupports 
+{
+  /**
+   * Called to handle the opening tag of an element.
+   * @param aName the fully qualified tagname of the element
+   * @param aAtts the array of attribute names and values.  There are
+   *        aAttsCount/2 names and aAttsCount/2 values, so the total number of
+   *        elements in the array is aAttsCount.  The names and values
+   *        alternate.  Thus, if we number attributes starting with 0,
+   *        aAtts[2*k] is the name of the k-th attribute and aAtts[2*k+1] is
+   *        the value of that attribute  Both explicitly specified attributes
+   *        and attributes that are defined to have default values in a DTD are
+   *        present in aAtts.
+   * @param aAttsCount the number of elements in aAtts.
+   * @param aLineNumber the line number of the start tag in the data stream.
+   */
+  void HandleStartElement(in wstring aName,
+                          [array, size_is(aAttsCount)] in wstring aAtts,
+                          in unsigned long aAttsCount,
+                          in unsigned long aLineNumber);
+
+  /**
+   * Called to handle the closing tag of an element.
+   * @param aName the fully qualified tagname of the element
+   */
+  void HandleEndElement(in wstring aName);
+
+  /**
+   * Called to handle a comment
+   * @param aCommentText the text of the comment (not including the
+   *        "<!--" and "-->")
+   */ 
+  void HandleComment(in wstring aCommentText);
+
+  /**
+   * Called to handle a CDATA section
+   * @param aData the text in the CDATA section.  This is null-terminated.
+   * @param aLength the length of the aData string
+   */
+  void HandleCDataSection([size_is(aLength)] in wstring aData, 
+                          in unsigned long aLength);
+
+  /**
+   * Called to handle the doctype declaration
+   */
+  void HandleDoctypeDecl(in AString aSubset,
+                         in AString aName,
+                         in AString aSystemId,
+                         in AString aPublicId,
+                         in nsISupports aCatalogData);
+
+  /**
+   * Called to handle character data.  Note that this does NOT get
+   * called for the contents of CDATA sections.
+   * @param aData the data to handle.  aData is NOT NULL-TERMINATED.
+   * @param aLength the length of the aData string
+   */
+  void HandleCharacterData([size_is(aLength)] in wstring aData, 
+                           in unsigned long aLength);
+
+  /**
+   * Called to handle a processing instruction
+   * @param aTarget the PI target (e.g. xml-stylesheet)
+   * @param aData all the rest of the data in the PI
+   */
+  void HandleProcessingInstruction(in wstring aTarget, 
+                                   in wstring aData);
+
+  /**
+   * Handle the XML Declaration.
+   *
+   * @param aVersion    The version string, can be null if not specified.
+   * @param aEncoding   The encoding string, can be null if not specified.
+   * @param aStandalone -1, 0, or 1 indicating respectively that there was no
+   *                    standalone parameter in the declaration, that it was
+   *                    given as no, or that it was given as yes.
+   */
+  void HandleXMLDeclaration(in wstring aVersion,
+                            in wstring aEncoding,
+                            in long aStandalone);
+
+  /**
+   * Ask the content sink if the expat driver should log an error to the console.
+   *
+   * @param aErrorText  Error message to pass to content sink.
+   * @param aSourceText Source text of the document we're parsing.
+   * @param aError      Script error object with line number & column number
+   *
+   * @retval True if the expat driver should report the error.
+   */
+  boolean ReportError(in wstring aErrorText,
+                      in wstring aSourceText,
+                      in nsIScriptError aError);
+}; 
diff --git a/components/htmlparser/public/nsIExtendedExpatSink.idl b/components/htmlparser/public/nsIExtendedExpatSink.idl
new file mode 100644
index 000000000..d88f0d974
--- /dev/null
+++ b/components/htmlparser/public/nsIExtendedExpatSink.idl
@@ -0,0 +1,72 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsIExpatSink.idl"
+
+/**
+ * This interface provides notification of syntax-level events.
+ */
+[scriptable, uuid(5e3e4f0c-7b77-47ca-a7c5-a3d87f2a9c82)]
+interface nsIExtendedExpatSink : nsIExpatSink
+{
+  /**
+   * Called at the beginning of the DTD, before any entity or notation
+   * events.
+   * @param aDoctypeName The document type name.
+   * @param aSysid The declared system identifier for the external DTD subset,
+   *               or null if none was declared.
+   * @param aPubid The declared public identifier for the external DTD subset,
+   *               or null if none was declared.
+   */
+  void handleStartDTD(in wstring aDoctypeName,
+                      in wstring aSysid,
+                      in wstring aPubid);
+
+  /**
+   * Called when a prefix mapping starts to be in-scope, before any
+   * startElement events.
+   * @param aPrefix The Namespace prefix being declared. An empty string
+   *                is used for the default element namespace, which has
+   *                no prefix.
+   * @param aUri The Namespace URI the prefix is mapped to.
+   */
+  void handleStartNamespaceDecl(in wstring aPrefix,
+                                in wstring aUri);
+                              
+  /**
+   * Called when a prefix mapping is no longer in-scope, after any
+   * endElement events.
+   * @param aPrefix The prefix that was being mapped. This is the empty string
+   *                when a default mapping scope ends.
+   */
+  void handleEndNamespaceDecl(in wstring aPrefix);
+
+  /**
+   * This is called for a declaration of notation.  The base argument is
+   * whatever was set by XML_SetBase. aNotationName will never be
+   * null. The other arguments can be.
+   * @param aNotationName The notation name.
+   * @param aSysId The notation's system identifier, or null if none was given.
+   * @param aPubId The notation's pubilc identifier, or null if none was given.
+   */
+  void handleNotationDecl(in wstring aNotationName,
+                          in wstring aSysid,
+                          in wstring aPubid);
+                              
+  /**
+   * This is called for a declaration of an unparsed (NDATA) entity.
+   * aName, aSysid and aNotationName arguments will never be
+   * null. The other arguments may be.
+   * @param aName  The unparsed entity's name.
+   * @param aSysId The notation's system identifier.
+   * @param aPubId The notation's pubilc identifier, or null if none was given.
+   * @param aNotationName The name of the associated notation.
+   */
+  void handleUnparsedEntityDecl(in wstring aName,
+                                in wstring aSysid,
+                                in wstring aPubid,
+                                in wstring aNotationName);
+
+};
diff --git a/components/htmlparser/src/CNavDTD.cpp b/components/htmlparser/src/CNavDTD.cpp
new file mode 100644
index 000000000..decc6a963
--- /dev/null
+++ b/components/htmlparser/src/CNavDTD.cpp
@@ -0,0 +1,90 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.h"
+#include "nsISupportsImpl.h"
+#include "nsIParser.h"
+#include "CNavDTD.h"
+#include "nsIHTMLContentSink.h"
+
+NS_IMPL_ISUPPORTS(CNavDTD, nsIDTD);
+
+CNavDTD::CNavDTD()
+{
+}
+
+CNavDTD::~CNavDTD()
+{
+}
+
+NS_IMETHODIMP
+CNavDTD::WillBuildModel(const CParserContext& aParserContext,
+                        nsITokenizer* aTokenizer,
+                        nsIContentSink* aSink)
+{
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+CNavDTD::BuildModel(nsITokenizer* aTokenizer,
+                    nsIContentSink* aSink)
+{
+  // NB: It is important to throw STOPPARSING if the sink is the wrong type in
+  // order to make sure nsParser cleans up properly after itself.
+  nsCOMPtr<nsIHTMLContentSink> sink = do_QueryInterface(aSink);
+  if (!sink) {
+    return NS_ERROR_HTMLPARSER_STOPPARSING;
+  }
+
+  nsresult rv = sink->OpenContainer(nsIHTMLContentSink::eHTML);
+  NS_ENSURE_SUCCESS(rv, rv);
+  rv = sink->OpenContainer(nsIHTMLContentSink::eBody);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  rv = sink->CloseContainer(nsIHTMLContentSink::eBody);
+  MOZ_ASSERT(NS_SUCCEEDED(rv));
+  rv = sink->CloseContainer(nsIHTMLContentSink::eHTML);
+  MOZ_ASSERT(NS_SUCCEEDED(rv));
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+CNavDTD::DidBuildModel(nsresult anErrorCode)
+{
+  return NS_OK;
+}
+
+NS_IMETHODIMP_(void)
+CNavDTD::Terminate()
+{
+}
+
+
+NS_IMETHODIMP_(int32_t) 
+CNavDTD::GetType() 
+{ 
+  return NS_IPARSER_FLAG_HTML; 
+}
+
+NS_IMETHODIMP_(nsDTDMode)
+CNavDTD::GetMode() const
+{
+  return eDTDMode_quirks;
+}
+
+NS_IMETHODIMP_(bool)
+CNavDTD::CanContain(int32_t aParent,int32_t aChild) const
+{
+  MOZ_CRASH("nobody calls this");
+  return false;
+}
+
+NS_IMETHODIMP_(bool)
+CNavDTD::IsContainer(int32_t aTag) const
+{
+  MOZ_CRASH("nobody calls this");
+  return false;
+}
diff --git a/components/htmlparser/src/CNavDTD.h b/components/htmlparser/src/CNavDTD.h
new file mode 100644
index 000000000..b3c557e81
--- /dev/null
+++ b/components/htmlparser/src/CNavDTD.h
@@ -0,0 +1,35 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef NS_NAVHTMLDTD__
+#define NS_NAVHTMLDTD__
+
+#include "nsIDTD.h"
+#include "nsISupports.h"
+#include "nsCOMPtr.h"
+
+#ifdef _MSC_VER
+#pragma warning( disable : 4275 )
+#endif
+
+class CNavDTD : public nsIDTD
+{
+#ifdef _MSC_VER
+#pragma warning( default : 4275 )
+#endif
+
+    virtual ~CNavDTD();
+
+public:
+    CNavDTD();
+
+    NS_DECL_ISUPPORTS
+    NS_DECL_NSIDTD
+};
+
+#endif
+
+
+
diff --git a/components/htmlparser/src/CParserContext.cpp b/components/htmlparser/src/CParserContext.cpp
new file mode 100644
index 000000000..3b764d7e4
--- /dev/null
+++ b/components/htmlparser/src/CParserContext.cpp
@@ -0,0 +1,85 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+#include "nsIAtom.h"
+#include "CParserContext.h"
+#include "nsToken.h"
+#include "prenv.h"  
+#include "nsIHTMLContentSink.h"
+#include "nsHTMLTokenizer.h"
+#include "nsMimeTypes.h"
+#include "nsHTMLTokenizer.h"
+
+CParserContext::CParserContext(CParserContext* aPrevContext,
+                               nsScanner* aScanner, 
+                               void *aKey, 
+                               eParserCommands aCommand,
+                               nsIRequestObserver* aListener, 
+                               eAutoDetectResult aStatus, 
+                               bool aCopyUnused)
+  : mListener(aListener),
+    mKey(aKey),
+    mPrevContext(aPrevContext),
+    mScanner(aScanner),
+    mDTDMode(eDTDMode_unknown),
+    mStreamListenerState(eNone),
+    mContextType(eCTNone),
+    mAutoDetectStatus(aStatus),
+    mParserCommand(aCommand),
+    mMultipart(true),
+    mCopyUnused(aCopyUnused)
+{ 
+  MOZ_COUNT_CTOR(CParserContext); 
+} 
+
+CParserContext::~CParserContext()
+{
+  // It's ok to simply ingore the PrevContext.
+  MOZ_COUNT_DTOR(CParserContext);
+}
+
+void
+CParserContext::SetMimeType(const nsACString& aMimeType)
+{
+  mMimeType.Assign(aMimeType);
+
+  mDocType = ePlainText;
+
+  if (mMimeType.EqualsLiteral(TEXT_HTML))
+    mDocType = eHTML_Strict;
+  else if (mMimeType.EqualsLiteral(TEXT_XML)              ||
+           mMimeType.EqualsLiteral(APPLICATION_XML)       ||
+           mMimeType.EqualsLiteral(APPLICATION_XHTML_XML) ||
+           mMimeType.EqualsLiteral(TEXT_XUL)              ||
+           mMimeType.EqualsLiteral(IMAGE_SVG_XML)         ||
+           mMimeType.EqualsLiteral(APPLICATION_MATHML_XML) ||
+           mMimeType.EqualsLiteral(APPLICATION_RDF_XML)   ||
+           mMimeType.EqualsLiteral(APPLICATION_WAPXHTML_XML) ||
+           mMimeType.EqualsLiteral(TEXT_RDF))
+    mDocType = eXML;
+}
+
+nsresult
+CParserContext::GetTokenizer(nsIDTD* aDTD,
+                             nsIContentSink* aSink,
+                             nsITokenizer*& aTokenizer)
+{
+  nsresult result = NS_OK;
+  int32_t type = aDTD ? aDTD->GetType() : NS_IPARSER_FLAG_HTML;
+
+  if (!mTokenizer) {
+    if (type == NS_IPARSER_FLAG_HTML || mParserCommand == eViewSource) {
+      mTokenizer = new nsHTMLTokenizer;
+    }
+    else if (type == NS_IPARSER_FLAG_XML) {
+      mTokenizer = do_QueryInterface(aDTD, &result);
+    }
+  }
+
+  aTokenizer = mTokenizer;
+
+  return result;
+}
diff --git a/components/htmlparser/src/CParserContext.h b/components/htmlparser/src/CParserContext.h
new file mode 100644
index 000000000..8850b83d5
--- /dev/null
+++ b/components/htmlparser/src/CParserContext.h
@@ -0,0 +1,70 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * MODULE NOTES:
+ * @update  gess 4/1/98
+ * 
+ */
+
+#ifndef __CParserContext
+#define __CParserContext
+
+#include "nsIParser.h"
+#include "nsIURL.h"
+#include "nsIDTD.h"
+#include "nsIStreamListener.h"
+#include "nsIRequest.h"
+#include "nsScanner.h"
+#include "nsString.h"
+#include "nsCOMPtr.h"
+#include "nsAutoPtr.h"
+
+/**
+ * Note that the parser is given FULL access to all
+ * data in a parsercontext. Hey, that what it's for!
+ */
+
+class CParserContext {
+public:
+   enum eContextType {eCTNone,eCTURL,eCTString,eCTStream};
+
+   CParserContext(CParserContext* aPrevContext,
+                  nsScanner* aScanner,
+                  void* aKey = 0,
+                  eParserCommands aCommand = eViewNormal,
+                  nsIRequestObserver* aListener = 0,
+                  eAutoDetectResult aStatus = eUnknownDetect,
+                  bool aCopyUnused = false);
+
+    ~CParserContext();
+
+    nsresult GetTokenizer(nsIDTD* aDTD,
+                          nsIContentSink* aSink,
+                          nsITokenizer*& aTokenizer);
+    void  SetMimeType(const nsACString& aMimeType);
+
+    nsCOMPtr<nsIRequest> mRequest; // provided by necko to differnciate different input streams
+                                   // why is mRequest strongly referenced? see bug 102376.
+    nsCOMPtr<nsIRequestObserver> mListener;
+    void* const          mKey;
+    nsCOMPtr<nsITokenizer> mTokenizer;
+    CParserContext* const mPrevContext;
+    nsAutoPtr<nsScanner> mScanner;
+
+    nsCString            mMimeType;
+    nsDTDMode            mDTDMode;
+
+    eParserDocType       mDocType;
+    eStreamState         mStreamListenerState;
+    eContextType         mContextType;
+    eAutoDetectResult    mAutoDetectStatus;
+    eParserCommands      mParserCommand;
+
+    bool                 mMultipart;
+    bool                 mCopyUnused;
+};
+
+#endif
diff --git a/components/htmlparser/src/nsElementTable.cpp b/components/htmlparser/src/nsElementTable.cpp
new file mode 100644
index 000000000..7ab4c48b1
--- /dev/null
+++ b/components/htmlparser/src/nsElementTable.cpp
@@ -0,0 +1,210 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsElementTable.h"
+
+struct HTMLElement
+{
+#ifdef DEBUG
+  nsHTMLTag mTagID;
+#endif
+  bool mIsBlock;
+  bool mIsContainer;
+};
+
+#ifdef DEBUG
+#define ELEM(tag, block, container) { eHTMLTag_##tag, block, container },
+#else
+#define ELEM(tag, block, container) { block, container },
+#endif
+
+#define ____ false    // This makes the table easier to read.
+
+// Note that the mIsBlock field disagrees with
+// https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements for
+// the following elements: center, details, dialog, dir, dt, figcaption,
+// listing, menu, multicol, noscript, output, summary, tfoot, video.
+//
+// mrbkap thinks that the field values were pulled from the old HTML4 DTD and
+// then got modified in mostly random ways to make the old parser's behavior
+// compatible with the web. So it might make sense to change the mIsBlock
+// values for the abovementioned tags at some point.
+//
+
+static const HTMLElement gHTMLElements[] = {
+  ELEM(unknown,     ____, ____)
+  ELEM(a,           ____, true)
+  ELEM(abbr,        ____, true)
+  ELEM(acronym,     ____, true)
+  ELEM(address,     true, true)
+  ELEM(applet,      ____, true)
+  ELEM(area,        ____, ____)
+  ELEM(article,     true, true)
+  ELEM(aside,       true, true)
+  ELEM(audio,       ____, true)
+  ELEM(b,           ____, true)
+  ELEM(base,        ____, ____)
+  ELEM(basefont,    ____, ____)
+  ELEM(bdo,         ____, true)
+  ELEM(bgsound,     ____, ____)
+  ELEM(big,         ____, true)
+  ELEM(blockquote,  true, true)
+  ELEM(body,        ____, true)
+  ELEM(br,          ____, ____)
+  ELEM(button,      ____, true)
+  ELEM(canvas,      ____, true)
+  ELEM(caption,     ____, true)
+  ELEM(center,      true, true)
+  ELEM(cite,        ____, true)
+  ELEM(code,        ____, true)
+  ELEM(col,         ____, ____)
+  ELEM(colgroup,    ____, true)
+  ELEM(data,        ____, true)
+  ELEM(datalist,    ____, true)
+  ELEM(dd,          ____, true)
+  ELEM(del,         ____, true)
+  ELEM(details,     true, true)
+  ELEM(dfn,         ____, true)
+  ELEM(dialog,      true, true)
+  ELEM(dir,         true, true)
+  ELEM(div,         true, true)
+  ELEM(dl,          true, true)
+  ELEM(dt,          ____, true)
+  ELEM(em,          ____, true)
+  ELEM(embed,       ____, ____)
+  ELEM(fieldset,    true, true)
+  ELEM(figcaption,  ____, true)
+  ELEM(figure,      true, true)
+  ELEM(font,        ____, true)
+  ELEM(footer,      true, true)
+  ELEM(form,        true, true)
+  ELEM(frame,       ____, ____)
+  ELEM(frameset,    ____, true)
+  ELEM(h1,          true, true)
+  ELEM(h2,          true, true)
+  ELEM(h3,          true, true)
+  ELEM(h4,          true, true)
+  ELEM(h5,          true, true)
+  ELEM(h6,          true, true)
+  ELEM(head,        ____, true)
+  ELEM(header,      true, true)
+  ELEM(hgroup,      true, true)
+  ELEM(hr,          true, ____)
+  ELEM(html,        ____, true)
+  ELEM(i,           ____, true)
+  ELEM(iframe,      ____, true)
+  ELEM(image,       ____, ____)
+  ELEM(img,         ____, ____)
+  ELEM(input,       ____, ____)
+  ELEM(ins,         ____, true)
+  ELEM(kbd,         ____, true)
+  ELEM(keygen,      ____, ____)
+  ELEM(label,       ____, true)
+  ELEM(legend,      ____, true)
+  ELEM(li,          true, true)
+  ELEM(link,        ____, ____)
+  ELEM(listing,     true, true)
+  ELEM(main,        true, true)
+  ELEM(map,         ____, true)
+  ELEM(mark,        ____, true)
+  ELEM(menu,        true, true)
+  ELEM(menuitem,    ____, true)
+  ELEM(meta,        ____, ____)
+  ELEM(meter,       ____, true)
+  ELEM(multicol,    true, true)
+  ELEM(nav,         true, true)
+  ELEM(nobr,        ____, true)
+  ELEM(noembed,     ____, true)
+  ELEM(noframes,    ____, true)
+  ELEM(noscript,    ____, true)
+  ELEM(object,      ____, true)
+  ELEM(ol,          true, true)
+  ELEM(optgroup,    ____, true)
+  ELEM(option,      ____, true)
+  ELEM(output,      ____, true)
+  ELEM(p,           true, true)
+  ELEM(param,       ____, ____)
+  ELEM(picture,     ____, true)
+  ELEM(plaintext,   ____, true)
+  ELEM(pre,         true, true)
+  ELEM(progress,    ____, true)
+  ELEM(q,           ____, true)
+  ELEM(rb,          ____, true)
+  ELEM(rp,          ____, true)
+  ELEM(rt,          ____, true)
+  ELEM(rtc,         ____, true)
+  ELEM(ruby,        ____, true)
+  ELEM(s,           ____, true)
+  ELEM(samp,        ____, true)
+  ELEM(script,      ____, true)
+  ELEM(section,     true, true)
+  ELEM(select,      ____, true)
+  ELEM(small,       ____, true)
+  ELEM(slot,        ____, true)
+  ELEM(source,      ____, ____)
+  ELEM(span,        ____, true)
+  ELEM(strike,      ____, true)
+  ELEM(strong,      ____, true)
+  ELEM(style,       ____, true)
+  ELEM(sub,         ____, true)
+  ELEM(summary,     true, true)
+  ELEM(sup,         ____, true)
+  ELEM(table,       true, true)
+  ELEM(tbody,       ____, true)
+  ELEM(td,          ____, true)
+  ELEM(textarea,    ____, true)
+  ELEM(tfoot,       ____, true)
+  ELEM(th,          ____, true)
+  ELEM(thead,       ____, true)
+  ELEM(template,    ____, true)
+  ELEM(time,        ____, true)
+  ELEM(title,       ____, true)
+  ELEM(tr,          ____, true)
+  ELEM(track,       ____, ____)
+  ELEM(tt,          ____, true)
+  ELEM(u,           ____, true)
+  ELEM(ul,          true, true)
+  ELEM(var,         ____, true)
+  ELEM(video,       ____, true)
+  ELEM(wbr,         ____, ____)
+  ELEM(xmp,         ____, true)
+  ELEM(text,        ____, ____)
+  ELEM(whitespace,  ____, ____)
+  ELEM(newline,     ____, ____)
+  ELEM(comment,     ____, true)
+  ELEM(entity,      ____, true)
+  ELEM(doctypeDecl, ____, true)
+  ELEM(markupDecl,  ____, true)
+  ELEM(instruction, ____, true)
+  ELEM(userdefined, ____, true)
+};
+
+#undef ELEM
+#undef ____
+
+bool
+nsHTMLElement::IsContainer(nsHTMLTag aId)
+{
+  return gHTMLElements[aId].mIsContainer;
+}
+
+bool
+nsHTMLElement::IsBlock(nsHTMLTag aId)
+{
+  return gHTMLElements[aId].mIsBlock;
+}
+
+#ifdef DEBUG
+void
+CheckElementTable()
+{
+  for (nsHTMLTag t = eHTMLTag_unknown;
+       t <= eHTMLTag_userdefined;
+       t = nsHTMLTag(t + 1)) {
+    MOZ_ASSERT(gHTMLElements[t].mTagID == t,
+               "gHTMLElements entries does match tag list.");
+  }
+}
+#endif
diff --git a/components/htmlparser/src/nsElementTable.h b/components/htmlparser/src/nsElementTable.h
new file mode 100644
index 000000000..b456b5989
--- /dev/null
+++ b/components/htmlparser/src/nsElementTable.h
@@ -0,0 +1,21 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsElementTable_h
+#define nsElementTable_h
+
+#include "nsHTMLTags.h"
+
+#ifdef DEBUG
+void CheckElementTable();
+#endif
+
+struct nsHTMLElement
+{
+  static bool IsContainer(nsHTMLTag aTag);
+  static bool IsBlock(nsHTMLTag aTag);
+};
+
+#endif // nsElementTable_h
diff --git a/components/htmlparser/src/nsExpatDriver.cpp b/components/htmlparser/src/nsExpatDriver.cpp
new file mode 100644
index 000000000..e35a1da25
--- /dev/null
+++ b/components/htmlparser/src/nsExpatDriver.cpp
@@ -0,0 +1,1412 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsExpatDriver.h"
+#include "nsCOMPtr.h"
+#include "nsParserCIID.h"
+#include "CParserContext.h"
+#include "nsIExpatSink.h"
+#include "nsIExtendedExpatSink.h"
+#include "nsIContentSink.h"
+#include "nsParserMsgUtils.h"
+#include "nsIURL.h"
+#include "nsIUnicharInputStream.h"
+#include "nsIProtocolHandler.h"
+#include "nsNetUtil.h"
+#include "prprf.h"
+#include "prmem.h"
+#include "nsTextFormatter.h"
+#include "nsDirectoryServiceDefs.h"
+#include "nsCRT.h"
+#include "nsIConsoleService.h"
+#include "nsIScriptError.h"
+#include "nsIContentPolicy.h"
+#include "nsContentPolicyUtils.h"
+#include "nsError.h"
+#include "nsXPCOMCIDInternal.h"
+#include "nsUnicharInputStream.h"
+#include "nsContentUtils.h"
+#include "nsNullPrincipal.h"
+
+#include "mozilla/IntegerTypeTraits.h"
+#include "mozilla/Logging.h"
+
+using mozilla::fallible;
+using mozilla::LogLevel;
+
+#define kExpatSeparatorChar 0xFFFF
+
+static const char16_t kUTF16[] = { 'U', 'T', 'F', '-', '1', '6', '\0' };
+
+static mozilla::LazyLogModule gExpatDriverLog("expatdriver");
+
+// The maximum tree depth used for XML-based files (xml/svg/etc.)
+static const uint16_t sMaxXMLDepth = 2048;
+
+/***************************** EXPAT CALL BACKS ******************************/
+// The callback handlers that get called from the expat parser.
+
+static void
+Driver_HandleXMLDeclaration(void *aUserData,
+                            const XML_Char *aVersion,
+                            const XML_Char *aEncoding,
+                            int aStandalone)
+{
+  NS_ASSERTION(aUserData, "expat driver should exist");
+  if (aUserData) {
+    nsExpatDriver* driver = static_cast<nsExpatDriver*>(aUserData);
+    driver->HandleXMLDeclaration(aVersion, aEncoding, aStandalone);
+  }
+}
+
+static void
+Driver_HandleStartElement(void *aUserData,
+                          const XML_Char *aName,
+                          const XML_Char **aAtts)
+{
+  NS_ASSERTION(aUserData, "expat driver should exist");
+  if (aUserData) {
+    static_cast<nsExpatDriver*>(aUserData)->HandleStartElement(aName,
+                                                                  aAtts);
+  }
+}
+
+static void
+Driver_HandleEndElement(void *aUserData,
+                        const XML_Char *aName)
+{
+  NS_ASSERTION(aUserData, "expat driver should exist");
+  if (aUserData) {
+    static_cast<nsExpatDriver*>(aUserData)->HandleEndElement(aName);
+  }
+}
+
+static void
+Driver_HandleCharacterData(void *aUserData,
+                           const XML_Char *aData,
+                           int aLength)
+{
+  NS_ASSERTION(aUserData, "expat driver should exist");
+  if (aUserData) {
+    nsExpatDriver* driver = static_cast<nsExpatDriver*>(aUserData);
+    driver->HandleCharacterData(aData, uint32_t(aLength));
+  }
+}
+
+static void
+Driver_HandleComment(void *aUserData,
+                     const XML_Char *aName)
+{
+  NS_ASSERTION(aUserData, "expat driver should exist");
+  if(aUserData) {
+    static_cast<nsExpatDriver*>(aUserData)->HandleComment(aName);
+  }
+}
+
+static void
+Driver_HandleProcessingInstruction(void *aUserData,
+                                   const XML_Char *aTarget,
+                                   const XML_Char *aData)
+{
+  NS_ASSERTION(aUserData, "expat driver should exist");
+  if (aUserData) {
+    nsExpatDriver* driver = static_cast<nsExpatDriver*>(aUserData);
+    driver->HandleProcessingInstruction(aTarget, aData);
+  }
+}
+
+static void
+Driver_HandleDefault(void *aUserData,
+                     const XML_Char *aData,
+                     int aLength)
+{
+  NS_ASSERTION(aUserData, "expat driver should exist");
+  if (aUserData) {
+    nsExpatDriver* driver = static_cast<nsExpatDriver*>(aUserData);
+    driver->HandleDefault(aData, uint32_t(aLength));
+  }
+}
+
+static void
+Driver_HandleStartCdataSection(void *aUserData)
+{
+  NS_ASSERTION(aUserData, "expat driver should exist");
+  if (aUserData) {
+    static_cast<nsExpatDriver*>(aUserData)->HandleStartCdataSection();
+  }
+}
+
+static void
+Driver_HandleEndCdataSection(void *aUserData)
+{
+  NS_ASSERTION(aUserData, "expat driver should exist");
+  if (aUserData) {
+    static_cast<nsExpatDriver*>(aUserData)->HandleEndCdataSection();
+  }
+}
+
+static void
+Driver_HandleStartDoctypeDecl(void *aUserData,
+                              const XML_Char *aDoctypeName,
+                              const XML_Char *aSysid,
+                              const XML_Char *aPubid,
+                              int aHasInternalSubset)
+{
+  NS_ASSERTION(aUserData, "expat driver should exist");
+  if (aUserData) {
+    static_cast<nsExpatDriver*>(aUserData)->
+      HandleStartDoctypeDecl(aDoctypeName, aSysid, aPubid, !!aHasInternalSubset);
+  }
+}
+
+static void
+Driver_HandleEndDoctypeDecl(void *aUserData)
+{
+  NS_ASSERTION(aUserData, "expat driver should exist");
+  if (aUserData) {
+    static_cast<nsExpatDriver*>(aUserData)->HandleEndDoctypeDecl();
+  }
+}
+
+static int
+Driver_HandleExternalEntityRef(void *aExternalEntityRefHandler,
+                               const XML_Char *aOpenEntityNames,
+                               const XML_Char *aBase,
+                               const XML_Char *aSystemId,
+                               const XML_Char *aPublicId)
+{
+  NS_ASSERTION(aExternalEntityRefHandler, "expat driver should exist");
+  if (!aExternalEntityRefHandler) {
+    return 1;
+  }
+
+  nsExpatDriver* driver = static_cast<nsExpatDriver*>
+                                     (aExternalEntityRefHandler);
+
+  return driver->HandleExternalEntityRef(aOpenEntityNames, aBase, aSystemId,
+                                         aPublicId);
+}
+
+static void
+Driver_HandleStartNamespaceDecl(void *aUserData,
+                                const XML_Char *aPrefix,
+                                const XML_Char *aUri)
+{
+  NS_ASSERTION(aUserData, "expat driver should exist");
+  if (aUserData) {
+    static_cast<nsExpatDriver*>(aUserData)->
+      HandleStartNamespaceDecl(aPrefix, aUri);
+  }
+}
+
+static void
+Driver_HandleEndNamespaceDecl(void *aUserData,
+                              const XML_Char *aPrefix)
+{
+  NS_ASSERTION(aUserData, "expat driver should exist");
+  if (aUserData) {
+    static_cast<nsExpatDriver*>(aUserData)->
+      HandleEndNamespaceDecl(aPrefix);
+  }
+}
+
+static void
+Driver_HandleNotationDecl(void *aUserData,
+                          const XML_Char *aNotationName,
+                          const XML_Char *aBase,
+                          const XML_Char *aSysid,
+                          const XML_Char *aPubid)
+{
+  NS_ASSERTION(aUserData, "expat driver should exist");
+  if (aUserData) {
+    static_cast<nsExpatDriver*>(aUserData)->
+      HandleNotationDecl(aNotationName, aBase, aSysid, aPubid);
+  }
+}
+
+static void
+Driver_HandleUnparsedEntityDecl(void *aUserData,
+                                const XML_Char *aEntityName,
+                                const XML_Char *aBase,
+                                const XML_Char *aSysid,
+                                const XML_Char *aPubid,
+                                const XML_Char *aNotationName)
+{
+  NS_ASSERTION(aUserData, "expat driver should exist");
+  if (aUserData) {
+    static_cast<nsExpatDriver*>(aUserData)->
+      HandleUnparsedEntityDecl(aEntityName, aBase, aSysid, aPubid,
+                               aNotationName);
+  }
+}
+
+
+/***************************** END CALL BACKS ********************************/
+
+/***************************** CATALOG UTILS *********************************/
+
+// Initially added for bug 113400 to switch from the remote "XHTML 1.0 plus
+// MathML 2.0" DTD to the the lightweight customized version that Mozilla uses.
+// Since Mozilla is not validating, no need to fetch a *huge* file at each
+// click.
+// XXX The cleanest solution here would be to fix Bug 98413: Implement XML
+// Catalogs.
+struct nsCatalogData {
+  const char* mPublicID;
+  const char* mLocalDTD;
+  const char* mAgentSheet;
+};
+
+// The order of this table is guestimated to be in the optimum order
+static const nsCatalogData kCatalogTable[] = {
+  { "-//W3C//DTD XHTML 1.0 Transitional//EN",    "htmlmathml-f.ent", nullptr },
+  { "-//W3C//DTD XHTML 1.1//EN",                 "htmlmathml-f.ent", nullptr },
+  { "-//W3C//DTD XHTML 1.0 Strict//EN",          "htmlmathml-f.ent", nullptr },
+  { "-//W3C//DTD XHTML 1.0 Frameset//EN",        "htmlmathml-f.ent", nullptr },
+  { "-//W3C//DTD XHTML Basic 1.0//EN",           "htmlmathml-f.ent", nullptr },
+  { "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN", "htmlmathml-f.ent", nullptr },
+  { "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN", "htmlmathml-f.ent", nullptr },
+  { "-//W3C//DTD MathML 2.0//EN",                "htmlmathml-f.ent", nullptr },
+  { "-//WAPFORUM//DTD XHTML Mobile 1.0//EN",     "htmlmathml-f.ent", nullptr },
+  { nullptr, nullptr, nullptr }
+};
+
+static const nsCatalogData*
+LookupCatalogData(const char16_t* aPublicID)
+{
+  nsDependentString publicID(aPublicID);
+
+  // linear search for now since the number of entries is going to
+  // be negligible, and the fix for bug 98413 would get rid of this
+  // code anyway
+  const nsCatalogData* data = kCatalogTable;
+  while (data->mPublicID) {
+    if (publicID.EqualsASCII(data->mPublicID)) {
+      return data;
+    }
+    ++data;
+  }
+
+  return nullptr;
+}
+
+// This function provides a resource URI to a local DTD 
+// in resource://gre/res/dtd/ which may or may not exist.
+// If aCatalogData is provided, it is used to remap the
+// DTD instead of taking the filename from the URI.
+static void
+GetLocalDTDURI(const nsCatalogData* aCatalogData, nsIURI* aDTD,
+              nsIURI** aResult)
+{
+  NS_ASSERTION(aDTD, "Null parameter.");
+
+  nsAutoCString fileName;
+  if (aCatalogData) {
+    // remap the DTD to a known local DTD
+    fileName.Assign(aCatalogData->mLocalDTD);
+  }
+
+  if (fileName.IsEmpty()) {
+    // Try to see if the user has installed the DTD file -- we extract the
+    // filename.ext of the DTD here. Hence, for any DTD for which we have
+    // no predefined mapping, users just have to copy the DTD file to our
+    // special DTD directory and it will be picked.
+    nsCOMPtr<nsIURL> dtdURL = do_QueryInterface(aDTD);
+    if (!dtdURL) {
+      return;
+    }
+
+    dtdURL->GetFileName(fileName);
+    if (fileName.IsEmpty()) {
+      return;
+    }
+  }
+
+  nsAutoCString respath("resource://gre/res/dtd/");
+  respath += fileName;
+  NS_NewURI(aResult, respath);
+}
+
+/***************************** END CATALOG UTILS *****************************/
+
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsExpatDriver)
+  NS_INTERFACE_MAP_ENTRY(nsITokenizer)
+  NS_INTERFACE_MAP_ENTRY(nsIDTD)
+  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIDTD)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(nsExpatDriver)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(nsExpatDriver)
+
+NS_IMPL_CYCLE_COLLECTION(nsExpatDriver, mSink, mExtendedSink)
+
+nsExpatDriver::nsExpatDriver()
+  : mExpatParser(nullptr),
+    mInCData(false),
+    mInInternalSubset(false),
+    mInExternalDTD(false),
+    mMadeFinalCallToExpat(false),
+    mIsFinalChunk(false),
+    mTagDepth(0),
+    mInternalState(NS_OK),
+    mExpatBuffered(0),
+    mCatalogData(nullptr),
+    mInnerWindowID(0)
+{
+}
+
+nsExpatDriver::~nsExpatDriver()
+{
+  if (mExpatParser) {
+    XML_ParserFree(mExpatParser);
+  }
+}
+
+void
+nsExpatDriver::HandleStartElement(const char16_t *aValue,
+                                  const char16_t **aAtts)
+{
+  NS_ASSERTION(mSink, "content sink not found!");
+
+  // Calculate the total number of elements in aAtts.
+  // XML_GetSpecifiedAttributeCount will only give us the number of specified
+  // attrs (twice that number, actually), so we have to check for default attrs
+  // ourselves.
+  uint32_t attrArrayLength;
+  for (attrArrayLength = XML_GetSpecifiedAttributeCount(mExpatParser);
+       aAtts[attrArrayLength];
+       attrArrayLength += 2) {
+    // Just looping till we find out what the length is
+  }
+
+  if (mSink) {
+    // Sanity check: Make sure the limit fits in the type the tag depth tracker
+    // was declared as.
+    static_assert(sMaxXMLDepth <= mozilla::MaxValue<decltype(nsExpatDriver::mTagDepth)>::value,
+                  "Maximum XML parsing depth type mismatch: value too large.");
+
+    if (++mTagDepth >= sMaxXMLDepth) {
+      MaybeStopParser(NS_ERROR_HTMLPARSER_HIERARCHYTOODEEP);
+      return;
+    }
+
+    nsresult rv = mSink->
+      HandleStartElement(aValue, aAtts, attrArrayLength,
+                         XML_GetCurrentLineNumber(mExpatParser));
+    MaybeStopParser(rv);
+  }
+}
+
+nsresult
+nsExpatDriver::HandleEndElement(const char16_t *aValue)
+{
+  NS_ASSERTION(mSink, "content sink not found!");
+  NS_ASSERTION(mInternalState != NS_ERROR_HTMLPARSER_BLOCK,
+               "Shouldn't block from HandleStartElement.");
+
+  if (mSink && mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
+    nsresult rv = mSink->HandleEndElement(aValue);
+    --mTagDepth;
+    MaybeStopParser(rv);
+  }
+
+  return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleCharacterData(const char16_t *aValue,
+                                   const uint32_t aLength)
+{
+  NS_ASSERTION(mSink, "content sink not found!");
+
+  if (mInCData) {
+    if (!mCDataText.Append(aValue, aLength, fallible)) {
+      MaybeStopParser(NS_ERROR_OUT_OF_MEMORY);
+    }
+  }
+  else if (mSink) {
+    nsresult rv = mSink->HandleCharacterData(aValue, aLength);
+    MaybeStopParser(rv);
+  }
+
+  return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleComment(const char16_t *aValue)
+{
+  NS_ASSERTION(mSink, "content sink not found!");
+
+  if (mInExternalDTD) {
+    // Ignore comments from external DTDs
+    return NS_OK;
+  }
+
+  if (mInInternalSubset) {
+    mInternalSubset.AppendLiteral("<!--");
+    mInternalSubset.Append(aValue);
+    mInternalSubset.AppendLiteral("-->");
+  }
+  else if (mSink) {
+    nsresult rv = mSink->HandleComment(aValue);
+    MaybeStopParser(rv);
+  }
+
+  return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleProcessingInstruction(const char16_t *aTarget,
+                                           const char16_t *aData)
+{
+  NS_ASSERTION(mSink, "content sink not found!");
+
+  if (mInExternalDTD) {
+    // Ignore PIs in external DTDs for now.  Eventually we want to
+    // pass them to the sink in a way that doesn't put them in the DOM
+    return NS_OK;
+  }
+
+  if (mInInternalSubset) {
+    mInternalSubset.AppendLiteral("<?");
+    mInternalSubset.Append(aTarget);
+    mInternalSubset.Append(' ');
+    mInternalSubset.Append(aData);
+    mInternalSubset.AppendLiteral("?>");
+  }
+  else if (mSink) {
+    nsresult rv = mSink->HandleProcessingInstruction(aTarget, aData);
+    MaybeStopParser(rv);
+  }
+
+  return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleXMLDeclaration(const char16_t *aVersion,
+                                    const char16_t *aEncoding,
+                                    int32_t aStandalone)
+{
+  if (mSink) {
+    nsresult rv = mSink->HandleXMLDeclaration(aVersion, aEncoding, aStandalone);
+    MaybeStopParser(rv);
+  }
+
+  return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleDefault(const char16_t *aValue,
+                             const uint32_t aLength)
+{
+  NS_ASSERTION(mSink, "content sink not found!");
+
+  if (mInExternalDTD) {
+    // Ignore newlines in external DTDs
+    return NS_OK;
+  }
+
+  if (mInInternalSubset) {
+    mInternalSubset.Append(aValue, aLength);
+  }
+  else if (mSink) {
+    uint32_t i;
+    nsresult rv = mInternalState;
+    for (i = 0; i < aLength && NS_SUCCEEDED(rv); ++i) {
+      if (aValue[i] == '\n' || aValue[i] == '\r') {
+        rv = mSink->HandleCharacterData(&aValue[i], 1);
+      }
+    }
+    MaybeStopParser(rv);
+  }
+
+  return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleStartCdataSection()
+{
+  mInCData = true;
+
+  return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleEndCdataSection()
+{
+  NS_ASSERTION(mSink, "content sink not found!");
+
+  mInCData = false;
+  if (mSink) {
+    nsresult rv = mSink->HandleCDataSection(mCDataText.get(),
+                                            mCDataText.Length());
+    MaybeStopParser(rv);
+  }
+  mCDataText.Truncate();
+
+  return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleStartNamespaceDecl(const char16_t* aPrefix,
+                                        const char16_t* aUri)
+{
+  if (mExtendedSink) {
+    nsresult rv = mExtendedSink->HandleStartNamespaceDecl(aPrefix, aUri);
+    MaybeStopParser(rv);
+  }
+  return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleEndNamespaceDecl(const char16_t* aPrefix)
+{
+  if (mExtendedSink && mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
+    nsresult rv = mExtendedSink->HandleEndNamespaceDecl(aPrefix);
+    MaybeStopParser(rv);
+  }
+  return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleNotationDecl(const char16_t* aNotationName,
+                                  const char16_t* aBase,
+                                  const char16_t* aSysid,
+                                  const char16_t* aPubid)
+{
+  if (mExtendedSink) {
+    nsresult rv = mExtendedSink->HandleNotationDecl(aNotationName, aSysid,
+                                                    aPubid);
+    MaybeStopParser(rv);
+  }
+  return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleUnparsedEntityDecl(const char16_t* aEntityName,
+                                        const char16_t* aBase,
+                                        const char16_t* aSysid,
+                                        const char16_t* aPubid,
+                                        const char16_t* aNotationName)
+{
+  if (mExtendedSink) {
+    nsresult rv = mExtendedSink->HandleUnparsedEntityDecl(aEntityName,
+                                                          aSysid,
+                                                          aPubid,
+                                                          aNotationName);
+    MaybeStopParser(rv);
+  }
+  return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleStartDoctypeDecl(const char16_t* aDoctypeName,
+                                      const char16_t* aSysid,
+                                      const char16_t* aPubid,
+                                      bool aHasInternalSubset)
+{
+  mDoctypeName = aDoctypeName;
+  mSystemID = aSysid;
+  mPublicID = aPubid;
+
+  if (mExtendedSink) {
+    nsresult rv = mExtendedSink->HandleStartDTD(aDoctypeName, aSysid, aPubid);
+    MaybeStopParser(rv);
+  }
+
+  if (aHasInternalSubset) {
+    // Consuming a huge internal subset translates to numerous
+    // allocations. In an effort to avoid too many allocations
+    // setting mInternalSubset's capacity to be 1K ( just a guesstimate! ).
+    mInInternalSubset = true;
+    mInternalSubset.SetCapacity(1024);
+  } else {
+    // Distinguish missing internal subset from an empty one
+    mInternalSubset.SetIsVoid(true);
+  }
+
+  return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleEndDoctypeDecl()
+{
+  NS_ASSERTION(mSink, "content sink not found!");
+
+  mInInternalSubset = false;
+
+  if (mSink) {
+    // let the sink know any additional knowledge that we have about the
+    // document (currently, from bug 124570, we only expect to pass additional
+    // agent sheets needed to layout the XML vocabulary of the document)
+    nsCOMPtr<nsIURI> data;
+#if 0
+    if (mCatalogData && mCatalogData->mAgentSheet) {
+      NS_NewURI(getter_AddRefs(data), mCatalogData->mAgentSheet);
+    }
+#endif
+
+    // The unused support for "catalog style sheets" was removed. It doesn't
+    // look like we'll ever fix bug 98413 either.
+    MOZ_ASSERT(!mCatalogData || !mCatalogData->mAgentSheet,
+               "Need to add back support for catalog style sheets");
+
+    // Note: mInternalSubset already doesn't include the [] around it.
+    nsresult rv = mSink->HandleDoctypeDecl(mInternalSubset, mDoctypeName,
+                                           mSystemID, mPublicID, data);
+    MaybeStopParser(rv);
+  }
+  
+  mInternalSubset.SetCapacity(0);
+
+  return NS_OK;
+}
+
+static nsresult
+ExternalDTDStreamReaderFunc(nsIUnicharInputStream* aIn,
+                            void* aClosure,
+                            const char16_t* aFromSegment,
+                            uint32_t aToOffset,
+                            uint32_t aCount,
+                            uint32_t *aWriteCount)
+{
+  // Pass the buffer to expat for parsing.
+  if (XML_Parse((XML_Parser)aClosure, (const char *)aFromSegment,
+                aCount * sizeof(char16_t), 0) == XML_STATUS_OK) {
+    *aWriteCount = aCount;
+
+    return NS_OK;
+  }
+
+  *aWriteCount = 0;
+
+  return NS_ERROR_FAILURE;
+}
+
+int
+nsExpatDriver::HandleExternalEntityRef(const char16_t *openEntityNames,
+                                       const char16_t *base,
+                                       const char16_t *systemId,
+                                       const char16_t *publicId)
+{
+  if (mInInternalSubset && !mInExternalDTD && openEntityNames) {
+    mInternalSubset.Append(char16_t('%'));
+    mInternalSubset.Append(nsDependentString(openEntityNames));
+    mInternalSubset.Append(char16_t(';'));
+  }
+
+  // Load the external entity into a buffer.
+  nsCOMPtr<nsIInputStream> in;
+  nsAutoString absURL;
+  nsresult rv = OpenInputStreamFromExternalDTD(publicId, systemId, base,
+                                               getter_AddRefs(in), absURL);
+  if (NS_FAILED(rv)) {
+#ifdef DEBUG
+    nsCString message("Failed to open external DTD: publicId \"");
+    AppendUTF16toUTF8(publicId, message);
+    message += "\" systemId \"";
+    AppendUTF16toUTF8(systemId, message);
+    message += "\" base \"";
+    AppendUTF16toUTF8(base, message);
+    message += "\" URL \"";
+    AppendUTF16toUTF8(absURL, message);
+    message += "\"";
+    NS_WARNING(message.get());
+#endif
+    return 1;
+  }
+
+  nsCOMPtr<nsIUnicharInputStream> uniIn;
+  rv = NS_NewUnicharInputStream(in, getter_AddRefs(uniIn));
+  NS_ENSURE_SUCCESS(rv, 1);
+
+  int result = 1;
+  if (uniIn) {
+    XML_Parser entParser = XML_ExternalEntityParserCreate(mExpatParser, 0,
+                                                          kUTF16);
+    if (entParser) {
+      XML_SetBase(entParser, absURL.get());
+
+      mInExternalDTD = true;
+
+      uint32_t totalRead;
+      do {
+        rv = uniIn->ReadSegments(ExternalDTDStreamReaderFunc, entParser,
+                                 uint32_t(-1), &totalRead);
+      } while (NS_SUCCEEDED(rv) && totalRead > 0);
+
+      result = XML_Parse(entParser, nullptr, 0, 1);
+
+      mInExternalDTD = false;
+
+      XML_ParserFree(entParser);
+    }
+  }
+
+  return result;
+}
+
+nsresult
+nsExpatDriver::OpenInputStreamFromExternalDTD(const char16_t* aFPIStr,
+                                              const char16_t* aURLStr,
+                                              const char16_t* aBaseURL,
+                                              nsIInputStream** aStream,
+                                              nsAString& aAbsURL)
+{
+  nsCOMPtr<nsIURI> baseURI;
+  nsresult rv = NS_NewURI(getter_AddRefs(baseURI),
+                          NS_ConvertUTF16toUTF8(aBaseURL));
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  nsCOMPtr<nsIURI> uri;
+  rv = NS_NewURI(getter_AddRefs(uri), NS_ConvertUTF16toUTF8(aURLStr), nullptr,
+                 baseURI);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  // make sure the URI is allowed to be loaded in sync
+  bool isUIResource = false;
+  rv = NS_URIChainHasFlags(uri, nsIProtocolHandler::URI_IS_UI_RESOURCE,
+                           &isUIResource);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  nsCOMPtr<nsIURI> localURI;
+  if (!isUIResource) {
+    // Check to see if we can map the DTD to a known local DTD, or if a DTD
+    // file of the same name exists in the special DTD directory
+    if (aFPIStr) {
+      // see if the Formal Public Identifier (FPI) maps to a catalog entry
+      mCatalogData = LookupCatalogData(aFPIStr);
+      GetLocalDTDURI(mCatalogData, uri, getter_AddRefs(localURI));
+    }
+    if (!localURI) {
+      return NS_ERROR_NOT_IMPLEMENTED;
+    }
+  }
+
+  nsCOMPtr<nsIChannel> channel;
+  if (localURI) {
+    localURI.swap(uri);
+    rv = NS_NewChannel(getter_AddRefs(channel),
+                       uri,
+                       nsContentUtils::GetSystemPrincipal(),
+                       nsILoadInfo::SEC_ALLOW_CROSS_ORIGIN_DATA_IS_NULL,
+                       nsIContentPolicy::TYPE_DTD);
+  }
+  else {
+    NS_ASSERTION(mSink == nsCOMPtr<nsIExpatSink>(do_QueryInterface(mOriginalSink)),
+                 "In nsExpatDriver::OpenInputStreamFromExternalDTD: "
+                 "mOriginalSink not the same object as mSink?");
+    nsCOMPtr<nsIPrincipal> loadingPrincipal;
+    if (mOriginalSink) {
+      nsCOMPtr<nsIDocument> doc;
+      doc = do_QueryInterface(mOriginalSink->GetTarget());
+      if (doc) {
+        loadingPrincipal = doc->NodePrincipal();
+      }
+    }
+    if (!loadingPrincipal) {
+      loadingPrincipal = nsNullPrincipal::Create();
+    }
+    rv = NS_NewChannel(getter_AddRefs(channel),
+                       uri,
+                       loadingPrincipal,
+                       nsILoadInfo::SEC_ALLOW_CROSS_ORIGIN_DATA_INHERITS |
+                       nsILoadInfo::SEC_ALLOW_CHROME,
+                       nsIContentPolicy::TYPE_DTD);
+  }
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  nsAutoCString absURL;
+  rv = uri->GetSpec(absURL);
+  NS_ENSURE_SUCCESS(rv, rv);
+  CopyUTF8toUTF16(absURL, aAbsURL);
+
+  channel->SetContentType(NS_LITERAL_CSTRING("application/xml"));
+  return channel->Open2(aStream);
+}
+
+static nsresult
+CreateErrorText(const char16_t* aDescription,
+                const char16_t* aSourceURL,
+                const uint32_t aLineNumber,
+                const uint32_t aColNumber,
+                nsString& aErrorString)
+{
+  aErrorString.Truncate();
+
+  nsAutoString msg;
+  nsresult rv =
+    nsParserMsgUtils::GetLocalizedStringByName(XMLPARSER_PROPERTIES,
+                                               "XMLParsingError", msg);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  // XML Parsing Error: %1$S\nLocation: %2$S\nLine Number %3$u, Column %4$u:
+  char16_t *message = nsTextFormatter::smprintf(msg.get(), aDescription,
+                                                 aSourceURL, aLineNumber,
+                                                 aColNumber);
+  if (!message) {
+    return NS_ERROR_OUT_OF_MEMORY;
+  }
+
+  aErrorString.Assign(message);
+  nsTextFormatter::smprintf_free(message);
+
+  return NS_OK;
+}
+
+static nsresult
+AppendErrorPointer(const int32_t aColNumber,
+                   const char16_t *aSourceLine,
+                   nsString& aSourceString)
+{
+  aSourceString.Append(char16_t('\n'));
+
+  // Last character will be '^'.
+  int32_t last = aColNumber - 1;
+  int32_t i;
+  uint32_t minuses = 0;
+  for (i = 0; i < last; ++i) {
+    if (aSourceLine[i] == '\t') {
+      // Since this uses |white-space: pre;| a tab stop equals 8 spaces.
+      uint32_t add = 8 - (minuses % 8);
+      aSourceString.AppendASCII("--------", add);
+      minuses += add;
+    }
+    else {
+      aSourceString.Append(char16_t('-'));
+      ++minuses;
+    }
+  }
+  aSourceString.Append(char16_t('^'));
+
+  return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleError()
+{
+  int32_t code = XML_GetErrorCode(mExpatParser);
+  NS_ASSERTION(code > XML_ERROR_NONE, "unexpected XML error code");
+
+  // Map Expat error code to an error string
+  // XXX Deal with error returns.
+  nsAutoString description;
+  nsParserMsgUtils::GetLocalizedStringByID(XMLPARSER_PROPERTIES, code,
+                                           description);
+
+  if (code == XML_ERROR_TAG_MISMATCH) {
+    /**
+     *  Expat can send the following:
+     *    localName
+     *    namespaceURI<separator>localName
+     *    namespaceURI<separator>localName<separator>prefix
+     *
+     *  and we use 0xFFFF for the <separator>.
+     *
+     */
+    const char16_t *mismatch = MOZ_XML_GetMismatchedTag(mExpatParser);
+    const char16_t *uriEnd = nullptr;
+    const char16_t *nameEnd = nullptr;
+    const char16_t *pos;
+    for (pos = mismatch; *pos; ++pos) {
+      if (*pos == kExpatSeparatorChar) {
+        if (uriEnd) {
+          nameEnd = pos;
+        }
+        else {
+          uriEnd = pos;
+        }
+      }
+    }
+
+    nsAutoString tagName;
+    if (uriEnd && nameEnd) {
+      // We have a prefix.
+      tagName.Append(nameEnd + 1, pos - nameEnd - 1);
+      tagName.Append(char16_t(':'));
+    }
+    const char16_t *nameStart = uriEnd ? uriEnd + 1 : mismatch;
+    tagName.Append(nameStart, (nameEnd ? nameEnd : pos) - nameStart);
+    
+    nsAutoString msg;
+    nsParserMsgUtils::GetLocalizedStringByName(XMLPARSER_PROPERTIES,
+                                               "Expected", msg);
+
+    // . Expected: </%S>.
+    char16_t *message = nsTextFormatter::smprintf(msg.get(), tagName.get());
+    if (!message) {
+      return NS_ERROR_OUT_OF_MEMORY;
+    }
+
+    description.Append(message);
+
+    nsTextFormatter::smprintf_free(message);
+  }
+
+  // Adjust the column number so that it is one based rather than zero based.
+  uint32_t colNumber = XML_GetCurrentColumnNumber(mExpatParser) + 1;
+  uint32_t lineNumber = XML_GetCurrentLineNumber(mExpatParser);
+
+  nsAutoString errorText;
+  CreateErrorText(description.get(), XML_GetBase(mExpatParser), lineNumber,
+                  colNumber, errorText);
+
+  NS_ASSERTION(mSink, "no sink?");
+
+  nsAutoString sourceText(mLastLine);
+  AppendErrorPointer(colNumber, mLastLine.get(), sourceText);
+
+  // Try to create and initialize the script error.
+  nsCOMPtr<nsIScriptError> serr(do_CreateInstance(NS_SCRIPTERROR_CONTRACTID));
+  nsresult rv = NS_ERROR_FAILURE;
+  if (serr) {
+    rv = serr->InitWithWindowID(errorText,
+                                mURISpec,
+                                mLastLine,
+                                lineNumber, colNumber,
+                                nsIScriptError::errorFlag, "malformed-xml",
+                                mInnerWindowID);
+  }
+
+  // If it didn't initialize, we can't do any logging.
+  bool shouldReportError = NS_SUCCEEDED(rv);
+
+  if (mSink && shouldReportError) {
+    rv = mSink->ReportError(errorText.get(), 
+                            sourceText.get(), 
+                            serr, 
+                            &shouldReportError);
+    if (NS_FAILED(rv)) {
+      shouldReportError = true;
+    }
+  }
+
+  if (mOriginalSink) {
+    nsCOMPtr<nsIDocument> doc = do_QueryInterface(mOriginalSink->GetTarget());
+    if (doc && doc->SuppressParserErrorConsoleMessages()) {
+      shouldReportError = false;
+    }
+  }
+
+  if (shouldReportError) {
+    nsCOMPtr<nsIConsoleService> cs
+      (do_GetService(NS_CONSOLESERVICE_CONTRACTID));  
+    if (cs) {
+      cs->LogMessage(serr);
+    }
+  }
+
+  return NS_ERROR_HTMLPARSER_STOPPARSING;
+}
+
+void
+nsExpatDriver::ParseBuffer(const char16_t *aBuffer,
+                           uint32_t aLength,
+                           bool aIsFinal,
+                           uint32_t *aConsumed)
+{
+  NS_ASSERTION((aBuffer && aLength != 0) || (!aBuffer && aLength == 0), "?");
+  NS_ASSERTION(mInternalState != NS_OK || aIsFinal || aBuffer,
+               "Useless call, we won't call Expat");
+  NS_PRECONDITION(!BlockedOrInterrupted() || !aBuffer,
+                  "Non-null buffer when resuming");
+  NS_PRECONDITION(XML_GetCurrentByteIndex(mExpatParser) % sizeof(char16_t) == 0,
+                  "Consumed part of a char16_t?");
+
+  if (mExpatParser && (mInternalState == NS_OK || BlockedOrInterrupted())) {
+    int32_t parserBytesBefore = XML_GetCurrentByteIndex(mExpatParser);
+    NS_ASSERTION(parserBytesBefore >= 0, "Unexpected value");
+
+    XML_Status status;
+    if (BlockedOrInterrupted()) {
+      mInternalState = NS_OK; // Resume in case we're blocked.
+      status = XML_ResumeParser(mExpatParser);
+    }
+    else {
+      status = XML_Parse(mExpatParser,
+                         reinterpret_cast<const char*>(aBuffer),
+                         aLength * sizeof(char16_t), aIsFinal);
+    }
+
+    int32_t parserBytesConsumed = XML_GetCurrentByteIndex(mExpatParser);
+
+    NS_ASSERTION(parserBytesConsumed >= 0, "Unexpected value");
+    NS_ASSERTION(parserBytesConsumed >= parserBytesBefore,
+                 "How'd this happen?");
+    NS_ASSERTION(parserBytesConsumed % sizeof(char16_t) == 0,
+                 "Consumed part of a char16_t?");
+
+    // Consumed something.
+    *aConsumed = (parserBytesConsumed - parserBytesBefore) / sizeof(char16_t);
+    NS_ASSERTION(*aConsumed <= aLength + mExpatBuffered,
+                 "Too many bytes consumed?");
+
+    NS_ASSERTION(status != XML_STATUS_SUSPENDED || BlockedOrInterrupted(), 
+                 "Inconsistent expat suspension state.");
+
+    if (status == XML_STATUS_ERROR) {
+      mInternalState = NS_ERROR_HTMLPARSER_STOPPARSING;
+    }
+  }
+  else {
+    *aConsumed = 0;
+  }
+}
+
+NS_IMETHODIMP
+nsExpatDriver::ConsumeToken(nsScanner& aScanner, bool& aFlushTokens)
+{
+  // We keep the scanner pointing to the position where Expat will start
+  // parsing.
+  nsScannerIterator currentExpatPosition;
+  aScanner.CurrentPosition(currentExpatPosition);
+
+  // This is the start of the first buffer that we need to pass to Expat.
+  nsScannerIterator start = currentExpatPosition;
+  start.advance(mExpatBuffered);
+
+  // This is the end of the last buffer (at this point, more data could come in
+  // later).
+  nsScannerIterator end;
+  aScanner.EndReading(end);
+
+  MOZ_LOG(gExpatDriverLog, LogLevel::Debug,
+         ("Remaining in expat's buffer: %i, remaining in scanner: %i.",
+          mExpatBuffered, Distance(start, end)));
+
+  // We want to call Expat if we have more buffers, or if we know there won't
+  // be more buffers (and so we want to flush the remaining data), or if we're
+  // currently blocked and there's data in Expat's buffer.
+  while (start != end || (mIsFinalChunk && !mMadeFinalCallToExpat) ||
+         (BlockedOrInterrupted() && mExpatBuffered > 0)) {
+    bool noMoreBuffers = start == end && mIsFinalChunk;
+    bool blocked = BlockedOrInterrupted();
+
+    const char16_t *buffer;
+    uint32_t length;
+    if (blocked || noMoreBuffers) {
+      // If we're blocked we just resume Expat so we don't need a buffer, if
+      // there aren't any more buffers we pass a null buffer to Expat.
+      buffer = nullptr;
+      length = 0;
+
+      if (blocked) {
+        MOZ_LOG(gExpatDriverLog, LogLevel::Debug,
+               ("Resuming Expat, will parse data remaining in Expat's "
+                "buffer.\nContent of Expat's buffer:\n-----\n%s\n-----\n",
+                NS_ConvertUTF16toUTF8(currentExpatPosition.get(),
+                                      mExpatBuffered).get()));
+      }
+      else {
+        NS_ASSERTION(mExpatBuffered == Distance(currentExpatPosition, end),
+                     "Didn't pass all the data to Expat?");
+        MOZ_LOG(gExpatDriverLog, LogLevel::Debug,
+               ("Last call to Expat, will parse data remaining in Expat's "
+                "buffer.\nContent of Expat's buffer:\n-----\n%s\n-----\n",
+                NS_ConvertUTF16toUTF8(currentExpatPosition.get(),
+                                      mExpatBuffered).get()));
+      }
+    }
+    else {
+      buffer = start.get();
+      length = uint32_t(start.size_forward());
+
+      MOZ_LOG(gExpatDriverLog, LogLevel::Debug,
+             ("Calling Expat, will parse data remaining in Expat's buffer and "
+              "new data.\nContent of Expat's buffer:\n-----\n%s\n-----\nNew "
+              "data:\n-----\n%s\n-----\n",
+              NS_ConvertUTF16toUTF8(currentExpatPosition.get(),
+                                    mExpatBuffered).get(),
+              NS_ConvertUTF16toUTF8(start.get(), length).get()));
+    }
+
+    uint32_t consumed;
+    ParseBuffer(buffer, length, noMoreBuffers, &consumed);
+    if (consumed > 0) {
+      nsScannerIterator oldExpatPosition = currentExpatPosition;
+      currentExpatPosition.advance(consumed);
+
+      // We consumed some data, we want to store the last line of data that
+      // was consumed in case we run into an error (to show the line in which
+      // the error occurred).
+
+      // The length of the last line that Expat has parsed.
+      XML_Size lastLineLength = XML_GetCurrentColumnNumber(mExpatParser);
+
+      if (lastLineLength <= consumed) {
+        // The length of the last line was less than what expat consumed, so
+        // there was at least one line break in the consumed data. Store the
+        // last line until the point where we stopped parsing.
+        nsScannerIterator startLastLine = currentExpatPosition;
+        startLastLine.advance(-((ptrdiff_t)lastLineLength));
+        if (!CopyUnicodeTo(startLastLine, currentExpatPosition, mLastLine)) {
+          return (mInternalState = NS_ERROR_OUT_OF_MEMORY);
+        }
+      }
+      else {
+        // There was no line break in the consumed data, append the consumed
+        // data.
+        if (!AppendUnicodeTo(oldExpatPosition,
+                             currentExpatPosition,
+                             mLastLine)) {
+          return (mInternalState = NS_ERROR_OUT_OF_MEMORY);
+        }
+      }
+    }
+
+    mExpatBuffered += length - consumed;
+
+    if (BlockedOrInterrupted()) {
+      MOZ_LOG(gExpatDriverLog, LogLevel::Debug,
+             ("Blocked or interrupted parser (probably for loading linked "
+              "stylesheets or scripts)."));
+
+      aScanner.SetPosition(currentExpatPosition, true);
+      aScanner.Mark();
+
+      return mInternalState;
+    }
+
+    if (noMoreBuffers && mExpatBuffered == 0) {
+      mMadeFinalCallToExpat = true;
+    }
+
+    if (NS_FAILED(mInternalState)) {
+      if (XML_GetErrorCode(mExpatParser) != XML_ERROR_NONE) {
+        NS_ASSERTION(mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING,
+                     "Unexpected error");
+
+        // Look for the next newline after the last one we consumed
+        nsScannerIterator lastLine = currentExpatPosition;
+        while (lastLine != end) {
+          length = uint32_t(lastLine.size_forward());
+          uint32_t endOffset = 0;
+          const char16_t *buffer = lastLine.get();
+          while (endOffset < length && buffer[endOffset] != '\n' &&
+                 buffer[endOffset] != '\r') {
+            ++endOffset;
+          }
+          mLastLine.Append(Substring(buffer, buffer + endOffset));
+          if (endOffset < length) {
+            // We found a newline.
+            break;
+          }
+
+          lastLine.advance(length);
+        }
+
+        HandleError();
+      }
+
+      return mInternalState;
+    }
+
+    // Either we have more buffers, or we were blocked (and we'll flush in the
+    // next iteration), or we should have emptied Expat's buffer.
+    NS_ASSERTION(!noMoreBuffers || blocked ||
+                 (mExpatBuffered == 0 && currentExpatPosition == end),
+                 "Unreachable data left in Expat's buffer");
+
+    start.advance(length);
+
+    // It's possible for start to have passed end if we received more data
+    // (e.g. if we spun the event loop in an inline script). Reload end now
+    // to compensate.
+    aScanner.EndReading(end);
+  }
+
+  aScanner.SetPosition(currentExpatPosition, true);
+  aScanner.Mark();
+
+  MOZ_LOG(gExpatDriverLog, LogLevel::Debug,
+         ("Remaining in expat's buffer: %i, remaining in scanner: %i.",
+          mExpatBuffered, Distance(currentExpatPosition, end)));
+
+  return NS_SUCCEEDED(mInternalState) ? kEOF : NS_OK;
+}
+
+NS_IMETHODIMP
+nsExpatDriver::WillBuildModel(const CParserContext& aParserContext,
+                              nsITokenizer* aTokenizer,
+                              nsIContentSink* aSink)
+{
+  mSink = do_QueryInterface(aSink);
+  if (!mSink) {
+    NS_ERROR("nsExpatDriver didn't get an nsIExpatSink");
+    // Make sure future calls to us bail out as needed
+    mInternalState = NS_ERROR_UNEXPECTED;
+    return mInternalState;
+  }
+
+  mOriginalSink = aSink;
+
+  static const XML_Memory_Handling_Suite memsuite =
+    {
+      (void *(*)(size_t))PR_Malloc,
+      (void *(*)(void *, size_t))PR_Realloc,
+      PR_Free
+    };
+
+  static const char16_t kExpatSeparator[] = { kExpatSeparatorChar, '\0' };
+
+  mExpatParser = XML_ParserCreate_MM(kUTF16, &memsuite, kExpatSeparator);
+  NS_ENSURE_TRUE(mExpatParser, NS_ERROR_FAILURE);
+
+  XML_SetReturnNSTriplet(mExpatParser, XML_TRUE);
+
+#ifdef XML_DTD
+  XML_SetParamEntityParsing(mExpatParser, XML_PARAM_ENTITY_PARSING_ALWAYS);
+#endif
+
+  mURISpec = aParserContext.mScanner->GetFilename();
+
+  XML_SetBase(mExpatParser, mURISpec.get());
+
+  nsCOMPtr<nsIDocument> doc = do_QueryInterface(mOriginalSink->GetTarget());
+  if (doc) {
+    nsCOMPtr<nsPIDOMWindowOuter> win = doc->GetWindow();
+    nsCOMPtr<nsPIDOMWindowInner> inner;
+    if (win) {
+      inner = win->GetCurrentInnerWindow();
+    } else {
+      bool aHasHadScriptHandlingObject;
+      nsIScriptGlobalObject *global =
+        doc->GetScriptHandlingObject(aHasHadScriptHandlingObject);
+      if (global) {
+        inner = do_QueryInterface(global);
+      }
+    }
+    if (inner) {
+      mInnerWindowID = inner->WindowID();
+    }
+  }
+
+  // Set up the callbacks
+  XML_SetXmlDeclHandler(mExpatParser, Driver_HandleXMLDeclaration); 
+  XML_SetElementHandler(mExpatParser, Driver_HandleStartElement,
+                        Driver_HandleEndElement);
+  XML_SetCharacterDataHandler(mExpatParser, Driver_HandleCharacterData);
+  XML_SetProcessingInstructionHandler(mExpatParser,
+                                      Driver_HandleProcessingInstruction);
+  XML_SetDefaultHandlerExpand(mExpatParser, Driver_HandleDefault);
+  XML_SetExternalEntityRefHandler(mExpatParser,
+                                  (XML_ExternalEntityRefHandler)
+                                          Driver_HandleExternalEntityRef);
+  XML_SetExternalEntityRefHandlerArg(mExpatParser, this);
+  XML_SetCommentHandler(mExpatParser, Driver_HandleComment);
+  XML_SetCdataSectionHandler(mExpatParser, Driver_HandleStartCdataSection,
+                             Driver_HandleEndCdataSection);
+
+  XML_SetParamEntityParsing(mExpatParser,
+                            XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
+  XML_SetDoctypeDeclHandler(mExpatParser, Driver_HandleStartDoctypeDecl,
+                            Driver_HandleEndDoctypeDecl);
+
+  // If the sink is an nsIExtendedExpatSink,
+  // register some addtional handlers.
+  mExtendedSink = do_QueryInterface(mSink);
+  if (mExtendedSink) {
+    XML_SetNamespaceDeclHandler(mExpatParser,
+                                Driver_HandleStartNamespaceDecl,
+                                Driver_HandleEndNamespaceDecl);
+    XML_SetUnparsedEntityDeclHandler(mExpatParser,
+                                     Driver_HandleUnparsedEntityDecl);
+    XML_SetNotationDeclHandler(mExpatParser,
+                               Driver_HandleNotationDecl);
+  }
+
+  // Set up the user data.
+  XML_SetUserData(mExpatParser, this);
+
+  return mInternalState;
+}
+
+NS_IMETHODIMP
+nsExpatDriver::BuildModel(nsITokenizer* aTokenizer, nsIContentSink* aSink)
+{
+  return mInternalState;
+}
+
+NS_IMETHODIMP
+nsExpatDriver::DidBuildModel(nsresult anErrorCode)
+{
+  mOriginalSink = nullptr;
+  mSink = nullptr;
+  mExtendedSink = nullptr;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsExpatDriver::WillTokenize(bool aIsFinalChunk)
+{
+  mIsFinalChunk = aIsFinalChunk;
+  return NS_OK;
+}
+
+NS_IMETHODIMP_(void)
+nsExpatDriver::Terminate()
+{
+  // XXX - not sure what happens to the unparsed data.
+  if (mExpatParser) {
+    XML_StopParser(mExpatParser, XML_FALSE);
+  }
+  mInternalState = NS_ERROR_HTMLPARSER_STOPPARSING;
+}
+
+NS_IMETHODIMP_(int32_t)
+nsExpatDriver::GetType()
+{
+  return NS_IPARSER_FLAG_XML;
+}
+
+NS_IMETHODIMP_(nsDTDMode)
+nsExpatDriver::GetMode() const
+{
+  return eDTDMode_full_standards;
+}
+
+/*************************** Unused methods **********************************/
+
+NS_IMETHODIMP_(bool)
+nsExpatDriver::IsContainer(int32_t aTag) const
+{
+  return true;
+}
+
+NS_IMETHODIMP_(bool)
+nsExpatDriver::CanContain(int32_t aParent,int32_t aChild) const
+{
+  return true;
+}
+
+void
+nsExpatDriver::MaybeStopParser(nsresult aState)
+{
+  if (NS_FAILED(aState)) {
+    // If we had a failure we want to override NS_ERROR_HTMLPARSER_INTERRUPTED
+    // and we want to override NS_ERROR_HTMLPARSER_BLOCK but not with
+    // NS_ERROR_HTMLPARSER_INTERRUPTED.
+    if (NS_SUCCEEDED(mInternalState) ||
+        mInternalState == NS_ERROR_HTMLPARSER_INTERRUPTED ||
+        (mInternalState == NS_ERROR_HTMLPARSER_BLOCK &&
+         aState != NS_ERROR_HTMLPARSER_INTERRUPTED)) {
+      mInternalState = (aState == NS_ERROR_HTMLPARSER_INTERRUPTED ||
+                        aState == NS_ERROR_HTMLPARSER_BLOCK) ?
+                       aState :
+                       NS_ERROR_HTMLPARSER_STOPPARSING;
+    }
+
+    // If we get an error then we need to stop Expat (by calling XML_StopParser
+    // with false as the last argument). If the parser should be blocked or
+    // interrupted we need to pause Expat (by calling XML_StopParser with
+    // true as the last argument).
+    XML_StopParser(mExpatParser, BlockedOrInterrupted());
+  }
+  else if (NS_SUCCEEDED(mInternalState)) {
+    // Only clobber mInternalState with the success code if we didn't block or
+    // interrupt before.
+    mInternalState = aState;
+  }
+}
diff --git a/components/htmlparser/src/nsExpatDriver.h b/components/htmlparser/src/nsExpatDriver.h
new file mode 100644
index 000000000..988409cfe
--- /dev/null
+++ b/components/htmlparser/src/nsExpatDriver.h
@@ -0,0 +1,145 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef NS_EXPAT_DRIVER__
+#define NS_EXPAT_DRIVER__
+
+#include "expat_config.h"
+#include "expat.h"
+#include "nsCOMPtr.h"
+#include "nsString.h"
+#include "nsIDTD.h"
+#include "nsITokenizer.h"
+#include "nsIInputStream.h"
+#include "nsIParser.h"
+#include "nsCycleCollectionParticipant.h"
+
+class nsIExpatSink;
+class nsIExtendedExpatSink;
+struct nsCatalogData;
+
+class nsExpatDriver : public nsIDTD,
+                      public nsITokenizer
+{
+  virtual ~nsExpatDriver();
+
+public:
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_NSIDTD
+  NS_DECL_NSITOKENIZER
+  NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsExpatDriver, nsIDTD)
+
+  nsExpatDriver();
+
+  int HandleExternalEntityRef(const char16_t *aOpenEntityNames,
+                              const char16_t *aBase,
+                              const char16_t *aSystemId,
+                              const char16_t *aPublicId);
+  void HandleStartElement(const char16_t *aName, const char16_t **aAtts);
+  nsresult HandleEndElement(const char16_t *aName);
+  nsresult HandleCharacterData(const char16_t *aCData, const uint32_t aLength);
+  nsresult HandleComment(const char16_t *aName);
+  nsresult HandleProcessingInstruction(const char16_t *aTarget,
+                                       const char16_t *aData);
+  nsresult HandleXMLDeclaration(const char16_t *aVersion,
+                                const char16_t *aEncoding,
+                                int32_t aStandalone);
+  nsresult HandleDefault(const char16_t *aData, const uint32_t aLength);
+  nsresult HandleStartCdataSection();
+  nsresult HandleEndCdataSection();
+  nsresult HandleStartDoctypeDecl(const char16_t* aDoctypeName,
+                                  const char16_t* aSysid,
+                                  const char16_t* aPubid,
+                                  bool aHasInternalSubset);
+  nsresult HandleEndDoctypeDecl();
+  nsresult HandleStartNamespaceDecl(const char16_t* aPrefix,
+                                    const char16_t* aUri);
+  nsresult HandleEndNamespaceDecl(const char16_t* aPrefix);
+  nsresult HandleNotationDecl(const char16_t* aNotationName,
+                              const char16_t* aBase,
+                              const char16_t* aSysid,
+                              const char16_t* aPubid);
+  nsresult HandleUnparsedEntityDecl(const char16_t* aEntityName,
+                                    const char16_t* aBase,
+                                    const char16_t* aSysid,
+                                    const char16_t* aPubid,
+                                    const char16_t* aNotationName);
+
+private:
+  // Load up an external stream to get external entity information
+  nsresult OpenInputStreamFromExternalDTD(const char16_t* aFPIStr,
+                                          const char16_t* aURLStr,
+                                          const char16_t* aBaseURL,
+                                          nsIInputStream** aStream,
+                                          nsAString& aAbsURL);
+
+  /**
+   * Pass a buffer to Expat. If Expat is blocked aBuffer should be null and
+   * aLength should be 0. The result of the call will be stored in
+   * mInternalState. Expat will parse as much of the buffer as it can and store
+   * the rest in its internal buffer.
+   *
+   * @param aBuffer the buffer to pass to Expat. May be null.
+   * @param aLength the length of the buffer to pass to Expat (in number of
+   *                char16_t's). Must be 0 if aBuffer is null and > 0 if
+   *                aBuffer is not null.
+   * @param aIsFinal whether there will definitely not be any more new buffers
+   *                 passed in to ParseBuffer
+   * @param aConsumed [out] the number of PRUnichars that Expat consumed. This
+   *                        doesn't include the PRUnichars that Expat stored in
+   *                        its buffer but didn't parse yet.
+   */
+  void ParseBuffer(const char16_t *aBuffer, uint32_t aLength, bool aIsFinal,
+                   uint32_t *aConsumed);
+  nsresult HandleError();
+
+  void MaybeStopParser(nsresult aState);
+
+  bool BlockedOrInterrupted()
+  {
+    return mInternalState == NS_ERROR_HTMLPARSER_BLOCK ||
+           mInternalState == NS_ERROR_HTMLPARSER_INTERRUPTED;
+  }
+
+  XML_Parser       mExpatParser;
+  nsString         mLastLine;
+  nsString         mCDataText;
+  // Various parts of a doctype
+  nsString         mDoctypeName;
+  nsString         mSystemID;
+  nsString         mPublicID;
+  nsString         mInternalSubset;
+  bool             mInCData;
+  bool             mInInternalSubset;
+  bool             mInExternalDTD;
+  bool             mMadeFinalCallToExpat;
+
+  // Whether we're sure that we won't be getting more buffers to parse from
+  // Necko
+  bool             mIsFinalChunk;
+  
+  // The depth of nested parsing we are currently at
+  uint16_t         mTagDepth;
+
+  nsresult         mInternalState;
+
+  // The length of the data in Expat's buffer (in number of PRUnichars).
+  uint32_t         mExpatBuffered;
+  
+  // These sinks all refer the same conceptual object. mOriginalSink is
+  // identical with the nsIContentSink* passed to WillBuildModel, and exists
+  // only to avoid QI-ing back to nsIContentSink*.
+  nsCOMPtr<nsIContentSink> mOriginalSink;
+  nsCOMPtr<nsIExpatSink> mSink;
+  nsCOMPtr<nsIExtendedExpatSink> mExtendedSink;
+
+  const nsCatalogData* mCatalogData; // weak
+  nsString         mURISpec;
+
+  // Used for error reporting.
+  uint64_t         mInnerWindowID;
+};
+
+#endif
diff --git a/components/htmlparser/src/nsHTMLEntities.cpp b/components/htmlparser/src/nsHTMLEntities.cpp
new file mode 100644
index 000000000..e8365c21f
--- /dev/null
+++ b/components/htmlparser/src/nsHTMLEntities.cpp
@@ -0,0 +1,205 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/ArrayUtils.h"
+
+#include "nsHTMLEntities.h"
+
+#include "nsString.h"
+#include "nsCRT.h"
+#include "PLDHashTable.h"
+
+using namespace mozilla;
+
+struct EntityNode {
+  const char* mStr; // never owns buffer
+  int32_t       mUnicode;
+};
+
+struct EntityNodeEntry : public PLDHashEntryHdr
+{
+  const EntityNode* node;
+}; 
+
+static bool matchNodeString(const PLDHashEntryHdr* aHdr, const void* key)
+{
+  const EntityNodeEntry* entry = static_cast<const EntityNodeEntry*>(aHdr);
+  const char* str = static_cast<const char*>(key);
+  return (nsCRT::strcmp(entry->node->mStr, str) == 0);
+}
+
+static bool matchNodeUnicode(const PLDHashEntryHdr* aHdr, const void* key)
+{
+  const EntityNodeEntry* entry = static_cast<const EntityNodeEntry*>(aHdr);
+  const int32_t ucode = NS_PTR_TO_INT32(key);
+  return (entry->node->mUnicode == ucode);
+}
+
+static PLDHashNumber hashUnicodeValue(const void* key)
+{
+  // key is actually the unicode value
+  return PLDHashNumber(NS_PTR_TO_INT32(key));
+}
+
+
+static const PLDHashTableOps EntityToUnicodeOps = {
+  PLDHashTable::HashStringKey,
+  matchNodeString,
+  PLDHashTable::MoveEntryStub,
+  PLDHashTable::ClearEntryStub,
+  nullptr,
+}; 
+
+static const PLDHashTableOps UnicodeToEntityOps = {
+  hashUnicodeValue,
+  matchNodeUnicode,
+  PLDHashTable::MoveEntryStub,
+  PLDHashTable::ClearEntryStub,
+  nullptr,
+};
+
+static PLDHashTable* gEntityToUnicode;
+static PLDHashTable* gUnicodeToEntity;
+static nsrefcnt gTableRefCnt = 0;
+
+#define HTML_ENTITY(_name, _value) { #_name, _value },
+static const EntityNode gEntityArray[] = {
+#include "nsHTMLEntityList.h"
+};
+#undef HTML_ENTITY
+
+#define NS_HTML_ENTITY_COUNT ((int32_t)ArrayLength(gEntityArray))
+
+nsresult
+nsHTMLEntities::AddRefTable(void)
+{
+  if (!gTableRefCnt) {
+    gEntityToUnicode = new PLDHashTable(&EntityToUnicodeOps,
+                                        sizeof(EntityNodeEntry),
+                                        NS_HTML_ENTITY_COUNT);
+    gUnicodeToEntity = new PLDHashTable(&UnicodeToEntityOps,
+                                        sizeof(EntityNodeEntry),
+                                        NS_HTML_ENTITY_COUNT);
+    for (const EntityNode *node = gEntityArray,
+                 *node_end = ArrayEnd(gEntityArray);
+         node < node_end; ++node) {
+
+      // add to Entity->Unicode table
+      auto entry = static_cast<EntityNodeEntry*>
+                              (gEntityToUnicode->Add(node->mStr, fallible));
+      NS_ASSERTION(entry, "Error adding an entry");
+      // Prefer earlier entries when we have duplication.
+      if (!entry->node)
+        entry->node = node;
+
+      // add to Unicode->Entity table
+      entry = static_cast<EntityNodeEntry*>
+                         (gUnicodeToEntity->Add(NS_INT32_TO_PTR(node->mUnicode),
+                                                fallible));
+      NS_ASSERTION(entry, "Error adding an entry");
+      // Prefer earlier entries when we have duplication.
+      if (!entry->node)
+        entry->node = node;
+    }
+#ifdef DEBUG
+    gUnicodeToEntity->MarkImmutable();
+    gEntityToUnicode->MarkImmutable();
+#endif
+  }
+  ++gTableRefCnt;
+  return NS_OK;
+}
+
+void
+nsHTMLEntities::ReleaseTable(void)
+{
+  if (--gTableRefCnt != 0) {
+    return;
+  }
+
+  delete gEntityToUnicode;
+  delete gUnicodeToEntity;
+  gEntityToUnicode = nullptr;
+  gUnicodeToEntity = nullptr;
+}
+
+int32_t
+nsHTMLEntities::EntityToUnicode(const nsCString& aEntity)
+{
+  NS_ASSERTION(gEntityToUnicode, "no lookup table, needs addref");
+  if (!gEntityToUnicode) {
+    return -1;
+  }
+
+  //this little piece of code exists because entities may or may not have the terminating ';'.
+  //if we see it, strip if off for this test...
+
+  if(';'==aEntity.Last()) {
+    nsAutoCString temp(aEntity);
+    temp.Truncate(aEntity.Length()-1);
+    return EntityToUnicode(temp);
+  }
+
+  auto entry =
+    static_cast<EntityNodeEntry*>(gEntityToUnicode->Search(aEntity.get()));
+
+  return entry ? entry->node->mUnicode : -1;
+}
+
+
+int32_t 
+nsHTMLEntities::EntityToUnicode(const nsAString& aEntity) {
+  nsAutoCString theEntity; theEntity.AssignWithConversion(aEntity);
+  if(';'==theEntity.Last()) {
+    theEntity.Truncate(theEntity.Length()-1);
+  }
+
+  return EntityToUnicode(theEntity);
+}
+
+
+const char*
+nsHTMLEntities::UnicodeToEntity(int32_t aUnicode)
+{
+  NS_ASSERTION(gUnicodeToEntity, "no lookup table, needs addref");
+  auto entry =
+    static_cast<EntityNodeEntry*>
+               (gUnicodeToEntity->Search(NS_INT32_TO_PTR(aUnicode)));
+
+  return entry ? entry->node->mStr : nullptr;
+}
+
+#ifdef DEBUG
+#include <stdio.h>
+
+class nsTestEntityTable {
+public:
+   nsTestEntityTable() {
+     int32_t value;
+     nsHTMLEntities::AddRefTable();
+
+     // Make sure we can find everything we are supposed to
+     for (int i = 0; i < NS_HTML_ENTITY_COUNT; ++i) {
+       nsAutoString entity; entity.AssignWithConversion(gEntityArray[i].mStr);
+
+       value = nsHTMLEntities::EntityToUnicode(entity);
+       NS_ASSERTION(value != -1, "can't find entity");
+       NS_ASSERTION(value == gEntityArray[i].mUnicode, "bad unicode value");
+
+       entity.AssignWithConversion(nsHTMLEntities::UnicodeToEntity(value));
+       NS_ASSERTION(entity.EqualsASCII(gEntityArray[i].mStr), "bad entity name");
+     }
+
+     // Make sure we don't find things that aren't there
+     value = nsHTMLEntities::EntityToUnicode(nsAutoCString("@"));
+     NS_ASSERTION(value == -1, "found @");
+     value = nsHTMLEntities::EntityToUnicode(nsAutoCString("zzzzz"));
+     NS_ASSERTION(value == -1, "found zzzzz");
+     nsHTMLEntities::ReleaseTable();
+   }
+};
+//nsTestEntityTable validateEntityTable;
+#endif
+
diff --git a/components/htmlparser/src/nsHTMLEntities.h b/components/htmlparser/src/nsHTMLEntities.h
new file mode 100644
index 000000000..f38856bfa
--- /dev/null
+++ b/components/htmlparser/src/nsHTMLEntities.h
@@ -0,0 +1,35 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef nsHTMLEntities_h___
+#define nsHTMLEntities_h___
+
+#include "nsString.h"
+
+class nsHTMLEntities {
+public:
+
+  static nsresult AddRefTable(void);
+  static void ReleaseTable(void);
+
+/**
+ * Translate an entity string into it's unicode value. This call
+ * returns -1 if the entity cannot be mapped. Note that the string
+ * passed in must NOT have the leading "&" nor the trailing ";"
+ * in it.
+ */
+  static int32_t EntityToUnicode(const nsAString& aEntity);
+  static int32_t EntityToUnicode(const nsCString& aEntity);
+
+/**
+ * Translate a unicode value into an entity string. This call
+ * returns null if the entity cannot be mapped. 
+ * Note that the string returned DOES NOT have the leading "&" nor 
+ * the trailing ";" in it.
+ */
+  static const char* UnicodeToEntity(int32_t aUnicode);
+};
+
+
+#endif /* nsHTMLEntities_h___ */
diff --git a/components/htmlparser/src/nsHTMLEntityList.h b/components/htmlparser/src/nsHTMLEntityList.h
new file mode 100644
index 000000000..fa05382bf
--- /dev/null
+++ b/components/htmlparser/src/nsHTMLEntityList.h
@@ -0,0 +1,303 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/******
+
+  This file contains the list of all HTML entities 
+  See nsHTMLEntities.h for access to the enum values for entities
+
+  It is designed to be used as inline input to nsHTMLEntities.cpp *only*
+  through the magic of C preprocessing.
+
+  All entries must be enclosed in the macro HTML_ENTITY which will have cruel
+  and unusual things done to it
+
+  It is recommended (but not strictly necessary) to keep all entries
+  in alphabetical order
+
+  The first argument to HTML_ENTITY is the string value of the entity
+  The second argument it HTML_ENTITY is the unicode value of the entity
+
+ ******/
+
+// ISO 8859-1 entities.
+// See the HTML4.0 spec for this list in it's DTD form
+HTML_ENTITY(nbsp, 160)
+HTML_ENTITY(iexcl, 161)
+HTML_ENTITY(cent, 162)
+HTML_ENTITY(pound, 163)
+HTML_ENTITY(curren, 164)
+HTML_ENTITY(yen, 165)
+HTML_ENTITY(brvbar, 166)
+HTML_ENTITY(sect, 167)
+HTML_ENTITY(uml, 168)
+HTML_ENTITY(copy, 169)
+HTML_ENTITY(ordf, 170)
+HTML_ENTITY(laquo, 171)
+HTML_ENTITY(not, 172)
+HTML_ENTITY(shy, 173)
+HTML_ENTITY(reg, 174)
+HTML_ENTITY(macr, 175)
+HTML_ENTITY(deg, 176)
+HTML_ENTITY(plusmn, 177)
+HTML_ENTITY(sup2, 178)
+HTML_ENTITY(sup3, 179)
+HTML_ENTITY(acute, 180)
+HTML_ENTITY(micro, 181)
+HTML_ENTITY(para, 182)
+HTML_ENTITY(middot, 183)
+HTML_ENTITY(cedil, 184)
+HTML_ENTITY(sup1, 185)
+HTML_ENTITY(ordm, 186)
+HTML_ENTITY(raquo, 187)
+HTML_ENTITY(frac14, 188)
+HTML_ENTITY(frac12, 189)
+HTML_ENTITY(frac34, 190)
+HTML_ENTITY(iquest, 191)
+HTML_ENTITY(Agrave, 192)
+HTML_ENTITY(Aacute, 193)
+HTML_ENTITY(Acirc, 194)
+HTML_ENTITY(Atilde, 195)
+HTML_ENTITY(Auml, 196)
+HTML_ENTITY(Aring, 197)
+HTML_ENTITY(AElig, 198)
+HTML_ENTITY(Ccedil, 199)
+HTML_ENTITY(Egrave, 200)
+HTML_ENTITY(Eacute, 201)
+HTML_ENTITY(Ecirc, 202)
+HTML_ENTITY(Euml, 203)
+HTML_ENTITY(Igrave, 204)
+HTML_ENTITY(Iacute, 205)
+HTML_ENTITY(Icirc, 206)
+HTML_ENTITY(Iuml, 207)
+HTML_ENTITY(ETH, 208)
+HTML_ENTITY(Ntilde, 209)
+HTML_ENTITY(Ograve, 210)
+HTML_ENTITY(Oacute, 211)
+HTML_ENTITY(Ocirc, 212)
+HTML_ENTITY(Otilde, 213)
+HTML_ENTITY(Ouml, 214)
+HTML_ENTITY(times, 215)
+HTML_ENTITY(Oslash, 216)
+HTML_ENTITY(Ugrave, 217)
+HTML_ENTITY(Uacute, 218)
+HTML_ENTITY(Ucirc, 219)
+HTML_ENTITY(Uuml, 220)
+HTML_ENTITY(Yacute, 221)
+HTML_ENTITY(THORN, 222)
+HTML_ENTITY(szlig, 223)
+HTML_ENTITY(agrave, 224)
+HTML_ENTITY(aacute, 225)
+HTML_ENTITY(acirc, 226)
+HTML_ENTITY(atilde, 227)
+HTML_ENTITY(auml, 228)
+HTML_ENTITY(aring, 229)
+HTML_ENTITY(aelig, 230)
+HTML_ENTITY(ccedil, 231)
+HTML_ENTITY(egrave, 232)
+HTML_ENTITY(eacute, 233)
+HTML_ENTITY(ecirc, 234)
+HTML_ENTITY(euml, 235)
+HTML_ENTITY(igrave, 236)
+HTML_ENTITY(iacute, 237)
+HTML_ENTITY(icirc, 238)
+HTML_ENTITY(iuml, 239)
+HTML_ENTITY(eth, 240)
+HTML_ENTITY(ntilde, 241)
+HTML_ENTITY(ograve, 242)
+HTML_ENTITY(oacute, 243)
+HTML_ENTITY(ocirc, 244)
+HTML_ENTITY(otilde, 245)
+HTML_ENTITY(ouml, 246)
+HTML_ENTITY(divide, 247)
+HTML_ENTITY(oslash, 248)
+HTML_ENTITY(ugrave, 249)
+HTML_ENTITY(uacute, 250)
+HTML_ENTITY(ucirc, 251)
+HTML_ENTITY(uuml, 252)
+HTML_ENTITY(yacute, 253)
+HTML_ENTITY(thorn, 254)
+HTML_ENTITY(yuml, 255)
+
+// Symbols, mathematical symbols and Greek letters
+// See the HTML4.0 spec for this list in it's DTD form
+HTML_ENTITY(fnof, 402)
+HTML_ENTITY(Alpha, 913)
+HTML_ENTITY(Beta, 914)
+HTML_ENTITY(Gamma, 915)
+HTML_ENTITY(Delta, 916)
+HTML_ENTITY(Epsilon, 917)
+HTML_ENTITY(Zeta, 918)
+HTML_ENTITY(Eta, 919)
+HTML_ENTITY(Theta, 920)
+HTML_ENTITY(Iota, 921)
+HTML_ENTITY(Kappa, 922)
+HTML_ENTITY(Lambda, 923)
+HTML_ENTITY(Mu, 924)
+HTML_ENTITY(Nu, 925)
+HTML_ENTITY(Xi, 926)
+HTML_ENTITY(Omicron, 927)
+HTML_ENTITY(Pi, 928)
+HTML_ENTITY(Rho, 929)
+HTML_ENTITY(Sigma, 931)
+HTML_ENTITY(Tau, 932)
+HTML_ENTITY(Upsilon, 933)
+HTML_ENTITY(Phi, 934)
+HTML_ENTITY(Chi, 935)
+HTML_ENTITY(Psi, 936)
+HTML_ENTITY(Omega, 937)
+HTML_ENTITY(alpha, 945)
+HTML_ENTITY(beta, 946)
+HTML_ENTITY(gamma, 947)
+HTML_ENTITY(delta, 948)
+HTML_ENTITY(epsilon, 949)
+HTML_ENTITY(zeta, 950)
+HTML_ENTITY(eta, 951)
+HTML_ENTITY(theta, 952)
+HTML_ENTITY(iota, 953)
+HTML_ENTITY(kappa, 954)
+HTML_ENTITY(lambda, 955)
+HTML_ENTITY(mu, 956)
+HTML_ENTITY(nu, 957)
+HTML_ENTITY(xi, 958)
+HTML_ENTITY(omicron, 959)
+HTML_ENTITY(pi, 960)
+HTML_ENTITY(rho, 961)
+HTML_ENTITY(sigmaf, 962)
+HTML_ENTITY(sigma, 963)
+HTML_ENTITY(tau, 964)
+HTML_ENTITY(upsilon, 965)
+HTML_ENTITY(phi, 966)
+HTML_ENTITY(chi, 967)
+HTML_ENTITY(psi, 968)
+HTML_ENTITY(omega, 969)
+HTML_ENTITY(thetasym, 977)
+HTML_ENTITY(upsih, 978)
+HTML_ENTITY(piv, 982)
+HTML_ENTITY(bull, 8226)
+HTML_ENTITY(hellip, 8230)
+HTML_ENTITY(prime, 8242)
+HTML_ENTITY(Prime, 8243)
+HTML_ENTITY(oline, 8254)
+HTML_ENTITY(frasl, 8260)
+HTML_ENTITY(weierp, 8472)
+HTML_ENTITY(image, 8465)
+HTML_ENTITY(real, 8476)
+HTML_ENTITY(trade, 8482)
+HTML_ENTITY(alefsym, 8501)
+HTML_ENTITY(larr, 8592)
+HTML_ENTITY(uarr, 8593)
+HTML_ENTITY(rarr, 8594)
+HTML_ENTITY(darr, 8595)
+HTML_ENTITY(harr, 8596)
+HTML_ENTITY(crarr, 8629)
+HTML_ENTITY(lArr, 8656)
+HTML_ENTITY(uArr, 8657)
+HTML_ENTITY(rArr, 8658)
+HTML_ENTITY(dArr, 8659)
+HTML_ENTITY(hArr, 8660)
+HTML_ENTITY(forall, 8704)
+HTML_ENTITY(part, 8706)
+HTML_ENTITY(exist, 8707)
+HTML_ENTITY(empty, 8709)
+HTML_ENTITY(nabla, 8711)
+HTML_ENTITY(isin, 8712)
+HTML_ENTITY(notin, 8713)
+HTML_ENTITY(ni, 8715)
+HTML_ENTITY(prod, 8719)
+HTML_ENTITY(sum, 8721)
+HTML_ENTITY(minus, 8722)
+HTML_ENTITY(lowast, 8727)
+HTML_ENTITY(radic, 8730)
+HTML_ENTITY(prop, 8733)
+HTML_ENTITY(infin, 8734)
+HTML_ENTITY(ang, 8736)
+HTML_ENTITY(and, 8743)
+HTML_ENTITY(or, 8744)
+HTML_ENTITY(cap, 8745)
+HTML_ENTITY(cup, 8746)
+HTML_ENTITY(int, 8747)
+HTML_ENTITY(there4, 8756)
+HTML_ENTITY(sim, 8764)
+HTML_ENTITY(cong, 8773)
+HTML_ENTITY(asymp, 8776)
+HTML_ENTITY(ne, 8800)
+HTML_ENTITY(equiv, 8801)
+HTML_ENTITY(le, 8804)
+HTML_ENTITY(ge, 8805)
+HTML_ENTITY(sub, 8834)
+HTML_ENTITY(sup, 8835)
+HTML_ENTITY(nsub, 8836)
+HTML_ENTITY(sube, 8838)
+HTML_ENTITY(supe, 8839)
+HTML_ENTITY(oplus, 8853)
+HTML_ENTITY(otimes, 8855)
+HTML_ENTITY(perp, 8869)
+HTML_ENTITY(sdot, 8901)
+HTML_ENTITY(lceil, 8968)
+HTML_ENTITY(rceil, 8969)
+HTML_ENTITY(lfloor, 8970)
+HTML_ENTITY(rfloor, 8971)
+// Bug 603716: expansions of &lang; and &rang; have been modified in HTML5.
+// See http://www.w3.org/2003/entities/2007/htmlmathml-f.ent
+HTML_ENTITY(lang, 0x27E8)
+HTML_ENTITY(rang, 0x27E9)
+HTML_ENTITY(loz, 9674)
+HTML_ENTITY(spades, 9824)
+HTML_ENTITY(clubs, 9827)
+HTML_ENTITY(hearts, 9829)
+HTML_ENTITY(diams, 9830)
+
+// Markup-significant and internationalization characters
+// See the HTML4.0 spec for this list in it's DTD form
+HTML_ENTITY(quot, 34)
+HTML_ENTITY(amp, 38)
+HTML_ENTITY(lt, 60)
+HTML_ENTITY(gt, 62)
+HTML_ENTITY(OElig, 338)
+HTML_ENTITY(oelig, 339)
+HTML_ENTITY(Scaron, 352)
+HTML_ENTITY(scaron, 353)
+HTML_ENTITY(Yuml, 376)
+HTML_ENTITY(circ, 710)
+HTML_ENTITY(tilde, 732)
+HTML_ENTITY(ensp, 8194)
+HTML_ENTITY(emsp, 8195)
+HTML_ENTITY(thinsp, 8201)
+HTML_ENTITY(zwnj, 8204)
+HTML_ENTITY(zwj, 8205)
+HTML_ENTITY(lrm, 8206)
+HTML_ENTITY(rlm, 8207)
+HTML_ENTITY(ndash, 8211)
+HTML_ENTITY(mdash, 8212)
+HTML_ENTITY(lsquo, 8216)
+HTML_ENTITY(rsquo, 8217)
+HTML_ENTITY(sbquo, 8218)
+HTML_ENTITY(ldquo, 8220)
+HTML_ENTITY(rdquo, 8221)
+HTML_ENTITY(bdquo, 8222)
+HTML_ENTITY(dagger, 8224)
+HTML_ENTITY(Dagger, 8225)
+HTML_ENTITY(permil, 8240)
+HTML_ENTITY(lsaquo, 8249)
+HTML_ENTITY(rsaquo, 8250)
+HTML_ENTITY(euro, 8364)
+
+// Navigator entity extensions
+// This block of entities needs to be at the bottom of the list since it
+// contains duplicate Unicode codepoints.  The codepoint to entity name
+// mapping (used by Composer) must ignores them, which occurs only
+// because they are listed later.
+
+// apos is from XML
+HTML_ENTITY(apos, 39) 
+// The capitalized versions are required to handle non-standard input.
+HTML_ENTITY(AMP, 38)
+HTML_ENTITY(COPY, 169)
+HTML_ENTITY(GT, 62)
+HTML_ENTITY(LT, 60)
+HTML_ENTITY(QUOT, 34)
+HTML_ENTITY(REG, 174)
+
diff --git a/components/htmlparser/src/nsHTMLTagList.h b/components/htmlparser/src/nsHTMLTagList.h
new file mode 100644
index 000000000..4cb2a61e0
--- /dev/null
+++ b/components/htmlparser/src/nsHTMLTagList.h
@@ -0,0 +1,197 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// IWYU pragma: private, include "nsHTMLTags.h"
+
+/******
+
+  This file contains the list of all HTML tags.
+  See nsHTMLTags.h for access to the enum values for tags.
+
+  It is designed to be used as input to various places that will define the
+  HTML_TAG macro in useful ways through the magic of C preprocessing.
+  Additionally, it is consumed by the self-regeneration code in
+  ElementName.java from which nsHtml5ElementName.cpp/h is translated.
+  See parser/html/java/README.txt.
+
+  If you edit this list, you need to re-run ElementName.java
+  self-regeneration and the HTML parser Java to C++ translation.
+
+  All entries must be enclosed in the macro HTML_TAG which will have cruel
+  and unusual things done to it.
+
+  It is recommended (but not strictly necessary) to keep all entries
+  in alphabetical order.
+
+  The first argument to HTML_TAG is the tag name. The second argument is the
+  "creator" method of the form NS_New$TAGNAMEElement, that will be used by
+  nsHTMLContentSink.cpp to create a content object for a tag of that
+  type. Use NOTUSED, if the particular tag has a non-standard creator.
+  The third argument is the interface name specified for this element
+  in the HTML specification. It can be empty if the relevant interface name
+  is "HTMLElement".
+
+  The HTML_OTHER macro is for values in the nsHTMLTag enum that are
+  not strictly tags.
+
+  Entries *must* use only lowercase characters.
+
+  Don't forget to update /editor/libeditor/HTMLEditUtils.cpp as well.
+
+  ** Break these invariants and bad things will happen. **
+
+ ******/
+#define HTML_HTMLELEMENT_TAG(_tag) HTML_TAG(_tag, , )
+
+HTML_TAG(a, Anchor, Anchor)
+HTML_HTMLELEMENT_TAG(abbr)
+HTML_HTMLELEMENT_TAG(acronym)
+HTML_HTMLELEMENT_TAG(address)
+HTML_TAG(applet, SharedObject, Applet)
+HTML_TAG(area, Area, Area)
+HTML_HTMLELEMENT_TAG(article)
+HTML_HTMLELEMENT_TAG(aside)
+HTML_TAG(audio, Audio, Audio)
+HTML_HTMLELEMENT_TAG(b)
+HTML_TAG(base, Shared, Base)
+HTML_HTMLELEMENT_TAG(basefont)
+HTML_HTMLELEMENT_TAG(bdo)
+HTML_TAG(bgsound, Unknown, Unknown)
+HTML_HTMLELEMENT_TAG(big)
+HTML_TAG(blockquote, Shared, Quote)
+HTML_TAG(body, Body, Body)
+HTML_TAG(br, BR, BR)
+HTML_TAG(button, Button, Button)
+HTML_TAG(canvas, Canvas, Canvas)
+HTML_TAG(caption, TableCaption, TableCaption)
+HTML_HTMLELEMENT_TAG(center)
+HTML_HTMLELEMENT_TAG(cite)
+HTML_HTMLELEMENT_TAG(code)
+HTML_TAG(col, TableCol, TableCol)
+HTML_TAG(colgroup, TableCol, TableCol)
+HTML_TAG(data, Data, Data)
+HTML_TAG(datalist, DataList, DataList)
+HTML_HTMLELEMENT_TAG(dd)
+HTML_TAG(del, Mod, Mod)
+HTML_TAG(details, Details, Details)
+HTML_HTMLELEMENT_TAG(dfn)
+HTML_TAG(dialog, Dialog, Dialog)
+HTML_TAG(dir, Shared, Directory)
+HTML_TAG(div, Div, Div)
+HTML_TAG(dl, SharedList, DList)
+HTML_HTMLELEMENT_TAG(dt)
+HTML_HTMLELEMENT_TAG(em)
+HTML_TAG(embed, SharedObject, Embed)
+HTML_TAG(fieldset, FieldSet, FieldSet)
+HTML_HTMLELEMENT_TAG(figcaption)
+HTML_HTMLELEMENT_TAG(figure)
+HTML_TAG(font, Font, Font)
+HTML_HTMLELEMENT_TAG(footer)
+HTML_TAG(form, Form, Form)
+HTML_TAG(frame, Frame, Frame)
+HTML_TAG(frameset, FrameSet, FrameSet)
+HTML_TAG(h1, Heading, Heading)
+HTML_TAG(h2, Heading, Heading)
+HTML_TAG(h3, Heading, Heading)
+HTML_TAG(h4, Heading, Heading)
+HTML_TAG(h5, Heading, Heading)
+HTML_TAG(h6, Heading, Heading)
+HTML_TAG(head, Shared, Head)
+HTML_HTMLELEMENT_TAG(header)
+HTML_HTMLELEMENT_TAG(hgroup)
+HTML_TAG(hr, HR, HR)
+HTML_TAG(html, Shared, Html)
+HTML_HTMLELEMENT_TAG(i)
+HTML_TAG(iframe, IFrame, IFrame)
+HTML_HTMLELEMENT_TAG(image)
+HTML_TAG(img, Image, Image)
+HTML_TAG(input, Input, Input)
+HTML_TAG(ins, Mod, Mod)
+HTML_HTMLELEMENT_TAG(kbd)
+HTML_TAG(keygen, Span, Span)
+HTML_TAG(label, Label, Label)
+HTML_TAG(legend, Legend, Legend)
+HTML_TAG(li, LI, LI)
+HTML_TAG(link, Link, Link)
+HTML_TAG(listing, Pre, Pre)
+HTML_HTMLELEMENT_TAG(main)
+HTML_TAG(map, Map, Map)
+HTML_HTMLELEMENT_TAG(mark)
+HTML_TAG(menu, Menu, Menu)
+HTML_TAG(menuitem, MenuItem, MenuItem)
+HTML_TAG(meta, Meta, Meta)
+HTML_TAG(meter, Meter, Meter)
+HTML_TAG(multicol, Unknown, Unknown)
+HTML_HTMLELEMENT_TAG(nav)
+HTML_HTMLELEMENT_TAG(nobr)
+HTML_HTMLELEMENT_TAG(noembed)
+HTML_HTMLELEMENT_TAG(noframes)
+HTML_HTMLELEMENT_TAG(noscript)
+HTML_TAG(object, Object, Object)
+HTML_TAG(ol, SharedList, OList)
+HTML_TAG(optgroup, OptGroup, OptGroup)
+HTML_TAG(option, Option, Option)
+HTML_TAG(output, Output, Output)
+HTML_TAG(p, Paragraph, Paragraph)
+HTML_TAG(param, Shared, Param)
+HTML_TAG(picture, Picture, Picture)
+HTML_HTMLELEMENT_TAG(plaintext)
+HTML_TAG(pre, Pre, Pre)
+HTML_TAG(progress, Progress, Progress)
+HTML_TAG(q, Shared, Quote)
+HTML_HTMLELEMENT_TAG(rb)
+HTML_HTMLELEMENT_TAG(rp)
+HTML_HTMLELEMENT_TAG(rt)
+HTML_HTMLELEMENT_TAG(rtc)
+HTML_HTMLELEMENT_TAG(ruby)
+HTML_HTMLELEMENT_TAG(s)
+HTML_HTMLELEMENT_TAG(samp)
+HTML_TAG(script, Script, Script)
+HTML_HTMLELEMENT_TAG(section)
+HTML_TAG(select, Select, Select)
+HTML_HTMLELEMENT_TAG(small)
+HTML_TAG(slot, Slot, Slot)
+HTML_TAG(source, Source, Source)
+HTML_TAG(span, Span, Span)
+HTML_HTMLELEMENT_TAG(strike)
+HTML_HTMLELEMENT_TAG(strong)
+HTML_TAG(style, Style, Style)
+HTML_HTMLELEMENT_TAG(sub)
+HTML_TAG(summary, Summary, )
+HTML_HTMLELEMENT_TAG(sup)
+HTML_TAG(table, Table, Table)
+HTML_TAG(tbody, TableSection, TableSection)
+HTML_TAG(td, TableCell, TableCell)
+HTML_TAG(textarea, TextArea, TextArea)
+HTML_TAG(tfoot, TableSection, TableSection)
+HTML_TAG(th, TableCell, TableCell)
+HTML_TAG(thead, TableSection, TableSection)
+HTML_TAG(template, Template, Template)
+HTML_TAG(time, Time, Time)
+HTML_TAG(title, Title, Title)
+HTML_TAG(tr, TableRow, TableRow)
+HTML_TAG(track, Track, Track)
+HTML_HTMLELEMENT_TAG(tt)
+HTML_HTMLELEMENT_TAG(u)
+HTML_TAG(ul, SharedList, UList)
+HTML_HTMLELEMENT_TAG(var)
+HTML_TAG(video, Video, Video)
+HTML_HTMLELEMENT_TAG(wbr)
+HTML_TAG(xmp, Pre, Pre)
+
+
+/* These are not for tags. But they will be included in the nsHTMLTag
+   enum anyway */
+
+HTML_OTHER(text)
+HTML_OTHER(whitespace)
+HTML_OTHER(newline)
+HTML_OTHER(comment)
+HTML_OTHER(entity)
+HTML_OTHER(doctypeDecl)
+HTML_OTHER(markupDecl)
+HTML_OTHER(instruction)
+
+#undef HTML_HTMLELEMENT_TAG
diff --git a/components/htmlparser/src/nsHTMLTags.cpp b/components/htmlparser/src/nsHTMLTags.cpp
new file mode 100644
index 000000000..681c37489
--- /dev/null
+++ b/components/htmlparser/src/nsHTMLTags.cpp
@@ -0,0 +1,259 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsHTMLTags.h"
+#include "nsCRT.h"
+#include "nsReadableUtils.h"
+#include "nsString.h"
+#include "nsStaticAtom.h"
+#include "nsUnicharUtils.h"
+#include "mozilla/HashFunctions.h"
+#include <algorithm>
+
+using namespace mozilla;
+
+// static array of unicode tag names
+#define HTML_TAG(_tag, _classname, _interfacename) (u"" #_tag),
+#define HTML_OTHER(_tag)
+const char16_t* const nsHTMLTags::sTagUnicodeTable[] = {
+#include "nsHTMLTagList.h"
+};
+#undef HTML_TAG
+#undef HTML_OTHER
+
+// static array of tag atoms
+nsIAtom* nsHTMLTags::sTagAtomTable[eHTMLTag_userdefined - 1];
+
+int32_t nsHTMLTags::gTableRefCount;
+PLHashTable* nsHTMLTags::gTagTable;
+PLHashTable* nsHTMLTags::gTagAtomTable;
+
+
+// char16_t* -> id hash
+static PLHashNumber
+HTMLTagsHashCodeUCPtr(const void *key)
+{
+  return HashString(static_cast<const char16_t*>(key));
+}
+
+static int
+HTMLTagsKeyCompareUCPtr(const void *key1, const void *key2)
+{
+  const char16_t *str1 = (const char16_t *)key1;
+  const char16_t *str2 = (const char16_t *)key2;
+
+  return nsCRT::strcmp(str1, str2) == 0;
+}
+
+// nsIAtom* -> id hash
+static PLHashNumber
+HTMLTagsHashCodeAtom(const void *key)
+{
+  return NS_PTR_TO_INT32(key) >> 2;
+}
+
+#define NS_HTMLTAG_NAME_MAX_LENGTH 10
+
+// static
+void
+nsHTMLTags::RegisterAtoms(void)
+{
+#define HTML_TAG(_tag, _classname, _interfacename) NS_STATIC_ATOM_BUFFER(Atombuffer_##_tag, #_tag)
+#define HTML_OTHER(_tag)
+#include "nsHTMLTagList.h"
+#undef HTML_TAG
+#undef HTML_OTHER
+
+// static array of tag StaticAtom structs
+#define HTML_TAG(_tag, _classname, _interfacename) NS_STATIC_ATOM(Atombuffer_##_tag, &nsHTMLTags::sTagAtomTable[eHTMLTag_##_tag - 1]),
+#define HTML_OTHER(_tag)
+  static const nsStaticAtom sTagAtoms_info[] = {
+#include "nsHTMLTagList.h"
+  };
+#undef HTML_TAG
+#undef HTML_OTHER
+
+  // Fill in our static atom pointers
+  NS_RegisterStaticAtoms(sTagAtoms_info);
+
+
+#if defined(DEBUG)
+  {
+    // let's verify that all names in the the table are lowercase...
+    for (int32_t i = 0; i < NS_HTML_TAG_MAX; ++i) {
+      nsAutoString temp1((char16_t*)sTagAtoms_info[i].mStringBuffer->Data());
+      nsAutoString temp2((char16_t*)sTagAtoms_info[i].mStringBuffer->Data());
+      ToLowerCase(temp1);
+      NS_ASSERTION(temp1.Equals(temp2), "upper case char in table");
+    }
+
+    // let's verify that all names in the unicode strings above are
+    // correct.
+    for (int32_t i = 0; i < NS_HTML_TAG_MAX; ++i) {
+      nsAutoString temp1(sTagUnicodeTable[i]);
+      nsAutoString temp2((char16_t*)sTagAtoms_info[i].mStringBuffer->Data());
+      NS_ASSERTION(temp1.Equals(temp2), "Bad unicode tag name!");
+    }
+
+    // let's verify that NS_HTMLTAG_NAME_MAX_LENGTH is correct
+    uint32_t maxTagNameLength = 0;
+    for (int32_t i = 0; i < NS_HTML_TAG_MAX; ++i) {
+      uint32_t len = NS_strlen(sTagUnicodeTable[i]);
+      maxTagNameLength = std::max(len, maxTagNameLength);
+    }
+    NS_ASSERTION(maxTagNameLength == NS_HTMLTAG_NAME_MAX_LENGTH,
+                 "NS_HTMLTAG_NAME_MAX_LENGTH not set correctly!");
+  }
+#endif
+}
+
+// static
+nsresult
+nsHTMLTags::AddRefTable(void)
+{
+  if (gTableRefCount++ == 0) {
+    NS_ASSERTION(!gTagTable && !gTagAtomTable, "pre existing hash!");
+
+    gTagTable = PL_NewHashTable(64, HTMLTagsHashCodeUCPtr,
+                                HTMLTagsKeyCompareUCPtr, PL_CompareValues,
+                                nullptr, nullptr);
+    NS_ENSURE_TRUE(gTagTable, NS_ERROR_OUT_OF_MEMORY);
+
+    gTagAtomTable = PL_NewHashTable(64, HTMLTagsHashCodeAtom,
+                                    PL_CompareValues, PL_CompareValues,
+                                    nullptr, nullptr);
+    NS_ENSURE_TRUE(gTagAtomTable, NS_ERROR_OUT_OF_MEMORY);
+
+    // Fill in gTagTable with the above static char16_t strings as
+    // keys and the value of the corresponding enum as the value in
+    // the table.
+
+    int32_t i;
+    for (i = 0; i < NS_HTML_TAG_MAX; ++i) {
+      PL_HashTableAdd(gTagTable, sTagUnicodeTable[i],
+                      NS_INT32_TO_PTR(i + 1));
+
+      PL_HashTableAdd(gTagAtomTable, sTagAtomTable[i],
+                      NS_INT32_TO_PTR(i + 1));
+    }
+  }
+
+  return NS_OK;
+}
+
+// static
+void
+nsHTMLTags::ReleaseTable(void)
+{
+  if (0 == --gTableRefCount) {
+    if (gTagTable) {
+      // Nothing to delete/free in this table, just destroy the table.
+
+      PL_HashTableDestroy(gTagTable);
+      PL_HashTableDestroy(gTagAtomTable);
+      gTagTable = nullptr;
+      gTagAtomTable = nullptr;
+    }
+  }
+}
+
+// static
+nsHTMLTag
+nsHTMLTags::StringTagToId(const nsAString& aTagName)
+{
+  uint32_t length = aTagName.Length();
+
+  if (length > NS_HTMLTAG_NAME_MAX_LENGTH) {
+    return eHTMLTag_userdefined;
+  }
+
+  char16_t buf[NS_HTMLTAG_NAME_MAX_LENGTH + 1];
+
+  nsAString::const_iterator iter;
+  uint32_t i = 0;
+  char16_t c;
+
+  aTagName.BeginReading(iter);
+
+  // Fast lowercasing-while-copying of ASCII characters into a
+  // char16_t buffer
+
+  while (i < length) {
+    c = *iter;
+
+    if (c <= 'Z' && c >= 'A') {
+      c |= 0x20; // Lowercase the ASCII character.
+    }
+
+    buf[i] = c; // Copy ASCII character.
+
+    ++i;
+    ++iter;
+  }
+
+  buf[i] = 0;
+
+  return CaseSensitiveStringTagToId(buf);
+}
+
+#ifdef DEBUG
+void
+nsHTMLTags::TestTagTable()
+{
+     const char16_t *tag;
+     nsHTMLTag id;
+     nsCOMPtr<nsIAtom> atom;
+
+     nsHTMLTags::AddRefTable();
+     // Make sure we can find everything we are supposed to
+     for (int i = 0; i < NS_HTML_TAG_MAX; ++i) {
+       tag = sTagUnicodeTable[i];
+       id = StringTagToId(nsDependentString(tag));
+       NS_ASSERTION(id != eHTMLTag_userdefined, "can't find tag id");
+       const char16_t* check = GetStringValue(id);
+       NS_ASSERTION(0 == nsCRT::strcmp(check, tag), "can't map id back to tag");
+
+       nsAutoString uname(tag);
+       ToUpperCase(uname);
+       NS_ASSERTION(id == StringTagToId(uname), "wrong id");
+
+       NS_ASSERTION(id == CaseSensitiveStringTagToId(tag), "wrong id");
+
+       atom = NS_Atomize(tag);
+       NS_ASSERTION(id == CaseSensitiveAtomTagToId(atom), "wrong id");
+       NS_ASSERTION(atom == GetAtom(id), "can't map id back to atom");
+     }
+
+     // Make sure we don't find things that aren't there
+     id = StringTagToId(NS_LITERAL_STRING("@"));
+     NS_ASSERTION(id == eHTMLTag_userdefined, "found @");
+     id = StringTagToId(NS_LITERAL_STRING("zzzzz"));
+     NS_ASSERTION(id == eHTMLTag_userdefined, "found zzzzz");
+
+     atom = NS_Atomize("@");
+     id = CaseSensitiveAtomTagToId(atom);
+     NS_ASSERTION(id == eHTMLTag_userdefined, "found @");
+     atom = NS_Atomize("zzzzz");
+     id = CaseSensitiveAtomTagToId(atom);
+     NS_ASSERTION(id == eHTMLTag_userdefined, "found zzzzz");
+
+     tag = GetStringValue((nsHTMLTag) 0);
+     NS_ASSERTION(!tag, "found enum 0");
+     tag = GetStringValue((nsHTMLTag) -1);
+     NS_ASSERTION(!tag, "found enum -1");
+     tag = GetStringValue((nsHTMLTag) (NS_HTML_TAG_MAX + 1));
+     NS_ASSERTION(!tag, "found past max enum");
+
+     atom = GetAtom((nsHTMLTag) 0);
+     NS_ASSERTION(!atom, "found enum 0");
+     atom = GetAtom((nsHTMLTag) -1);
+     NS_ASSERTION(!atom, "found enum -1");
+     atom = GetAtom((nsHTMLTag) (NS_HTML_TAG_MAX + 1));
+     NS_ASSERTION(!atom, "found past max enum");
+
+     ReleaseTable();
+}
+
+#endif // DEBUG
diff --git a/components/htmlparser/src/nsHTMLTags.h b/components/htmlparser/src/nsHTMLTags.h
new file mode 100644
index 000000000..b21df55f8
--- /dev/null
+++ b/components/htmlparser/src/nsHTMLTags.h
@@ -0,0 +1,100 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsHTMLTags_h___
+#define nsHTMLTags_h___
+
+#include "nsIAtom.h"
+#include "nsString.h"
+#include "plhash.h"
+
+class nsIAtom;
+
+/*
+   Declare the enum list using the magic of preprocessing
+   enum values are "eHTMLTag_foo" (where foo is the tag)
+
+   To change the list of tags, see nsHTMLTagList.h
+
+   These enum values are used as the index of array in various places.
+   If we change the structure of the enum by adding entries to it or removing
+   entries from it _directly_, not via nsHTMLTagList.h, don't forget to update
+   dom/bindings/BindingUtils.cpp and dom/html/nsHTMLContentSink.cpp as well.
+ */
+#define HTML_TAG(_tag, _classname, _interfacename) eHTMLTag_##_tag,
+#define HTML_OTHER(_tag) eHTMLTag_##_tag,
+enum nsHTMLTag {
+  /* this enum must be first and must be zero */
+  eHTMLTag_unknown = 0,
+#include "nsHTMLTagList.h"
+
+  /* can't be moved into nsHTMLTagList since gcc3.4 doesn't like a
+     comma at the end of enum list*/
+  eHTMLTag_userdefined
+};
+#undef HTML_TAG
+#undef HTML_OTHER
+
+// All tags before eHTMLTag_text are HTML tags
+#define NS_HTML_TAG_MAX int32_t(eHTMLTag_text - 1)
+
+class nsHTMLTags {
+public:
+  static void RegisterAtoms(void);
+  static nsresult AddRefTable(void);
+  static void ReleaseTable(void);
+
+  // Functions for converting string or atom to id
+  static nsHTMLTag StringTagToId(const nsAString& aTagName);
+  static nsHTMLTag AtomTagToId(nsIAtom* aTagName)
+  {
+    return StringTagToId(nsDependentAtomString(aTagName));
+  }
+
+  static nsHTMLTag CaseSensitiveStringTagToId(const char16_t* aTagName)
+  {
+    NS_ASSERTION(gTagTable, "no lookup table, needs addref");
+    NS_ASSERTION(aTagName, "null tagname!");
+
+    void* tag = PL_HashTableLookupConst(gTagTable, aTagName);
+
+    return tag ? (nsHTMLTag)NS_PTR_TO_INT32(tag) : eHTMLTag_userdefined;
+  }
+  static nsHTMLTag CaseSensitiveAtomTagToId(nsIAtom* aTagName)
+  {
+    NS_ASSERTION(gTagAtomTable, "no lookup table, needs addref");
+    NS_ASSERTION(aTagName, "null tagname!");
+
+    void* tag = PL_HashTableLookupConst(gTagAtomTable, aTagName);
+
+    return tag ? (nsHTMLTag)NS_PTR_TO_INT32(tag) : eHTMLTag_userdefined;
+  }
+
+  // Functions for converting an id to a string or atom
+  static const char16_t *GetStringValue(nsHTMLTag aEnum)
+  {
+    return aEnum <= eHTMLTag_unknown || aEnum > NS_HTML_TAG_MAX ?
+      nullptr : sTagUnicodeTable[aEnum - 1];
+  }
+  static nsIAtom *GetAtom(nsHTMLTag aEnum)
+  {
+    return aEnum <= eHTMLTag_unknown || aEnum > NS_HTML_TAG_MAX ?
+      nullptr : sTagAtomTable[aEnum - 1];
+  }
+
+#ifdef DEBUG
+  static void TestTagTable();
+#endif
+
+private:
+  static nsIAtom* sTagAtomTable[eHTMLTag_userdefined - 1];
+  static const char16_t* const sTagUnicodeTable[];
+
+  static int32_t gTableRefCount;
+  static PLHashTable* gTagTable;
+  static PLHashTable* gTagAtomTable;
+};
+
+#endif /* nsHTMLTags_h___ */
diff --git a/components/htmlparser/src/nsHTMLTokenizer.cpp b/components/htmlparser/src/nsHTMLTokenizer.cpp
new file mode 100644
index 000000000..a40e11f0e
--- /dev/null
+++ b/components/htmlparser/src/nsHTMLTokenizer.cpp
@@ -0,0 +1,59 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+/**
+ * @file nsHTMLTokenizer.cpp
+ * This is an implementation of the nsITokenizer interface.
+ * This file contains the implementation of a tokenizer to tokenize an HTML
+ * document. It attempts to do so, making tradeoffs between compatibility with
+ * older parsers and the SGML specification. Note that most of the real
+ * "tokenization" takes place in nsHTMLTokens.cpp.
+ */
+
+#include "nsHTMLTokenizer.h"
+#include "nsIParser.h"
+#include "nsParserConstants.h"
+
+/************************************************************************
+  And now for the main class -- nsHTMLTokenizer...
+ ************************************************************************/
+
+/**
+ * Satisfy the nsISupports interface.
+ */
+NS_IMPL_ISUPPORTS(nsHTMLTokenizer, nsITokenizer)
+
+/**
+ * Default constructor
+ */
+nsHTMLTokenizer::nsHTMLTokenizer()
+{
+  // TODO Assert about:blank-ness.
+}
+
+nsresult
+nsHTMLTokenizer::WillTokenize(bool aIsFinalChunk)
+{
+  return NS_OK;
+}
+
+/**
+ * This method is repeatedly called by the tokenizer. 
+ * Each time, we determine the kind of token we're about to 
+ * read, and then we call the appropriate method to handle
+ * that token type.
+ *  
+ * @param  aScanner The source of our input.
+ * @param  aFlushTokens An OUT parameter to tell the caller whether it should
+ *                      process our queued tokens up to now (e.g., when we
+ *                      reach a <script>).
+ * @return Success or error
+ */
+nsresult
+nsHTMLTokenizer::ConsumeToken(nsScanner& aScanner, bool& aFlushTokens)
+{
+  return kEOF;
+}
diff --git a/components/htmlparser/src/nsHTMLTokenizer.h b/components/htmlparser/src/nsHTMLTokenizer.h
new file mode 100644
index 000000000..0d2940c5e
--- /dev/null
+++ b/components/htmlparser/src/nsHTMLTokenizer.h
@@ -0,0 +1,35 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+/**
+ * MODULE NOTES:
+ * @update  gess 4/1/98
+ * 
+ */
+
+#ifndef __NSHTMLTOKENIZER
+#define __NSHTMLTOKENIZER
+
+#include "mozilla/Attributes.h"
+#include "nsISupports.h"
+#include "nsITokenizer.h"
+
+#ifdef _MSC_VER
+#pragma warning( disable : 4275 )
+#endif
+
+class nsHTMLTokenizer final : public nsITokenizer {
+  ~nsHTMLTokenizer() {}
+
+public:
+  NS_DECL_ISUPPORTS
+  NS_DECL_NSITOKENIZER
+  nsHTMLTokenizer();
+};
+
+#endif
+
+
diff --git a/components/htmlparser/src/nsIContentSink.h b/components/htmlparser/src/nsIContentSink.h
new file mode 100644
index 000000000..56c70a1b4
--- /dev/null
+++ b/components/htmlparser/src/nsIContentSink.h
@@ -0,0 +1,132 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef nsIContentSink_h___
+#define nsIContentSink_h___
+
+/**
+ * MODULE NOTES:
+ * @update  gess 4/1/98
+ * 
+ * This pure virtual interface is used as the "glue" that connects the parsing 
+ * process to the content model construction process.
+ *
+ * The icontentsink interface is a very lightweight wrapper that represents the
+ * content-sink model building process. There is another one that you may care 
+ * about more, which is the IHTMLContentSink interface. (See that file for details).
+ */
+#include "nsISupports.h"
+#include "nsString.h"
+#include "mozFlushType.h"
+#include "nsIDTD.h"
+
+class nsParserBase;
+
+#define NS_ICONTENT_SINK_IID \
+{ 0xcf9a7cbb, 0xfcbc, 0x4e13, \
+  { 0x8e, 0xf5, 0x18, 0xef, 0x2d, 0x3d, 0x58, 0x29 } }
+
+class nsIContentSink : public nsISupports {
+public:
+
+  NS_DECLARE_STATIC_IID_ACCESSOR(NS_ICONTENT_SINK_IID)
+
+  /**
+   * This method is called by the parser when it is entered from
+   * the event loop. The content sink wants to know how long the
+   * parser has been active since we last processed events on the
+   * main event loop and this call calibrates that measurement.
+   */
+  NS_IMETHOD WillParse(void)=0;
+
+  /**
+   * This method gets called when the parser begins the process
+   * of building the content model via the content sink.
+   *
+   * Default implementation provided since the sink should have the option of
+   * doing nothing in response to this call.
+   *
+   * @update 5/7/98 gess
+   */
+  NS_IMETHOD WillBuildModel(nsDTDMode aDTDMode) {
+    return NS_OK;
+  }
+
+  /**
+   * This method gets called when the parser concludes the process
+   * of building the content model via the content sink.
+   *
+   * Default implementation provided since the sink should have the option of
+   * doing nothing in response to this call.
+   *
+   * @update 5/7/98 gess
+   */
+  NS_IMETHOD DidBuildModel(bool aTerminated) {
+    return NS_OK;
+  }
+
+  /**
+   * This method gets called when the parser gets i/o blocked,
+   * and wants to notify the sink that it may be a while before
+   * more data is available.
+   *
+   * @update 5/7/98 gess
+   */
+  NS_IMETHOD WillInterrupt(void)=0;
+
+  /**
+   * This method gets called when the parser i/o gets unblocked,
+   * and we're about to start dumping content again to the sink.
+   *
+   * @update 5/7/98 gess
+   */
+  NS_IMETHOD WillResume(void)=0;
+
+  /**
+   * This method gets called by the parser so that the content
+   * sink can retain a reference to the parser. The expectation
+   * is that the content sink will drop the reference when it
+   * gets the DidBuildModel notification i.e. when parsing is done.
+   */
+  NS_IMETHOD SetParser(nsParserBase* aParser)=0;
+
+  /**
+   * Flush content so that the content model is in sync with the state
+   * of the sink.
+   *
+   * @param aType the type of flush to perform
+   */
+  virtual void FlushPendingNotifications(mozFlushType aType)=0;
+
+  /**
+   * Set the document character set. This should be passed on to the
+   * document itself.
+   */
+  NS_IMETHOD SetDocumentCharset(nsACString& aCharset)=0;
+
+  /**
+   * Returns the target object (often a document object) into which
+   * the content built by this content sink is being added, if any
+   * (IOW, may return null).
+   */
+  virtual nsISupports *GetTarget()=0;
+  
+  /**
+   * Returns true if there's currently script executing that we need to hold
+   * parsing for.
+   */
+  virtual bool IsScriptExecuting()
+  {
+    return false;
+  }
+  
+  /**
+   * Posts a runnable that continues parsing.
+   */
+  virtual void ContinueInterruptedParsingAsync() {}
+};
+
+NS_DEFINE_STATIC_IID_ACCESSOR(nsIContentSink, NS_ICONTENT_SINK_IID)
+
+#endif /* nsIContentSink_h___ */
diff --git a/components/htmlparser/src/nsIDTD.h b/components/htmlparser/src/nsIDTD.h
new file mode 100644
index 000000000..cbae4d507
--- /dev/null
+++ b/components/htmlparser/src/nsIDTD.h
@@ -0,0 +1,136 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsIDTD_h___
+#define nsIDTD_h___
+
+/**
+ * MODULE NOTES:
+ * @update  gess 7/20/98
+ *
+ * This interface defines standard interface for DTD's. Note that this
+ * isn't HTML specific. DTD's have several functions within the parser
+ * system:
+ *      1) To coordinate the consumption of an input stream via the
+ *      parser
+ *      2) To serve as proxy to represent the containment rules of the
+ *      underlying document
+ *      3) To offer autodetection services to the parser (mainly for doc
+ *      conversion)
+ * */
+
+#include "nsISupports.h"
+#include "nsString.h"
+#include "nsITokenizer.h"
+
+#define NS_IDTD_IID \
+{ 0x3de05873, 0xefa7, 0x410d, \
+  { 0xa4, 0x61, 0x80, 0x33, 0xaf, 0xd9, 0xe3, 0x26 } }
+
+enum eAutoDetectResult {
+    eUnknownDetect,
+    eValidDetect,
+    ePrimaryDetect,
+    eInvalidDetect
+};
+
+enum nsDTDMode {
+    eDTDMode_unknown = 0,
+    eDTDMode_quirks,        //pre 4.0 versions
+    eDTDMode_almost_standards,
+    eDTDMode_full_standards,
+    eDTDMode_autodetect,
+    eDTDMode_fragment
+};
+
+
+class nsIContentSink;
+class CParserContext;
+
+class nsIDTD : public nsISupports
+{
+public:
+
+    NS_DECLARE_STATIC_IID_ACCESSOR(NS_IDTD_IID)
+
+    NS_IMETHOD WillBuildModel(const CParserContext& aParserContext,
+                              nsITokenizer* aTokenizer,
+                              nsIContentSink* aSink) = 0;
+
+    /**
+     * Called by the parser after the parsing process has concluded
+     * @update  gess5/18/98
+     * @param   anErrorCode - contains error code resulting from parse process
+     * @return
+     */
+    NS_IMETHOD DidBuildModel(nsresult anErrorCode) = 0;
+
+    /**
+     * Called (possibly repeatedly) by the parser to parse tokens and construct
+     * the document model via the sink provided to WillBuildModel.
+     *
+     * @param   aTokenizer - tokenizer providing the token stream to be parsed
+     * @param   aCountLines - informs the DTD whether to count newlines
+     *                        (not wanted, e.g., when handling document.write)
+     * @param   aCharsetPtr - address of an nsCString containing the charset
+     *                        that the DTD should use (pointer in case the DTD
+     *                        opts to ignore this parameter)
+     */
+    NS_IMETHOD BuildModel(nsITokenizer* aTokenizer, nsIContentSink* aSink) = 0;
+
+    /**
+     * This method is called to determine whether or not a tag of one
+     * type can contain a tag of another type.
+     *
+     * @update  gess 3/25/98
+     * @param   aParent -- int tag of parent container
+     * @param   aChild -- int tag of child container
+     * @return true if parent can contain child
+     */
+    NS_IMETHOD_(bool) CanContain(int32_t aParent,int32_t aChild) const = 0;
+
+    /**
+     * This method gets called to determine whether a given
+     * tag is itself a container
+     *
+     * @update  gess 3/25/98
+     * @param   aTag -- tag to test for containership
+     * @return  true if given tag can contain other tags
+     */
+    NS_IMETHOD_(bool) IsContainer(int32_t aTag) const = 0;
+
+    /**
+     * Use this id you want to stop the building content model
+     * --------------[ Sets DTD to STOP mode ]----------------
+     * It's recommended to use this method in accordance with
+     * the parser's terminate() method.
+     *
+     * @update  harishd 07/22/99
+     * @param
+     * @return
+     */
+    NS_IMETHOD_(void) Terminate() = 0;
+
+    NS_IMETHOD_(int32_t) GetType() = 0;
+
+    /**
+     * Call this method after calling WillBuildModel to determine what mode the
+     * DTD actually is using, as it may differ from aParserContext.mDTDMode.
+     */
+    NS_IMETHOD_(nsDTDMode) GetMode() const = 0;
+};
+
+NS_DEFINE_STATIC_IID_ACCESSOR(nsIDTD, NS_IDTD_IID)
+
+#define NS_DECL_NSIDTD \
+    NS_IMETHOD WillBuildModel(  const CParserContext& aParserContext, nsITokenizer* aTokenizer, nsIContentSink* aSink) override;\
+    NS_IMETHOD DidBuildModel(nsresult anErrorCode) override;\
+    NS_IMETHOD BuildModel(nsITokenizer* aTokenizer, nsIContentSink* aSink) override;\
+    NS_IMETHOD_(bool) CanContain(int32_t aParent,int32_t aChild) const override;\
+    NS_IMETHOD_(bool) IsContainer(int32_t aTag) const override;\
+    NS_IMETHOD_(void)  Terminate() override;\
+    NS_IMETHOD_(int32_t) GetType() override;\
+    NS_IMETHOD_(nsDTDMode) GetMode() const override;
+#endif /* nsIDTD_h___ */
diff --git a/components/htmlparser/src/nsIFragmentContentSink.h b/components/htmlparser/src/nsIFragmentContentSink.h
new file mode 100644
index 000000000..8d547ed66
--- /dev/null
+++ b/components/htmlparser/src/nsIFragmentContentSink.h
@@ -0,0 +1,77 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef nsIFragmentContentSink_h___
+#define nsIFragmentContentSink_h___
+
+#include "nsISupports.h"
+
+class nsIDOMDocumentFragment;
+class nsIDocument;
+
+#define NS_I_FRAGMENT_CONTENT_SINK_IID \
+  { 0x1a8ce30b, 0x63fc, 0x441a, \
+    { 0xa3, 0xaa, 0xf7, 0x16, 0xc0, 0xfe, 0x96, 0x69 } }
+
+/**
+ * The fragment sink allows a client to parse a fragment of sink, possibly
+ * surrounded in context. Also see nsIParser::ParseFragment().
+ * Note: once you've parsed a fragment, the fragment sink must be re-set on
+ * the parser in order to parse another fragment.
+ */
+class nsIFragmentContentSink : public nsISupports {
+public:
+  NS_DECLARE_STATIC_IID_ACCESSOR(NS_I_FRAGMENT_CONTENT_SINK_IID)
+  /**
+   * This method is used to obtain the fragment created by
+   * a fragment content sink and to release resources held by the parser.
+   *
+   * The sink drops its reference to the fragment.
+   */
+  NS_IMETHOD FinishFragmentParsing(nsIDOMDocumentFragment** aFragment) = 0;
+
+  /**
+   * This method is used to set the target document for this fragment
+   * sink.  This document's nodeinfo manager will be used to create
+   * the content objects.  This MUST be called before the sink is used.
+   *
+   * @param aDocument the document the new nodes will belong to
+   * (should not be null)
+   */
+  NS_IMETHOD SetTargetDocument(nsIDocument* aDocument) = 0;
+
+  /**
+   * This method is used to indicate to the sink that we're done building
+   * the context and should start paying attention to the incoming content
+   */
+  NS_IMETHOD WillBuildContent() = 0;
+
+  /**
+   * This method is used to indicate to the sink that we're done building
+   * The real content. This is useful if you want to parse additional context
+   * (such as an end context).
+   */
+  NS_IMETHOD DidBuildContent() = 0;
+
+  /**
+   * This method is a total hack to help with parsing fragments. It is called to
+   * tell the fragment sink that a container from the context will be delivered
+   * after the call to WillBuildContent(). This is only relevent for HTML
+   * fragments that use nsHTMLTokenizer/CNavDTD.
+   */
+  NS_IMETHOD IgnoreFirstContainer() = 0;
+
+  /**
+   * Sets whether scripts elements are marked as unexecutable.
+   */
+  NS_IMETHOD SetPreventScriptExecution(bool aPreventScriptExecution) = 0;
+};
+
+NS_DEFINE_STATIC_IID_ACCESSOR(nsIFragmentContentSink,
+                              NS_I_FRAGMENT_CONTENT_SINK_IID)
+
+nsresult
+NS_NewXMLFragmentContentSink(nsIFragmentContentSink** aInstancePtrResult);
+
+#endif
diff --git a/components/htmlparser/src/nsIHTMLContentSink.h b/components/htmlparser/src/nsIHTMLContentSink.h
new file mode 100644
index 000000000..bf08c4b5e
--- /dev/null
+++ b/components/htmlparser/src/nsIHTMLContentSink.h
@@ -0,0 +1,89 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef nsIHTMLContentSink_h___
+#define nsIHTMLContentSink_h___
+
+/**
+ * This interface is OBSOLETE and in the process of being REMOVED.
+ * Do NOT implement!
+ *
+ * This file declares the concrete HTMLContentSink class.
+ * This class is used during the parsing process as the
+ * primary interface between the parser and the content
+ * model.
+ *
+ * After the tokenizer completes, the parser iterates over
+ * the known token list. As the parser identifies valid 
+ * elements, it calls the contentsink interface to notify
+ * the content model that a new node or child node is being
+ * created and added to the content model.
+ *
+ * The HTMLContentSink interface assumes 4 underlying
+ * containers: HTML, HEAD, BODY and FRAMESET. Before 
+ * accessing any these, the parser will call the appropriate
+ * OpennsIHTMLContentSink method: OpenHTML,OpenHead,OpenBody,OpenFrameSet;
+ * likewise, the ClosensIHTMLContentSink version will be called when the
+ * parser is done with a given section.
+ *
+ * IMPORTANT: The parser may Open each container more than
+ * once! This is due to the irregular nature of HTML files.
+ * For example, it is possible to encounter plain text at
+ * the start of an HTML document (that precedes the HTML tag).
+ * Such text is treated as if it were part of the body.
+ * In such cases, the parser will Open the body, pass the text-
+ * node in and then Close the body. The body will likely be
+ * re-Opened later when the actual <BODY> tag has been seen.
+ *
+ * Containers within the body are Opened and Closed
+ * using the OpenContainer(...) and CloseContainer(...) calls.
+ * It is assumed that the document or contentSink is 
+ * maintaining its state to manage where new content should 
+ * be added to the underlying document.
+ *
+ * NOTE: OpenHTML() and OpenBody() may get called multiple times
+ *       in the same document. That's fine, and it doesn't mean
+ *       that we have multiple bodies or HTML's.
+ *
+ * NOTE: I haven't figured out how sub-documents (non-frames)
+ *       are going to be handled. Stay tuned.
+ */
+#include "nsIContentSink.h"
+#include "nsHTMLTags.h"
+
+#define NS_IHTML_CONTENT_SINK_IID \
+  {0xefc5af86, 0x5cfd, 0x4918, {0x9d, 0xd3, 0x5f, 0x7a, 0xb2, 0x88, 0xb2, 0x68}}
+
+/**
+ * This interface is OBSOLETE and in the process of being REMOVED.
+ * Do NOT implement!
+ */
+class nsIHTMLContentSink : public nsIContentSink 
+{
+public:
+
+  NS_DECLARE_STATIC_IID_ACCESSOR(NS_IHTML_CONTENT_SINK_IID)
+
+  enum ElementType { eHTML, eBody };
+
+    /**
+   * This method is used to open a generic container in the sink.
+   *
+   * @update 4/1/98 gess
+   */     
+  NS_IMETHOD OpenContainer(ElementType aNodeType) = 0;
+
+  /**
+   *  This method gets called by the parser when a close
+   *  container tag has been consumed and needs to be closed.
+   *
+   * @param  aTag - The tag to be closed.
+   */     
+  NS_IMETHOD CloseContainer(ElementType aTag) = 0;
+};
+
+NS_DEFINE_STATIC_IID_ACCESSOR(nsIHTMLContentSink, NS_IHTML_CONTENT_SINK_IID)
+
+#endif /* nsIHTMLContentSink_h___ */
+
diff --git a/components/htmlparser/src/nsIParser.h b/components/htmlparser/src/nsIParser.h
new file mode 100644
index 000000000..4bf0b3370
--- /dev/null
+++ b/components/htmlparser/src/nsIParser.h
@@ -0,0 +1,272 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef NS_IPARSER___
+#define NS_IPARSER___
+
+
+ /**
+ * This GECKO-INTERNAL interface is on track to being REMOVED (or refactored
+ * to the point of being near-unrecognizable).
+ *
+ * Please DO NOT #include this file in comm-central code, in your XUL
+ * app or binary extensions.
+ *
+ * Please DO NOT #include this into new files even inside Gecko. It is more
+ * likely than not that #including this header is the wrong thing to do.
+ */
+
+#include "nsISupports.h"
+#include "nsIStreamListener.h"
+#include "nsIDTD.h"
+#include "nsString.h"
+#include "nsTArray.h"
+#include "nsIAtom.h"
+#include "nsParserBase.h"
+
+#define NS_IPARSER_IID \
+{ 0x2c4ad90a, 0x740e, 0x4212, \
+  { 0xba, 0x3f, 0xfe, 0xac, 0xda, 0x4b, 0x92, 0x9e } }
+
+// {41421C60-310A-11d4-816F-000064657374}
+#define NS_IDEBUG_DUMP_CONTENT_IID \
+{ 0x41421c60, 0x310a, 0x11d4, \
+{ 0x81, 0x6f, 0x0, 0x0, 0x64, 0x65, 0x73, 0x74 } }
+
+class nsIContentSink;
+class nsIRequestObserver;
+class nsString;
+class nsIURI;
+class nsIChannel;
+class nsIContent;
+
+enum eParserCommands {
+  eViewNormal,
+  eViewSource,
+  eViewFragment,
+  eViewErrors
+};
+
+enum eParserDocType {
+  ePlainText = 0,
+  eXML,
+  eHTML_Quirks,
+  eHTML_Strict
+};
+
+enum eStreamState {eNone,eOnStart,eOnDataAvail,eOnStop};
+
+/**
+ * This GECKO-INTERNAL interface is on track to being REMOVED (or refactored
+ * to the point of being near-unrecognizable).
+ *
+ * Please DO NOT #include this file in comm-central code, in your XUL
+ * app or binary extensions.
+ *
+ * Please DO NOT #include this into new files even inside Gecko. It is more
+ * likely than not that #including this header is the wrong thing to do.
+ */
+class nsIParser : public nsParserBase {
+  public:
+
+    NS_DECLARE_STATIC_IID_ACCESSOR(NS_IPARSER_IID)
+
+    /**
+     * Select given content sink into parser for parser output
+     * @update	gess5/11/98
+     * @param   aSink is the new sink to be used by parser
+     * @return  
+     */
+    NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink)=0;
+
+
+    /**
+     * retrieve the sink set into the parser 
+     * @update	gess5/11/98
+     * @return  current sink
+     */
+    NS_IMETHOD_(nsIContentSink*) GetContentSink(void)=0;
+
+    /**
+     *  Call this method once you've created a parser, and want to instruct it
+	   *  about the command which caused the parser to be constructed. For example,
+     *  this allows us to select a DTD which can do, say, view-source.
+     *  
+     *  @update  gess 3/25/98
+     *  @param   aCommand -- ptrs to string that contains command
+     *  @return	 nada
+     */
+    NS_IMETHOD_(void) GetCommand(nsCString& aCommand)=0;
+    NS_IMETHOD_(void) SetCommand(const char* aCommand)=0;
+    NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand)=0;
+
+    /**
+     *  Call this method once you've created a parser, and want to instruct it
+     *  about what charset to load
+     *  
+     *  @update  ftang 4/23/99
+     *  @param   aCharset- the charest of a document
+     *  @param   aCharsetSource- the soure of the chares
+     *  @return	 nada
+     */
+    NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, int32_t aSource)=0;
+    NS_IMETHOD_(void) GetDocumentCharset(nsACString& oCharset, int32_t& oSource)=0;
+
+    /** 
+     * Get the channel associated with this parser
+     * @update harishd,gagan 07/17/01
+     * @param aChannel out param that will contain the result
+     * @return NS_OK if successful
+     */
+    NS_IMETHOD GetChannel(nsIChannel** aChannel) override = 0;
+
+    /** 
+     * Get the DTD associated with this parser
+     * @update vidur 9/29/99
+     * @param aDTD out param that will contain the result
+     * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error
+     */
+    NS_IMETHOD GetDTD(nsIDTD** aDTD) = 0;
+    
+    /**
+     * Get the nsIStreamListener for this parser
+     */
+    virtual nsIStreamListener* GetStreamListener() = 0;
+
+    /**************************************************************************
+     *  Parse methods always begin with an input source, and perform
+     *  conversions until you wind up being emitted to the given contentsink
+     *  (which may or may not be a proxy for the NGLayout content model).
+     ************************************************************************/
+    
+    // Call this method to resume the parser from an unblocked state.
+    // This can happen, for example, if parsing was interrupted and then the
+    // consumer needed to restart the parser without waiting for more data.
+    // This also happens after loading scripts, which unblock the parser in
+    // order to process the output of document.write() and then need to
+    // continue on with the page load on an enabled parser.
+    NS_IMETHOD ContinueInterruptedParsing() = 0;
+    
+    // Stops parsing temporarily.
+    NS_IMETHOD_(void) BlockParser() = 0;
+    
+    // Open up the parser for tokenization, building up content 
+    // model..etc. However, this method does not resume parsing 
+    // automatically. It's the callers' responsibility to restart
+    // the parsing engine.
+    NS_IMETHOD_(void) UnblockParser() = 0;
+
+    /**
+     * Asynchronously continues parsing.
+     */
+    NS_IMETHOD_(void) ContinueInterruptedParsingAsync() = 0;
+
+    NS_IMETHOD_(bool) IsParserEnabled() override = 0;
+    NS_IMETHOD_(bool) IsComplete() = 0;
+    
+    NS_IMETHOD Parse(nsIURI* aURL,
+                     nsIRequestObserver* aListener = nullptr,
+                     void* aKey = 0,
+                     nsDTDMode aMode = eDTDMode_autodetect) = 0;
+
+    NS_IMETHOD Terminate(void) = 0;
+
+    /**
+     * This method gets called when you want to parse a fragment of HTML or XML
+     * surrounded by the context |aTagStack|. It requires that the parser have
+     * been given a fragment content sink.
+     *
+     * @param aSourceBuffer The XML or HTML that hasn't been parsed yet.
+     * @param aTagStack The context of the source buffer.
+     * @return Success or failure.
+     */
+    NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer,
+                             nsTArray<nsString>& aTagStack) = 0;
+
+    /**
+     * This method gets called when the tokens have been consumed, and it's time
+     * to build the model via the content sink.
+     * @update	gess5/11/98
+     * @return  error code -- 0 if model building went well .
+     */
+    NS_IMETHOD BuildModel(void) = 0;
+
+    /**
+     *  Call this method to cancel any pending parsing events.
+     *  Parsing events may be pending if all of the document's content
+     *  has been passed to the parser but the parser has been interrupted
+     *  because processing the tokens took too long.
+     *  
+     *  @update  kmcclusk 05/18/01
+     *  @return  NS_OK if succeeded else ERROR.
+     */
+
+    NS_IMETHOD CancelParsingEvents() = 0;
+
+    virtual void Reset() = 0;
+
+    /**
+     * True if the insertion point (per HTML5) is defined.
+     */
+    virtual bool IsInsertionPointDefined() = 0;
+
+    /**
+     * Call immediately before starting to evaluate a parser-inserted script or
+     * in general when the spec says to define an insertion point.
+     */
+    virtual void PushDefinedInsertionPoint() = 0;
+
+    /**
+     * Call immediately after having evaluated a parser-inserted script or
+     * generally want to restore to the state before the last
+     * PushDefinedInsertionPoint call.
+     */
+    virtual void PopDefinedInsertionPoint() = 0;
+
+    /**
+     * Marks the HTML5 parser as not a script-created parser.
+     */
+    virtual void MarkAsNotScriptCreated(const char* aCommand) = 0;
+
+    /**
+     * True if this is a script-created HTML5 parser.
+     */
+    virtual bool IsScriptCreated() = 0;
+};
+
+NS_DEFINE_STATIC_IID_ACCESSOR(nsIParser, NS_IPARSER_IID)
+
+/* ===========================================================*
+  Some useful constants...
+ * ===========================================================*/
+
+#include "nsError.h"
+
+const nsresult  kEOF              = NS_ERROR_HTMLPARSER_EOF;
+const nsresult  kUnknownError     = NS_ERROR_HTMLPARSER_UNKNOWN;
+const nsresult  kCantPropagate    = NS_ERROR_HTMLPARSER_CANTPROPAGATE;
+const nsresult  kContextMismatch  = NS_ERROR_HTMLPARSER_CONTEXTMISMATCH;
+const nsresult  kBadFilename      = NS_ERROR_HTMLPARSER_BADFILENAME;
+const nsresult  kBadURL           = NS_ERROR_HTMLPARSER_BADURL;
+const nsresult  kInvalidParserContext = NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT;
+const nsresult  kBlocked          = NS_ERROR_HTMLPARSER_BLOCK;
+const nsresult  kBadStringLiteral = NS_ERROR_HTMLPARSER_UNTERMINATEDSTRINGLITERAL;
+const nsresult  kHierarchyTooDeep = NS_ERROR_HTMLPARSER_HIERARCHYTOODEEP;
+const nsresult  kFakeEndTag       = NS_ERROR_HTMLPARSER_FAKE_ENDTAG;
+const nsresult  kNotAComment      = NS_ERROR_HTMLPARSER_INVALID_COMMENT;
+
+#define NS_IPARSER_FLAG_UNKNOWN_MODE         0x00000000
+#define NS_IPARSER_FLAG_QUIRKS_MODE          0x00000002
+#define NS_IPARSER_FLAG_STRICT_MODE          0x00000004
+#define NS_IPARSER_FLAG_AUTO_DETECT_MODE     0x00000010
+#define NS_IPARSER_FLAG_VIEW_NORMAL          0x00000020
+#define NS_IPARSER_FLAG_VIEW_SOURCE          0x00000040
+#define NS_IPARSER_FLAG_VIEW_ERRORS          0x00000080
+#define NS_IPARSER_FLAG_PLAIN_TEXT           0x00000100
+#define NS_IPARSER_FLAG_XML                  0x00000200
+#define NS_IPARSER_FLAG_HTML                 0x00000400
+#define NS_IPARSER_FLAG_SCRIPT_ENABLED       0x00000800
+#define NS_IPARSER_FLAG_FRAMES_ENABLED       0x00001000
+
+#endif 
diff --git a/components/htmlparser/src/nsIParserService.h b/components/htmlparser/src/nsIParserService.h
new file mode 100644
index 000000000..2906974e9
--- /dev/null
+++ b/components/htmlparser/src/nsIParserService.h
@@ -0,0 +1,98 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsIParserService_h__
+#define nsIParserService_h__
+
+#include "nsISupports.h"
+#include "nsString.h"
+#include "nsHTMLTags.h"
+
+class nsIParser;
+
+#define NS_PARSERSERVICE_CONTRACTID "@mozilla.org/parser/parser-service;1"
+
+// {90a92e37-abd6-441b-9b39-4064d98e1ede}
+#define NS_IPARSERSERVICE_IID \
+{ 0x90a92e37, 0xabd6, 0x441b, { 0x9b, 0x39, 0x40, 0x64, 0xd9, 0x8e, 0x1e, 0xde } }
+
+class nsIParserService : public nsISupports {
+ public:
+  NS_DECLARE_STATIC_IID_ACCESSOR(NS_IPARSERSERVICE_IID)
+
+  /**
+   * Looks up the nsHTMLTag enum value corresponding to the tag in aAtom. The
+   * lookup happens case insensitively.
+   *
+   * @param aAtom The tag to look up.
+   *
+   * @return int32_t The nsHTMLTag enum value corresponding to the tag in aAtom
+   *                 or eHTMLTag_userdefined if the tag does not correspond to
+   *                 any of the tag nsHTMLTag enum values.
+   */
+  virtual int32_t HTMLAtomTagToId(nsIAtom* aAtom) const = 0;
+
+  /**
+   * Looks up the nsHTMLTag enum value corresponding to the tag in aAtom.
+   *
+   * @param aAtom The tag to look up.
+   *
+   * @return int32_t The nsHTMLTag enum value corresponding to the tag in aAtom
+   *                 or eHTMLTag_userdefined if the tag does not correspond to
+   *                 any of the tag nsHTMLTag enum values.
+   */
+  virtual int32_t HTMLCaseSensitiveAtomTagToId(nsIAtom* aAtom) const = 0;
+
+  /**
+   * Looks up the nsHTMLTag enum value corresponding to the tag in aTag. The
+   * lookup happens case insensitively.
+   *
+   * @param aTag The tag to look up.
+   *
+   * @return int32_t The nsHTMLTag enum value corresponding to the tag in aTag
+   *                 or eHTMLTag_userdefined if the tag does not correspond to
+   *                 any of the tag nsHTMLTag enum values.
+   */
+  virtual int32_t HTMLStringTagToId(const nsAString& aTag) const = 0;
+
+  /**
+   * Gets the tag corresponding to the nsHTMLTag enum value in aId. The
+   * returned tag will be in lowercase.
+   *
+   * @param aId The nsHTMLTag enum value to get the tag for.
+   *
+   * @return const char16_t* The tag corresponding to the nsHTMLTag enum
+   *                          value, or nullptr if the enum value doesn't
+   *                          correspond to a tag (eHTMLTag_unknown,
+   *                          eHTMLTag_userdefined, eHTMLTag_text, ...).
+   */
+  virtual const char16_t *HTMLIdToStringTag(int32_t aId) const = 0;
+
+  /**
+   * Gets the tag corresponding to the nsHTMLTag enum value in aId. The
+   * returned tag will be in lowercase.
+   *
+   * @param aId The nsHTMLTag enum value to get the tag for.
+   *
+   * @return nsIAtom* The tag corresponding to the nsHTMLTag enum value, or
+   *                  nullptr if the enum value doesn't correspond to a tag
+   *                  (eHTMLTag_unknown, eHTMLTag_userdefined, eHTMLTag_text,
+   *                  ...).
+   */
+  virtual nsIAtom *HTMLIdToAtomTag(int32_t aId) const = 0;
+  
+  NS_IMETHOD HTMLConvertEntityToUnicode(const nsAString& aEntity, 
+                                        int32_t* aUnicode) const = 0;
+
+  NS_IMETHOD HTMLConvertUnicodeToEntity(int32_t aUnicode,
+                                        nsCString& aEntity) const = 0;
+
+  NS_IMETHOD IsContainer(int32_t aId, bool& aIsContainer) const = 0;
+  NS_IMETHOD IsBlock(int32_t aId, bool& aIsBlock) const = 0;
+};
+
+NS_DEFINE_STATIC_IID_ACCESSOR(nsIParserService, NS_IPARSERSERVICE_IID)
+
+#endif // nsIParserService_h__
diff --git a/components/htmlparser/src/nsITokenizer.h b/components/htmlparser/src/nsITokenizer.h
new file mode 100644
index 000000000..2ed09d410
--- /dev/null
+++ b/components/htmlparser/src/nsITokenizer.h
@@ -0,0 +1,44 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+/**
+ * MODULE NOTES:
+ * @update  gess 4/1/98
+ * 
+ */
+
+#ifndef __NSITOKENIZER__
+#define __NSITOKENIZER__
+
+#include "nsISupports.h"
+
+class nsScanner;
+
+#define NS_ITOKENIZER_IID      \
+{ 0Xae98a348, 0X5e91, 0X41a8, \
+  { 0Xa5, 0Xb4, 0Xd2, 0X20, 0Xf3, 0X1f, 0Xc4, 0Xab } }
+
+/***************************************************************
+  Notes: 
+ ***************************************************************/
+
+
+class nsITokenizer : public nsISupports {
+public:
+  NS_DECLARE_STATIC_IID_ACCESSOR(NS_ITOKENIZER_IID)
+
+  NS_IMETHOD                     WillTokenize(bool aIsFinalChunk)=0;
+  NS_IMETHOD                     ConsumeToken(nsScanner& aScanner,bool& aFlushTokens)=0;
+};
+
+NS_DEFINE_STATIC_IID_ACCESSOR(nsITokenizer, NS_ITOKENIZER_IID)
+
+#define NS_DECL_NSITOKENIZER \
+  NS_IMETHOD                     WillTokenize(bool aIsFinalChunk) override;\
+  NS_IMETHOD                     ConsumeToken(nsScanner& aScanner,bool& aFlushTokens) override;\
+
+
+#endif
diff --git a/components/htmlparser/src/nsParser.cpp b/components/htmlparser/src/nsParser.cpp
new file mode 100644
index 000000000..791ccf772
--- /dev/null
+++ b/components/htmlparser/src/nsParser.cpp
@@ -0,0 +1,1599 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsIAtom.h"
+#include "nsParser.h"
+#include "nsString.h"
+#include "nsCRT.h"
+#include "nsScanner.h"
+#include "plstr.h"
+#include "nsIStringStream.h"
+#include "nsIChannel.h"
+#include "nsICachingChannel.h"
+#include "nsIInputStream.h"
+#include "CNavDTD.h"
+#include "prenv.h"
+#include "prlock.h"
+#include "prcvar.h"
+#include "nsParserCIID.h"
+#include "nsReadableUtils.h"
+#include "nsCOMPtr.h"
+#include "nsExpatDriver.h"
+#include "nsIServiceManager.h"
+#include "nsICategoryManager.h"
+#include "nsISupportsPrimitives.h"
+#include "nsIFragmentContentSink.h"
+#include "nsStreamUtils.h"
+#include "nsHTMLTokenizer.h"
+#include "nsDataHashtable.h"
+#include "nsXPCOMCIDInternal.h"
+#include "nsMimeTypes.h"
+#include "mozilla/CondVar.h"
+#include "mozilla/Mutex.h"
+#include "nsParserConstants.h"
+#include "nsCharsetSource.h"
+#include "nsContentUtils.h"
+#include "nsThreadUtils.h"
+#include "nsIHTMLContentSink.h"
+
+#include "mozilla/dom/EncodingUtils.h"
+#include "mozilla/dom/ScriptLoader.h"
+#include "mozilla/BinarySearch.h"
+
+using namespace mozilla;
+using mozilla::dom::EncodingUtils;
+
+#define NS_PARSER_FLAG_PARSER_ENABLED         0x00000002
+#define NS_PARSER_FLAG_OBSERVERS_ENABLED      0x00000004
+#define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008
+#define NS_PARSER_FLAG_FLUSH_TOKENS           0x00000020
+#define NS_PARSER_FLAG_CAN_TOKENIZE           0x00000040
+
+//-------------- Begin ParseContinue Event Definition ------------------------
+/*
+The parser can be explicitly interrupted by passing a return value of
+NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
+the parser to stop processing and allow the application to return to the event
+loop. The data which was left at the time of interruption will be processed
+the next time OnDataAvailable is called. If the parser has received its final
+chunk of data then OnDataAvailable will no longer be called by the networking
+module, so the parser will schedule a nsParserContinueEvent which will call
+the parser to process the remaining data after returning to the event loop.
+If the parser is interrupted while processing the remaining data it will
+schedule another ParseContinueEvent. The processing of data followed by
+scheduling of the continue events will proceed until either:
+
+  1) All of the remaining data can be processed without interrupting
+  2) The parser has been cancelled.
+
+
+This capability is currently used in CNavDTD and nsHTMLContentSink. The
+nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be
+processed and when each token is processed. The nsHTML content sink records
+the time when the chunk has started processing and will return
+NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a
+threshold called max tokenizing processing time. This allows the content sink
+to limit how much data is processed in a single chunk which in turn gates how
+much time is spent away from the event loop. Processing smaller chunks of data
+also reduces the time spent in subsequent reflows.
+
+This capability is most apparent when loading large documents. If the maximum
+token processing time is set small enough the application will remain
+responsive during document load.
+
+A side-effect of this capability is that document load is not complete when
+the last chunk of data is passed to OnDataAvailable since  the parser may have
+been interrupted when the last chunk of data arrived. The document is complete
+when all of the document has been tokenized and there aren't any pending
+nsParserContinueEvents. This can cause problems if the application assumes
+that it can monitor the load requests to determine when the document load has
+been completed. This is what happens in Mozilla. The document is considered
+completely loaded when all of the load requests have been satisfied. To delay
+the document load until all of the parsing has been completed the
+nsHTMLContentSink adds a dummy parser load request which is not removed until
+the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call
+DidBuildModel until the final chunk of data has been passed to the parser
+through the OnDataAvailable and there aren't any pending
+nsParserContineEvents.
+
+Currently the parser is ignores requests to be interrupted during the
+processing of script.  This is because a document.write followed by JavaScript
+calls to manipulate the DOM may fail if the parser was interrupted during the
+document.write.
+
+For more details @see bugzilla bug 76722
+*/
+
+
+class nsParserContinueEvent : public Runnable
+{
+public:
+  RefPtr<nsParser> mParser;
+
+  explicit nsParserContinueEvent(nsParser* aParser)
+    : mParser(aParser)
+  {}
+
+  NS_IMETHOD Run() override
+  {
+    mParser->HandleParserContinueEvent(this);
+    return NS_OK;
+  }
+};
+
+//-------------- End ParseContinue Event Definition ------------------------
+
+/**
+ *  default constructor
+ */
+nsParser::nsParser()
+{
+  Initialize(true);
+}
+
+nsParser::~nsParser()
+{
+  Cleanup();
+}
+
+void
+nsParser::Initialize(bool aConstructor)
+{
+  if (aConstructor) {
+    // Raw pointer
+    mParserContext = 0;
+  }
+  else {
+    // nsCOMPtrs
+    mObserver = nullptr;
+    mUnusedInput.Truncate();
+  }
+
+  mContinueEvent = nullptr;
+  mCharsetSource = kCharsetUninitialized;
+  mCharset.AssignLiteral("ISO-8859-1");
+  mInternalState = NS_OK;
+  mStreamStatus = NS_OK;
+  mCommand = eViewNormal;
+  mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED |
+           NS_PARSER_FLAG_PARSER_ENABLED |
+           NS_PARSER_FLAG_CAN_TOKENIZE;
+
+  mProcessingNetworkData = false;
+  mIsAboutBlank = false;
+}
+
+void
+nsParser::Cleanup()
+{
+#ifdef DEBUG
+  if (mParserContext && mParserContext->mPrevContext) {
+    NS_WARNING("Extra parser contexts still on the parser stack");
+  }
+#endif
+
+  while (mParserContext) {
+    CParserContext *pc = mParserContext->mPrevContext;
+    delete mParserContext;
+    mParserContext = pc;
+  }
+
+  // It should not be possible for this flag to be set when we are getting
+  // destroyed since this flag implies a pending nsParserContinueEvent, which
+  // has an owning reference to |this|.
+  NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");
+}
+
+NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)
+
+NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)
+  NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD)
+  NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink)
+  NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver)
+NS_IMPL_CYCLE_COLLECTION_UNLINK_END
+
+NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)
+  NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD)
+  NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink)
+  NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver)
+  CParserContext *pc = tmp->mParserContext;
+  while (pc) {
+    cb.NoteXPCOMChild(pc->mTokenizer);
+    pc = pc->mPrevContext;
+  }
+NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)
+  NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
+  NS_INTERFACE_MAP_ENTRY(nsIParser)
+  NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
+  NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
+  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)
+NS_INTERFACE_MAP_END
+
+// The parser continue event is posted only if
+// all of the data to parse has been passed to ::OnDataAvailable
+// and the parser has been interrupted by the content sink
+// because the processing of tokens took too long.
+
+nsresult
+nsParser::PostContinueEvent()
+{
+  if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {
+    // If this flag isn't set, then there shouldn't be a live continue event!
+    NS_ASSERTION(!mContinueEvent, "bad");
+
+    // This creates a reference cycle between this and the event that is
+    // broken when the event fires.
+    nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);
+    if (NS_FAILED(NS_DispatchToCurrentThread(event))) {
+        NS_WARNING("failed to dispatch parser continuation event");
+    } else {
+        mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
+        mContinueEvent = event;
+    }
+  }
+  return NS_OK;
+}
+
+NS_IMETHODIMP_(void)
+nsParser::GetCommand(nsCString& aCommand)
+{
+  aCommand = mCommandStr;
+}
+
+/**
+ *  Call this method once you've created a parser, and want to instruct it
+ *  about the command which caused the parser to be constructed. For example,
+ *  this allows us to select a DTD which can do, say, view-source.
+ *
+ *  @param   aCommand the command string to set
+ */
+NS_IMETHODIMP_(void)
+nsParser::SetCommand(const char* aCommand)
+{
+  mCommandStr.Assign(aCommand);
+  if (mCommandStr.EqualsLiteral("view-source")) {
+    mCommand = eViewSource;
+  } else if (mCommandStr.EqualsLiteral("view-fragment")) {
+    mCommand = eViewFragment;
+  } else {
+    mCommand = eViewNormal;
+  }
+}
+
+/**
+ *  Call this method once you've created a parser, and want to instruct it
+ *  about the command which caused the parser to be constructed. For example,
+ *  this allows us to select a DTD which can do, say, view-source.
+ *
+ *  @param   aParserCommand the command to set
+ */
+NS_IMETHODIMP_(void)
+nsParser::SetCommand(eParserCommands aParserCommand)
+{
+  mCommand = aParserCommand;
+}
+
+/**
+ *  Call this method once you've created a parser, and want to instruct it
+ *  about what charset to load
+ *
+ *  @param   aCharset- the charset of a document
+ *  @param   aCharsetSource- the source of the charset
+ */
+NS_IMETHODIMP_(void)
+nsParser::SetDocumentCharset(const nsACString& aCharset, int32_t aCharsetSource)
+{
+  mCharset = aCharset;
+  mCharsetSource = aCharsetSource;
+  if (mParserContext && mParserContext->mScanner) {
+     mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource);
+  }
+}
+
+void
+nsParser::SetSinkCharset(nsACString& aCharset)
+{
+  if (mSink) {
+    mSink->SetDocumentCharset(aCharset);
+  }
+}
+
+/**
+ *  This method gets called in order to set the content
+ *  sink for this parser to dump nodes to.
+ *
+ *  @param   nsIContentSink interface for node receiver
+ */
+NS_IMETHODIMP_(void)
+nsParser::SetContentSink(nsIContentSink* aSink)
+{
+  NS_PRECONDITION(aSink, "sink cannot be null!");
+  mSink = aSink;
+
+  if (mSink) {
+    mSink->SetParser(this);
+    nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink);
+    if (htmlSink) {
+      mIsAboutBlank = true;
+    }
+  }
+}
+
+/**
+ * retrieve the sink set into the parser
+ * @return  current sink
+ */
+NS_IMETHODIMP_(nsIContentSink*)
+nsParser::GetContentSink()
+{
+  return mSink;
+}
+
+static nsIDTD*
+FindSuitableDTD(CParserContext& aParserContext)
+{
+  // We always find a DTD.
+  aParserContext.mAutoDetectStatus = ePrimaryDetect;
+
+  // Quick check for view source.
+  MOZ_ASSERT(aParserContext.mParserCommand != eViewSource,
+             "The old parser is not supposed to be used for View Source "
+             "anymore.");
+
+  // Now see if we're parsing HTML (which, as far as we're concerned, simply
+  // means "not XML").
+  if (aParserContext.mDocType != eXML) {
+    return new CNavDTD();
+  }
+
+  // If we're here, then we'd better be parsing XML.
+  NS_ASSERTION(aParserContext.mDocType == eXML, "What are you trying to send me, here?");
+  return new nsExpatDriver();
+}
+
+NS_IMETHODIMP
+nsParser::CancelParsingEvents()
+{
+  if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {
+    NS_ASSERTION(mContinueEvent, "mContinueEvent is null");
+    // Revoke the pending continue parsing event
+    mContinueEvent = nullptr;
+    mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
+  }
+  return NS_OK;
+}
+
+////////////////////////////////////////////////////////////////////////
+
+/**
+ * Evalutes EXPR1 and EXPR2 exactly once each, in that order.  Stores the value
+ * of EXPR2 in RV is EXPR2 fails, otherwise RV contains the result of EXPR1
+ * (which could be success or failure).
+ *
+ * To understand the motivation for this construct, consider these example
+ * methods:
+ *
+ *   nsresult nsSomething::DoThatThing(nsIWhatever* obj) {
+ *     nsresult rv = NS_OK;
+ *     ...
+ *     return obj->DoThatThing();
+ *     NS_ENSURE_SUCCESS(rv, rv);
+ *     ...
+ *     return rv;
+ *   }
+ *
+ *   void nsCaller::MakeThingsHappen() {
+ *     return mSomething->DoThatThing(mWhatever);
+ *   }
+ *
+ * Suppose, for whatever reason*, we want to shift responsibility for calling
+ * mWhatever->DoThatThing() from nsSomething::DoThatThing up to
+ * nsCaller::MakeThingsHappen.  We might rewrite the two methods as follows:
+ *
+ *   nsresult nsSomething::DoThatThing() {
+ *     nsresult rv = NS_OK;
+ *     ...
+ *     ...
+ *     return rv;
+ *   }
+ *
+ *   void nsCaller::MakeThingsHappen() {
+ *     nsresult rv;
+ *     PREFER_LATTER_ERROR_CODE(mSomething->DoThatThing(),
+ *                              mWhatever->DoThatThing(),
+ *                              rv);
+ *     return rv;
+ *   }
+ *
+ * *Possible reasons include: nsCaller doesn't want to give mSomething access
+ * to mWhatever, nsCaller wants to guarantee that mWhatever->DoThatThing() will
+ * be called regardless of how nsSomething::DoThatThing behaves, &c.
+ */
+#define PREFER_LATTER_ERROR_CODE(EXPR1, EXPR2, RV) {                          \
+  nsresult RV##__temp = EXPR1;                                                \
+  RV = EXPR2;                                                                 \
+  if (NS_FAILED(RV)) {                                                        \
+    RV = RV##__temp;                                                          \
+  }                                                                           \
+}
+
+/**
+ * This gets called just prior to the model actually
+ * being constructed. It's important to make this the
+ * last thing that happens right before parsing, so we
+ * can delay until the last moment the resolution of
+ * which DTD to use (unless of course we're assigned one).
+ */
+nsresult
+nsParser::WillBuildModel(nsString& aFilename)
+{
+  if (!mParserContext)
+    return kInvalidParserContext;
+
+  if (eUnknownDetect != mParserContext->mAutoDetectStatus)
+    return NS_OK;
+
+  if (eDTDMode_unknown == mParserContext->mDTDMode ||
+      eDTDMode_autodetect == mParserContext->mDTDMode) {
+    if (mIsAboutBlank) {
+      mParserContext->mDTDMode = eDTDMode_quirks;
+      mParserContext->mDocType = eHTML_Quirks;
+    } else {
+      mParserContext->mDTDMode = eDTDMode_full_standards;
+      mParserContext->mDocType = eXML;
+    }
+  } // else XML fragment with nested parser context
+
+  NS_ASSERTION(!mDTD || !mParserContext->mPrevContext,
+               "Clobbering DTD for non-root parser context!");
+  mDTD = FindSuitableDTD(*mParserContext);
+  NS_ENSURE_TRUE(mDTD, NS_ERROR_OUT_OF_MEMORY);
+
+  nsITokenizer* tokenizer;
+  nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, tokenizer);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  rv = mDTD->WillBuildModel(*mParserContext, tokenizer, mSink);
+  nsresult sinkResult = mSink->WillBuildModel(mDTD->GetMode());
+  // nsIDTD::WillBuildModel used to be responsible for calling
+  // nsIContentSink::WillBuildModel, but that obligation isn't expressible
+  // in the nsIDTD interface itself, so it's sounder and simpler to give that
+  // responsibility back to the parser. The former behavior of the DTD was to
+  // NS_ENSURE_SUCCESS the sink WillBuildModel call, so if the sink returns
+  // failure we should use sinkResult instead of rv, to preserve the old error
+  // handling behavior of the DTD:
+  return NS_FAILED(sinkResult) ? sinkResult : rv;
+}
+
+/**
+ * This gets called when the parser is done with its input.
+ * Note that the parser may have been called recursively, so we
+ * have to check for a prev. context before closing out the DTD/sink.
+ */
+nsresult
+nsParser::DidBuildModel(nsresult anErrorCode)
+{
+  nsresult result = anErrorCode;
+
+  if (IsComplete()) {
+    if (mParserContext && !mParserContext->mPrevContext) {
+      // Let sink know if we're about to end load because we've been terminated.
+      // In that case we don't want it to run deferred scripts.
+      bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING;
+      if (mDTD && mSink) {
+        nsresult dtdResult =  mDTD->DidBuildModel(anErrorCode),
+                sinkResult = mSink->DidBuildModel(terminated);
+        // nsIDTD::DidBuildModel used to be responsible for calling
+        // nsIContentSink::DidBuildModel, but that obligation isn't expressible
+        // in the nsIDTD interface itself, so it's sounder and simpler to give
+        // that responsibility back to the parser. The former behavior of the
+        // DTD was to NS_ENSURE_SUCCESS the sink DidBuildModel call, so if the
+        // sink returns failure we should use sinkResult instead of dtdResult,
+        // to preserve the old error handling behavior of the DTD:
+        result = NS_FAILED(sinkResult) ? sinkResult : dtdResult;
+      }
+
+      //Ref. to bug 61462.
+      mParserContext->mRequest = nullptr;
+    }
+  }
+
+  return result;
+}
+
+/**
+ * This method adds a new parser context to the list,
+ * pushing the current one to the next position.
+ *
+ * @param   ptr to new context
+ */
+void
+nsParser::PushContext(CParserContext& aContext)
+{
+  NS_ASSERTION(aContext.mPrevContext == mParserContext,
+               "Trying to push a context whose previous context differs from "
+               "the current parser context.");
+  mParserContext = &aContext;
+}
+
+/**
+ * This method pops the topmost context off the stack,
+ * returning it to the user. The next context  (if any)
+ * becomes the current context.
+ * @update	gess7/22/98
+ * @return  prev. context
+ */
+CParserContext*
+nsParser::PopContext()
+{
+  CParserContext* oldContext = mParserContext;
+  if (oldContext) {
+    mParserContext = oldContext->mPrevContext;
+    if (mParserContext) {
+      // If the old context was blocked, propagate the blocked state
+      // back to the new one. Also, propagate the stream listener state
+      // but don't override onStop state to guarantee the call to DidBuildModel().
+      if (mParserContext->mStreamListenerState != eOnStop) {
+        mParserContext->mStreamListenerState = oldContext->mStreamListenerState;
+      }
+    }
+  }
+  return oldContext;
+}
+
+/**
+ *  Call this when you want control whether or not the parser will parse
+ *  and tokenize input (TRUE), or whether it just caches input to be
+ *  parsed later (FALSE).
+ *
+ *  @param   aState determines whether we parse/tokenize or just cache.
+ *  @return  current state
+ */
+void
+nsParser::SetUnusedInput(nsString& aBuffer)
+{
+  mUnusedInput = aBuffer;
+}
+
+/**
+ *  Call this when you want to *force* the parser to terminate the
+ *  parsing process altogether. This is binary -- so once you terminate
+ *  you can't resume without restarting altogether.
+ */
+NS_IMETHODIMP
+nsParser::Terminate(void)
+{
+  // We should only call DidBuildModel once, so don't do anything if this is
+  // the second time that Terminate has been called.
+  if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
+    return NS_OK;
+  }
+
+  nsresult result = NS_OK;
+  // XXX - [ until we figure out a way to break parser-sink circularity ]
+  // Hack - Hold a reference until we are completely done...
+  nsCOMPtr<nsIParser> kungFuDeathGrip(this);
+  mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;
+
+  // CancelParsingEvents must be called to avoid leaking the nsParser object
+  // @see bug 108049
+  // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents
+  // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note:
+  // The IsComplete() call inside of DidBuildModel looks at the pendingContinueEvents flag.
+  CancelParsingEvents();
+
+  // If we got interrupted in the middle of a document.write, then we might
+  // have more than one parser context on our parsercontext stack. This has
+  // the effect of making DidBuildModel a no-op, meaning that we never call
+  // our sink's DidBuildModel and break the reference cycle, causing a leak.
+  // Since we're getting terminated, we manually clean up our context stack.
+  while (mParserContext && mParserContext->mPrevContext) {
+    CParserContext *prev = mParserContext->mPrevContext;
+    delete mParserContext;
+    mParserContext = prev;
+  }
+
+  if (mDTD) {
+    mDTD->Terminate();
+    DidBuildModel(result);
+  } else if (mSink) {
+    // We have no parser context or no DTD yet (so we got terminated before we
+    // got any data).  Manually break the reference cycle with the sink.
+    result = mSink->DidBuildModel(true);
+    NS_ENSURE_SUCCESS(result, result);
+  }
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsParser::ContinueInterruptedParsing()
+{
+  // If there are scripts executing, then the content sink is jumping the gun
+  // (probably due to a synchronous XMLHttpRequest) and will re-enable us
+  // later, see bug 460706.
+  if (!IsOkToProcessNetworkData()) {
+    return NS_OK;
+  }
+
+  // If the stream has already finished, there's a good chance
+  // that we might start closing things down when the parser
+  // is reenabled. To make sure that we're not deleted across
+  // the reenabling process, hold a reference to ourselves.
+  nsresult result=NS_OK;
+  nsCOMPtr<nsIParser> kungFuDeathGrip(this);
+  nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
+
+#ifdef DEBUG
+  if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) {
+    NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");
+  }
+#endif
+
+  bool isFinalChunk = mParserContext &&
+                        mParserContext->mStreamListenerState == eOnStop;
+
+  mProcessingNetworkData = true;
+  if (sinkDeathGrip) {
+    sinkDeathGrip->WillParse();
+  }
+  result = ResumeParse(true, isFinalChunk); // Ref. bug 57999
+  mProcessingNetworkData = false;
+
+  if (result != NS_OK) {
+    result=mInternalState;
+  }
+
+  return result;
+}
+
+/**
+ *  Stops parsing temporarily. That's it will prevent the
+ *  parser from building up content model.
+ */
+NS_IMETHODIMP_(void)
+nsParser::BlockParser()
+{
+  mFlags &= ~NS_PARSER_FLAG_PARSER_ENABLED;
+}
+
+/**
+ *  Open up the parser for tokenization, building up content
+ *  model..etc. However, this method does not resume parsing
+ *  automatically. It's the callers' responsibility to restart
+ *  the parsing engine.
+ */
+NS_IMETHODIMP_(void)
+nsParser::UnblockParser()
+{
+  if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) {
+    mFlags |= NS_PARSER_FLAG_PARSER_ENABLED;
+  } else {
+    NS_WARNING("Trying to unblock an unblocked parser.");
+  }
+}
+
+NS_IMETHODIMP_(void)
+nsParser::ContinueInterruptedParsingAsync()
+{
+  mSink->ContinueInterruptedParsingAsync();
+}
+
+/**
+ * Call this to query whether the parser is enabled or not.
+ */
+NS_IMETHODIMP_(bool)
+nsParser::IsParserEnabled()
+{
+  return (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) != 0;
+}
+
+/**
+ * Call this to query whether the parser thinks it's done with parsing.
+ */
+NS_IMETHODIMP_(bool)
+nsParser::IsComplete()
+{
+  return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);
+}
+
+
+void nsParser::HandleParserContinueEvent(nsParserContinueEvent *ev)
+{
+  // Ignore any revoked continue events...
+  if (mContinueEvent != ev)
+    return;
+
+  mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
+  mContinueEvent = nullptr;
+
+  NS_ASSERTION(IsOkToProcessNetworkData(),
+               "Interrupted in the middle of a script?");
+  ContinueInterruptedParsing();
+}
+
+bool
+nsParser::IsInsertionPointDefined()
+{
+  return false;
+}
+
+void
+nsParser::PushDefinedInsertionPoint()
+{
+}
+
+void
+nsParser::PopDefinedInsertionPoint()
+{
+}
+
+void
+nsParser::MarkAsNotScriptCreated(const char* aCommand)
+{
+}
+
+bool
+nsParser::IsScriptCreated()
+{
+  return false;
+}
+
+/**
+ *  This is the main controlling routine in the parsing process.
+ *  Note that it may get called multiple times for the same scanner,
+ *  since this is a pushed based system, and all the tokens may
+ *  not have been consumed by the scanner during a given invocation
+ *  of this method.
+ */
+NS_IMETHODIMP
+nsParser::Parse(nsIURI* aURL,
+                nsIRequestObserver* aListener,
+                void* aKey,
+                nsDTDMode aMode)
+{
+
+  NS_PRECONDITION(aURL, "Error: Null URL given");
+
+  nsresult result=kBadURL;
+  mObserver = aListener;
+
+  if (aURL) {
+    nsAutoCString spec;
+    nsresult rv = aURL->GetSpec(spec);
+    if (rv != NS_OK) {
+      return rv;
+    }
+    NS_ConvertUTF8toUTF16 theName(spec);
+
+    nsScanner* theScanner = new nsScanner(theName, false);
+    CParserContext* pc = new CParserContext(mParserContext, theScanner, aKey,
+                                            mCommand, aListener);
+    if (pc && theScanner) {
+      pc->mMultipart = true;
+      pc->mContextType = CParserContext::eCTURL;
+      pc->mDTDMode = aMode;
+      PushContext(*pc);
+
+      result = NS_OK;
+    } else {
+      result = mInternalState = NS_ERROR_HTMLPARSER_BADCONTEXT;
+    }
+  }
+  return result;
+}
+
+/**
+ * Used by XML fragment parsing below.
+ *
+ * @param   aSourceBuffer contains a string-full of real content
+ */
+nsresult
+nsParser::Parse(const nsAString& aSourceBuffer,
+                void* aKey,
+                bool aLastCall)
+{
+  nsresult result = NS_OK;
+
+  // Don't bother if we're never going to parse this.
+  if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
+    return result;
+  }
+
+  if (!aLastCall && aSourceBuffer.IsEmpty()) {
+    // Nothing is being passed to the parser so return
+    // immediately. mUnusedInput will get processed when
+    // some data is actually passed in.
+    // But if this is the last call, make sure to finish up
+    // stuff correctly.
+    return result;
+  }
+
+  // Maintain a reference to ourselves so we don't go away
+  // till we're completely done.
+  nsCOMPtr<nsIParser> kungFuDeathGrip(this);
+
+  if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) {
+    // Note: The following code will always find the parser context associated
+    // with the given key, even if that context has been suspended (e.g., for
+    // another document.write call). This doesn't appear to be exactly what IE
+    // does in the case where this happens, but this makes more sense.
+    CParserContext* pc = mParserContext;
+    while (pc && pc->mKey != aKey) {
+      pc = pc->mPrevContext;
+    }
+
+    if (!pc) {
+      // Only make a new context if we don't have one, OR if we do, but has a
+      // different context key.
+      nsScanner* theScanner = new nsScanner(mUnusedInput);
+      NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY);
+
+      eAutoDetectResult theStatus = eUnknownDetect;
+
+      if (mParserContext &&
+          mParserContext->mMimeType.EqualsLiteral("application/xml")) {
+        // Ref. Bug 90379
+        NS_ASSERTION(mDTD, "How come the DTD is null?");
+
+        if (mParserContext) {
+          theStatus = mParserContext->mAutoDetectStatus;
+          // Added this to fix bug 32022.
+        }
+      }
+
+      pc = new CParserContext(mParserContext, theScanner, aKey, mCommand,
+                              0, theStatus, aLastCall);
+      NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY);
+
+      PushContext(*pc);
+
+      pc->mMultipart = !aLastCall; // By default
+      if (pc->mPrevContext) {
+        pc->mMultipart |= pc->mPrevContext->mMultipart;
+      }
+
+      // Start fix bug 40143
+      if (pc->mMultipart) {
+        pc->mStreamListenerState = eOnDataAvail;
+        if (pc->mScanner) {
+          pc->mScanner->SetIncremental(true);
+        }
+      } else {
+        pc->mStreamListenerState = eOnStop;
+        if (pc->mScanner) {
+          pc->mScanner->SetIncremental(false);
+        }
+      }
+      // end fix for 40143
+
+      pc->mContextType=CParserContext::eCTString;
+      pc->SetMimeType(NS_LITERAL_CSTRING("application/xml"));
+      pc->mDTDMode = eDTDMode_full_standards;
+
+      mUnusedInput.Truncate();
+
+      pc->mScanner->Append(aSourceBuffer);
+      // Do not interrupt document.write() - bug 95487
+      result = ResumeParse(false, false, false);
+    } else {
+      pc->mScanner->Append(aSourceBuffer);
+      if (!pc->mPrevContext) {
+        // Set stream listener state to eOnStop, on the final context - Fix 68160,
+        // to guarantee DidBuildModel() call - Fix 36148
+        if (aLastCall) {
+          pc->mStreamListenerState = eOnStop;
+          pc->mScanner->SetIncremental(false);
+        }
+
+        if (pc == mParserContext) {
+          // If pc is not mParserContext, then this call to ResumeParse would
+          // do the wrong thing and try to continue parsing using
+          // mParserContext. We need to wait to actually resume parsing on pc.
+          ResumeParse(false, false, false);
+        }
+      }
+    }
+  }
+
+  return result;
+}
+
+NS_IMETHODIMP
+nsParser::ParseFragment(const nsAString& aSourceBuffer,
+                        nsTArray<nsString>& aTagStack)
+{
+  nsresult result = NS_OK;
+  nsAutoString  theContext;
+  uint32_t theCount = aTagStack.Length();
+  uint32_t theIndex = 0;
+
+  // Disable observers for fragments
+  mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED;
+
+  for (theIndex = 0; theIndex < theCount; theIndex++) {
+    theContext.Append('<');
+    theContext.Append(aTagStack[theCount - theIndex - 1]);
+    theContext.Append('>');
+  }
+
+  if (theCount == 0) {
+    // Ensure that the buffer is not empty. Because none of the DTDs care
+    // about leading whitespace, this doesn't change the result.
+    theContext.Assign(' ');
+  }
+
+  // First, parse the context to build up the DTD's tag stack. Note that we
+  // pass false for the aLastCall parameter.
+  result = Parse(theContext,
+                 (void*)&theContext,
+                 false);
+  if (NS_FAILED(result)) {
+    mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
+    return result;
+  }
+
+  if (!mSink) {
+    // Parse must have failed in the XML case and so the sink was killed.
+    return NS_ERROR_HTMLPARSER_STOPPARSING;
+  }
+
+  nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);
+  NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");
+
+  fragSink->WillBuildContent();
+  // Now, parse the actual content. Note that this is the last call
+  // for HTML content, but for XML, we will want to build and parse
+  // the end tags.  However, if tagStack is empty, it's the last call
+  // for XML as well.
+  if (theCount == 0) {
+    result = Parse(aSourceBuffer,
+                   &theContext,
+                   true);
+    fragSink->DidBuildContent();
+  } else {
+    // Add an end tag chunk, so expat will read the whole source buffer,
+    // and not worry about ']]' etc.
+    result = Parse(aSourceBuffer + NS_LITERAL_STRING("</"),
+                   &theContext,
+                   false);
+    fragSink->DidBuildContent();
+
+    if (NS_SUCCEEDED(result)) {
+      nsAutoString endContext;
+      for (theIndex = 0; theIndex < theCount; theIndex++) {
+         // we already added an end tag chunk above
+        if (theIndex > 0) {
+          endContext.AppendLiteral("</");
+        }
+
+        nsString& thisTag = aTagStack[theIndex];
+        // was there an xmlns=?
+        int32_t endOfTag = thisTag.FindChar(char16_t(' '));
+        if (endOfTag == -1) {
+          endContext.Append(thisTag);
+        } else {
+          endContext.Append(Substring(thisTag,0,endOfTag));
+        }
+
+        endContext.Append('>');
+      }
+
+      result = Parse(endContext,
+                     &theContext,
+                     true);
+    }
+  }
+
+  mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
+
+  return result;
+}
+
+/**
+ *  This routine is called to cause the parser to continue parsing its
+ *  underlying stream.  This call allows the parse process to happen in
+ *  chunks, such as when the content is push based, and we need to parse in
+ *  pieces.
+ *
+ *  An interesting change in how the parser gets used has led us to add extra
+ *  processing to this method.  The case occurs when the parser is blocked in
+ *  one context, and gets a parse(string) call in another context.  In this
+ *  case, the parserContexts are linked. No problem.
+ *
+ *  The problem is that Parse(string) assumes that it can proceed unabated,
+ *  but if the parser is already blocked that assumption is false. So we
+ *  needed to add a mechanism here to allow the parser to continue to process
+ *  (the pop and free) contexts until 1) it get's blocked again; 2) it runs
+ *  out of contexts.
+ *
+ *
+ *  @param   allowItertion : set to true if non-script resumption is requested
+ *  @param   aIsFinalChunk : tells us when the last chunk of data is provided.
+ *  @return  error code -- 0 if ok, non-zero if error.
+ */
+nsresult
+nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,
+                      bool aCanInterrupt)
+{
+  nsresult result = NS_OK;
+
+  if ((mFlags & NS_PARSER_FLAG_PARSER_ENABLED) &&
+      mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
+
+    result = WillBuildModel(mParserContext->mScanner->GetFilename());
+    if (NS_FAILED(result)) {
+      mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;
+      return result;
+    }
+
+    if (mDTD) {
+      mSink->WillResume();
+      bool theIterationIsOk = true;
+
+      while (result == NS_OK && theIterationIsOk) {
+        if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) {
+          // -- Ref: Bug# 22485 --
+          // Insert the unused input into the source buffer
+          // as if it was read from the input stream.
+          // Adding UngetReadable() per vidur!!
+          mParserContext->mScanner->UngetReadable(mUnusedInput);
+          mUnusedInput.Truncate(0);
+        }
+
+        // Only allow parsing to be interrupted in the subsequent call to
+        // build model.
+        nsresult theTokenizerResult = (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE)
+                                      ? Tokenize(aIsFinalChunk)
+                                      : NS_OK;
+        result = BuildModel();
+
+        if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {
+          PostContinueEvent();
+        }
+
+        theIterationIsOk = theTokenizerResult != kEOF &&
+                           result != NS_ERROR_HTMLPARSER_INTERRUPTED;
+
+        // Make sure not to stop parsing too early. Therefore, before shutting
+        // down the parser, it's important to check whether the input buffer
+        // has been scanned to completion (theTokenizerResult should be kEOF).
+        // kEOF -> End of buffer.
+
+        // If we're told to block the parser, we disable all further parsing
+        // (and cache any data coming in) until the parser is re-enabled.
+        if (NS_ERROR_HTMLPARSER_BLOCK == result) {
+          mSink->WillInterrupt();
+          if (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) {
+            // If we were blocked by a recursive invocation, don't re-block.
+            BlockParser();
+          }
+          return NS_OK;
+        }
+        if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
+          // Note: Parser Terminate() calls DidBuildModel.
+          if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
+            DidBuildModel(mStreamStatus);
+            mInternalState = result;
+          }
+
+          return NS_OK;
+        }
+        if ((NS_OK == result && theTokenizerResult == kEOF) ||
+             result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
+          bool theContextIsStringBased =
+            CParserContext::eCTString == mParserContext->mContextType;
+
+          if (mParserContext->mStreamListenerState == eOnStop ||
+              !mParserContext->mMultipart || theContextIsStringBased) {
+            if (!mParserContext->mPrevContext) {
+              if (mParserContext->mStreamListenerState == eOnStop) {
+                DidBuildModel(mStreamStatus);
+                return NS_OK;
+              }
+            } else {
+              CParserContext* theContext = PopContext();
+              if (theContext) {
+                theIterationIsOk = allowIteration && theContextIsStringBased;
+                if (theContext->mCopyUnused) {
+                  if (!theContext->mScanner->CopyUnusedData(mUnusedInput)) {
+                    mInternalState = NS_ERROR_OUT_OF_MEMORY;
+                  }
+                }
+
+                delete theContext;
+              }
+
+              result = mInternalState;
+              aIsFinalChunk = mParserContext &&
+                              mParserContext->mStreamListenerState == eOnStop;
+              // ...then intentionally fall through to mSink->WillInterrupt()...
+            }
+          }
+        }
+
+        if (theTokenizerResult == kEOF ||
+            result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
+          result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
+          mSink->WillInterrupt();
+        }
+      }
+    } else {
+      mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;
+    }
+  }
+
+  return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
+}
+
+/**
+ *  This is where we loop over the tokens created in the
+ *  tokenization phase, and try to make sense out of them.
+ */
+nsresult
+nsParser::BuildModel()
+{
+  nsITokenizer* theTokenizer = nullptr;
+
+  nsresult result = NS_OK;
+  if (mParserContext) {
+    result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
+  }
+
+  if (NS_SUCCEEDED(result)) {
+    if (mDTD) {
+      result = mDTD->BuildModel(theTokenizer, mSink);
+    }
+  } else {
+    mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER;
+  }
+  return result;
+}
+
+/*******************************************************************
+  These methods are used to talk to the netlib system...
+ *******************************************************************/
+
+nsresult
+nsParser::OnStartRequest(nsIRequest *request, nsISupports* aContext)
+{
+  NS_PRECONDITION(eNone == mParserContext->mStreamListenerState,
+                  "Parser's nsIStreamListener API was not setup "
+                  "correctly in constructor.");
+  if (mObserver) {
+    mObserver->OnStartRequest(request, aContext);
+  }
+  mParserContext->mStreamListenerState = eOnStart;
+  mParserContext->mAutoDetectStatus = eUnknownDetect;
+  mParserContext->mRequest = request;
+
+  NS_ASSERTION(!mParserContext->mPrevContext,
+               "Clobbering DTD for non-root parser context!");
+  mDTD = nullptr;
+
+  nsresult rv;
+  nsAutoCString contentType;
+  nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
+  if (channel) {
+    rv = channel->GetContentType(contentType);
+    if (NS_SUCCEEDED(rv)) {
+      mParserContext->SetMimeType(contentType);
+    }
+  }
+
+  rv = NS_OK;
+
+  return rv;
+}
+
+static bool
+ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes, int32_t aLen,
+                                 nsCString& oCharset)
+{
+  // This code is rather pointless to have. Might as well reuse expat as
+  // seen in nsHtml5StreamParser. -- hsivonen
+  oCharset.Truncate();
+  if ((aLen >= 5) &&
+      ('<' == aBytes[0]) &&
+      ('?' == aBytes[1]) &&
+      ('x' == aBytes[2]) &&
+      ('m' == aBytes[3]) &&
+      ('l' == aBytes[4])) {
+    int32_t i;
+    bool versionFound = false, encodingFound = false;
+    for (i = 6; i < aLen && !encodingFound; ++i) {
+      // end of XML declaration?
+      if ((((char*) aBytes)[i] == '?') &&
+          ((i + 1) < aLen) &&
+          (((char*) aBytes)[i + 1] == '>')) {
+        break;
+      }
+      // Version is required.
+      if (!versionFound) {
+        // Want to avoid string comparisons, hence looking for 'n'
+        // and only if found check the string leading to it. Not
+        // foolproof, but fast.
+        // The shortest string allowed before this is  (strlen==13):
+        // <?xml version
+        if ((((char*) aBytes)[i] == 'n') &&
+            (i >= 12) &&
+            (0 == PL_strncmp("versio", (char*) (aBytes + i - 6), 6))) {
+          // Fast forward through version
+          char q = 0;
+          for (++i; i < aLen; ++i) {
+            char qi = ((char*) aBytes)[i];
+            if (qi == '\'' || qi == '"') {
+              if (q && q == qi) {
+                //  ending quote
+                versionFound = true;
+                break;
+              } else {
+                // Starting quote
+                q = qi;
+              }
+            }
+          }
+        }
+      } else {
+        // encoding must follow version
+        // Want to avoid string comparisons, hence looking for 'g'
+        // and only if found check the string leading to it. Not
+        // foolproof, but fast.
+        // The shortest allowed string before this (strlen==26):
+        // <?xml version="1" encoding
+        if ((((char*) aBytes)[i] == 'g') && (i >= 25) && (0 == PL_strncmp(
+            "encodin", (char*) (aBytes + i - 7), 7))) {
+          int32_t encStart = 0;
+          char q = 0;
+          for (++i; i < aLen; ++i) {
+            char qi = ((char*) aBytes)[i];
+            if (qi == '\'' || qi == '"') {
+              if (q && q == qi) {
+                int32_t count = i - encStart;
+                // encoding value is invalid if it is UTF-16
+                if (count > 0 && PL_strncasecmp("UTF-16",
+                    (char*) (aBytes + encStart), count)) {
+                  oCharset.Assign((char*) (aBytes + encStart), count);
+                }
+                encodingFound = true;
+                break;
+              } else {
+                encStart = i + 1;
+                q = qi;
+              }
+            }
+          }
+        }
+      } // if (!versionFound)
+    } // for
+  }
+  return !oCharset.IsEmpty();
+}
+
+inline char
+GetNextChar(nsACString::const_iterator& aStart,
+            nsACString::const_iterator& aEnd)
+{
+  NS_ASSERTION(aStart != aEnd, "end of buffer");
+  return (++aStart != aEnd) ? *aStart : '\0';
+}
+
+static nsresult
+NoOpParserWriteFunc(nsIInputStream* in,
+                void* closure,
+                const char* fromRawSegment,
+                uint32_t toOffset,
+                uint32_t count,
+                uint32_t *writeCount)
+{
+  *writeCount = count;
+  return NS_OK;
+}
+
+typedef struct {
+  bool mNeedCharsetCheck;
+  nsParser* mParser;
+  nsScanner* mScanner;
+  nsIRequest* mRequest;
+} ParserWriteStruct;
+
+/*
+ * This function is invoked as a result of a call to a stream's
+ * ReadSegments() method. It is called for each contiguous buffer
+ * of data in the underlying stream or pipe. Using ReadSegments
+ * allows us to avoid copying data to read out of the stream.
+ */
+static nsresult
+ParserWriteFunc(nsIInputStream* in,
+                void* closure,
+                const char* fromRawSegment,
+                uint32_t toOffset,
+                uint32_t count,
+                uint32_t *writeCount)
+{
+  nsresult result;
+  ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
+  const unsigned char* buf =
+    reinterpret_cast<const unsigned char*> (fromRawSegment);
+  uint32_t theNumRead = count;
+
+  if (!pws) {
+    return NS_ERROR_FAILURE;
+  }
+
+  if (pws->mNeedCharsetCheck) {
+    pws->mNeedCharsetCheck = false;
+    int32_t source;
+    nsAutoCString preferred;
+    nsAutoCString maybePrefer;
+    pws->mParser->GetDocumentCharset(preferred, source);
+
+    // This code was bogus when I found it. It expects the BOM or the XML
+    // declaration to be entirely in the first network buffer. -- hsivonen
+    if (nsContentUtils::CheckForBOM(buf, count, maybePrefer)) {
+      // The decoder will swallow the BOM. The UTF-16 will re-sniff for
+      // endianness. The value of preferred is now either "UTF-8" or "UTF-16".
+      preferred.Assign(maybePrefer);
+      source = kCharsetFromByteOrderMark;
+    } else if (source < kCharsetFromChannel) {
+      nsAutoCString declCharset;
+
+      if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {
+        if (EncodingUtils::FindEncodingForLabel(declCharset, maybePrefer)) {
+          preferred.Assign(maybePrefer);
+          source = kCharsetFromMetaTag;
+        }
+      }
+    }
+
+    pws->mParser->SetDocumentCharset(preferred, source);
+    pws->mParser->SetSinkCharset(preferred);
+
+  }
+
+  result = pws->mScanner->Append(fromRawSegment, theNumRead);
+  if (NS_SUCCEEDED(result)) {
+    *writeCount = count;
+  }
+
+  return result;
+}
+
+nsresult
+nsParser::OnDataAvailable(nsIRequest *request, nsISupports* aContext,
+                          nsIInputStream *pIStream, uint64_t sourceOffset,
+                          uint32_t aLength)
+{
+  NS_PRECONDITION((eOnStart == mParserContext->mStreamListenerState ||
+                   eOnDataAvail == mParserContext->mStreamListenerState),
+            "Error: OnStartRequest() must be called before OnDataAvailable()");
+  NS_PRECONDITION(NS_InputStreamIsBuffered(pIStream),
+                  "Must have a buffered input stream");
+
+  nsresult rv = NS_OK;
+
+  if (mIsAboutBlank) {
+    MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank");
+    // ... but if an extension tries to feed us data for about:blank in a
+    // release build, silently ignore the data.
+    uint32_t totalRead;
+    rv = pIStream->ReadSegments(NoOpParserWriteFunc,
+                                nullptr,
+                                aLength,
+                                &totalRead);
+    return rv;
+  }
+
+  CParserContext *theContext = mParserContext;
+
+  while (theContext && theContext->mRequest != request) {
+    theContext = theContext->mPrevContext;
+  }
+
+  if (theContext) {
+    theContext->mStreamListenerState = eOnDataAvail;
+
+    if (eInvalidDetect == theContext->mAutoDetectStatus) {
+      if (theContext->mScanner) {
+        nsScannerIterator iter;
+        theContext->mScanner->EndReading(iter);
+        theContext->mScanner->SetPosition(iter, true);
+      }
+    }
+
+    uint32_t totalRead;
+    ParserWriteStruct pws;
+    pws.mNeedCharsetCheck = true;
+    pws.mParser = this;
+    pws.mScanner = theContext->mScanner;
+    pws.mRequest = request;
+
+    rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
+    if (NS_FAILED(rv)) {
+      return rv;
+    }
+
+    if (IsOkToProcessNetworkData()) {
+      nsCOMPtr<nsIParser> kungFuDeathGrip(this);
+      nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
+      mProcessingNetworkData = true;
+      if (sinkDeathGrip) {
+        sinkDeathGrip->WillParse();
+      }
+      rv = ResumeParse();
+      mProcessingNetworkData = false;
+    }
+  } else {
+    rv = NS_ERROR_UNEXPECTED;
+  }
+
+  return rv;
+}
+
+/**
+ *  This is called by the networking library once the last block of data
+ *  has been collected from the net.
+ */
+nsresult
+nsParser::OnStopRequest(nsIRequest *request, nsISupports* aContext,
+                        nsresult status)
+{
+  nsresult rv = NS_OK;
+
+  CParserContext *pc = mParserContext;
+  while (pc) {
+    if (pc->mRequest == request) {
+      pc->mStreamListenerState = eOnStop;
+      pc->mScanner->SetIncremental(false);
+      break;
+    }
+
+    pc = pc->mPrevContext;
+  }
+
+  mStreamStatus = status;
+
+  if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) {
+    mProcessingNetworkData = true;
+    if (mSink) {
+      mSink->WillParse();
+    }
+    rv = ResumeParse(true, true);
+    mProcessingNetworkData = false;
+  }
+
+  // If the parser isn't enabled, we don't finish parsing till
+  // it is reenabled.
+
+
+  // XXX Should we wait to notify our observers as well if the
+  // parser isn't yet enabled?
+  if (mObserver) {
+    mObserver->OnStopRequest(request, aContext, status);
+  }
+
+  return rv;
+}
+
+
+/*******************************************************************
+  Here come the tokenization methods...
+ *******************************************************************/
+
+
+/**
+ *  Part of the code sandwich, this gets called right before
+ *  the tokenization process begins. The main reason for
+ *  this call is to allow the delegate to do initialization.
+ */
+bool
+nsParser::WillTokenize(bool aIsFinalChunk)
+{
+  if (!mParserContext) {
+    return true;
+  }
+
+  nsITokenizer* theTokenizer;
+  nsresult result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
+  NS_ENSURE_SUCCESS(result, false);
+  return NS_SUCCEEDED(theTokenizer->WillTokenize(aIsFinalChunk));
+}
+
+
+/**
+ * This is the primary control routine to consume tokens.
+ * It iteratively consumes tokens until an error occurs or
+ * you run out of data.
+ */
+nsresult nsParser::Tokenize(bool aIsFinalChunk)
+{
+  nsITokenizer* theTokenizer;
+
+  nsresult result = NS_ERROR_NOT_AVAILABLE;
+  if (mParserContext) {
+    result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
+  }
+
+  if (NS_SUCCEEDED(result)) {
+    bool flushTokens = false;
+
+    bool killSink = false;
+
+    WillTokenize(aIsFinalChunk);
+    while (NS_SUCCEEDED(result)) {
+      mParserContext->mScanner->Mark();
+      result = theTokenizer->ConsumeToken(*mParserContext->mScanner,
+                                          flushTokens);
+      if (NS_FAILED(result)) {
+        mParserContext->mScanner->RewindToMark();
+        if (kEOF == result){
+          break;
+        }
+        if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
+          killSink = true;
+          result = Terminate();
+          break;
+        }
+      } else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) {
+        // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix Bug# 23931.
+        // Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 --
+        // Also remember to update the marked position.
+        mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS;
+        mParserContext->mScanner->Mark();
+        break;
+      }
+    }
+
+    if (killSink) {
+      mSink = nullptr;
+    }
+  } else {
+    result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER;
+  }
+
+  return result;
+}
+
+/**
+ * Get the channel associated with this parser
+ *
+ * @param aChannel out param that will contain the result
+ * @return NS_OK if successful
+ */
+NS_IMETHODIMP
+nsParser::GetChannel(nsIChannel** aChannel)
+{
+  nsresult result = NS_ERROR_NOT_AVAILABLE;
+  if (mParserContext && mParserContext->mRequest) {
+    result = CallQueryInterface(mParserContext->mRequest, aChannel);
+  }
+  return result;
+}
+
+/**
+ * Get the DTD associated with this parser
+ */
+NS_IMETHODIMP
+nsParser::GetDTD(nsIDTD** aDTD)
+{
+  if (mParserContext) {
+    NS_IF_ADDREF(*aDTD = mDTD);
+  }
+
+  return NS_OK;
+}
+
+/**
+ * Get this as nsIStreamListener
+ */
+nsIStreamListener*
+nsParser::GetStreamListener()
+{
+  return this;
+}
diff --git a/components/htmlparser/src/nsParser.h b/components/htmlparser/src/nsParser.h
new file mode 100644
index 000000000..39bfe03b8
--- /dev/null
+++ b/components/htmlparser/src/nsParser.h
@@ -0,0 +1,398 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+ 
+/**
+ * MODULE NOTES:
+ * 
+ *  This class does two primary jobs:
+ *    1) It iterates the tokens provided during the 
+ *       tokenization process, identifing where elements
+ *       begin and end (doing validation and normalization).
+ *    2) It controls and coordinates with an instance of
+ *       the IContentSink interface, to coordinate the
+ *       the production of the content model.
+ *
+ *  The basic operation of this class assumes that an HTML
+ *  document is non-normalized. Therefore, we don't process
+ *  the document in a normalized way. Don't bother to look
+ *  for methods like: doHead() or doBody().
+ *
+ *  Instead, in order to be backward compatible, we must
+ *  scan the set of tokens and perform this basic set of
+ *  operations:
+ *    1)  Determine the token type (easy, since the tokens know)
+ *    2)  Determine the appropriate section of the HTML document
+ *        each token belongs in (HTML,HEAD,BODY,FRAMESET).
+ *    3)  Insert content into our document (via the sink) into
+ *        the correct section.
+ *    4)  In the case of tags that belong in the BODY, we must
+ *        ensure that our underlying document state reflects
+ *        the appropriate context for our tag. 
+ *
+ *        For example,if we see a <TR>, we must ensure our 
+ *        document contains a table into which the row can
+ *        be placed. This may result in "implicit containers" 
+ *        created to ensure a well-formed document.
+ *         
+ */
+
+#ifndef NS_PARSER__
+#define NS_PARSER__
+
+#include "nsIParser.h"
+#include "nsDeque.h"
+#include "nsIURL.h"
+#include "CParserContext.h"
+#include "nsParserCIID.h"
+#include "nsITokenizer.h"
+#include "nsHTMLTags.h"
+#include "nsIContentSink.h"
+#include "nsCOMArray.h"
+#include "nsCycleCollectionParticipant.h"
+#include "nsWeakReference.h"
+
+class nsIDTD;
+class nsIRunnable;
+
+#ifdef _MSC_VER
+#pragma warning( disable : 4275 )
+#endif
+
+
+class nsParser final : public nsIParser,
+                       public nsIStreamListener,
+                       public nsSupportsWeakReference
+{
+    /**
+     * Destructor
+     * @update  gess5/11/98
+     */
+    virtual ~nsParser();
+
+  public:
+    /**
+     * Called on module init
+     */
+    static nsresult Init();
+
+    /**
+     * Called on module shutdown
+     */
+    static void Shutdown();
+
+    NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+    NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsParser, nsIParser)
+
+    /**
+     * default constructor
+     * @update	gess5/11/98
+     */
+    nsParser();
+
+    /**
+     * Select given content sink into parser for parser output
+     * @update	gess5/11/98
+     * @param   aSink is the new sink to be used by parser
+     * @return  old sink, or nullptr
+     */
+    NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink) override;
+
+    /**
+     * retrive the sink set into the parser 
+     * @update	gess5/11/98
+     * @param   aSink is the new sink to be used by parser
+     * @return  old sink, or nullptr
+     */
+    NS_IMETHOD_(nsIContentSink*) GetContentSink(void) override;
+    
+    /**
+     *  Call this method once you've created a parser, and want to instruct it
+     *  about the command which caused the parser to be constructed. For example,
+     *  this allows us to select a DTD which can do, say, view-source.
+     *  
+     *  @update  gess 3/25/98
+     *  @param   aCommand -- ptrs to string that contains command
+     *  @return	 nada
+     */
+    NS_IMETHOD_(void) GetCommand(nsCString& aCommand) override;
+    NS_IMETHOD_(void) SetCommand(const char* aCommand) override;
+    NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand) override;
+
+    /**
+     *  Call this method once you've created a parser, and want to instruct it
+     *  about what charset to load
+     *  
+     *  @update  ftang 4/23/99
+     *  @param   aCharset- the charset of a document
+     *  @param   aCharsetSource- the source of the charset
+     *  @return	 nada
+     */
+    NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, int32_t aSource) override;
+
+    NS_IMETHOD_(void) GetDocumentCharset(nsACString& aCharset, int32_t& aSource) override
+    {
+         aCharset = mCharset;
+         aSource = mCharsetSource;
+    }
+
+    /**
+     * Cause parser to parse input from given URL 
+     * @update	gess5/11/98
+     * @param   aURL is a descriptor for source document
+     * @param   aListener is a listener to forward notifications to
+     * @return  TRUE if all went well -- FALSE otherwise
+     */
+    NS_IMETHOD Parse(nsIURI* aURL,
+                     nsIRequestObserver* aListener = nullptr,
+                     void* aKey = 0,
+                     nsDTDMode aMode = eDTDMode_autodetect) override;
+
+    /**
+     * This method needs documentation
+     */
+    NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer,
+                             nsTArray<nsString>& aTagStack) override;
+                             
+    /**
+     * This method gets called when the tokens have been consumed, and it's time
+     * to build the model via the content sink.
+     * @update	gess5/11/98
+     * @return  YES if model building went well -- NO otherwise.
+     */
+    NS_IMETHOD BuildModel(void) override;
+
+    NS_IMETHOD        ContinueInterruptedParsing() override;
+    NS_IMETHOD_(void) BlockParser() override;
+    NS_IMETHOD_(void) UnblockParser() override;
+    NS_IMETHOD_(void) ContinueInterruptedParsingAsync() override;
+    NS_IMETHOD        Terminate(void) override;
+
+    /**
+     * Call this to query whether the parser is enabled or not.
+     *
+     *  @update  vidur 4/12/99
+     *  @return  current state
+     */
+    NS_IMETHOD_(bool) IsParserEnabled() override;
+
+    /**
+     * Call this to query whether the parser thinks it's done with parsing.
+     *
+     *  @update  rickg 5/12/01
+     *  @return  complete state
+     */
+    NS_IMETHOD_(bool) IsComplete() override;
+
+    /**
+     *  This rather arcane method (hack) is used as a signal between the
+     *  DTD and the parser. It allows the DTD to tell the parser that content
+     *  that comes through (parser::parser(string)) but not consumed should
+     *  propagate into the next string based parse call.
+     *  
+     *  @update  gess 9/1/98
+     *  @param   aState determines whether we propagate unused string content.
+     *  @return  current state
+     */
+    void SetUnusedInput(nsString& aBuffer);
+
+    /**
+     * This method gets called (automatically) during incremental parsing
+     * @update	gess5/11/98
+     * @return  TRUE if all went well, otherwise FALSE
+     */
+    virtual nsresult ResumeParse(bool allowIteration = true, 
+                                 bool aIsFinalChunk = false,
+                                 bool aCanInterrupt = true);
+
+     //*********************************************
+      // These methods are callback methods used by
+      // net lib to let us know about our inputstream.
+      //*********************************************
+    // nsIRequestObserver methods:
+    NS_DECL_NSIREQUESTOBSERVER
+
+    // nsIStreamListener methods:
+    NS_DECL_NSISTREAMLISTENER
+
+    void              PushContext(CParserContext& aContext);
+    CParserContext*   PopContext();
+    CParserContext*   PeekContext() {return mParserContext;}
+
+    /** 
+     * Get the channel associated with this parser
+     * @update harishd,gagan 07/17/01
+     * @param aChannel out param that will contain the result
+     * @return NS_OK if successful
+     */
+    NS_IMETHOD GetChannel(nsIChannel** aChannel) override;
+
+    /** 
+     * Get the DTD associated with this parser
+     * @update vidur 9/29/99
+     * @param aDTD out param that will contain the result
+     * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error
+     */
+    NS_IMETHOD GetDTD(nsIDTD** aDTD) override;
+  
+    /**
+     * Get the nsIStreamListener for this parser
+     */
+    virtual nsIStreamListener* GetStreamListener() override;
+
+    void SetSinkCharset(nsACString& aCharset);
+
+    /**
+     *  Removes continue parsing events
+     *  @update  kmcclusk 5/18/98
+     */
+
+    NS_IMETHOD CancelParsingEvents() override;
+
+    /**
+     * Return true.
+     */
+    virtual bool IsInsertionPointDefined() override;
+
+    /**
+     * No-op.
+     */
+    virtual void PushDefinedInsertionPoint() override;
+
+    /**
+     * No-op.
+     */
+    virtual void PopDefinedInsertionPoint() override;
+
+    /**
+     * No-op.
+     */
+    virtual void MarkAsNotScriptCreated(const char* aCommand) override;
+
+    /**
+     * Always false.
+     */
+    virtual bool IsScriptCreated() override;
+
+    /**  
+     *  Set to parser state to indicate whether parsing tokens can be interrupted
+     *  @param aCanInterrupt true if parser can be interrupted, false if it can not be interrupted.
+     *  @update  kmcclusk 5/18/98
+     */
+    void SetCanInterrupt(bool aCanInterrupt);
+
+    /**
+     * This is called when the final chunk has been
+     * passed to the parser and the content sink has
+     * interrupted token processing. It schedules
+     * a ParserContinue PL_Event which will ask the parser
+     * to HandleParserContinueEvent when it is handled.
+     * @update	kmcclusk6/1/2001
+     */
+    nsresult PostContinueEvent();
+
+    /**
+     *  Fired when the continue parse event is triggered.
+     *  @update  kmcclusk 5/18/98
+     */
+    void HandleParserContinueEvent(class nsParserContinueEvent *);
+
+    virtual void Reset() override {
+      Cleanup();
+      Initialize();
+    }
+
+    bool IsScriptExecuting() {
+      return mSink && mSink->IsScriptExecuting();
+    }
+
+    bool IsOkToProcessNetworkData() {
+      return !IsScriptExecuting() && !mProcessingNetworkData;
+    }
+
+ protected:
+
+    void Initialize(bool aConstructor = false);
+    void Cleanup();
+
+    /**
+     * 
+     * @update	gess5/18/98
+     * @param 
+     * @return
+     */
+    nsresult WillBuildModel(nsString& aFilename);
+
+    /**
+     * 
+     * @update	gess5/18/98
+     * @param 
+     * @return
+     */
+    nsresult DidBuildModel(nsresult anErrorCode);
+
+private:
+
+    /*******************************************
+      These are the tokenization methods...
+     *******************************************/
+
+    /**
+     *  Part of the code sandwich, this gets called right before
+     *  the tokenization process begins. The main reason for
+     *  this call is to allow the delegate to do initialization.
+     *  
+     *  @update  gess 3/25/98
+     *  @param   
+     *  @return  TRUE if it's ok to proceed
+     */
+    bool WillTokenize(bool aIsFinalChunk = false);
+
+   
+    /**
+     *  This is the primary control routine. It iteratively
+     *  consumes tokens until an error occurs or you run out
+     *  of data.
+     *  
+     *  @update  gess 3/25/98
+     *  @return  error code 
+     */
+    nsresult Tokenize(bool aIsFinalChunk = false);
+
+    /**
+     * Pushes XML fragment parsing data to expat without an input stream.
+     */
+    nsresult Parse(const nsAString& aSourceBuffer,
+                   void* aKey,
+                   bool aLastCall);
+
+protected:
+    //*********************************************
+    // And now, some data members...
+    //*********************************************
+    
+      
+    CParserContext*              mParserContext;
+    nsCOMPtr<nsIDTD>             mDTD;
+    nsCOMPtr<nsIRequestObserver> mObserver;
+    nsCOMPtr<nsIContentSink>     mSink;
+    nsIRunnable*                 mContinueEvent;  // weak ref
+
+    eParserCommands     mCommand;
+    nsresult            mInternalState;
+    nsresult            mStreamStatus;
+    int32_t             mCharsetSource;
+    
+    uint16_t            mFlags;
+
+    nsString            mUnusedInput;
+    nsCString           mCharset;
+    nsCString           mCommandStr;
+
+    bool                mProcessingNetworkData;
+    bool                mIsAboutBlank;
+};
+
+#endif 
+
diff --git a/components/htmlparser/src/nsParserBase.h b/components/htmlparser/src/nsParserBase.h
new file mode 100644
index 000000000..83b68c554
--- /dev/null
+++ b/components/htmlparser/src/nsParserBase.h
@@ -0,0 +1,20 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsParserBase_h_
+#define nsParserBase_h_
+
+#include "nsIChannel.h"
+
+class nsParserBase : public nsISupports
+{
+  public:
+    NS_IMETHOD_(bool) IsParserEnabled() { return true; }
+    NS_IMETHOD GetChannel(nsIChannel** aChannel) {
+      *aChannel = nullptr;
+      return NS_OK;
+    }
+};
+
+#endif // nsParserBase_h_
diff --git a/components/htmlparser/src/nsParserCIID.h b/components/htmlparser/src/nsParserCIID.h
new file mode 100644
index 000000000..4a2b7b1ad
--- /dev/null
+++ b/components/htmlparser/src/nsParserCIID.h
@@ -0,0 +1,39 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsParserCIID_h__
+#define nsParserCIID_h__
+
+#include "nsISupports.h"
+#include "nsIFactory.h"
+#include "nsIComponentManager.h"
+
+// {2ce606b0-bee6-11d1-aad9-00805f8a3e14}
+#define NS_PARSER_CID      \
+{ 0x2ce606b0, 0xbee6, 0x11d1, { 0xaa, 0xd9, 0x0, 0x80, 0x5f, 0x8a, 0x3e, 0x14 } }
+
+// XXX: This object should not be exposed outside of the parser.
+//      Remove when CNavDTD subclasses do not need access
+#define NS_PARSER_NODE_IID      \
+  {0x9039c670, 0x2717,  0x11d2,  \
+  {0x92, 0x46, 0x00,    0x80, 0x5f, 0x8a, 0x7a, 0xb6}}
+
+// {a6cf9107-15b3-11d2-932e-00805f8add32}
+#define NS_CNAVDTD_CID \
+{ 0xa6cf9107, 0x15b3, 0x11d2, { 0x93, 0x2e, 0x0, 0x80, 0x5f, 0x8a, 0xdd, 0x32 } }
+
+// {FFF4FBE9-528A-4b37-819D-FC18F3A401A7}
+#define NS_EXPAT_DRIVER_CID \
+{ 0xfff4fbe9, 0x528a, 0x4b37, { 0x81, 0x9d, 0xfc, 0x18, 0xf3, 0xa4, 0x1, 0xa7 } }
+
+// {a6cf910f-15b3-11d2-932e-00805f8add32}
+#define NS_HTMLCONTENTSINKSTREAM_CID \
+{ 0xa6cf910f, 0x15b3, 0x11d2, { 0x93, 0x2e, 0x0, 0x80, 0x5f, 0x8a, 0xdd, 0x32 } }
+
+// {a6cf9112-15b3-11d2-932e-00805f8add32}
+#define NS_PARSERSERVICE_CID \
+{ 0xa6cf9112, 0x15b3, 0x11d2, { 0x93, 0x2e, 0x0, 0x80, 0x5f, 0x8a, 0xdd, 0x32 } }
+
+#endif
diff --git a/components/htmlparser/src/nsParserConstants.h b/components/htmlparser/src/nsParserConstants.h
new file mode 100644
index 000000000..2f2373c7f
--- /dev/null
+++ b/components/htmlparser/src/nsParserConstants.h
@@ -0,0 +1,38 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsParserConstants_h_
+#define nsParserConstants_h_
+const char16_t  kNewLine          = '\n';
+const char16_t  kCR               = '\r';
+const char16_t  kLF               = '\n';
+const char16_t  kTab              = '\t';
+const char16_t  kSpace            = ' ';
+const char16_t  kQuote            = '"';
+const char16_t  kApostrophe       = '\'';
+const char16_t  kLessThan         = '<';
+const char16_t  kGreaterThan      = '>';
+const char16_t  kAmpersand        = '&';
+const char16_t  kForwardSlash     = '/';
+const char16_t  kBackSlash        = '\\';
+const char16_t  kEqual            = '=';
+const char16_t  kMinus            = '-';
+const char16_t  kPlus             = '+';
+const char16_t  kExclamation      = '!';
+const char16_t  kSemicolon        = ';';
+const char16_t  kHashsign         = '#';
+const char16_t  kAsterisk         = '*';
+const char16_t  kUnderbar         = '_';
+const char16_t  kComma            = ',';
+const char16_t  kLeftParen        = '(';
+const char16_t  kRightParen       = ')';
+const char16_t  kLeftBrace        = '{';
+const char16_t  kRightBrace       = '}';
+const char16_t  kQuestionMark     = '?';
+const char16_t  kLeftSquareBracket  = '[';
+const char16_t  kRightSquareBracket = ']';
+const char16_t kNullCh           = '\0';
+
+#endif // nsParserConstants_h_
diff --git a/components/htmlparser/src/nsParserModule.cpp b/components/htmlparser/src/nsParserModule.cpp
new file mode 100644
index 000000000..00c2d6c56
--- /dev/null
+++ b/components/htmlparser/src/nsParserModule.cpp
@@ -0,0 +1,107 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsIAtom.h"
+#include "nsString.h"
+#include "nspr.h"
+#include "nsCOMPtr.h"
+#include "mozilla/ModuleUtils.h"
+#include "nsParserCIID.h"
+#include "nsParser.h"
+#include "CNavDTD.h"
+#include "nsHTMLEntities.h"
+#include "nsHTMLTokenizer.h"
+//#include "nsTextTokenizer.h"
+#include "nsElementTable.h"
+#include "nsParserService.h"
+#include "nsSAXAttributes.h"
+#include "nsSAXLocator.h"
+#include "nsSAXXMLReader.h"
+
+#if defined(DEBUG)
+#include "nsExpatDriver.h"
+#endif
+
+//----------------------------------------------------------------------
+
+#if defined(DEBUG)
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsExpatDriver)
+#endif
+
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsParser)
+NS_GENERIC_FACTORY_CONSTRUCTOR(CNavDTD)
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsParserService)
+
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsSAXAttributes)
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsSAXXMLReader)
+
+#if defined(DEBUG)
+NS_DEFINE_NAMED_CID(NS_EXPAT_DRIVER_CID);
+#endif
+NS_DEFINE_NAMED_CID(NS_PARSER_CID);
+NS_DEFINE_NAMED_CID(NS_CNAVDTD_CID);
+NS_DEFINE_NAMED_CID(NS_PARSERSERVICE_CID);
+NS_DEFINE_NAMED_CID(NS_SAXATTRIBUTES_CID);
+NS_DEFINE_NAMED_CID(NS_SAXXMLREADER_CID);
+
+static const mozilla::Module::CIDEntry kParserCIDs[] = {
+#if defined(DEBUG)
+  { &kNS_EXPAT_DRIVER_CID, false, nullptr, nsExpatDriverConstructor },
+#endif
+  { &kNS_PARSER_CID, false, nullptr, nsParserConstructor },
+  { &kNS_CNAVDTD_CID, false, nullptr, CNavDTDConstructor },
+  { &kNS_PARSERSERVICE_CID, false, nullptr, nsParserServiceConstructor },
+  { &kNS_SAXATTRIBUTES_CID, false, nullptr, nsSAXAttributesConstructor },
+  { &kNS_SAXXMLREADER_CID, false, nullptr, nsSAXXMLReaderConstructor },
+  { nullptr }
+};
+
+static const mozilla::Module::ContractIDEntry kParserContracts[] = {
+  { NS_PARSERSERVICE_CONTRACTID, &kNS_PARSERSERVICE_CID },
+  { NS_SAXATTRIBUTES_CONTRACTID, &kNS_SAXATTRIBUTES_CID },
+  { NS_SAXXMLREADER_CONTRACTID, &kNS_SAXXMLREADER_CID },
+  { nullptr }
+};
+
+static nsresult
+Initialize()
+{
+  nsresult rv = nsHTMLTags::AddRefTable();
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  rv = nsHTMLEntities::AddRefTable();
+  if (NS_FAILED(rv)) {
+    nsHTMLTags::ReleaseTable();
+    return rv;
+  }
+#ifdef DEBUG
+  CheckElementTable();
+#endif
+
+#ifdef DEBUG
+  nsHTMLTags::TestTagTable();
+#endif
+
+  return rv;
+}
+
+static void
+Shutdown()
+{
+  nsHTMLTags::ReleaseTable();
+  nsHTMLEntities::ReleaseTable();
+}
+
+static mozilla::Module kParserModule = {
+  mozilla::Module::kVersion,
+  kParserCIDs,
+  kParserContracts,
+  nullptr,
+  nullptr,
+  Initialize,
+  Shutdown
+};
+
+NSMODULE_DEFN(nsParserModule) = &kParserModule;
diff --git a/components/htmlparser/src/nsParserMsgUtils.cpp b/components/htmlparser/src/nsParserMsgUtils.cpp
new file mode 100644
index 000000000..627f57a0e
--- /dev/null
+++ b/components/htmlparser/src/nsParserMsgUtils.cpp
@@ -0,0 +1,65 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsIServiceManager.h"
+#include "nsIStringBundle.h"
+#include "nsXPIDLString.h"
+#include "nsParserMsgUtils.h"
+#include "nsNetCID.h"
+#include "mozilla/Services.h"
+
+static nsresult GetBundle(const char * aPropFileName, nsIStringBundle **aBundle)
+{
+  NS_ENSURE_ARG_POINTER(aPropFileName);
+  NS_ENSURE_ARG_POINTER(aBundle);
+
+  // Create a bundle for the localization
+
+  nsCOMPtr<nsIStringBundleService> stringService =
+    mozilla::services::GetStringBundleService();
+  if (!stringService)
+    return NS_ERROR_FAILURE;
+
+  return stringService->CreateBundle(aPropFileName, aBundle);
+}
+
+nsresult
+nsParserMsgUtils::GetLocalizedStringByName(const char * aPropFileName, const char* aKey, nsString& oVal)
+{
+  oVal.Truncate();
+
+  NS_ENSURE_ARG_POINTER(aKey);
+
+  nsCOMPtr<nsIStringBundle> bundle;
+  nsresult rv = GetBundle(aPropFileName,getter_AddRefs(bundle));
+  if (NS_SUCCEEDED(rv) && bundle) {
+    nsXPIDLString valUni;
+    nsAutoString key; key.AssignWithConversion(aKey);
+    rv = bundle->GetStringFromName(key.get(), getter_Copies(valUni));
+    if (NS_SUCCEEDED(rv) && valUni) {
+      oVal.Assign(valUni);
+    }  
+  }
+
+  return rv;
+}
+
+nsresult
+nsParserMsgUtils::GetLocalizedStringByID(const char * aPropFileName, uint32_t aID, nsString& oVal)
+{
+  oVal.Truncate();
+
+  nsCOMPtr<nsIStringBundle> bundle;
+  nsresult rv = GetBundle(aPropFileName,getter_AddRefs(bundle));
+  if (NS_SUCCEEDED(rv) && bundle) {
+    nsXPIDLString valUni;
+    rv = bundle->GetStringFromID(aID, getter_Copies(valUni));
+    if (NS_SUCCEEDED(rv) && valUni) {
+      oVal.Assign(valUni);
+    }  
+  }
+
+  return rv;
+}
diff --git a/components/htmlparser/src/nsParserMsgUtils.h b/components/htmlparser/src/nsParserMsgUtils.h
new file mode 100644
index 000000000..adf3fda8a
--- /dev/null
+++ b/components/htmlparser/src/nsParserMsgUtils.h
@@ -0,0 +1,21 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsParserMsgUtils_h
+#define nsParserMsgUtils_h
+
+#include "nsString.h"
+
+#define XMLPARSER_PROPERTIES "chrome://global/locale/layout/xmlparser.properties"
+
+class nsParserMsgUtils {
+  nsParserMsgUtils();  // Currently this is not meant to be created, use the static methods
+  ~nsParserMsgUtils(); // If perf required, change this to cache values etc.
+public:
+  static nsresult GetLocalizedStringByName(const char * aPropFileName, const char* aKey, nsString& aVal);
+  static nsresult GetLocalizedStringByID(const char * aPropFileName, uint32_t aID, nsString& aVal);
+};
+
+#endif
diff --git a/components/htmlparser/src/nsParserService.cpp b/components/htmlparser/src/nsParserService.cpp
new file mode 100644
index 000000000..5893f19a9
--- /dev/null
+++ b/components/htmlparser/src/nsParserService.cpp
@@ -0,0 +1,90 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsError.h"
+#include "nsIAtom.h"
+#include "nsParserService.h"
+#include "nsHTMLEntities.h"
+#include "nsElementTable.h"
+#include "nsICategoryManager.h"
+#include "nsCategoryManagerUtils.h"
+
+nsParserService::nsParserService()
+{
+}
+
+nsParserService::~nsParserService()
+{
+}
+
+NS_IMPL_ISUPPORTS(nsParserService, nsIParserService)
+
+int32_t
+nsParserService::HTMLAtomTagToId(nsIAtom* aAtom) const
+{
+  return nsHTMLTags::StringTagToId(nsDependentAtomString(aAtom));
+}
+
+int32_t
+nsParserService::HTMLCaseSensitiveAtomTagToId(nsIAtom* aAtom) const
+{
+  return nsHTMLTags::CaseSensitiveAtomTagToId(aAtom);
+}
+
+int32_t
+nsParserService::HTMLStringTagToId(const nsAString& aTag) const
+{
+  return nsHTMLTags::StringTagToId(aTag);
+}
+
+const char16_t*
+nsParserService::HTMLIdToStringTag(int32_t aId) const
+{
+  return nsHTMLTags::GetStringValue((nsHTMLTag)aId);
+}
+  
+nsIAtom*
+nsParserService::HTMLIdToAtomTag(int32_t aId) const
+{
+  return nsHTMLTags::GetAtom((nsHTMLTag)aId);
+}
+
+NS_IMETHODIMP
+nsParserService::HTMLConvertEntityToUnicode(const nsAString& aEntity,
+                                            int32_t* aUnicode) const
+{
+  *aUnicode = nsHTMLEntities::EntityToUnicode(aEntity);
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsParserService::HTMLConvertUnicodeToEntity(int32_t aUnicode,
+                                            nsCString& aEntity) const
+{
+  const char* str = nsHTMLEntities::UnicodeToEntity(aUnicode);
+  if (str) {
+    aEntity.Assign(str);
+  }
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsParserService::IsContainer(int32_t aId, bool& aIsContainer) const
+{
+  aIsContainer = nsHTMLElement::IsContainer((nsHTMLTag)aId);
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsParserService::IsBlock(int32_t aId, bool& aIsBlock) const
+{
+  aIsBlock = nsHTMLElement::IsBlock((nsHTMLTag)aId);
+
+  return NS_OK;
+}
diff --git a/components/htmlparser/src/nsParserService.h b/components/htmlparser/src/nsParserService.h
new file mode 100644
index 000000000..0ea7ec98c
--- /dev/null
+++ b/components/htmlparser/src/nsParserService.h
@@ -0,0 +1,58 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef NS_PARSERSERVICE_H__
+#define NS_PARSERSERVICE_H__
+
+#include "nsIParserService.h"
+
+extern "C" int MOZ_XMLIsLetter(const char* ptr);
+extern "C" int MOZ_XMLIsNCNameChar(const char* ptr);
+/**
+ * Decodes an entity into the UTF-16 encoding of a Unicode character. If a ';'
+ * is found between `ptr` and `end` it will try to decode the entity and set
+ * `*next` to point to the character after the ;. The resulting UTF-16 code
+ * units will be written in `*result`, so if the entity is a valid numeric
+ * entity there needs to be space for at least two char16_t at the location
+ * `result` points to.
+ *
+ * @param ptr pointer to the ampersand.
+ * @param end pointer to the position after the last character of the
+ *            string.
+ * @param next [out] will be set to the character after the ';' or null if
+ *                   the decoding was unsuccessful.
+ * @param result the buffer to write the resulting UTF-16 character in.
+ * @return the number of char16_t written to `*result`.
+ */
+extern "C" int MOZ_XMLTranslateEntity(const char* ptr, const char* end,
+                                      const char** next, char16_t* result);
+
+class nsParserService : public nsIParserService {
+  virtual ~nsParserService();
+
+public:
+  nsParserService();
+
+  NS_DECL_ISUPPORTS
+
+  int32_t HTMLAtomTagToId(nsIAtom* aAtom) const override;
+
+  int32_t HTMLCaseSensitiveAtomTagToId(nsIAtom* aAtom) const override;
+
+  int32_t HTMLStringTagToId(const nsAString& aTag) const override;
+
+  const char16_t *HTMLIdToStringTag(int32_t aId) const override;
+  
+  nsIAtom *HTMLIdToAtomTag(int32_t aId) const override;
+
+  NS_IMETHOD HTMLConvertEntityToUnicode(const nsAString& aEntity, 
+                                        int32_t* aUnicode) const override;
+  NS_IMETHOD HTMLConvertUnicodeToEntity(int32_t aUnicode,
+                                        nsCString& aEntity) const override;
+  NS_IMETHOD IsContainer(int32_t aId, bool& aIsContainer) const override;
+  NS_IMETHOD IsBlock(int32_t aId, bool& aIsBlock) const override;
+};
+
+#endif
diff --git a/components/htmlparser/src/nsScanner.cpp b/components/htmlparser/src/nsScanner.cpp
new file mode 100644
index 000000000..0fa8e43c6
--- /dev/null
+++ b/components/htmlparser/src/nsScanner.cpp
@@ -0,0 +1,408 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//#define __INCREMENTAL 1
+
+#include "mozilla/Attributes.h"
+#include "mozilla/DebugOnly.h"
+
+#include "nsScanner.h"
+#include "nsDebug.h"
+#include "nsReadableUtils.h"
+#include "nsIInputStream.h"
+#include "nsIFile.h"
+#include "nsUTF8Utils.h" // for LossyConvertEncoding
+#include "nsCRT.h"
+#include "nsParser.h"
+#include "nsCharsetSource.h"
+
+#include "mozilla/dom/EncodingUtils.h"
+
+using mozilla::dom::EncodingUtils;
+
+nsReadEndCondition::nsReadEndCondition(const char16_t* aTerminateChars) :
+  mChars(aTerminateChars), mFilter(char16_t(~0)) // All bits set
+{
+  // Build filter that will be used to filter out characters with
+  // bits that none of the terminal chars have. This works very well
+  // because terminal chars often have only the last 4-6 bits set and
+  // normal ascii letters have bit 7 set. Other letters have even higher
+  // bits set.
+  
+  // Calculate filter
+  const char16_t *current = aTerminateChars;
+  char16_t terminalChar = *current;
+  while (terminalChar) {
+    mFilter &= ~terminalChar;
+    ++current;
+    terminalChar = *current;
+  }
+}
+
+/**
+ *  Use this constructor if you want i/o to be based on 
+ *  a single string you hand in during construction.
+ *  This short cut was added for Javascript.
+ *
+ *  @update  gess 5/12/98
+ *  @param   aMode represents the parser mode (nav, other)
+ *  @return  
+ */
+nsScanner::nsScanner(const nsAString& anHTMLString)
+{
+  MOZ_COUNT_CTOR(nsScanner);
+
+  mSlidingBuffer = nullptr;
+  if (AppendToBuffer(anHTMLString)) {
+    mSlidingBuffer->BeginReading(mCurrentPosition);
+  } else {
+    /* XXX see hack below, re: bug 182067 */
+    memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
+    mEndPosition = mCurrentPosition;
+  }
+  mMarkPosition = mCurrentPosition;
+  mIncremental = false;
+  mUnicodeDecoder = nullptr;
+  mCharsetSource = kCharsetUninitialized;
+}
+
+/**
+ *  Use this constructor if you want i/o to be based on strings 
+ *  the scanner receives. If you pass a null filename, you
+ *  can still provide data to the scanner via append.
+ */
+nsScanner::nsScanner(nsString& aFilename, bool aCreateStream)
+  : mFilename(aFilename)
+{
+  MOZ_COUNT_CTOR(nsScanner);
+  NS_ASSERTION(!aCreateStream, "This is always true.");
+
+  mSlidingBuffer = nullptr;
+
+  // XXX This is a big hack.  We need to initialize the iterators to something.
+  // What matters is that mCurrentPosition == mEndPosition, so that our methods
+  // believe that we are at EOF (see bug 182067).  We null out mCurrentPosition
+  // so that we have some hope of catching null pointer dereferences associated
+  // with this hack. --darin
+  memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
+  mMarkPosition = mCurrentPosition;
+  mEndPosition = mCurrentPosition;
+
+  mIncremental = true;
+
+  mUnicodeDecoder = nullptr;
+  mCharsetSource = kCharsetUninitialized;
+  // XML defaults to UTF-8 and about:blank is UTF-8, too.
+  SetDocumentCharset(NS_LITERAL_CSTRING("UTF-8"), kCharsetFromDocTypeDefault);
+}
+
+nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , int32_t aSource)
+{
+  if (aSource < mCharsetSource) // priority is lower than the current one
+    return NS_OK;
+
+  mCharsetSource = aSource;
+
+  nsCString charsetName;
+  mozilla::DebugOnly<bool> valid =
+      EncodingUtils::FindEncodingForLabel(aCharset, charsetName);
+  MOZ_ASSERT(valid, "Should never call with a bogus aCharset.");
+
+  if (!mCharset.IsEmpty() && charsetName.Equals(mCharset)) {
+    return NS_OK; // no difference, don't change it
+  }
+
+  // different, need to change it
+
+  mCharset.Assign(charsetName);
+
+  mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset);
+  mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
+
+  return NS_OK;
+}
+
+
+/**
+ *  default destructor
+ *  
+ *  @update  gess 3/25/98
+ *  @param   
+ *  @return  
+ */
+nsScanner::~nsScanner() {
+
+  delete mSlidingBuffer;
+
+  MOZ_COUNT_DTOR(nsScanner);
+}
+
+/**
+ *  Resets current offset position of input stream to marked position. 
+ *  This allows us to back up to this point if the need should arise, 
+ *  such as when tokenization gets interrupted.
+ *  NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
+ *
+ *  @update  gess 5/12/98
+ *  @param   
+ *  @return  
+ */
+void nsScanner::RewindToMark(void){
+  if (mSlidingBuffer) {
+    mCurrentPosition = mMarkPosition;
+  }
+}
+
+
+/**
+ *  Records current offset position in input stream. This allows us
+ *  to back up to this point if the need should arise, such as when
+ *  tokenization gets interrupted.
+ *
+ *  @update  gess 7/29/98
+ *  @param   
+ *  @return  
+ */
+int32_t nsScanner::Mark() {
+  int32_t distance = 0;
+  if (mSlidingBuffer) {
+    nsScannerIterator oldStart;
+    mSlidingBuffer->BeginReading(oldStart);
+
+    distance = Distance(oldStart, mCurrentPosition);
+
+    mSlidingBuffer->DiscardPrefix(mCurrentPosition);
+    mSlidingBuffer->BeginReading(mCurrentPosition);
+    mMarkPosition = mCurrentPosition;
+  }
+
+  return distance;
+}
+
+/** 
+ * Insert data to our underlying input buffer as
+ * if it were read from an input stream.
+ *
+ * @update  harishd 01/12/99
+ * @return  error code 
+ */
+bool nsScanner::UngetReadable(const nsAString& aBuffer) {
+  if (!mSlidingBuffer) {
+    return false;
+  }
+
+  mSlidingBuffer->UngetReadable(aBuffer,mCurrentPosition);
+  mSlidingBuffer->BeginReading(mCurrentPosition); // Insertion invalidated our iterators
+  mSlidingBuffer->EndReading(mEndPosition);
+ 
+  return true;
+}
+
+/** 
+ * Append data to our underlying input buffer as
+ * if it were read from an input stream.
+ *
+ * @update  gess4/3/98
+ * @return  error code 
+ */
+nsresult nsScanner::Append(const nsAString& aBuffer) {
+  if (!AppendToBuffer(aBuffer))
+    return NS_ERROR_OUT_OF_MEMORY;
+  return NS_OK;
+}
+
+/**
+ *  
+ *  
+ *  @update  gess 5/21/98
+ *  @param   
+ *  @return  
+ */
+nsresult nsScanner::Append(const char* aBuffer, uint32_t aLen)
+{
+  nsresult res = NS_OK;
+  if (mUnicodeDecoder) {
+    int32_t unicharBufLen = 0;
+
+    nsresult rv = mUnicodeDecoder->GetMaxLength(aBuffer, aLen, &unicharBufLen);
+    if (NS_WARN_IF(NS_FAILED(rv))) {
+      return rv;
+    }
+
+    nsScannerString::Buffer* buffer = nsScannerString::AllocBuffer(unicharBufLen + 1);
+    NS_ENSURE_TRUE(buffer,NS_ERROR_OUT_OF_MEMORY);
+    char16_t *unichars = buffer->DataStart();
+
+    int32_t totalChars = 0;
+    int32_t unicharLength = unicharBufLen;
+
+    do {
+      int32_t srcLength = aLen;
+      res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars, &unicharLength);
+
+      totalChars += unicharLength;
+      // Continuation of failure case
+      if(NS_FAILED(res)) {
+        // if we failed, we consume one byte, replace it with the replacement
+        // character and try the conversion again.
+
+        // This is only needed because some decoders don't follow the
+        // nsIUnicodeDecoder contract: they return a failure when *aDestLength
+        // is 0 rather than the correct NS_OK_UDEC_MOREOUTPUT.  See bug 244177
+        if ((unichars + unicharLength) >= buffer->DataEnd()) {
+          NS_ERROR("Unexpected end of destination buffer");
+          break;
+        }
+
+        // Since about:blank is empty, this line runs only for XML. Use a
+        // character that's illegal in XML instead of U+FFFD in order to make
+        // expat flag the error.
+        unichars[unicharLength++] = 0xFFFF;
+
+        unichars = unichars + unicharLength;
+        unicharLength = unicharBufLen - (++totalChars);
+
+        mUnicodeDecoder->Reset();
+
+        if(((uint32_t) (srcLength + 1)) > aLen) {
+          srcLength = aLen;
+        }
+        else {
+          ++srcLength;
+        }
+
+        aBuffer += srcLength;
+        aLen -= srcLength;
+      }
+    } while (NS_FAILED(res) && (aLen > 0));
+
+    buffer->SetDataLength(totalChars);
+    // Don't propagate return code of unicode decoder
+    // since it doesn't reflect on our success or failure
+    // - Ref. bug 87110
+    res = NS_OK; 
+    if (!AppendToBuffer(buffer))
+      res = NS_ERROR_OUT_OF_MEMORY;
+  }
+  else {
+    NS_WARNING("No decoder found.");
+    res = NS_ERROR_FAILURE;
+  }
+
+  return res;
+}
+
+/**
+ *  retrieve next char from scanners internal input stream
+ *  
+ *  @update  gess 3/25/98
+ *  @param   
+ *  @return  error code reflecting read status
+ */
+nsresult nsScanner::GetChar(char16_t& aChar) {
+  if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
+    aChar = 0;
+    return kEOF;
+  }
+
+  aChar = *mCurrentPosition++;
+
+  return NS_OK;
+}
+
+
+void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd)
+{
+  aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);
+}
+
+void nsScanner::CurrentPosition(nsScannerIterator& aPosition)
+{
+  aPosition = mCurrentPosition;
+}
+
+void nsScanner::EndReading(nsScannerIterator& aPosition)
+{
+  aPosition = mEndPosition;
+}
+ 
+void nsScanner::SetPosition(nsScannerIterator& aPosition, bool aTerminate)
+{
+  if (mSlidingBuffer) {
+    mCurrentPosition = aPosition;
+    if (aTerminate && (mCurrentPosition == mEndPosition)) {
+      mMarkPosition = mCurrentPosition;
+      mSlidingBuffer->DiscardPrefix(mCurrentPosition);
+    }
+  }
+}
+
+bool nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf)
+{
+  if (!mSlidingBuffer) {
+    mSlidingBuffer = new nsScannerString(aBuf);
+    if (!mSlidingBuffer)
+      return false;
+    mSlidingBuffer->BeginReading(mCurrentPosition);
+    mMarkPosition = mCurrentPosition;
+    mSlidingBuffer->EndReading(mEndPosition);
+  }
+  else {
+    mSlidingBuffer->AppendBuffer(aBuf);
+    if (mCurrentPosition == mEndPosition) {
+      mSlidingBuffer->BeginReading(mCurrentPosition);
+    }
+    mSlidingBuffer->EndReading(mEndPosition);
+  }
+
+  return true;
+}
+
+/**
+ *  call this to copy bytes out of the scanner that have not yet been consumed
+ *  by the tokenization process.
+ *  
+ *  @update  gess 5/12/98
+ *  @param   aCopyBuffer is where the scanner buffer will be copied to
+ *  @return  true if OK or false on OOM
+ */
+bool nsScanner::CopyUnusedData(nsString& aCopyBuffer) {
+  if (!mSlidingBuffer) {
+    aCopyBuffer.Truncate();
+    return true;
+  }
+
+  nsScannerIterator start, end;
+  start = mCurrentPosition;
+  end = mEndPosition;
+
+  return CopyUnicodeTo(start, end, aCopyBuffer);
+}
+
+/**
+ *  Retrieve the name of the file that the scanner is reading from.
+ *  In some cases, it's just a given name, because the scanner isn't
+ *  really reading from a file.
+ *  
+ *  @update  gess 5/12/98
+ *  @return  
+ */
+nsString& nsScanner::GetFilename(void) {
+  return mFilename;
+}
+
+/**
+ *  Conduct self test. Actually, selftesting for this class
+ *  occurs in the parser selftest.
+ *  
+ *  @update  gess 3/25/98
+ *  @param   
+ *  @return  
+ */
+
+void nsScanner::SelfTest(void) {
+#ifdef _DEBUG
+#endif
+}
diff --git a/components/htmlparser/src/nsScanner.h b/components/htmlparser/src/nsScanner.h
new file mode 100644
index 000000000..88edcf74e
--- /dev/null
+++ b/components/htmlparser/src/nsScanner.h
@@ -0,0 +1,190 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+/**
+ * MODULE NOTES:
+ * @update  gess 4/1/98
+ * 
+ * The scanner is a low-level service class that knows
+ * how to consume characters out of an (internal) stream.
+ * This class also offers a series of utility methods
+ * that most tokenizers want, such as readUntil()
+ * and SkipWhitespace().
+ */
+
+
+#ifndef SCANNER
+#define SCANNER
+
+#include "nsCOMPtr.h"
+#include "nsString.h"
+#include "nsIParser.h"
+#include "nsIUnicodeDecoder.h"
+#include "nsScannerString.h"
+#include "mozilla/CheckedInt.h"
+
+class nsReadEndCondition {
+public:
+  const char16_t *mChars;
+  char16_t mFilter;
+  explicit nsReadEndCondition(const char16_t* aTerminateChars);
+private:
+  nsReadEndCondition(const nsReadEndCondition& aOther); // No copying
+  void operator=(const nsReadEndCondition& aOther); // No assigning
+};
+
+class nsScanner {
+  public:
+
+      /**
+       *  Use this constructor for the XML fragment parsing case
+       */
+      explicit nsScanner(const nsAString& anHTMLString);
+
+      /**
+       *  Use this constructor if you want i/o to be based on 
+       *  a file (therefore a stream) or just data you provide via Append().
+       */
+      nsScanner(nsString& aFilename, bool aCreateStream);
+
+      ~nsScanner();
+
+      /**
+       *  retrieve next char from internal input stream
+       *  
+       *  @update  gess 3/25/98
+       *  @param   ch is the char to accept new value
+       *  @return  error code reflecting read status
+       */
+      nsresult GetChar(char16_t& ch);
+
+      /**
+       *  Records current offset position in input stream. This allows us
+       *  to back up to this point if the need should arise, such as when
+       *  tokenization gets interrupted.
+       *  
+       *  @update  gess 5/12/98
+       *  @param   
+       *  @return  
+       */
+      int32_t Mark(void);
+
+      /**
+       *  Resets current offset position of input stream to marked position. 
+       *  This allows us to back up to this point if the need should arise, 
+       *  such as when tokenization gets interrupted.
+       *  NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
+       *  
+       *  @update  gess 5/12/98
+       *  @param   
+       *  @return  
+       */
+      void RewindToMark(void);
+
+
+      /**
+       *  
+       *  
+       *  @update  harishd 01/12/99
+       *  @param   
+       *  @return  
+       */
+      bool UngetReadable(const nsAString& aBuffer);
+
+      /**
+       *  
+       *  
+       *  @update  gess 5/13/98
+       *  @param   
+       *  @return  
+       */
+      nsresult Append(const nsAString& aBuffer);
+
+      /**
+       *  
+       *  
+       *  @update  gess 5/21/98
+       *  @param   
+       *  @return  
+       */
+      nsresult Append(const char* aBuffer, uint32_t aLen);
+
+      /**
+       *  Call this to copy bytes out of the scanner that have not yet been consumed
+       *  by the tokenization process.
+       *  
+       *  @update  gess 5/12/98
+       *  @param   aCopyBuffer is where the scanner buffer will be copied to
+       *  @return  true if OK or false on OOM
+       */
+      bool CopyUnusedData(nsString& aCopyBuffer);
+
+      /**
+       *  Retrieve the name of the file that the scanner is reading from.
+       *  In some cases, it's just a given name, because the scanner isn't
+       *  really reading from a file.
+       *  
+       *  @update  gess 5/12/98
+       *  @return  
+       */
+      nsString& GetFilename(void);
+
+      static void SelfTest();
+
+      /**
+       *  Use this setter to change the scanner's unicode decoder
+       *
+       *  @update  ftang 3/02/99
+       *  @param   aCharset a normalized (alias resolved) charset name
+       *  @param   aCharsetSource- where the charset info came from
+       *  @return  
+       */
+      nsresult SetDocumentCharset(const nsACString& aCharset, int32_t aSource);
+
+      void BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd);
+      void CurrentPosition(nsScannerIterator& aPosition);
+      void EndReading(nsScannerIterator& aPosition);
+      void SetPosition(nsScannerIterator& aPosition,
+                       bool aTruncate = false);
+
+      /**
+       * Internal method used to cause the internal buffer to
+       * be filled with data. 
+       *
+       * @update  gess4/3/98
+       */
+      bool      IsIncremental(void) {return mIncremental;}
+      void      SetIncremental(bool anIncrValue) {mIncremental=anIncrValue;}
+
+  protected:
+
+      bool AppendToBuffer(nsScannerString::Buffer* aBuffer);
+      bool AppendToBuffer(const nsAString& aStr)
+      {
+        nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr);
+        if (!buf)
+          return false;
+        AppendToBuffer(buf);
+        return true;
+      }
+
+      nsScannerString*             mSlidingBuffer;
+      nsScannerIterator            mCurrentPosition; // The position we will next read from in the scanner buffer
+      nsScannerIterator            mMarkPosition;    // The position last marked (we may rewind to here)
+      nsScannerIterator            mEndPosition;     // The current end of the scanner buffer
+      nsString        mFilename;
+      bool            mIncremental;
+      int32_t         mCharsetSource;
+      nsCString       mCharset;
+      nsCOMPtr<nsIUnicodeDecoder> mUnicodeDecoder;
+
+  private:
+      nsScanner &operator =(const nsScanner &); // Not implemented.
+};
+
+#endif
+
+
diff --git a/components/htmlparser/src/nsScannerString.cpp b/components/htmlparser/src/nsScannerString.cpp
new file mode 100644
index 000000000..53ac117f1
--- /dev/null
+++ b/components/htmlparser/src/nsScannerString.cpp
@@ -0,0 +1,650 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdlib.h>
+#include "nsScannerString.h"
+#include "mozilla/CheckedInt.h"
+
+
+  /**
+   * nsScannerBufferList
+   */
+
+#define MAX_CAPACITY ((UINT32_MAX / sizeof(char16_t)) - \
+                      (sizeof(Buffer) + sizeof(char16_t)))
+
+nsScannerBufferList::Buffer*
+nsScannerBufferList::AllocBufferFromString( const nsAString& aString )
+  {
+    uint32_t len = aString.Length();
+    Buffer* buf = AllocBuffer(len);
+
+    if (buf)
+      {
+        nsAString::const_iterator source;
+        aString.BeginReading(source);
+        nsCharTraits<char16_t>::copy(buf->DataStart(), source.get(), len);
+      }
+    return buf;
+  }
+
+nsScannerBufferList::Buffer*
+nsScannerBufferList::AllocBuffer( uint32_t capacity )
+  {
+    if (capacity > MAX_CAPACITY)
+      return nullptr;
+
+    void* ptr = malloc(sizeof(Buffer) + (capacity + 1) * sizeof(char16_t));
+    if (!ptr)
+      return nullptr;
+
+    Buffer* buf = new (ptr) Buffer();
+
+    buf->mUsageCount = 0;
+    buf->mDataEnd = buf->DataStart() + capacity;
+
+    // XXX null terminate.  this shouldn't be required, but we do it because
+    // nsScanner erroneously thinks it can dereference DataEnd :-(
+    *buf->mDataEnd = char16_t(0);
+    return buf;
+  }
+
+void
+nsScannerBufferList::ReleaseAll()
+  {
+    while (!mBuffers.isEmpty())
+      {
+        Buffer* node = mBuffers.popFirst();
+        //printf(">>> freeing buffer @%p\n", node);
+        free(node);
+      }
+  }
+
+void
+nsScannerBufferList::SplitBuffer( const Position& pos )
+  {
+    // splitting to the right keeps the work string and any extant token
+    // pointing to and holding a reference count on the same buffer.
+
+    Buffer* bufferToSplit = pos.mBuffer;
+    NS_ASSERTION(bufferToSplit, "null pointer");
+
+    uint32_t splitOffset = pos.mPosition - bufferToSplit->DataStart();
+    NS_ASSERTION(pos.mPosition >= bufferToSplit->DataStart() &&
+                 splitOffset <= bufferToSplit->DataLength(),
+                 "split offset is outside buffer");
+    
+    uint32_t len = bufferToSplit->DataLength() - splitOffset;
+    Buffer* new_buffer = AllocBuffer(len);
+    if (new_buffer)
+      {
+        nsCharTraits<char16_t>::copy(new_buffer->DataStart(),
+                                      bufferToSplit->DataStart() + splitOffset,
+                                      len);
+        InsertAfter(new_buffer, bufferToSplit);
+        bufferToSplit->SetDataLength(splitOffset);
+      }
+  }
+
+void
+nsScannerBufferList::DiscardUnreferencedPrefix( Buffer* aBuf )
+  {
+    if (aBuf == Head())
+      {
+        while (!mBuffers.isEmpty() && !Head()->IsInUse())
+          {
+            Buffer* buffer = Head();
+            buffer->remove();
+            free(buffer);
+          }
+      }
+  }
+
+size_t
+nsScannerBufferList::Position::Distance( const Position& aStart, const Position& aEnd )
+  {
+    size_t result = 0;
+    if (aStart.mBuffer == aEnd.mBuffer)
+      {
+        result = aEnd.mPosition - aStart.mPosition;
+      }
+    else
+      {
+        result = aStart.mBuffer->DataEnd() - aStart.mPosition;
+        for (Buffer* b = aStart.mBuffer->Next(); b != aEnd.mBuffer; b = b->Next())
+          result += b->DataLength();
+        result += aEnd.mPosition - aEnd.mBuffer->DataStart();
+      }
+    return result;
+  }
+
+
+/**
+ * nsScannerSubstring
+ */
+
+nsScannerSubstring::nsScannerSubstring()
+  : mStart(nullptr, nullptr)
+  , mEnd(nullptr, nullptr)
+  , mBufferList(nullptr)
+  , mLength(0)
+  , mIsDirty(true)
+  {
+  }
+
+nsScannerSubstring::nsScannerSubstring( const nsAString& s )
+  : mBufferList(nullptr)
+  , mIsDirty(true)
+  {
+    Rebind(s);
+  }
+
+nsScannerSubstring::~nsScannerSubstring()
+  {
+    release_ownership_of_buffer_list();
+  }
+
+int32_t
+nsScannerSubstring::CountChar( char16_t c ) const
+  {
+      /*
+        re-write this to use a counting sink
+       */
+
+    size_type result = 0;
+    size_type lengthToExamine = Length();
+
+    nsScannerIterator iter;
+    for ( BeginReading(iter); ; )
+      {
+        int32_t lengthToExamineInThisFragment = iter.size_forward();
+        const char16_t* fromBegin = iter.get();
+        result += size_type(NS_COUNT(fromBegin, fromBegin+lengthToExamineInThisFragment, c));
+        if ( !(lengthToExamine -= lengthToExamineInThisFragment) )
+          return result;
+        iter.advance(lengthToExamineInThisFragment);
+      }
+      // never reached; quiets warnings
+    return 0;
+  }
+
+void
+nsScannerSubstring::Rebind( const nsScannerSubstring& aString,
+                            const nsScannerIterator& aStart, 
+                            const nsScannerIterator& aEnd )
+  {
+    // allow for the case where &aString == this
+
+    aString.acquire_ownership_of_buffer_list();
+    release_ownership_of_buffer_list();
+
+    mStart      = aStart;
+    mEnd        = aEnd;
+    mBufferList = aString.mBufferList;
+    mLength     = Distance(aStart, aEnd);
+    mIsDirty    = true;
+  }
+
+void
+nsScannerSubstring::Rebind( const nsAString& aString )
+  {
+    release_ownership_of_buffer_list();
+
+    mBufferList = new nsScannerBufferList(AllocBufferFromString(aString));
+    mIsDirty    = true;
+
+    init_range_from_buffer_list();
+    acquire_ownership_of_buffer_list();
+  }
+
+const nsSubstring&
+nsScannerSubstring::AsString() const
+  {
+    if (mIsDirty)
+      {
+        nsScannerSubstring* mutable_this = const_cast<nsScannerSubstring*>(this);
+
+        if (mStart.mBuffer == mEnd.mBuffer) {
+          // We only have a single fragment to deal with, so just return it
+          // as a substring.
+          mutable_this->mFlattenedRep.Rebind(mStart.mPosition, mEnd.mPosition);
+        } else {
+          // Otherwise, we need to copy the data into a flattened buffer.
+          nsScannerIterator start, end;
+          CopyUnicodeTo(BeginReading(start), EndReading(end), mutable_this->mFlattenedRep);
+        }
+
+        mutable_this->mIsDirty = false;
+      }
+
+    return mFlattenedRep;
+  }
+
+nsScannerIterator&
+nsScannerSubstring::BeginReading( nsScannerIterator& iter ) const
+  {
+    iter.mOwner = this;
+
+    iter.mFragment.mBuffer = mStart.mBuffer;
+    iter.mFragment.mFragmentStart = mStart.mPosition;
+    if (mStart.mBuffer == mEnd.mBuffer)
+      iter.mFragment.mFragmentEnd = mEnd.mPosition;
+    else
+      iter.mFragment.mFragmentEnd = mStart.mBuffer->DataEnd();
+
+    iter.mPosition = mStart.mPosition;
+    iter.normalize_forward();
+    return iter;
+  }
+
+nsScannerIterator&
+nsScannerSubstring::EndReading( nsScannerIterator& iter ) const
+  {
+    iter.mOwner = this;
+
+    iter.mFragment.mBuffer = mEnd.mBuffer;
+    iter.mFragment.mFragmentEnd = mEnd.mPosition;
+    if (mStart.mBuffer == mEnd.mBuffer)
+      iter.mFragment.mFragmentStart = mStart.mPosition;
+    else
+      iter.mFragment.mFragmentStart = mEnd.mBuffer->DataStart();
+
+    iter.mPosition = mEnd.mPosition;
+    // must not |normalize_backward| as that would likely invalidate tests like |while ( first != last )|
+    return iter;
+  }
+
+bool
+nsScannerSubstring::GetNextFragment( nsScannerFragment& frag ) const
+  {
+    // check to see if we are at the end of the buffer list
+    if (frag.mBuffer == mEnd.mBuffer)
+      return false;
+
+    frag.mBuffer = frag.mBuffer->getNext();
+
+    if (frag.mBuffer == mStart.mBuffer)
+      frag.mFragmentStart = mStart.mPosition;
+    else
+      frag.mFragmentStart = frag.mBuffer->DataStart();
+
+    if (frag.mBuffer == mEnd.mBuffer)
+      frag.mFragmentEnd = mEnd.mPosition;
+    else
+      frag.mFragmentEnd = frag.mBuffer->DataEnd();
+
+    return true;
+  }
+
+bool
+nsScannerSubstring::GetPrevFragment( nsScannerFragment& frag ) const
+  {
+    // check to see if we are at the beginning of the buffer list
+    if (frag.mBuffer == mStart.mBuffer)
+      return false;
+
+    frag.mBuffer = frag.mBuffer->getPrevious();
+
+    if (frag.mBuffer == mStart.mBuffer)
+      frag.mFragmentStart = mStart.mPosition;
+    else
+      frag.mFragmentStart = frag.mBuffer->DataStart();
+
+    if (frag.mBuffer == mEnd.mBuffer)
+      frag.mFragmentEnd = mEnd.mPosition;
+    else
+      frag.mFragmentEnd = frag.mBuffer->DataEnd();
+
+    return true;
+  }
+
+
+  /**
+   * nsScannerString
+   */
+
+nsScannerString::nsScannerString( Buffer* aBuf )
+  {
+    mBufferList = new nsScannerBufferList(aBuf);
+
+    init_range_from_buffer_list();
+    acquire_ownership_of_buffer_list();
+  }
+
+void
+nsScannerString::AppendBuffer( Buffer* aBuf )
+  {
+    mBufferList->Append(aBuf);
+    mLength += aBuf->DataLength();
+
+    mEnd.mBuffer = aBuf;
+    mEnd.mPosition = aBuf->DataEnd();
+
+    mIsDirty = true;
+  }
+
+void
+nsScannerString::DiscardPrefix( const nsScannerIterator& aIter )
+  {
+    Position old_start(mStart);
+    mStart = aIter;
+    mLength -= Position::Distance(old_start, mStart);
+    
+    mStart.mBuffer->IncrementUsageCount();
+    old_start.mBuffer->DecrementUsageCount();
+
+    mBufferList->DiscardUnreferencedPrefix(old_start.mBuffer);
+
+    mIsDirty = true;
+  }
+
+void
+nsScannerString::UngetReadable( const nsAString& aReadable, const nsScannerIterator& aInsertPoint )
+    /*
+     * Warning: this routine manipulates the shared buffer list in an unexpected way.
+     *  The original design did not really allow for insertions, but this call promises
+     *  that if called for a point after the end of all extant token strings, that no token string
+     *  or the work string will be invalidated.
+     *
+     *  This routine is protected because it is the responsibility of the derived class to keep those promises.
+     */
+  {
+    Position insertPos(aInsertPoint);
+
+    mBufferList->SplitBuffer(insertPos);
+      // splitting to the right keeps the work string and any extant token pointing to and
+      //  holding a reference count on the same buffer
+
+    Buffer* new_buffer = AllocBufferFromString(aReadable);
+      // make a new buffer with all the data to insert...
+      //  BULLSHIT ALERT: we may have empty space to re-use in the split buffer, measure the cost
+      //  of this and decide if we should do the work to fill it
+
+    Buffer* buffer_to_split = insertPos.mBuffer;
+    mBufferList->InsertAfter(new_buffer, buffer_to_split);
+    mLength += aReadable.Length();
+
+    mEnd.mBuffer = mBufferList->Tail();
+    mEnd.mPosition = mEnd.mBuffer->DataEnd();
+
+    mIsDirty = true;
+  }
+
+  /**
+   * nsScannerSharedSubstring
+   */
+
+void
+nsScannerSharedSubstring::Rebind(const nsScannerIterator &aStart,
+                              const nsScannerIterator &aEnd)
+{
+  // If the start and end positions are inside the same buffer, we must
+  // acquire ownership of the buffer.  If not, we can optimize by not holding
+  // onto it.
+
+  Buffer *buffer = const_cast<Buffer*>(aStart.buffer());
+  bool sameBuffer = buffer == aEnd.buffer();
+
+  nsScannerBufferList *bufferList;
+
+  if (sameBuffer) {
+    bufferList = aStart.mOwner->mBufferList;
+    bufferList->AddRef();
+    buffer->IncrementUsageCount();
+  }
+
+  if (mBufferList)
+    ReleaseBuffer();
+
+  if (sameBuffer) {
+    mBuffer = buffer;
+    mBufferList = bufferList;
+    mString.Rebind(aStart.mPosition, aEnd.mPosition);
+  } else {
+    mBuffer = nullptr;
+    mBufferList = nullptr;
+    CopyUnicodeTo(aStart, aEnd, mString);
+  }
+}
+
+void
+nsScannerSharedSubstring::ReleaseBuffer()
+{
+  NS_ASSERTION(mBufferList, "Should only be called with non-null mBufferList");
+  mBuffer->DecrementUsageCount();
+  mBufferList->DiscardUnreferencedPrefix(mBuffer);
+  mBufferList->Release();
+}
+
+void
+nsScannerSharedSubstring::MakeMutable()
+{
+  nsString temp(mString); // this will force a copy of the data
+  mString.Assign(temp);   // mString will now share the just-allocated buffer
+
+  ReleaseBuffer();
+
+  mBuffer = nullptr;
+  mBufferList = nullptr;
+}
+
+  /**
+   * utils -- based on code from nsReadableUtils.cpp
+   */
+
+// private helper function
+static inline
+nsAString::iterator&
+copy_multifragment_string( nsScannerIterator& first, const nsScannerIterator& last, nsAString::iterator& result )
+  {
+    typedef nsCharSourceTraits<nsScannerIterator> source_traits;
+    typedef nsCharSinkTraits<nsAString::iterator> sink_traits;
+
+    while ( first != last )
+      {
+        uint32_t distance = source_traits::readable_distance(first, last);
+        sink_traits::write(result, source_traits::read(first), distance);
+        NS_ASSERTION(distance > 0, "|copy_multifragment_string| will never terminate");
+        source_traits::advance(first, distance);
+      }
+
+    return result;
+  }
+
+bool
+CopyUnicodeTo( const nsScannerIterator& aSrcStart,
+               const nsScannerIterator& aSrcEnd,
+               nsAString& aDest )
+  {
+    nsAString::iterator writer;
+
+    mozilla::CheckedInt<nsAString::size_type> distance(Distance(aSrcStart, aSrcEnd));
+    if (!distance.isValid()) {
+      return false; // overflow detected
+    }
+
+    if (!aDest.SetLength(distance.value(), mozilla::fallible)) {
+      aDest.Truncate();
+      return false; // out of memory
+    }
+    aDest.BeginWriting(writer);
+    nsScannerIterator fromBegin(aSrcStart);
+    
+    copy_multifragment_string(fromBegin, aSrcEnd, writer);
+    return true;
+  }
+
+bool
+AppendUnicodeTo( const nsScannerIterator& aSrcStart,
+                 const nsScannerIterator& aSrcEnd,
+                 nsScannerSharedSubstring& aDest )
+  {
+    // Check whether we can just create a dependent string.
+    if (aDest.str().IsEmpty()) {
+      // We can just make |aDest| point to the buffer.
+      // This will take care of copying if the buffer spans fragments.
+      aDest.Rebind(aSrcStart, aSrcEnd);
+      return true;
+    }
+    // The dest string is not empty, so it can't be a dependent substring.
+    return AppendUnicodeTo(aSrcStart, aSrcEnd, aDest.writable());
+  }
+
+bool
+AppendUnicodeTo( const nsScannerIterator& aSrcStart,
+                 const nsScannerIterator& aSrcEnd,
+                 nsAString& aDest )
+  {
+    nsAString::iterator writer;
+    const nsAString::size_type oldLength = aDest.Length();
+    mozilla::CheckedInt<nsAString::size_type> newLen(Distance(aSrcStart, aSrcEnd));
+    newLen += oldLength;
+    if (!newLen.isValid()) {
+      return false; // overflow detected
+    }
+
+    if (!aDest.SetLength(newLen.value(), mozilla::fallible))
+      return false; // out of memory
+    aDest.BeginWriting(writer).advance(oldLength);
+    nsScannerIterator fromBegin(aSrcStart);
+    
+    copy_multifragment_string(fromBegin, aSrcEnd, writer);
+    return true;
+  }
+
+bool
+FindCharInReadable( char16_t aChar,
+                    nsScannerIterator& aSearchStart,
+                    const nsScannerIterator& aSearchEnd )
+  {
+    while ( aSearchStart != aSearchEnd )
+      {
+        int32_t fragmentLength;
+        if ( SameFragment(aSearchStart, aSearchEnd) ) 
+          fragmentLength = aSearchEnd.get() - aSearchStart.get();
+        else
+          fragmentLength = aSearchStart.size_forward();
+
+        const char16_t* charFoundAt = nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
+        if ( charFoundAt ) {
+          aSearchStart.advance( charFoundAt - aSearchStart.get() );
+          return true;
+        }
+
+        aSearchStart.advance(fragmentLength);
+      }
+
+    return false;
+  }
+
+bool
+FindInReadable( const nsAString& aPattern,
+                nsScannerIterator& aSearchStart,
+                nsScannerIterator& aSearchEnd,
+                const nsStringComparator& compare )
+  {
+    bool found_it = false;
+
+      // only bother searching at all if we're given a non-empty range to search
+    if ( aSearchStart != aSearchEnd )
+      {
+        nsAString::const_iterator aPatternStart, aPatternEnd;
+        aPattern.BeginReading(aPatternStart);
+        aPattern.EndReading(aPatternEnd);
+
+          // outer loop keeps searching till we find it or run out of string to search
+        while ( !found_it )
+          {
+              // fast inner loop (that's what it's called, not what it is) looks for a potential match
+            while ( aSearchStart != aSearchEnd &&
+                    compare(aPatternStart.get(), aSearchStart.get(), 1, 1) )
+              ++aSearchStart;
+
+              // if we broke out of the `fast' loop because we're out of string ... we're done: no match
+            if ( aSearchStart == aSearchEnd )
+              break;
+
+              // otherwise, we're at a potential match, let's see if we really hit one
+            nsAString::const_iterator testPattern(aPatternStart);
+            nsScannerIterator testSearch(aSearchStart);
+
+              // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
+            for(;;)
+              {
+                  // we already compared the first character in the outer loop,
+                  //  so we'll advance before the next comparison
+                ++testPattern;
+                ++testSearch;
+
+                  // if we verified all the way to the end of the pattern, then we found it!
+                if ( testPattern == aPatternEnd )
+                  {
+                    found_it = true;
+                    aSearchEnd = testSearch; // return the exact found range through the parameters
+                    break;
+                  }
+
+                  // if we got to end of the string we're searching before we hit the end of the
+                  //  pattern, we'll never find what we're looking for
+                if ( testSearch == aSearchEnd )
+                  {
+                    aSearchStart = aSearchEnd;
+                    break;
+                  }
+
+                  // else if we mismatched ... it's time to advance to the next search position
+                  //  and get back into the `fast' loop
+                if ( compare(testPattern.get(), testSearch.get(), 1, 1) )
+                  {
+                    ++aSearchStart;
+                    break;
+                  }
+              }
+          }
+      }
+
+    return found_it;
+  }
+
+  /**
+   * This implementation is simple, but does too much work.
+   * It searches the entire string from left to right, and returns the last match found, if any.
+   * This implementation will be replaced when I get |reverse_iterator|s working.
+   */
+bool
+RFindInReadable( const nsAString& aPattern,
+                 nsScannerIterator& aSearchStart,
+                 nsScannerIterator& aSearchEnd,
+                 const nsStringComparator& aComparator )
+  {
+    bool found_it = false;
+
+    nsScannerIterator savedSearchEnd(aSearchEnd);
+    nsScannerIterator searchStart(aSearchStart), searchEnd(aSearchEnd);
+
+    while ( searchStart != searchEnd )
+      {
+        if ( FindInReadable(aPattern, searchStart, searchEnd, aComparator) )
+          {
+            found_it = true;
+
+              // this is the best match so far, so remember it
+            aSearchStart = searchStart;
+            aSearchEnd = searchEnd;
+
+              // ...and get ready to search some more
+              //  (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns)
+            ++searchStart;
+            searchEnd = savedSearchEnd;
+          }
+      }
+
+      // if we never found it, return an empty range
+    if ( !found_it )
+      aSearchStart = aSearchEnd;
+
+    return found_it;
+  }
diff --git a/components/htmlparser/src/nsScannerString.h b/components/htmlparser/src/nsScannerString.h
new file mode 100644
index 000000000..247c04c04
--- /dev/null
+++ b/components/htmlparser/src/nsScannerString.h
@@ -0,0 +1,604 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsScannerString_h___
+#define nsScannerString_h___
+
+#include "nsString.h"
+#include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator
+#include "mozilla/LinkedList.h"
+#include <algorithm>
+
+
+  /**
+   * NOTE: nsScannerString (and the other classes defined in this file) are
+   * not related to nsAString or any of the other xpcom/string classes.
+   *
+   * nsScannerString is based on the nsSlidingString implementation that used
+   * to live in xpcom/string.  Now that nsAString is limited to representing
+   * only single fragment strings, nsSlidingString can no longer be used.
+   *
+   * An advantage to this design is that it does not employ any virtual
+   * functions.
+   *
+   * This file uses SCC-style indenting in deference to the nsSlidingString
+   * code from which this code is derived ;-)
+   */
+
+class nsScannerIterator;
+class nsScannerSubstring;
+class nsScannerString;
+
+
+  /**
+   * nsScannerBufferList
+   *
+   * This class maintains a list of heap-allocated Buffer objects.  The buffers
+   * are maintained in a circular linked list.  Each buffer has a usage count
+   * that is decremented by the owning nsScannerSubstring.
+   *
+   * The buffer list itself is reference counted.  This allows the buffer list
+   * to be shared by multiple nsScannerSubstring objects.  The reference
+   * counting is not threadsafe, which is not at all a requirement.
+   *
+   * When a nsScannerSubstring releases its reference to a buffer list, it
+   * decrements the usage count of the first buffer in the buffer list that it
+   * was referencing.  It informs the buffer list that it can discard buffers
+   * starting at that prefix.  The buffer list will do so if the usage count of
+   * that buffer is 0 and if it is the first buffer in the list.  It will
+   * continue to prune buffers starting from the front of the buffer list until
+   * it finds a buffer that has a usage count that is non-zero.
+   */
+class nsScannerBufferList
+  {
+    public:
+
+        /**
+         * Buffer objects are directly followed by a data segment.  The start
+         * of the data segment is determined by increment the |this| pointer
+         * by 1 unit.
+         */
+      class Buffer : public mozilla::LinkedListElement<Buffer>
+        {
+          public:
+
+            void IncrementUsageCount() { ++mUsageCount; }
+            void DecrementUsageCount() { --mUsageCount; }
+
+            bool IsInUse() const { return mUsageCount != 0; }
+
+            const char16_t* DataStart() const { return (const char16_t*) (this+1); }
+                  char16_t* DataStart()       { return (      char16_t*) (this+1); }
+
+            const char16_t* DataEnd() const { return mDataEnd; }
+                  char16_t* DataEnd()       { return mDataEnd; }
+
+            const Buffer* Next() const { return getNext(); }
+                  Buffer* Next()       { return getNext(); }
+
+            const Buffer* Prev() const { return getPrevious(); }
+                  Buffer* Prev()       { return getPrevious(); }
+
+            uint32_t DataLength() const { return mDataEnd - DataStart(); }
+            void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; }
+
+          private:
+
+            friend class nsScannerBufferList;
+
+            int32_t    mUsageCount;
+            char16_t* mDataEnd;
+        };
+
+        /**
+         * Position objects serve as lightweight pointers into a buffer list.
+         * The mPosition member must be contained with mBuffer->DataStart()
+         * and mBuffer->DataEnd().
+         */
+      class Position
+        {
+          public:
+
+            Position() {}
+
+            Position( Buffer* buffer, char16_t* position )
+              : mBuffer(buffer)
+              , mPosition(position)
+              {}
+
+            inline
+            explicit Position( const nsScannerIterator& aIter );
+
+            inline
+            Position& operator=( const nsScannerIterator& aIter );
+
+            static size_t Distance( const Position& p1, const Position& p2 );
+
+            Buffer*    mBuffer;
+            char16_t* mPosition;
+        };
+
+      static Buffer* AllocBufferFromString( const nsAString& );
+      static Buffer* AllocBuffer( uint32_t capacity ); // capacity = number of chars
+
+      explicit nsScannerBufferList( Buffer* buf )
+        : mRefCnt(0)
+        {
+          mBuffers.insertBack(buf);
+        }
+
+      void  AddRef()  { ++mRefCnt; }
+      void  Release() { if (--mRefCnt == 0) delete this; }
+
+      void  Append( Buffer* buf ) { mBuffers.insertBack(buf); }
+      void  InsertAfter( Buffer* buf, Buffer* prev ) { prev->setNext(buf); }
+      void  SplitBuffer( const Position& );
+      void  DiscardUnreferencedPrefix( Buffer* );
+
+            Buffer* Head()       { return mBuffers.getFirst(); }
+      const Buffer* Head() const { return mBuffers.getFirst(); }
+
+            Buffer* Tail()       { return mBuffers.getLast(); }
+      const Buffer* Tail() const { return mBuffers.getLast(); }
+
+    private:
+
+      friend class nsScannerSubstring;
+
+      ~nsScannerBufferList() { ReleaseAll(); }
+      void ReleaseAll();
+
+      int32_t mRefCnt;
+      mozilla::LinkedList<Buffer> mBuffers;
+  };
+
+
+  /**
+   * nsScannerFragment represents a "slice" of a Buffer object.
+   */
+struct nsScannerFragment
+  {
+    typedef nsScannerBufferList::Buffer Buffer;
+
+    const Buffer*    mBuffer;
+    const char16_t* mFragmentStart;
+    const char16_t* mFragmentEnd;
+  };
+
+
+  /**
+   * nsScannerSubstring is the base class for nsScannerString.  It provides
+   * access to iterators and methods to bind the substring to another
+   * substring or nsAString instance.
+   *
+   * This class owns the buffer list.
+   */
+class nsScannerSubstring
+  {
+    public:
+      typedef nsScannerBufferList::Buffer      Buffer;
+      typedef nsScannerBufferList::Position    Position;
+      typedef uint32_t                         size_type;
+
+      nsScannerSubstring();
+      explicit nsScannerSubstring( const nsAString& s );
+
+      ~nsScannerSubstring();
+
+      nsScannerIterator& BeginReading( nsScannerIterator& iter ) const;
+      nsScannerIterator& EndReading( nsScannerIterator& iter ) const;
+
+      size_type Length() const { return mLength; }
+
+      int32_t CountChar( char16_t ) const;
+
+      void Rebind( const nsScannerSubstring&, const nsScannerIterator&, const nsScannerIterator& );
+      void Rebind( const nsAString& );
+
+      const nsSubstring& AsString() const;
+
+      bool GetNextFragment( nsScannerFragment& ) const;
+      bool GetPrevFragment( nsScannerFragment& ) const;
+
+      static inline Buffer* AllocBufferFromString( const nsAString& aStr ) { return nsScannerBufferList::AllocBufferFromString(aStr); }
+      static inline Buffer* AllocBuffer( size_type aCapacity )             { return nsScannerBufferList::AllocBuffer(aCapacity); }
+
+    protected:
+
+      void acquire_ownership_of_buffer_list() const
+        {
+          mBufferList->AddRef();
+          mStart.mBuffer->IncrementUsageCount();
+        }
+
+      void release_ownership_of_buffer_list()
+        {
+          if (mBufferList)
+            {
+              mStart.mBuffer->DecrementUsageCount();
+              mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer);
+              mBufferList->Release();
+            }
+        }
+
+      void init_range_from_buffer_list()
+        {
+          mStart.mBuffer = mBufferList->Head();
+          mStart.mPosition = mStart.mBuffer->DataStart();
+
+          mEnd.mBuffer = mBufferList->Tail();
+          mEnd.mPosition = mEnd.mBuffer->DataEnd();
+
+          mLength = Position::Distance(mStart, mEnd);
+        }
+
+      Position             mStart;
+      Position             mEnd;
+      nsScannerBufferList *mBufferList;
+      size_type            mLength;
+
+      // these fields are used to implement AsString
+      nsDependentSubstring mFlattenedRep;
+      bool                 mIsDirty;
+
+      friend class nsScannerSharedSubstring;
+  };
+
+
+  /**
+   * nsScannerString provides methods to grow and modify a buffer list.
+   */
+class nsScannerString : public nsScannerSubstring
+  {
+    public:
+
+      explicit nsScannerString( Buffer* );
+
+        // you are giving ownership to the string, it takes and keeps your
+        // buffer, deleting it when done.
+        // Use AllocBuffer or AllocBufferFromString to create a Buffer object
+        // for use with this function.
+      void AppendBuffer( Buffer* );
+
+      void DiscardPrefix( const nsScannerIterator& );
+        // any other way you want to do this?
+
+      void UngetReadable(const nsAString& aReadable, const nsScannerIterator& aCurrentPosition);
+  };
+
+
+  /**
+   * nsScannerSharedSubstring implements copy-on-write semantics for
+   * nsScannerSubstring.  When you call .writable(), it will copy the data
+   * and return a mutable string object.  This class also manages releasing
+   * the reference to the scanner buffer when it is no longer needed.
+   */
+
+class nsScannerSharedSubstring
+  {
+    public:
+      nsScannerSharedSubstring()
+        : mBuffer(nullptr), mBufferList(nullptr) { }
+
+      ~nsScannerSharedSubstring()
+        {
+          if (mBufferList)
+            ReleaseBuffer();
+        }
+
+        // Acquire a copy-on-write reference to the given substring.
+      void Rebind(const nsScannerIterator& aStart,
+                              const nsScannerIterator& aEnd);
+
+       // Get a mutable reference to this string
+      nsSubstring& writable()
+        {
+          if (mBufferList)
+            MakeMutable();
+
+          return mString;
+        }
+
+        // Get a const reference to this string
+      const nsSubstring& str() const { return mString; }
+
+    private:
+      typedef nsScannerBufferList::Buffer Buffer;
+
+      void ReleaseBuffer();
+      void MakeMutable();
+
+      nsDependentSubstring  mString;
+      Buffer               *mBuffer;
+      nsScannerBufferList  *mBufferList;
+  };
+
+  /**
+   * nsScannerIterator works just like nsReadingIterator<CharT> except that
+   * it knows how to iterate over a list of scanner buffers.
+   */
+class nsScannerIterator
+  {
+    public:
+      typedef nsScannerIterator             self_type;
+      typedef ptrdiff_t                     difference_type;
+      typedef char16_t                     value_type;
+      typedef const char16_t*              pointer;
+      typedef const char16_t&              reference;
+      typedef nsScannerSubstring::Buffer    Buffer;
+
+    protected:
+
+      nsScannerFragment         mFragment;
+      const char16_t*          mPosition;
+      const nsScannerSubstring* mOwner;
+
+      friend class nsScannerSubstring;
+      friend class nsScannerSharedSubstring;
+
+    public:
+      // nsScannerIterator();                                       // auto-generate default constructor is OK
+      // nsScannerIterator( const nsScannerIterator& );             // auto-generated copy-constructor OK
+      // nsScannerIterator& operator=( const nsScannerIterator& );  // auto-generated copy-assignment operator OK
+
+      inline void normalize_forward();
+      inline void normalize_backward();
+
+      pointer get() const
+        {
+          return mPosition;
+        }
+
+      char16_t operator*() const
+        {
+          return *get();
+        }
+
+      const nsScannerFragment& fragment() const
+        {
+          return mFragment;
+        }
+
+      const Buffer* buffer() const
+        {
+          return mFragment.mBuffer;
+        }
+
+      self_type& operator++()
+        {
+          ++mPosition;
+          normalize_forward();
+          return *this;
+        }
+
+      self_type operator++( int )
+        {
+          self_type result(*this);
+          ++mPosition;
+          normalize_forward();
+          return result;
+        }
+
+      self_type& operator--()
+        {
+          normalize_backward();
+          --mPosition;
+          return *this;
+        }
+
+      self_type operator--( int )
+        {
+          self_type result(*this);
+          normalize_backward();
+          --mPosition;
+          return result;
+        }
+
+      difference_type size_forward() const
+        {
+          return mFragment.mFragmentEnd - mPosition;
+        }
+
+      difference_type size_backward() const
+        {
+          return mPosition - mFragment.mFragmentStart;
+        }
+
+      self_type& advance( difference_type n )
+        {
+          while ( n > 0 )
+            {
+              difference_type one_hop = std::min(n, size_forward());
+
+              NS_ASSERTION(one_hop>0, "Infinite loop: can't advance a reading iterator beyond the end of a string");
+                // perhaps I should |break| if |!one_hop|?
+
+              mPosition += one_hop;
+              normalize_forward();
+              n -= one_hop;
+            }
+
+          while ( n < 0 )
+            {
+              normalize_backward();
+              difference_type one_hop = std::max(n, -size_backward());
+
+              NS_ASSERTION(one_hop<0, "Infinite loop: can't advance (backward) a reading iterator beyond the end of a string");
+                // perhaps I should |break| if |!one_hop|?
+
+              mPosition += one_hop;
+              n -= one_hop;
+            }
+
+          return *this;
+        }
+  };
+
+
+inline
+bool
+SameFragment( const nsScannerIterator& a, const nsScannerIterator& b )
+  {
+    return a.fragment().mFragmentStart == b.fragment().mFragmentStart;
+  }
+
+
+  /**
+   * this class is needed in order to make use of the methods in nsAlgorithm.h
+   */
+template <>
+struct nsCharSourceTraits<nsScannerIterator>
+  {
+    typedef nsScannerIterator::difference_type difference_type;
+
+    static
+    uint32_t
+    readable_distance( const nsScannerIterator& first, const nsScannerIterator& last )
+      {
+        return uint32_t(SameFragment(first, last) ? last.get() - first.get() : first.size_forward());
+      }
+
+    static
+    const nsScannerIterator::value_type*
+    read( const nsScannerIterator& iter )
+      {
+        return iter.get();
+      }
+
+    static
+    void
+    advance( nsScannerIterator& s, difference_type n )
+      {
+        s.advance(n);
+      }
+  };
+
+
+  /**
+   * inline methods follow
+   */
+
+inline
+void
+nsScannerIterator::normalize_forward()
+  {
+    while (mPosition == mFragment.mFragmentEnd && mOwner->GetNextFragment(mFragment))
+      mPosition = mFragment.mFragmentStart;
+  }
+
+inline
+void
+nsScannerIterator::normalize_backward()
+  {
+    while (mPosition == mFragment.mFragmentStart && mOwner->GetPrevFragment(mFragment))
+      mPosition = mFragment.mFragmentEnd;
+  }
+
+inline
+bool
+operator==( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
+  {
+    return lhs.get() == rhs.get();
+  }
+
+inline
+bool
+operator!=( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
+  {
+    return lhs.get() != rhs.get();
+  }
+
+
+inline
+nsScannerBufferList::Position::Position(const nsScannerIterator& aIter)
+  : mBuffer(const_cast<Buffer*>(aIter.buffer()))
+  , mPosition(const_cast<char16_t*>(aIter.get()))
+  {}
+
+inline
+nsScannerBufferList::Position&
+nsScannerBufferList::Position::operator=(const nsScannerIterator& aIter)
+  {
+    mBuffer   = const_cast<Buffer*>(aIter.buffer());
+    mPosition = const_cast<char16_t*>(aIter.get());
+    return *this;
+  }
+
+
+  /**
+   * scanner string utils
+   *
+   * These methods mimic the API provided by nsReadableUtils in xpcom/string.
+   * Here we provide only the methods that the htmlparser module needs.
+   */
+
+inline
+size_t
+Distance( const nsScannerIterator& aStart, const nsScannerIterator& aEnd )
+  {
+    typedef nsScannerBufferList::Position Position;
+    return Position::Distance(Position(aStart), Position(aEnd));
+  }
+
+bool
+CopyUnicodeTo( const nsScannerIterator& aSrcStart,
+               const nsScannerIterator& aSrcEnd,
+               nsAString& aDest );
+
+inline
+bool
+CopyUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
+  {
+    nsScannerIterator begin, end;
+    return CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
+  }
+
+bool
+AppendUnicodeTo( const nsScannerIterator& aSrcStart,
+                 const nsScannerIterator& aSrcEnd,
+                 nsAString& aDest );
+
+inline
+bool
+AppendUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
+  {
+    nsScannerIterator begin, end;
+    return AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
+  }
+
+bool
+AppendUnicodeTo( const nsScannerIterator& aSrcStart,
+                 const nsScannerIterator& aSrcEnd,
+                 nsScannerSharedSubstring& aDest );
+
+bool
+FindCharInReadable( char16_t aChar,
+                    nsScannerIterator& aStart,
+                    const nsScannerIterator& aEnd );
+
+bool
+FindInReadable( const nsAString& aPattern,
+                nsScannerIterator& aStart,
+                nsScannerIterator& aEnd,
+                const nsStringComparator& = nsDefaultStringComparator() );
+
+bool
+RFindInReadable( const nsAString& aPattern,
+                 nsScannerIterator& aStart,
+                 nsScannerIterator& aEnd,
+                 const nsStringComparator& = nsDefaultStringComparator() );
+
+inline
+bool
+CaseInsensitiveFindInReadable( const nsAString& aPattern,
+                               nsScannerIterator& aStart,
+                               nsScannerIterator& aEnd )
+  {
+    return FindInReadable(aPattern, aStart, aEnd,
+                          nsCaseInsensitiveStringComparator());
+  }
+
+#endif // !defined(nsScannerString_h___)
diff --git a/components/htmlparser/src/nsToken.h b/components/htmlparser/src/nsToken.h
new file mode 100644
index 000000000..6221aca57
--- /dev/null
+++ b/components/htmlparser/src/nsToken.h
@@ -0,0 +1,19 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef CTOKEN__
+#define CTOKEN__
+
+enum eHTMLTokenTypes {
+  eToken_unknown=0,
+  eToken_start=1,      eToken_end,          eToken_comment,         eToken_entity,
+  eToken_whitespace,   eToken_newline,      eToken_text,            eToken_attribute,
+  eToken_instruction,  eToken_cdatasection, eToken_doctypeDecl,     eToken_markupDecl,
+  eToken_last //make sure this stays the last token...
+};
+
+#endif
+
+
diff --git a/components/moz.build b/components/moz.build
index b854260ef..22bfd42b7 100644
--- a/components/moz.build
+++ b/components/moz.build
@@ -32,6 +32,7 @@ DIRS += [
     'finalizationwitness',
     'formautofill',
     'find',
+    'htmlparser',
     'gfx',
     'global',
     'handling',