summaryrefslogtreecommitdiff
path: root/components
diff options
context:
space:
mode:
Diffstat (limited to 'components')
-rw-r--r--components/htmlparser/moz.build49
-rw-r--r--components/htmlparser/public/nsIExpatSink.idl109
-rw-r--r--components/htmlparser/public/nsIExtendedExpatSink.idl72
-rw-r--r--components/htmlparser/src/CNavDTD.cpp90
-rw-r--r--components/htmlparser/src/CNavDTD.h35
-rw-r--r--components/htmlparser/src/CParserContext.cpp85
-rw-r--r--components/htmlparser/src/CParserContext.h70
-rw-r--r--components/htmlparser/src/nsElementTable.cpp210
-rw-r--r--components/htmlparser/src/nsElementTable.h21
-rw-r--r--components/htmlparser/src/nsExpatDriver.cpp1412
-rw-r--r--components/htmlparser/src/nsExpatDriver.h145
-rw-r--r--components/htmlparser/src/nsHTMLEntities.cpp205
-rw-r--r--components/htmlparser/src/nsHTMLEntities.h35
-rw-r--r--components/htmlparser/src/nsHTMLEntityList.h303
-rw-r--r--components/htmlparser/src/nsHTMLTagList.h197
-rw-r--r--components/htmlparser/src/nsHTMLTags.cpp259
-rw-r--r--components/htmlparser/src/nsHTMLTags.h100
-rw-r--r--components/htmlparser/src/nsHTMLTokenizer.cpp59
-rw-r--r--components/htmlparser/src/nsHTMLTokenizer.h35
-rw-r--r--components/htmlparser/src/nsIContentSink.h132
-rw-r--r--components/htmlparser/src/nsIDTD.h136
-rw-r--r--components/htmlparser/src/nsIFragmentContentSink.h77
-rw-r--r--components/htmlparser/src/nsIHTMLContentSink.h89
-rw-r--r--components/htmlparser/src/nsIParser.h272
-rw-r--r--components/htmlparser/src/nsIParserService.h98
-rw-r--r--components/htmlparser/src/nsITokenizer.h44
-rw-r--r--components/htmlparser/src/nsParser.cpp1599
-rw-r--r--components/htmlparser/src/nsParser.h398
-rw-r--r--components/htmlparser/src/nsParserBase.h20
-rw-r--r--components/htmlparser/src/nsParserCIID.h39
-rw-r--r--components/htmlparser/src/nsParserConstants.h38
-rw-r--r--components/htmlparser/src/nsParserModule.cpp107
-rw-r--r--components/htmlparser/src/nsParserMsgUtils.cpp65
-rw-r--r--components/htmlparser/src/nsParserMsgUtils.h21
-rw-r--r--components/htmlparser/src/nsParserService.cpp90
-rw-r--r--components/htmlparser/src/nsParserService.h58
-rw-r--r--components/htmlparser/src/nsScanner.cpp408
-rw-r--r--components/htmlparser/src/nsScanner.h190
-rw-r--r--components/htmlparser/src/nsScannerString.cpp650
-rw-r--r--components/htmlparser/src/nsScannerString.h604
-rw-r--r--components/htmlparser/src/nsToken.h19
-rw-r--r--components/moz.build1
42 files changed, 8646 insertions, 0 deletions
diff --git a/components/htmlparser/moz.build b/components/htmlparser/moz.build
new file mode 100644
index 000000000..ddcad7b1a
--- /dev/null
+++ b/components/htmlparser/moz.build
@@ -0,0 +1,49 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+XPIDL_SOURCES += [
+ 'public/nsIExpatSink.idl',
+ 'public/nsIExtendedExpatSink.idl',
+]
+
+EXPORTS += [
+ 'src/nsElementTable.h',
+ 'src/nsHTMLTagList.h',
+ 'src/nsHTMLTags.h',
+ 'src/nsIContentSink.h',
+ 'src/nsIDTD.h',
+ 'src/nsIFragmentContentSink.h',
+ 'src/nsIHTMLContentSink.h',
+ 'src/nsIParser.h',
+ 'src/nsIParserService.h',
+ 'src/nsITokenizer.h',
+ 'src/nsParserBase.h',
+ 'src/nsParserCIID.h',
+ 'src/nsParserConstants.h',
+ 'src/nsScannerString.h',
+ 'src/nsToken.h',
+]
+
+SOURCES += [
+ 'src/CNavDTD.cpp',
+ 'src/CParserContext.cpp',
+ 'src/nsElementTable.cpp',
+ 'src/nsExpatDriver.cpp',
+ 'src/nsHTMLEntities.cpp',
+ 'src/nsHTMLTags.cpp',
+ 'src/nsHTMLTokenizer.cpp',
+ 'src/nsParser.cpp',
+ 'src/nsParserModule.cpp',
+ 'src/nsParserMsgUtils.cpp',
+ 'src/nsParserService.cpp',
+ 'src/nsScanner.cpp',
+ 'src/nsScannerString.cpp',
+]
+
+if CONFIG['GNU_CXX']:
+ CXXFLAGS += ['-Wno-error=shadow']
+
+XPIDL_MODULE = 'htmlparser'
+FINAL_LIBRARY = 'xul'
diff --git a/components/htmlparser/public/nsIExpatSink.idl b/components/htmlparser/public/nsIExpatSink.idl
new file mode 100644
index 000000000..df0b2d869
--- /dev/null
+++ b/components/htmlparser/public/nsIExpatSink.idl
@@ -0,0 +1,109 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.idl"
+interface nsIScriptError;
+
+/**
+ * This interface should be implemented by any content sink that wants
+ * to get output from expat and do something with it; in other words,
+ * by any sink that handles some sort of XML dialect.
+ */
+
+[scriptable, uuid(01f681af-0f22-4725-a914-0d396114daf0)]
+interface nsIExpatSink : nsISupports
+{
+ /**
+ * Called to handle the opening tag of an element.
+ * @param aName the fully qualified tagname of the element
+ * @param aAtts the array of attribute names and values. There are
+ * aAttsCount/2 names and aAttsCount/2 values, so the total number of
+ * elements in the array is aAttsCount. The names and values
+ * alternate. Thus, if we number attributes starting with 0,
+ * aAtts[2*k] is the name of the k-th attribute and aAtts[2*k+1] is
+ * the value of that attribute Both explicitly specified attributes
+ * and attributes that are defined to have default values in a DTD are
+ * present in aAtts.
+ * @param aAttsCount the number of elements in aAtts.
+ * @param aLineNumber the line number of the start tag in the data stream.
+ */
+ void HandleStartElement(in wstring aName,
+ [array, size_is(aAttsCount)] in wstring aAtts,
+ in unsigned long aAttsCount,
+ in unsigned long aLineNumber);
+
+ /**
+ * Called to handle the closing tag of an element.
+ * @param aName the fully qualified tagname of the element
+ */
+ void HandleEndElement(in wstring aName);
+
+ /**
+ * Called to handle a comment
+ * @param aCommentText the text of the comment (not including the
+ * "<!--" and "-->")
+ */
+ void HandleComment(in wstring aCommentText);
+
+ /**
+ * Called to handle a CDATA section
+ * @param aData the text in the CDATA section. This is null-terminated.
+ * @param aLength the length of the aData string
+ */
+ void HandleCDataSection([size_is(aLength)] in wstring aData,
+ in unsigned long aLength);
+
+ /**
+ * Called to handle the doctype declaration
+ */
+ void HandleDoctypeDecl(in AString aSubset,
+ in AString aName,
+ in AString aSystemId,
+ in AString aPublicId,
+ in nsISupports aCatalogData);
+
+ /**
+ * Called to handle character data. Note that this does NOT get
+ * called for the contents of CDATA sections.
+ * @param aData the data to handle. aData is NOT NULL-TERMINATED.
+ * @param aLength the length of the aData string
+ */
+ void HandleCharacterData([size_is(aLength)] in wstring aData,
+ in unsigned long aLength);
+
+ /**
+ * Called to handle a processing instruction
+ * @param aTarget the PI target (e.g. xml-stylesheet)
+ * @param aData all the rest of the data in the PI
+ */
+ void HandleProcessingInstruction(in wstring aTarget,
+ in wstring aData);
+
+ /**
+ * Handle the XML Declaration.
+ *
+ * @param aVersion The version string, can be null if not specified.
+ * @param aEncoding The encoding string, can be null if not specified.
+ * @param aStandalone -1, 0, or 1 indicating respectively that there was no
+ * standalone parameter in the declaration, that it was
+ * given as no, or that it was given as yes.
+ */
+ void HandleXMLDeclaration(in wstring aVersion,
+ in wstring aEncoding,
+ in long aStandalone);
+
+ /**
+ * Ask the content sink if the expat driver should log an error to the console.
+ *
+ * @param aErrorText Error message to pass to content sink.
+ * @param aSourceText Source text of the document we're parsing.
+ * @param aError Script error object with line number & column number
+ *
+ * @retval True if the expat driver should report the error.
+ */
+ boolean ReportError(in wstring aErrorText,
+ in wstring aSourceText,
+ in nsIScriptError aError);
+};
diff --git a/components/htmlparser/public/nsIExtendedExpatSink.idl b/components/htmlparser/public/nsIExtendedExpatSink.idl
new file mode 100644
index 000000000..d88f0d974
--- /dev/null
+++ b/components/htmlparser/public/nsIExtendedExpatSink.idl
@@ -0,0 +1,72 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsIExpatSink.idl"
+
+/**
+ * This interface provides notification of syntax-level events.
+ */
+[scriptable, uuid(5e3e4f0c-7b77-47ca-a7c5-a3d87f2a9c82)]
+interface nsIExtendedExpatSink : nsIExpatSink
+{
+ /**
+ * Called at the beginning of the DTD, before any entity or notation
+ * events.
+ * @param aDoctypeName The document type name.
+ * @param aSysid The declared system identifier for the external DTD subset,
+ * or null if none was declared.
+ * @param aPubid The declared public identifier for the external DTD subset,
+ * or null if none was declared.
+ */
+ void handleStartDTD(in wstring aDoctypeName,
+ in wstring aSysid,
+ in wstring aPubid);
+
+ /**
+ * Called when a prefix mapping starts to be in-scope, before any
+ * startElement events.
+ * @param aPrefix The Namespace prefix being declared. An empty string
+ * is used for the default element namespace, which has
+ * no prefix.
+ * @param aUri The Namespace URI the prefix is mapped to.
+ */
+ void handleStartNamespaceDecl(in wstring aPrefix,
+ in wstring aUri);
+
+ /**
+ * Called when a prefix mapping is no longer in-scope, after any
+ * endElement events.
+ * @param aPrefix The prefix that was being mapped. This is the empty string
+ * when a default mapping scope ends.
+ */
+ void handleEndNamespaceDecl(in wstring aPrefix);
+
+ /**
+ * This is called for a declaration of notation. The base argument is
+ * whatever was set by XML_SetBase. aNotationName will never be
+ * null. The other arguments can be.
+ * @param aNotationName The notation name.
+ * @param aSysId The notation's system identifier, or null if none was given.
+ * @param aPubId The notation's pubilc identifier, or null if none was given.
+ */
+ void handleNotationDecl(in wstring aNotationName,
+ in wstring aSysid,
+ in wstring aPubid);
+
+ /**
+ * This is called for a declaration of an unparsed (NDATA) entity.
+ * aName, aSysid and aNotationName arguments will never be
+ * null. The other arguments may be.
+ * @param aName The unparsed entity's name.
+ * @param aSysId The notation's system identifier.
+ * @param aPubId The notation's pubilc identifier, or null if none was given.
+ * @param aNotationName The name of the associated notation.
+ */
+ void handleUnparsedEntityDecl(in wstring aName,
+ in wstring aSysid,
+ in wstring aPubid,
+ in wstring aNotationName);
+
+};
diff --git a/components/htmlparser/src/CNavDTD.cpp b/components/htmlparser/src/CNavDTD.cpp
new file mode 100644
index 000000000..decc6a963
--- /dev/null
+++ b/components/htmlparser/src/CNavDTD.cpp
@@ -0,0 +1,90 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.h"
+#include "nsISupportsImpl.h"
+#include "nsIParser.h"
+#include "CNavDTD.h"
+#include "nsIHTMLContentSink.h"
+
+NS_IMPL_ISUPPORTS(CNavDTD, nsIDTD);
+
+CNavDTD::CNavDTD()
+{
+}
+
+CNavDTD::~CNavDTD()
+{
+}
+
+NS_IMETHODIMP
+CNavDTD::WillBuildModel(const CParserContext& aParserContext,
+ nsITokenizer* aTokenizer,
+ nsIContentSink* aSink)
+{
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+CNavDTD::BuildModel(nsITokenizer* aTokenizer,
+ nsIContentSink* aSink)
+{
+ // NB: It is important to throw STOPPARSING if the sink is the wrong type in
+ // order to make sure nsParser cleans up properly after itself.
+ nsCOMPtr<nsIHTMLContentSink> sink = do_QueryInterface(aSink);
+ if (!sink) {
+ return NS_ERROR_HTMLPARSER_STOPPARSING;
+ }
+
+ nsresult rv = sink->OpenContainer(nsIHTMLContentSink::eHTML);
+ NS_ENSURE_SUCCESS(rv, rv);
+ rv = sink->OpenContainer(nsIHTMLContentSink::eBody);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = sink->CloseContainer(nsIHTMLContentSink::eBody);
+ MOZ_ASSERT(NS_SUCCEEDED(rv));
+ rv = sink->CloseContainer(nsIHTMLContentSink::eHTML);
+ MOZ_ASSERT(NS_SUCCEEDED(rv));
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+CNavDTD::DidBuildModel(nsresult anErrorCode)
+{
+ return NS_OK;
+}
+
+NS_IMETHODIMP_(void)
+CNavDTD::Terminate()
+{
+}
+
+
+NS_IMETHODIMP_(int32_t)
+CNavDTD::GetType()
+{
+ return NS_IPARSER_FLAG_HTML;
+}
+
+NS_IMETHODIMP_(nsDTDMode)
+CNavDTD::GetMode() const
+{
+ return eDTDMode_quirks;
+}
+
+NS_IMETHODIMP_(bool)
+CNavDTD::CanContain(int32_t aParent,int32_t aChild) const
+{
+ MOZ_CRASH("nobody calls this");
+ return false;
+}
+
+NS_IMETHODIMP_(bool)
+CNavDTD::IsContainer(int32_t aTag) const
+{
+ MOZ_CRASH("nobody calls this");
+ return false;
+}
diff --git a/components/htmlparser/src/CNavDTD.h b/components/htmlparser/src/CNavDTD.h
new file mode 100644
index 000000000..b3c557e81
--- /dev/null
+++ b/components/htmlparser/src/CNavDTD.h
@@ -0,0 +1,35 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef NS_NAVHTMLDTD__
+#define NS_NAVHTMLDTD__
+
+#include "nsIDTD.h"
+#include "nsISupports.h"
+#include "nsCOMPtr.h"
+
+#ifdef _MSC_VER
+#pragma warning( disable : 4275 )
+#endif
+
+class CNavDTD : public nsIDTD
+{
+#ifdef _MSC_VER
+#pragma warning( default : 4275 )
+#endif
+
+ virtual ~CNavDTD();
+
+public:
+ CNavDTD();
+
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSIDTD
+};
+
+#endif
+
+
+
diff --git a/components/htmlparser/src/CParserContext.cpp b/components/htmlparser/src/CParserContext.cpp
new file mode 100644
index 000000000..3b764d7e4
--- /dev/null
+++ b/components/htmlparser/src/CParserContext.cpp
@@ -0,0 +1,85 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+#include "nsIAtom.h"
+#include "CParserContext.h"
+#include "nsToken.h"
+#include "prenv.h"
+#include "nsIHTMLContentSink.h"
+#include "nsHTMLTokenizer.h"
+#include "nsMimeTypes.h"
+#include "nsHTMLTokenizer.h"
+
+CParserContext::CParserContext(CParserContext* aPrevContext,
+ nsScanner* aScanner,
+ void *aKey,
+ eParserCommands aCommand,
+ nsIRequestObserver* aListener,
+ eAutoDetectResult aStatus,
+ bool aCopyUnused)
+ : mListener(aListener),
+ mKey(aKey),
+ mPrevContext(aPrevContext),
+ mScanner(aScanner),
+ mDTDMode(eDTDMode_unknown),
+ mStreamListenerState(eNone),
+ mContextType(eCTNone),
+ mAutoDetectStatus(aStatus),
+ mParserCommand(aCommand),
+ mMultipart(true),
+ mCopyUnused(aCopyUnused)
+{
+ MOZ_COUNT_CTOR(CParserContext);
+}
+
+CParserContext::~CParserContext()
+{
+ // It's ok to simply ingore the PrevContext.
+ MOZ_COUNT_DTOR(CParserContext);
+}
+
+void
+CParserContext::SetMimeType(const nsACString& aMimeType)
+{
+ mMimeType.Assign(aMimeType);
+
+ mDocType = ePlainText;
+
+ if (mMimeType.EqualsLiteral(TEXT_HTML))
+ mDocType = eHTML_Strict;
+ else if (mMimeType.EqualsLiteral(TEXT_XML) ||
+ mMimeType.EqualsLiteral(APPLICATION_XML) ||
+ mMimeType.EqualsLiteral(APPLICATION_XHTML_XML) ||
+ mMimeType.EqualsLiteral(TEXT_XUL) ||
+ mMimeType.EqualsLiteral(IMAGE_SVG_XML) ||
+ mMimeType.EqualsLiteral(APPLICATION_MATHML_XML) ||
+ mMimeType.EqualsLiteral(APPLICATION_RDF_XML) ||
+ mMimeType.EqualsLiteral(APPLICATION_WAPXHTML_XML) ||
+ mMimeType.EqualsLiteral(TEXT_RDF))
+ mDocType = eXML;
+}
+
+nsresult
+CParserContext::GetTokenizer(nsIDTD* aDTD,
+ nsIContentSink* aSink,
+ nsITokenizer*& aTokenizer)
+{
+ nsresult result = NS_OK;
+ int32_t type = aDTD ? aDTD->GetType() : NS_IPARSER_FLAG_HTML;
+
+ if (!mTokenizer) {
+ if (type == NS_IPARSER_FLAG_HTML || mParserCommand == eViewSource) {
+ mTokenizer = new nsHTMLTokenizer;
+ }
+ else if (type == NS_IPARSER_FLAG_XML) {
+ mTokenizer = do_QueryInterface(aDTD, &result);
+ }
+ }
+
+ aTokenizer = mTokenizer;
+
+ return result;
+}
diff --git a/components/htmlparser/src/CParserContext.h b/components/htmlparser/src/CParserContext.h
new file mode 100644
index 000000000..8850b83d5
--- /dev/null
+++ b/components/htmlparser/src/CParserContext.h
@@ -0,0 +1,70 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * MODULE NOTES:
+ * @update gess 4/1/98
+ *
+ */
+
+#ifndef __CParserContext
+#define __CParserContext
+
+#include "nsIParser.h"
+#include "nsIURL.h"
+#include "nsIDTD.h"
+#include "nsIStreamListener.h"
+#include "nsIRequest.h"
+#include "nsScanner.h"
+#include "nsString.h"
+#include "nsCOMPtr.h"
+#include "nsAutoPtr.h"
+
+/**
+ * Note that the parser is given FULL access to all
+ * data in a parsercontext. Hey, that what it's for!
+ */
+
+class CParserContext {
+public:
+ enum eContextType {eCTNone,eCTURL,eCTString,eCTStream};
+
+ CParserContext(CParserContext* aPrevContext,
+ nsScanner* aScanner,
+ void* aKey = 0,
+ eParserCommands aCommand = eViewNormal,
+ nsIRequestObserver* aListener = 0,
+ eAutoDetectResult aStatus = eUnknownDetect,
+ bool aCopyUnused = false);
+
+ ~CParserContext();
+
+ nsresult GetTokenizer(nsIDTD* aDTD,
+ nsIContentSink* aSink,
+ nsITokenizer*& aTokenizer);
+ void SetMimeType(const nsACString& aMimeType);
+
+ nsCOMPtr<nsIRequest> mRequest; // provided by necko to differnciate different input streams
+ // why is mRequest strongly referenced? see bug 102376.
+ nsCOMPtr<nsIRequestObserver> mListener;
+ void* const mKey;
+ nsCOMPtr<nsITokenizer> mTokenizer;
+ CParserContext* const mPrevContext;
+ nsAutoPtr<nsScanner> mScanner;
+
+ nsCString mMimeType;
+ nsDTDMode mDTDMode;
+
+ eParserDocType mDocType;
+ eStreamState mStreamListenerState;
+ eContextType mContextType;
+ eAutoDetectResult mAutoDetectStatus;
+ eParserCommands mParserCommand;
+
+ bool mMultipart;
+ bool mCopyUnused;
+};
+
+#endif
diff --git a/components/htmlparser/src/nsElementTable.cpp b/components/htmlparser/src/nsElementTable.cpp
new file mode 100644
index 000000000..7ab4c48b1
--- /dev/null
+++ b/components/htmlparser/src/nsElementTable.cpp
@@ -0,0 +1,210 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsElementTable.h"
+
+struct HTMLElement
+{
+#ifdef DEBUG
+ nsHTMLTag mTagID;
+#endif
+ bool mIsBlock;
+ bool mIsContainer;
+};
+
+#ifdef DEBUG
+#define ELEM(tag, block, container) { eHTMLTag_##tag, block, container },
+#else
+#define ELEM(tag, block, container) { block, container },
+#endif
+
+#define ____ false // This makes the table easier to read.
+
+// Note that the mIsBlock field disagrees with
+// https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements for
+// the following elements: center, details, dialog, dir, dt, figcaption,
+// listing, menu, multicol, noscript, output, summary, tfoot, video.
+//
+// mrbkap thinks that the field values were pulled from the old HTML4 DTD and
+// then got modified in mostly random ways to make the old parser's behavior
+// compatible with the web. So it might make sense to change the mIsBlock
+// values for the abovementioned tags at some point.
+//
+
+static const HTMLElement gHTMLElements[] = {
+ ELEM(unknown, ____, ____)
+ ELEM(a, ____, true)
+ ELEM(abbr, ____, true)
+ ELEM(acronym, ____, true)
+ ELEM(address, true, true)
+ ELEM(applet, ____, true)
+ ELEM(area, ____, ____)
+ ELEM(article, true, true)
+ ELEM(aside, true, true)
+ ELEM(audio, ____, true)
+ ELEM(b, ____, true)
+ ELEM(base, ____, ____)
+ ELEM(basefont, ____, ____)
+ ELEM(bdo, ____, true)
+ ELEM(bgsound, ____, ____)
+ ELEM(big, ____, true)
+ ELEM(blockquote, true, true)
+ ELEM(body, ____, true)
+ ELEM(br, ____, ____)
+ ELEM(button, ____, true)
+ ELEM(canvas, ____, true)
+ ELEM(caption, ____, true)
+ ELEM(center, true, true)
+ ELEM(cite, ____, true)
+ ELEM(code, ____, true)
+ ELEM(col, ____, ____)
+ ELEM(colgroup, ____, true)
+ ELEM(data, ____, true)
+ ELEM(datalist, ____, true)
+ ELEM(dd, ____, true)
+ ELEM(del, ____, true)
+ ELEM(details, true, true)
+ ELEM(dfn, ____, true)
+ ELEM(dialog, true, true)
+ ELEM(dir, true, true)
+ ELEM(div, true, true)
+ ELEM(dl, true, true)
+ ELEM(dt, ____, true)
+ ELEM(em, ____, true)
+ ELEM(embed, ____, ____)
+ ELEM(fieldset, true, true)
+ ELEM(figcaption, ____, true)
+ ELEM(figure, true, true)
+ ELEM(font, ____, true)
+ ELEM(footer, true, true)
+ ELEM(form, true, true)
+ ELEM(frame, ____, ____)
+ ELEM(frameset, ____, true)
+ ELEM(h1, true, true)
+ ELEM(h2, true, true)
+ ELEM(h3, true, true)
+ ELEM(h4, true, true)
+ ELEM(h5, true, true)
+ ELEM(h6, true, true)
+ ELEM(head, ____, true)
+ ELEM(header, true, true)
+ ELEM(hgroup, true, true)
+ ELEM(hr, true, ____)
+ ELEM(html, ____, true)
+ ELEM(i, ____, true)
+ ELEM(iframe, ____, true)
+ ELEM(image, ____, ____)
+ ELEM(img, ____, ____)
+ ELEM(input, ____, ____)
+ ELEM(ins, ____, true)
+ ELEM(kbd, ____, true)
+ ELEM(keygen, ____, ____)
+ ELEM(label, ____, true)
+ ELEM(legend, ____, true)
+ ELEM(li, true, true)
+ ELEM(link, ____, ____)
+ ELEM(listing, true, true)
+ ELEM(main, true, true)
+ ELEM(map, ____, true)
+ ELEM(mark, ____, true)
+ ELEM(menu, true, true)
+ ELEM(menuitem, ____, true)
+ ELEM(meta, ____, ____)
+ ELEM(meter, ____, true)
+ ELEM(multicol, true, true)
+ ELEM(nav, true, true)
+ ELEM(nobr, ____, true)
+ ELEM(noembed, ____, true)
+ ELEM(noframes, ____, true)
+ ELEM(noscript, ____, true)
+ ELEM(object, ____, true)
+ ELEM(ol, true, true)
+ ELEM(optgroup, ____, true)
+ ELEM(option, ____, true)
+ ELEM(output, ____, true)
+ ELEM(p, true, true)
+ ELEM(param, ____, ____)
+ ELEM(picture, ____, true)
+ ELEM(plaintext, ____, true)
+ ELEM(pre, true, true)
+ ELEM(progress, ____, true)
+ ELEM(q, ____, true)
+ ELEM(rb, ____, true)
+ ELEM(rp, ____, true)
+ ELEM(rt, ____, true)
+ ELEM(rtc, ____, true)
+ ELEM(ruby, ____, true)
+ ELEM(s, ____, true)
+ ELEM(samp, ____, true)
+ ELEM(script, ____, true)
+ ELEM(section, true, true)
+ ELEM(select, ____, true)
+ ELEM(small, ____, true)
+ ELEM(slot, ____, true)
+ ELEM(source, ____, ____)
+ ELEM(span, ____, true)
+ ELEM(strike, ____, true)
+ ELEM(strong, ____, true)
+ ELEM(style, ____, true)
+ ELEM(sub, ____, true)
+ ELEM(summary, true, true)
+ ELEM(sup, ____, true)
+ ELEM(table, true, true)
+ ELEM(tbody, ____, true)
+ ELEM(td, ____, true)
+ ELEM(textarea, ____, true)
+ ELEM(tfoot, ____, true)
+ ELEM(th, ____, true)
+ ELEM(thead, ____, true)
+ ELEM(template, ____, true)
+ ELEM(time, ____, true)
+ ELEM(title, ____, true)
+ ELEM(tr, ____, true)
+ ELEM(track, ____, ____)
+ ELEM(tt, ____, true)
+ ELEM(u, ____, true)
+ ELEM(ul, true, true)
+ ELEM(var, ____, true)
+ ELEM(video, ____, true)
+ ELEM(wbr, ____, ____)
+ ELEM(xmp, ____, true)
+ ELEM(text, ____, ____)
+ ELEM(whitespace, ____, ____)
+ ELEM(newline, ____, ____)
+ ELEM(comment, ____, true)
+ ELEM(entity, ____, true)
+ ELEM(doctypeDecl, ____, true)
+ ELEM(markupDecl, ____, true)
+ ELEM(instruction, ____, true)
+ ELEM(userdefined, ____, true)
+};
+
+#undef ELEM
+#undef ____
+
+bool
+nsHTMLElement::IsContainer(nsHTMLTag aId)
+{
+ return gHTMLElements[aId].mIsContainer;
+}
+
+bool
+nsHTMLElement::IsBlock(nsHTMLTag aId)
+{
+ return gHTMLElements[aId].mIsBlock;
+}
+
+#ifdef DEBUG
+void
+CheckElementTable()
+{
+ for (nsHTMLTag t = eHTMLTag_unknown;
+ t <= eHTMLTag_userdefined;
+ t = nsHTMLTag(t + 1)) {
+ MOZ_ASSERT(gHTMLElements[t].mTagID == t,
+ "gHTMLElements entries does match tag list.");
+ }
+}
+#endif
diff --git a/components/htmlparser/src/nsElementTable.h b/components/htmlparser/src/nsElementTable.h
new file mode 100644
index 000000000..b456b5989
--- /dev/null
+++ b/components/htmlparser/src/nsElementTable.h
@@ -0,0 +1,21 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsElementTable_h
+#define nsElementTable_h
+
+#include "nsHTMLTags.h"
+
+#ifdef DEBUG
+void CheckElementTable();
+#endif
+
+struct nsHTMLElement
+{
+ static bool IsContainer(nsHTMLTag aTag);
+ static bool IsBlock(nsHTMLTag aTag);
+};
+
+#endif // nsElementTable_h
diff --git a/components/htmlparser/src/nsExpatDriver.cpp b/components/htmlparser/src/nsExpatDriver.cpp
new file mode 100644
index 000000000..e35a1da25
--- /dev/null
+++ b/components/htmlparser/src/nsExpatDriver.cpp
@@ -0,0 +1,1412 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsExpatDriver.h"
+#include "nsCOMPtr.h"
+#include "nsParserCIID.h"
+#include "CParserContext.h"
+#include "nsIExpatSink.h"
+#include "nsIExtendedExpatSink.h"
+#include "nsIContentSink.h"
+#include "nsParserMsgUtils.h"
+#include "nsIURL.h"
+#include "nsIUnicharInputStream.h"
+#include "nsIProtocolHandler.h"
+#include "nsNetUtil.h"
+#include "prprf.h"
+#include "prmem.h"
+#include "nsTextFormatter.h"
+#include "nsDirectoryServiceDefs.h"
+#include "nsCRT.h"
+#include "nsIConsoleService.h"
+#include "nsIScriptError.h"
+#include "nsIContentPolicy.h"
+#include "nsContentPolicyUtils.h"
+#include "nsError.h"
+#include "nsXPCOMCIDInternal.h"
+#include "nsUnicharInputStream.h"
+#include "nsContentUtils.h"
+#include "nsNullPrincipal.h"
+
+#include "mozilla/IntegerTypeTraits.h"
+#include "mozilla/Logging.h"
+
+using mozilla::fallible;
+using mozilla::LogLevel;
+
+#define kExpatSeparatorChar 0xFFFF
+
+static const char16_t kUTF16[] = { 'U', 'T', 'F', '-', '1', '6', '\0' };
+
+static mozilla::LazyLogModule gExpatDriverLog("expatdriver");
+
+// The maximum tree depth used for XML-based files (xml/svg/etc.)
+static const uint16_t sMaxXMLDepth = 2048;
+
+/***************************** EXPAT CALL BACKS ******************************/
+// The callback handlers that get called from the expat parser.
+
+static void
+Driver_HandleXMLDeclaration(void *aUserData,
+ const XML_Char *aVersion,
+ const XML_Char *aEncoding,
+ int aStandalone)
+{
+ NS_ASSERTION(aUserData, "expat driver should exist");
+ if (aUserData) {
+ nsExpatDriver* driver = static_cast<nsExpatDriver*>(aUserData);
+ driver->HandleXMLDeclaration(aVersion, aEncoding, aStandalone);
+ }
+}
+
+static void
+Driver_HandleStartElement(void *aUserData,
+ const XML_Char *aName,
+ const XML_Char **aAtts)
+{
+ NS_ASSERTION(aUserData, "expat driver should exist");
+ if (aUserData) {
+ static_cast<nsExpatDriver*>(aUserData)->HandleStartElement(aName,
+ aAtts);
+ }
+}
+
+static void
+Driver_HandleEndElement(void *aUserData,
+ const XML_Char *aName)
+{
+ NS_ASSERTION(aUserData, "expat driver should exist");
+ if (aUserData) {
+ static_cast<nsExpatDriver*>(aUserData)->HandleEndElement(aName);
+ }
+}
+
+static void
+Driver_HandleCharacterData(void *aUserData,
+ const XML_Char *aData,
+ int aLength)
+{
+ NS_ASSERTION(aUserData, "expat driver should exist");
+ if (aUserData) {
+ nsExpatDriver* driver = static_cast<nsExpatDriver*>(aUserData);
+ driver->HandleCharacterData(aData, uint32_t(aLength));
+ }
+}
+
+static void
+Driver_HandleComment(void *aUserData,
+ const XML_Char *aName)
+{
+ NS_ASSERTION(aUserData, "expat driver should exist");
+ if(aUserData) {
+ static_cast<nsExpatDriver*>(aUserData)->HandleComment(aName);
+ }
+}
+
+static void
+Driver_HandleProcessingInstruction(void *aUserData,
+ const XML_Char *aTarget,
+ const XML_Char *aData)
+{
+ NS_ASSERTION(aUserData, "expat driver should exist");
+ if (aUserData) {
+ nsExpatDriver* driver = static_cast<nsExpatDriver*>(aUserData);
+ driver->HandleProcessingInstruction(aTarget, aData);
+ }
+}
+
+static void
+Driver_HandleDefault(void *aUserData,
+ const XML_Char *aData,
+ int aLength)
+{
+ NS_ASSERTION(aUserData, "expat driver should exist");
+ if (aUserData) {
+ nsExpatDriver* driver = static_cast<nsExpatDriver*>(aUserData);
+ driver->HandleDefault(aData, uint32_t(aLength));
+ }
+}
+
+static void
+Driver_HandleStartCdataSection(void *aUserData)
+{
+ NS_ASSERTION(aUserData, "expat driver should exist");
+ if (aUserData) {
+ static_cast<nsExpatDriver*>(aUserData)->HandleStartCdataSection();
+ }
+}
+
+static void
+Driver_HandleEndCdataSection(void *aUserData)
+{
+ NS_ASSERTION(aUserData, "expat driver should exist");
+ if (aUserData) {
+ static_cast<nsExpatDriver*>(aUserData)->HandleEndCdataSection();
+ }
+}
+
+static void
+Driver_HandleStartDoctypeDecl(void *aUserData,
+ const XML_Char *aDoctypeName,
+ const XML_Char *aSysid,
+ const XML_Char *aPubid,
+ int aHasInternalSubset)
+{
+ NS_ASSERTION(aUserData, "expat driver should exist");
+ if (aUserData) {
+ static_cast<nsExpatDriver*>(aUserData)->
+ HandleStartDoctypeDecl(aDoctypeName, aSysid, aPubid, !!aHasInternalSubset);
+ }
+}
+
+static void
+Driver_HandleEndDoctypeDecl(void *aUserData)
+{
+ NS_ASSERTION(aUserData, "expat driver should exist");
+ if (aUserData) {
+ static_cast<nsExpatDriver*>(aUserData)->HandleEndDoctypeDecl();
+ }
+}
+
+static int
+Driver_HandleExternalEntityRef(void *aExternalEntityRefHandler,
+ const XML_Char *aOpenEntityNames,
+ const XML_Char *aBase,
+ const XML_Char *aSystemId,
+ const XML_Char *aPublicId)
+{
+ NS_ASSERTION(aExternalEntityRefHandler, "expat driver should exist");
+ if (!aExternalEntityRefHandler) {
+ return 1;
+ }
+
+ nsExpatDriver* driver = static_cast<nsExpatDriver*>
+ (aExternalEntityRefHandler);
+
+ return driver->HandleExternalEntityRef(aOpenEntityNames, aBase, aSystemId,
+ aPublicId);
+}
+
+static void
+Driver_HandleStartNamespaceDecl(void *aUserData,
+ const XML_Char *aPrefix,
+ const XML_Char *aUri)
+{
+ NS_ASSERTION(aUserData, "expat driver should exist");
+ if (aUserData) {
+ static_cast<nsExpatDriver*>(aUserData)->
+ HandleStartNamespaceDecl(aPrefix, aUri);
+ }
+}
+
+static void
+Driver_HandleEndNamespaceDecl(void *aUserData,
+ const XML_Char *aPrefix)
+{
+ NS_ASSERTION(aUserData, "expat driver should exist");
+ if (aUserData) {
+ static_cast<nsExpatDriver*>(aUserData)->
+ HandleEndNamespaceDecl(aPrefix);
+ }
+}
+
+static void
+Driver_HandleNotationDecl(void *aUserData,
+ const XML_Char *aNotationName,
+ const XML_Char *aBase,
+ const XML_Char *aSysid,
+ const XML_Char *aPubid)
+{
+ NS_ASSERTION(aUserData, "expat driver should exist");
+ if (aUserData) {
+ static_cast<nsExpatDriver*>(aUserData)->
+ HandleNotationDecl(aNotationName, aBase, aSysid, aPubid);
+ }
+}
+
+static void
+Driver_HandleUnparsedEntityDecl(void *aUserData,
+ const XML_Char *aEntityName,
+ const XML_Char *aBase,
+ const XML_Char *aSysid,
+ const XML_Char *aPubid,
+ const XML_Char *aNotationName)
+{
+ NS_ASSERTION(aUserData, "expat driver should exist");
+ if (aUserData) {
+ static_cast<nsExpatDriver*>(aUserData)->
+ HandleUnparsedEntityDecl(aEntityName, aBase, aSysid, aPubid,
+ aNotationName);
+ }
+}
+
+
+/***************************** END CALL BACKS ********************************/
+
+/***************************** CATALOG UTILS *********************************/
+
+// Initially added for bug 113400 to switch from the remote "XHTML 1.0 plus
+// MathML 2.0" DTD to the the lightweight customized version that Mozilla uses.
+// Since Mozilla is not validating, no need to fetch a *huge* file at each
+// click.
+// XXX The cleanest solution here would be to fix Bug 98413: Implement XML
+// Catalogs.
+struct nsCatalogData {
+ const char* mPublicID;
+ const char* mLocalDTD;
+ const char* mAgentSheet;
+};
+
+// The order of this table is guestimated to be in the optimum order
+static const nsCatalogData kCatalogTable[] = {
+ { "-//W3C//DTD XHTML 1.0 Transitional//EN", "htmlmathml-f.ent", nullptr },
+ { "-//W3C//DTD XHTML 1.1//EN", "htmlmathml-f.ent", nullptr },
+ { "-//W3C//DTD XHTML 1.0 Strict//EN", "htmlmathml-f.ent", nullptr },
+ { "-//W3C//DTD XHTML 1.0 Frameset//EN", "htmlmathml-f.ent", nullptr },
+ { "-//W3C//DTD XHTML Basic 1.0//EN", "htmlmathml-f.ent", nullptr },
+ { "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN", "htmlmathml-f.ent", nullptr },
+ { "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN", "htmlmathml-f.ent", nullptr },
+ { "-//W3C//DTD MathML 2.0//EN", "htmlmathml-f.ent", nullptr },
+ { "-//WAPFORUM//DTD XHTML Mobile 1.0//EN", "htmlmathml-f.ent", nullptr },
+ { nullptr, nullptr, nullptr }
+};
+
+static const nsCatalogData*
+LookupCatalogData(const char16_t* aPublicID)
+{
+ nsDependentString publicID(aPublicID);
+
+ // linear search for now since the number of entries is going to
+ // be negligible, and the fix for bug 98413 would get rid of this
+ // code anyway
+ const nsCatalogData* data = kCatalogTable;
+ while (data->mPublicID) {
+ if (publicID.EqualsASCII(data->mPublicID)) {
+ return data;
+ }
+ ++data;
+ }
+
+ return nullptr;
+}
+
+// This function provides a resource URI to a local DTD
+// in resource://gre/res/dtd/ which may or may not exist.
+// If aCatalogData is provided, it is used to remap the
+// DTD instead of taking the filename from the URI.
+static void
+GetLocalDTDURI(const nsCatalogData* aCatalogData, nsIURI* aDTD,
+ nsIURI** aResult)
+{
+ NS_ASSERTION(aDTD, "Null parameter.");
+
+ nsAutoCString fileName;
+ if (aCatalogData) {
+ // remap the DTD to a known local DTD
+ fileName.Assign(aCatalogData->mLocalDTD);
+ }
+
+ if (fileName.IsEmpty()) {
+ // Try to see if the user has installed the DTD file -- we extract the
+ // filename.ext of the DTD here. Hence, for any DTD for which we have
+ // no predefined mapping, users just have to copy the DTD file to our
+ // special DTD directory and it will be picked.
+ nsCOMPtr<nsIURL> dtdURL = do_QueryInterface(aDTD);
+ if (!dtdURL) {
+ return;
+ }
+
+ dtdURL->GetFileName(fileName);
+ if (fileName.IsEmpty()) {
+ return;
+ }
+ }
+
+ nsAutoCString respath("resource://gre/res/dtd/");
+ respath += fileName;
+ NS_NewURI(aResult, respath);
+}
+
+/***************************** END CATALOG UTILS *****************************/
+
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsExpatDriver)
+ NS_INTERFACE_MAP_ENTRY(nsITokenizer)
+ NS_INTERFACE_MAP_ENTRY(nsIDTD)
+ NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIDTD)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(nsExpatDriver)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(nsExpatDriver)
+
+NS_IMPL_CYCLE_COLLECTION(nsExpatDriver, mSink, mExtendedSink)
+
+nsExpatDriver::nsExpatDriver()
+ : mExpatParser(nullptr),
+ mInCData(false),
+ mInInternalSubset(false),
+ mInExternalDTD(false),
+ mMadeFinalCallToExpat(false),
+ mIsFinalChunk(false),
+ mTagDepth(0),
+ mInternalState(NS_OK),
+ mExpatBuffered(0),
+ mCatalogData(nullptr),
+ mInnerWindowID(0)
+{
+}
+
+nsExpatDriver::~nsExpatDriver()
+{
+ if (mExpatParser) {
+ XML_ParserFree(mExpatParser);
+ }
+}
+
+void
+nsExpatDriver::HandleStartElement(const char16_t *aValue,
+ const char16_t **aAtts)
+{
+ NS_ASSERTION(mSink, "content sink not found!");
+
+ // Calculate the total number of elements in aAtts.
+ // XML_GetSpecifiedAttributeCount will only give us the number of specified
+ // attrs (twice that number, actually), so we have to check for default attrs
+ // ourselves.
+ uint32_t attrArrayLength;
+ for (attrArrayLength = XML_GetSpecifiedAttributeCount(mExpatParser);
+ aAtts[attrArrayLength];
+ attrArrayLength += 2) {
+ // Just looping till we find out what the length is
+ }
+
+ if (mSink) {
+ // Sanity check: Make sure the limit fits in the type the tag depth tracker
+ // was declared as.
+ static_assert(sMaxXMLDepth <= mozilla::MaxValue<decltype(nsExpatDriver::mTagDepth)>::value,
+ "Maximum XML parsing depth type mismatch: value too large.");
+
+ if (++mTagDepth >= sMaxXMLDepth) {
+ MaybeStopParser(NS_ERROR_HTMLPARSER_HIERARCHYTOODEEP);
+ return;
+ }
+
+ nsresult rv = mSink->
+ HandleStartElement(aValue, aAtts, attrArrayLength,
+ XML_GetCurrentLineNumber(mExpatParser));
+ MaybeStopParser(rv);
+ }
+}
+
+nsresult
+nsExpatDriver::HandleEndElement(const char16_t *aValue)
+{
+ NS_ASSERTION(mSink, "content sink not found!");
+ NS_ASSERTION(mInternalState != NS_ERROR_HTMLPARSER_BLOCK,
+ "Shouldn't block from HandleStartElement.");
+
+ if (mSink && mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
+ nsresult rv = mSink->HandleEndElement(aValue);
+ --mTagDepth;
+ MaybeStopParser(rv);
+ }
+
+ return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleCharacterData(const char16_t *aValue,
+ const uint32_t aLength)
+{
+ NS_ASSERTION(mSink, "content sink not found!");
+
+ if (mInCData) {
+ if (!mCDataText.Append(aValue, aLength, fallible)) {
+ MaybeStopParser(NS_ERROR_OUT_OF_MEMORY);
+ }
+ }
+ else if (mSink) {
+ nsresult rv = mSink->HandleCharacterData(aValue, aLength);
+ MaybeStopParser(rv);
+ }
+
+ return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleComment(const char16_t *aValue)
+{
+ NS_ASSERTION(mSink, "content sink not found!");
+
+ if (mInExternalDTD) {
+ // Ignore comments from external DTDs
+ return NS_OK;
+ }
+
+ if (mInInternalSubset) {
+ mInternalSubset.AppendLiteral("<!--");
+ mInternalSubset.Append(aValue);
+ mInternalSubset.AppendLiteral("-->");
+ }
+ else if (mSink) {
+ nsresult rv = mSink->HandleComment(aValue);
+ MaybeStopParser(rv);
+ }
+
+ return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleProcessingInstruction(const char16_t *aTarget,
+ const char16_t *aData)
+{
+ NS_ASSERTION(mSink, "content sink not found!");
+
+ if (mInExternalDTD) {
+ // Ignore PIs in external DTDs for now. Eventually we want to
+ // pass them to the sink in a way that doesn't put them in the DOM
+ return NS_OK;
+ }
+
+ if (mInInternalSubset) {
+ mInternalSubset.AppendLiteral("<?");
+ mInternalSubset.Append(aTarget);
+ mInternalSubset.Append(' ');
+ mInternalSubset.Append(aData);
+ mInternalSubset.AppendLiteral("?>");
+ }
+ else if (mSink) {
+ nsresult rv = mSink->HandleProcessingInstruction(aTarget, aData);
+ MaybeStopParser(rv);
+ }
+
+ return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleXMLDeclaration(const char16_t *aVersion,
+ const char16_t *aEncoding,
+ int32_t aStandalone)
+{
+ if (mSink) {
+ nsresult rv = mSink->HandleXMLDeclaration(aVersion, aEncoding, aStandalone);
+ MaybeStopParser(rv);
+ }
+
+ return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleDefault(const char16_t *aValue,
+ const uint32_t aLength)
+{
+ NS_ASSERTION(mSink, "content sink not found!");
+
+ if (mInExternalDTD) {
+ // Ignore newlines in external DTDs
+ return NS_OK;
+ }
+
+ if (mInInternalSubset) {
+ mInternalSubset.Append(aValue, aLength);
+ }
+ else if (mSink) {
+ uint32_t i;
+ nsresult rv = mInternalState;
+ for (i = 0; i < aLength && NS_SUCCEEDED(rv); ++i) {
+ if (aValue[i] == '\n' || aValue[i] == '\r') {
+ rv = mSink->HandleCharacterData(&aValue[i], 1);
+ }
+ }
+ MaybeStopParser(rv);
+ }
+
+ return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleStartCdataSection()
+{
+ mInCData = true;
+
+ return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleEndCdataSection()
+{
+ NS_ASSERTION(mSink, "content sink not found!");
+
+ mInCData = false;
+ if (mSink) {
+ nsresult rv = mSink->HandleCDataSection(mCDataText.get(),
+ mCDataText.Length());
+ MaybeStopParser(rv);
+ }
+ mCDataText.Truncate();
+
+ return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleStartNamespaceDecl(const char16_t* aPrefix,
+ const char16_t* aUri)
+{
+ if (mExtendedSink) {
+ nsresult rv = mExtendedSink->HandleStartNamespaceDecl(aPrefix, aUri);
+ MaybeStopParser(rv);
+ }
+ return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleEndNamespaceDecl(const char16_t* aPrefix)
+{
+ if (mExtendedSink && mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
+ nsresult rv = mExtendedSink->HandleEndNamespaceDecl(aPrefix);
+ MaybeStopParser(rv);
+ }
+ return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleNotationDecl(const char16_t* aNotationName,
+ const char16_t* aBase,
+ const char16_t* aSysid,
+ const char16_t* aPubid)
+{
+ if (mExtendedSink) {
+ nsresult rv = mExtendedSink->HandleNotationDecl(aNotationName, aSysid,
+ aPubid);
+ MaybeStopParser(rv);
+ }
+ return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleUnparsedEntityDecl(const char16_t* aEntityName,
+ const char16_t* aBase,
+ const char16_t* aSysid,
+ const char16_t* aPubid,
+ const char16_t* aNotationName)
+{
+ if (mExtendedSink) {
+ nsresult rv = mExtendedSink->HandleUnparsedEntityDecl(aEntityName,
+ aSysid,
+ aPubid,
+ aNotationName);
+ MaybeStopParser(rv);
+ }
+ return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleStartDoctypeDecl(const char16_t* aDoctypeName,
+ const char16_t* aSysid,
+ const char16_t* aPubid,
+ bool aHasInternalSubset)
+{
+ mDoctypeName = aDoctypeName;
+ mSystemID = aSysid;
+ mPublicID = aPubid;
+
+ if (mExtendedSink) {
+ nsresult rv = mExtendedSink->HandleStartDTD(aDoctypeName, aSysid, aPubid);
+ MaybeStopParser(rv);
+ }
+
+ if (aHasInternalSubset) {
+ // Consuming a huge internal subset translates to numerous
+ // allocations. In an effort to avoid too many allocations
+ // setting mInternalSubset's capacity to be 1K ( just a guesstimate! ).
+ mInInternalSubset = true;
+ mInternalSubset.SetCapacity(1024);
+ } else {
+ // Distinguish missing internal subset from an empty one
+ mInternalSubset.SetIsVoid(true);
+ }
+
+ return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleEndDoctypeDecl()
+{
+ NS_ASSERTION(mSink, "content sink not found!");
+
+ mInInternalSubset = false;
+
+ if (mSink) {
+ // let the sink know any additional knowledge that we have about the
+ // document (currently, from bug 124570, we only expect to pass additional
+ // agent sheets needed to layout the XML vocabulary of the document)
+ nsCOMPtr<nsIURI> data;
+#if 0
+ if (mCatalogData && mCatalogData->mAgentSheet) {
+ NS_NewURI(getter_AddRefs(data), mCatalogData->mAgentSheet);
+ }
+#endif
+
+ // The unused support for "catalog style sheets" was removed. It doesn't
+ // look like we'll ever fix bug 98413 either.
+ MOZ_ASSERT(!mCatalogData || !mCatalogData->mAgentSheet,
+ "Need to add back support for catalog style sheets");
+
+ // Note: mInternalSubset already doesn't include the [] around it.
+ nsresult rv = mSink->HandleDoctypeDecl(mInternalSubset, mDoctypeName,
+ mSystemID, mPublicID, data);
+ MaybeStopParser(rv);
+ }
+
+ mInternalSubset.SetCapacity(0);
+
+ return NS_OK;
+}
+
+static nsresult
+ExternalDTDStreamReaderFunc(nsIUnicharInputStream* aIn,
+ void* aClosure,
+ const char16_t* aFromSegment,
+ uint32_t aToOffset,
+ uint32_t aCount,
+ uint32_t *aWriteCount)
+{
+ // Pass the buffer to expat for parsing.
+ if (XML_Parse((XML_Parser)aClosure, (const char *)aFromSegment,
+ aCount * sizeof(char16_t), 0) == XML_STATUS_OK) {
+ *aWriteCount = aCount;
+
+ return NS_OK;
+ }
+
+ *aWriteCount = 0;
+
+ return NS_ERROR_FAILURE;
+}
+
+int
+nsExpatDriver::HandleExternalEntityRef(const char16_t *openEntityNames,
+ const char16_t *base,
+ const char16_t *systemId,
+ const char16_t *publicId)
+{
+ if (mInInternalSubset && !mInExternalDTD && openEntityNames) {
+ mInternalSubset.Append(char16_t('%'));
+ mInternalSubset.Append(nsDependentString(openEntityNames));
+ mInternalSubset.Append(char16_t(';'));
+ }
+
+ // Load the external entity into a buffer.
+ nsCOMPtr<nsIInputStream> in;
+ nsAutoString absURL;
+ nsresult rv = OpenInputStreamFromExternalDTD(publicId, systemId, base,
+ getter_AddRefs(in), absURL);
+ if (NS_FAILED(rv)) {
+#ifdef DEBUG
+ nsCString message("Failed to open external DTD: publicId \"");
+ AppendUTF16toUTF8(publicId, message);
+ message += "\" systemId \"";
+ AppendUTF16toUTF8(systemId, message);
+ message += "\" base \"";
+ AppendUTF16toUTF8(base, message);
+ message += "\" URL \"";
+ AppendUTF16toUTF8(absURL, message);
+ message += "\"";
+ NS_WARNING(message.get());
+#endif
+ return 1;
+ }
+
+ nsCOMPtr<nsIUnicharInputStream> uniIn;
+ rv = NS_NewUnicharInputStream(in, getter_AddRefs(uniIn));
+ NS_ENSURE_SUCCESS(rv, 1);
+
+ int result = 1;
+ if (uniIn) {
+ XML_Parser entParser = XML_ExternalEntityParserCreate(mExpatParser, 0,
+ kUTF16);
+ if (entParser) {
+ XML_SetBase(entParser, absURL.get());
+
+ mInExternalDTD = true;
+
+ uint32_t totalRead;
+ do {
+ rv = uniIn->ReadSegments(ExternalDTDStreamReaderFunc, entParser,
+ uint32_t(-1), &totalRead);
+ } while (NS_SUCCEEDED(rv) && totalRead > 0);
+
+ result = XML_Parse(entParser, nullptr, 0, 1);
+
+ mInExternalDTD = false;
+
+ XML_ParserFree(entParser);
+ }
+ }
+
+ return result;
+}
+
+nsresult
+nsExpatDriver::OpenInputStreamFromExternalDTD(const char16_t* aFPIStr,
+ const char16_t* aURLStr,
+ const char16_t* aBaseURL,
+ nsIInputStream** aStream,
+ nsAString& aAbsURL)
+{
+ nsCOMPtr<nsIURI> baseURI;
+ nsresult rv = NS_NewURI(getter_AddRefs(baseURI),
+ NS_ConvertUTF16toUTF8(aBaseURL));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ nsCOMPtr<nsIURI> uri;
+ rv = NS_NewURI(getter_AddRefs(uri), NS_ConvertUTF16toUTF8(aURLStr), nullptr,
+ baseURI);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // make sure the URI is allowed to be loaded in sync
+ bool isUIResource = false;
+ rv = NS_URIChainHasFlags(uri, nsIProtocolHandler::URI_IS_UI_RESOURCE,
+ &isUIResource);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ nsCOMPtr<nsIURI> localURI;
+ if (!isUIResource) {
+ // Check to see if we can map the DTD to a known local DTD, or if a DTD
+ // file of the same name exists in the special DTD directory
+ if (aFPIStr) {
+ // see if the Formal Public Identifier (FPI) maps to a catalog entry
+ mCatalogData = LookupCatalogData(aFPIStr);
+ GetLocalDTDURI(mCatalogData, uri, getter_AddRefs(localURI));
+ }
+ if (!localURI) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+ }
+ }
+
+ nsCOMPtr<nsIChannel> channel;
+ if (localURI) {
+ localURI.swap(uri);
+ rv = NS_NewChannel(getter_AddRefs(channel),
+ uri,
+ nsContentUtils::GetSystemPrincipal(),
+ nsILoadInfo::SEC_ALLOW_CROSS_ORIGIN_DATA_IS_NULL,
+ nsIContentPolicy::TYPE_DTD);
+ }
+ else {
+ NS_ASSERTION(mSink == nsCOMPtr<nsIExpatSink>(do_QueryInterface(mOriginalSink)),
+ "In nsExpatDriver::OpenInputStreamFromExternalDTD: "
+ "mOriginalSink not the same object as mSink?");
+ nsCOMPtr<nsIPrincipal> loadingPrincipal;
+ if (mOriginalSink) {
+ nsCOMPtr<nsIDocument> doc;
+ doc = do_QueryInterface(mOriginalSink->GetTarget());
+ if (doc) {
+ loadingPrincipal = doc->NodePrincipal();
+ }
+ }
+ if (!loadingPrincipal) {
+ loadingPrincipal = nsNullPrincipal::Create();
+ }
+ rv = NS_NewChannel(getter_AddRefs(channel),
+ uri,
+ loadingPrincipal,
+ nsILoadInfo::SEC_ALLOW_CROSS_ORIGIN_DATA_INHERITS |
+ nsILoadInfo::SEC_ALLOW_CHROME,
+ nsIContentPolicy::TYPE_DTD);
+ }
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ nsAutoCString absURL;
+ rv = uri->GetSpec(absURL);
+ NS_ENSURE_SUCCESS(rv, rv);
+ CopyUTF8toUTF16(absURL, aAbsURL);
+
+ channel->SetContentType(NS_LITERAL_CSTRING("application/xml"));
+ return channel->Open2(aStream);
+}
+
+static nsresult
+CreateErrorText(const char16_t* aDescription,
+ const char16_t* aSourceURL,
+ const uint32_t aLineNumber,
+ const uint32_t aColNumber,
+ nsString& aErrorString)
+{
+ aErrorString.Truncate();
+
+ nsAutoString msg;
+ nsresult rv =
+ nsParserMsgUtils::GetLocalizedStringByName(XMLPARSER_PROPERTIES,
+ "XMLParsingError", msg);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // XML Parsing Error: %1$S\nLocation: %2$S\nLine Number %3$u, Column %4$u:
+ char16_t *message = nsTextFormatter::smprintf(msg.get(), aDescription,
+ aSourceURL, aLineNumber,
+ aColNumber);
+ if (!message) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ aErrorString.Assign(message);
+ nsTextFormatter::smprintf_free(message);
+
+ return NS_OK;
+}
+
+static nsresult
+AppendErrorPointer(const int32_t aColNumber,
+ const char16_t *aSourceLine,
+ nsString& aSourceString)
+{
+ aSourceString.Append(char16_t('\n'));
+
+ // Last character will be '^'.
+ int32_t last = aColNumber - 1;
+ int32_t i;
+ uint32_t minuses = 0;
+ for (i = 0; i < last; ++i) {
+ if (aSourceLine[i] == '\t') {
+ // Since this uses |white-space: pre;| a tab stop equals 8 spaces.
+ uint32_t add = 8 - (minuses % 8);
+ aSourceString.AppendASCII("--------", add);
+ minuses += add;
+ }
+ else {
+ aSourceString.Append(char16_t('-'));
+ ++minuses;
+ }
+ }
+ aSourceString.Append(char16_t('^'));
+
+ return NS_OK;
+}
+
+nsresult
+nsExpatDriver::HandleError()
+{
+ int32_t code = XML_GetErrorCode(mExpatParser);
+ NS_ASSERTION(code > XML_ERROR_NONE, "unexpected XML error code");
+
+ // Map Expat error code to an error string
+ // XXX Deal with error returns.
+ nsAutoString description;
+ nsParserMsgUtils::GetLocalizedStringByID(XMLPARSER_PROPERTIES, code,
+ description);
+
+ if (code == XML_ERROR_TAG_MISMATCH) {
+ /**
+ * Expat can send the following:
+ * localName
+ * namespaceURI<separator>localName
+ * namespaceURI<separator>localName<separator>prefix
+ *
+ * and we use 0xFFFF for the <separator>.
+ *
+ */
+ const char16_t *mismatch = MOZ_XML_GetMismatchedTag(mExpatParser);
+ const char16_t *uriEnd = nullptr;
+ const char16_t *nameEnd = nullptr;
+ const char16_t *pos;
+ for (pos = mismatch; *pos; ++pos) {
+ if (*pos == kExpatSeparatorChar) {
+ if (uriEnd) {
+ nameEnd = pos;
+ }
+ else {
+ uriEnd = pos;
+ }
+ }
+ }
+
+ nsAutoString tagName;
+ if (uriEnd && nameEnd) {
+ // We have a prefix.
+ tagName.Append(nameEnd + 1, pos - nameEnd - 1);
+ tagName.Append(char16_t(':'));
+ }
+ const char16_t *nameStart = uriEnd ? uriEnd + 1 : mismatch;
+ tagName.Append(nameStart, (nameEnd ? nameEnd : pos) - nameStart);
+
+ nsAutoString msg;
+ nsParserMsgUtils::GetLocalizedStringByName(XMLPARSER_PROPERTIES,
+ "Expected", msg);
+
+ // . Expected: </%S>.
+ char16_t *message = nsTextFormatter::smprintf(msg.get(), tagName.get());
+ if (!message) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ description.Append(message);
+
+ nsTextFormatter::smprintf_free(message);
+ }
+
+ // Adjust the column number so that it is one based rather than zero based.
+ uint32_t colNumber = XML_GetCurrentColumnNumber(mExpatParser) + 1;
+ uint32_t lineNumber = XML_GetCurrentLineNumber(mExpatParser);
+
+ nsAutoString errorText;
+ CreateErrorText(description.get(), XML_GetBase(mExpatParser), lineNumber,
+ colNumber, errorText);
+
+ NS_ASSERTION(mSink, "no sink?");
+
+ nsAutoString sourceText(mLastLine);
+ AppendErrorPointer(colNumber, mLastLine.get(), sourceText);
+
+ // Try to create and initialize the script error.
+ nsCOMPtr<nsIScriptError> serr(do_CreateInstance(NS_SCRIPTERROR_CONTRACTID));
+ nsresult rv = NS_ERROR_FAILURE;
+ if (serr) {
+ rv = serr->InitWithWindowID(errorText,
+ mURISpec,
+ mLastLine,
+ lineNumber, colNumber,
+ nsIScriptError::errorFlag, "malformed-xml",
+ mInnerWindowID);
+ }
+
+ // If it didn't initialize, we can't do any logging.
+ bool shouldReportError = NS_SUCCEEDED(rv);
+
+ if (mSink && shouldReportError) {
+ rv = mSink->ReportError(errorText.get(),
+ sourceText.get(),
+ serr,
+ &shouldReportError);
+ if (NS_FAILED(rv)) {
+ shouldReportError = true;
+ }
+ }
+
+ if (mOriginalSink) {
+ nsCOMPtr<nsIDocument> doc = do_QueryInterface(mOriginalSink->GetTarget());
+ if (doc && doc->SuppressParserErrorConsoleMessages()) {
+ shouldReportError = false;
+ }
+ }
+
+ if (shouldReportError) {
+ nsCOMPtr<nsIConsoleService> cs
+ (do_GetService(NS_CONSOLESERVICE_CONTRACTID));
+ if (cs) {
+ cs->LogMessage(serr);
+ }
+ }
+
+ return NS_ERROR_HTMLPARSER_STOPPARSING;
+}
+
+void
+nsExpatDriver::ParseBuffer(const char16_t *aBuffer,
+ uint32_t aLength,
+ bool aIsFinal,
+ uint32_t *aConsumed)
+{
+ NS_ASSERTION((aBuffer && aLength != 0) || (!aBuffer && aLength == 0), "?");
+ NS_ASSERTION(mInternalState != NS_OK || aIsFinal || aBuffer,
+ "Useless call, we won't call Expat");
+ NS_PRECONDITION(!BlockedOrInterrupted() || !aBuffer,
+ "Non-null buffer when resuming");
+ NS_PRECONDITION(XML_GetCurrentByteIndex(mExpatParser) % sizeof(char16_t) == 0,
+ "Consumed part of a char16_t?");
+
+ if (mExpatParser && (mInternalState == NS_OK || BlockedOrInterrupted())) {
+ int32_t parserBytesBefore = XML_GetCurrentByteIndex(mExpatParser);
+ NS_ASSERTION(parserBytesBefore >= 0, "Unexpected value");
+
+ XML_Status status;
+ if (BlockedOrInterrupted()) {
+ mInternalState = NS_OK; // Resume in case we're blocked.
+ status = XML_ResumeParser(mExpatParser);
+ }
+ else {
+ status = XML_Parse(mExpatParser,
+ reinterpret_cast<const char*>(aBuffer),
+ aLength * sizeof(char16_t), aIsFinal);
+ }
+
+ int32_t parserBytesConsumed = XML_GetCurrentByteIndex(mExpatParser);
+
+ NS_ASSERTION(parserBytesConsumed >= 0, "Unexpected value");
+ NS_ASSERTION(parserBytesConsumed >= parserBytesBefore,
+ "How'd this happen?");
+ NS_ASSERTION(parserBytesConsumed % sizeof(char16_t) == 0,
+ "Consumed part of a char16_t?");
+
+ // Consumed something.
+ *aConsumed = (parserBytesConsumed - parserBytesBefore) / sizeof(char16_t);
+ NS_ASSERTION(*aConsumed <= aLength + mExpatBuffered,
+ "Too many bytes consumed?");
+
+ NS_ASSERTION(status != XML_STATUS_SUSPENDED || BlockedOrInterrupted(),
+ "Inconsistent expat suspension state.");
+
+ if (status == XML_STATUS_ERROR) {
+ mInternalState = NS_ERROR_HTMLPARSER_STOPPARSING;
+ }
+ }
+ else {
+ *aConsumed = 0;
+ }
+}
+
+NS_IMETHODIMP
+nsExpatDriver::ConsumeToken(nsScanner& aScanner, bool& aFlushTokens)
+{
+ // We keep the scanner pointing to the position where Expat will start
+ // parsing.
+ nsScannerIterator currentExpatPosition;
+ aScanner.CurrentPosition(currentExpatPosition);
+
+ // This is the start of the first buffer that we need to pass to Expat.
+ nsScannerIterator start = currentExpatPosition;
+ start.advance(mExpatBuffered);
+
+ // This is the end of the last buffer (at this point, more data could come in
+ // later).
+ nsScannerIterator end;
+ aScanner.EndReading(end);
+
+ MOZ_LOG(gExpatDriverLog, LogLevel::Debug,
+ ("Remaining in expat's buffer: %i, remaining in scanner: %i.",
+ mExpatBuffered, Distance(start, end)));
+
+ // We want to call Expat if we have more buffers, or if we know there won't
+ // be more buffers (and so we want to flush the remaining data), or if we're
+ // currently blocked and there's data in Expat's buffer.
+ while (start != end || (mIsFinalChunk && !mMadeFinalCallToExpat) ||
+ (BlockedOrInterrupted() && mExpatBuffered > 0)) {
+ bool noMoreBuffers = start == end && mIsFinalChunk;
+ bool blocked = BlockedOrInterrupted();
+
+ const char16_t *buffer;
+ uint32_t length;
+ if (blocked || noMoreBuffers) {
+ // If we're blocked we just resume Expat so we don't need a buffer, if
+ // there aren't any more buffers we pass a null buffer to Expat.
+ buffer = nullptr;
+ length = 0;
+
+ if (blocked) {
+ MOZ_LOG(gExpatDriverLog, LogLevel::Debug,
+ ("Resuming Expat, will parse data remaining in Expat's "
+ "buffer.\nContent of Expat's buffer:\n-----\n%s\n-----\n",
+ NS_ConvertUTF16toUTF8(currentExpatPosition.get(),
+ mExpatBuffered).get()));
+ }
+ else {
+ NS_ASSERTION(mExpatBuffered == Distance(currentExpatPosition, end),
+ "Didn't pass all the data to Expat?");
+ MOZ_LOG(gExpatDriverLog, LogLevel::Debug,
+ ("Last call to Expat, will parse data remaining in Expat's "
+ "buffer.\nContent of Expat's buffer:\n-----\n%s\n-----\n",
+ NS_ConvertUTF16toUTF8(currentExpatPosition.get(),
+ mExpatBuffered).get()));
+ }
+ }
+ else {
+ buffer = start.get();
+ length = uint32_t(start.size_forward());
+
+ MOZ_LOG(gExpatDriverLog, LogLevel::Debug,
+ ("Calling Expat, will parse data remaining in Expat's buffer and "
+ "new data.\nContent of Expat's buffer:\n-----\n%s\n-----\nNew "
+ "data:\n-----\n%s\n-----\n",
+ NS_ConvertUTF16toUTF8(currentExpatPosition.get(),
+ mExpatBuffered).get(),
+ NS_ConvertUTF16toUTF8(start.get(), length).get()));
+ }
+
+ uint32_t consumed;
+ ParseBuffer(buffer, length, noMoreBuffers, &consumed);
+ if (consumed > 0) {
+ nsScannerIterator oldExpatPosition = currentExpatPosition;
+ currentExpatPosition.advance(consumed);
+
+ // We consumed some data, we want to store the last line of data that
+ // was consumed in case we run into an error (to show the line in which
+ // the error occurred).
+
+ // The length of the last line that Expat has parsed.
+ XML_Size lastLineLength = XML_GetCurrentColumnNumber(mExpatParser);
+
+ if (lastLineLength <= consumed) {
+ // The length of the last line was less than what expat consumed, so
+ // there was at least one line break in the consumed data. Store the
+ // last line until the point where we stopped parsing.
+ nsScannerIterator startLastLine = currentExpatPosition;
+ startLastLine.advance(-((ptrdiff_t)lastLineLength));
+ if (!CopyUnicodeTo(startLastLine, currentExpatPosition, mLastLine)) {
+ return (mInternalState = NS_ERROR_OUT_OF_MEMORY);
+ }
+ }
+ else {
+ // There was no line break in the consumed data, append the consumed
+ // data.
+ if (!AppendUnicodeTo(oldExpatPosition,
+ currentExpatPosition,
+ mLastLine)) {
+ return (mInternalState = NS_ERROR_OUT_OF_MEMORY);
+ }
+ }
+ }
+
+ mExpatBuffered += length - consumed;
+
+ if (BlockedOrInterrupted()) {
+ MOZ_LOG(gExpatDriverLog, LogLevel::Debug,
+ ("Blocked or interrupted parser (probably for loading linked "
+ "stylesheets or scripts)."));
+
+ aScanner.SetPosition(currentExpatPosition, true);
+ aScanner.Mark();
+
+ return mInternalState;
+ }
+
+ if (noMoreBuffers && mExpatBuffered == 0) {
+ mMadeFinalCallToExpat = true;
+ }
+
+ if (NS_FAILED(mInternalState)) {
+ if (XML_GetErrorCode(mExpatParser) != XML_ERROR_NONE) {
+ NS_ASSERTION(mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING,
+ "Unexpected error");
+
+ // Look for the next newline after the last one we consumed
+ nsScannerIterator lastLine = currentExpatPosition;
+ while (lastLine != end) {
+ length = uint32_t(lastLine.size_forward());
+ uint32_t endOffset = 0;
+ const char16_t *buffer = lastLine.get();
+ while (endOffset < length && buffer[endOffset] != '\n' &&
+ buffer[endOffset] != '\r') {
+ ++endOffset;
+ }
+ mLastLine.Append(Substring(buffer, buffer + endOffset));
+ if (endOffset < length) {
+ // We found a newline.
+ break;
+ }
+
+ lastLine.advance(length);
+ }
+
+ HandleError();
+ }
+
+ return mInternalState;
+ }
+
+ // Either we have more buffers, or we were blocked (and we'll flush in the
+ // next iteration), or we should have emptied Expat's buffer.
+ NS_ASSERTION(!noMoreBuffers || blocked ||
+ (mExpatBuffered == 0 && currentExpatPosition == end),
+ "Unreachable data left in Expat's buffer");
+
+ start.advance(length);
+
+ // It's possible for start to have passed end if we received more data
+ // (e.g. if we spun the event loop in an inline script). Reload end now
+ // to compensate.
+ aScanner.EndReading(end);
+ }
+
+ aScanner.SetPosition(currentExpatPosition, true);
+ aScanner.Mark();
+
+ MOZ_LOG(gExpatDriverLog, LogLevel::Debug,
+ ("Remaining in expat's buffer: %i, remaining in scanner: %i.",
+ mExpatBuffered, Distance(currentExpatPosition, end)));
+
+ return NS_SUCCEEDED(mInternalState) ? kEOF : NS_OK;
+}
+
+NS_IMETHODIMP
+nsExpatDriver::WillBuildModel(const CParserContext& aParserContext,
+ nsITokenizer* aTokenizer,
+ nsIContentSink* aSink)
+{
+ mSink = do_QueryInterface(aSink);
+ if (!mSink) {
+ NS_ERROR("nsExpatDriver didn't get an nsIExpatSink");
+ // Make sure future calls to us bail out as needed
+ mInternalState = NS_ERROR_UNEXPECTED;
+ return mInternalState;
+ }
+
+ mOriginalSink = aSink;
+
+ static const XML_Memory_Handling_Suite memsuite =
+ {
+ (void *(*)(size_t))PR_Malloc,
+ (void *(*)(void *, size_t))PR_Realloc,
+ PR_Free
+ };
+
+ static const char16_t kExpatSeparator[] = { kExpatSeparatorChar, '\0' };
+
+ mExpatParser = XML_ParserCreate_MM(kUTF16, &memsuite, kExpatSeparator);
+ NS_ENSURE_TRUE(mExpatParser, NS_ERROR_FAILURE);
+
+ XML_SetReturnNSTriplet(mExpatParser, XML_TRUE);
+
+#ifdef XML_DTD
+ XML_SetParamEntityParsing(mExpatParser, XML_PARAM_ENTITY_PARSING_ALWAYS);
+#endif
+
+ mURISpec = aParserContext.mScanner->GetFilename();
+
+ XML_SetBase(mExpatParser, mURISpec.get());
+
+ nsCOMPtr<nsIDocument> doc = do_QueryInterface(mOriginalSink->GetTarget());
+ if (doc) {
+ nsCOMPtr<nsPIDOMWindowOuter> win = doc->GetWindow();
+ nsCOMPtr<nsPIDOMWindowInner> inner;
+ if (win) {
+ inner = win->GetCurrentInnerWindow();
+ } else {
+ bool aHasHadScriptHandlingObject;
+ nsIScriptGlobalObject *global =
+ doc->GetScriptHandlingObject(aHasHadScriptHandlingObject);
+ if (global) {
+ inner = do_QueryInterface(global);
+ }
+ }
+ if (inner) {
+ mInnerWindowID = inner->WindowID();
+ }
+ }
+
+ // Set up the callbacks
+ XML_SetXmlDeclHandler(mExpatParser, Driver_HandleXMLDeclaration);
+ XML_SetElementHandler(mExpatParser, Driver_HandleStartElement,
+ Driver_HandleEndElement);
+ XML_SetCharacterDataHandler(mExpatParser, Driver_HandleCharacterData);
+ XML_SetProcessingInstructionHandler(mExpatParser,
+ Driver_HandleProcessingInstruction);
+ XML_SetDefaultHandlerExpand(mExpatParser, Driver_HandleDefault);
+ XML_SetExternalEntityRefHandler(mExpatParser,
+ (XML_ExternalEntityRefHandler)
+ Driver_HandleExternalEntityRef);
+ XML_SetExternalEntityRefHandlerArg(mExpatParser, this);
+ XML_SetCommentHandler(mExpatParser, Driver_HandleComment);
+ XML_SetCdataSectionHandler(mExpatParser, Driver_HandleStartCdataSection,
+ Driver_HandleEndCdataSection);
+
+ XML_SetParamEntityParsing(mExpatParser,
+ XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
+ XML_SetDoctypeDeclHandler(mExpatParser, Driver_HandleStartDoctypeDecl,
+ Driver_HandleEndDoctypeDecl);
+
+ // If the sink is an nsIExtendedExpatSink,
+ // register some addtional handlers.
+ mExtendedSink = do_QueryInterface(mSink);
+ if (mExtendedSink) {
+ XML_SetNamespaceDeclHandler(mExpatParser,
+ Driver_HandleStartNamespaceDecl,
+ Driver_HandleEndNamespaceDecl);
+ XML_SetUnparsedEntityDeclHandler(mExpatParser,
+ Driver_HandleUnparsedEntityDecl);
+ XML_SetNotationDeclHandler(mExpatParser,
+ Driver_HandleNotationDecl);
+ }
+
+ // Set up the user data.
+ XML_SetUserData(mExpatParser, this);
+
+ return mInternalState;
+}
+
+NS_IMETHODIMP
+nsExpatDriver::BuildModel(nsITokenizer* aTokenizer, nsIContentSink* aSink)
+{
+ return mInternalState;
+}
+
+NS_IMETHODIMP
+nsExpatDriver::DidBuildModel(nsresult anErrorCode)
+{
+ mOriginalSink = nullptr;
+ mSink = nullptr;
+ mExtendedSink = nullptr;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsExpatDriver::WillTokenize(bool aIsFinalChunk)
+{
+ mIsFinalChunk = aIsFinalChunk;
+ return NS_OK;
+}
+
+NS_IMETHODIMP_(void)
+nsExpatDriver::Terminate()
+{
+ // XXX - not sure what happens to the unparsed data.
+ if (mExpatParser) {
+ XML_StopParser(mExpatParser, XML_FALSE);
+ }
+ mInternalState = NS_ERROR_HTMLPARSER_STOPPARSING;
+}
+
+NS_IMETHODIMP_(int32_t)
+nsExpatDriver::GetType()
+{
+ return NS_IPARSER_FLAG_XML;
+}
+
+NS_IMETHODIMP_(nsDTDMode)
+nsExpatDriver::GetMode() const
+{
+ return eDTDMode_full_standards;
+}
+
+/*************************** Unused methods **********************************/
+
+NS_IMETHODIMP_(bool)
+nsExpatDriver::IsContainer(int32_t aTag) const
+{
+ return true;
+}
+
+NS_IMETHODIMP_(bool)
+nsExpatDriver::CanContain(int32_t aParent,int32_t aChild) const
+{
+ return true;
+}
+
+void
+nsExpatDriver::MaybeStopParser(nsresult aState)
+{
+ if (NS_FAILED(aState)) {
+ // If we had a failure we want to override NS_ERROR_HTMLPARSER_INTERRUPTED
+ // and we want to override NS_ERROR_HTMLPARSER_BLOCK but not with
+ // NS_ERROR_HTMLPARSER_INTERRUPTED.
+ if (NS_SUCCEEDED(mInternalState) ||
+ mInternalState == NS_ERROR_HTMLPARSER_INTERRUPTED ||
+ (mInternalState == NS_ERROR_HTMLPARSER_BLOCK &&
+ aState != NS_ERROR_HTMLPARSER_INTERRUPTED)) {
+ mInternalState = (aState == NS_ERROR_HTMLPARSER_INTERRUPTED ||
+ aState == NS_ERROR_HTMLPARSER_BLOCK) ?
+ aState :
+ NS_ERROR_HTMLPARSER_STOPPARSING;
+ }
+
+ // If we get an error then we need to stop Expat (by calling XML_StopParser
+ // with false as the last argument). If the parser should be blocked or
+ // interrupted we need to pause Expat (by calling XML_StopParser with
+ // true as the last argument).
+ XML_StopParser(mExpatParser, BlockedOrInterrupted());
+ }
+ else if (NS_SUCCEEDED(mInternalState)) {
+ // Only clobber mInternalState with the success code if we didn't block or
+ // interrupt before.
+ mInternalState = aState;
+ }
+}
diff --git a/components/htmlparser/src/nsExpatDriver.h b/components/htmlparser/src/nsExpatDriver.h
new file mode 100644
index 000000000..988409cfe
--- /dev/null
+++ b/components/htmlparser/src/nsExpatDriver.h
@@ -0,0 +1,145 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef NS_EXPAT_DRIVER__
+#define NS_EXPAT_DRIVER__
+
+#include "expat_config.h"
+#include "expat.h"
+#include "nsCOMPtr.h"
+#include "nsString.h"
+#include "nsIDTD.h"
+#include "nsITokenizer.h"
+#include "nsIInputStream.h"
+#include "nsIParser.h"
+#include "nsCycleCollectionParticipant.h"
+
+class nsIExpatSink;
+class nsIExtendedExpatSink;
+struct nsCatalogData;
+
+class nsExpatDriver : public nsIDTD,
+ public nsITokenizer
+{
+ virtual ~nsExpatDriver();
+
+public:
+ NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+ NS_DECL_NSIDTD
+ NS_DECL_NSITOKENIZER
+ NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsExpatDriver, nsIDTD)
+
+ nsExpatDriver();
+
+ int HandleExternalEntityRef(const char16_t *aOpenEntityNames,
+ const char16_t *aBase,
+ const char16_t *aSystemId,
+ const char16_t *aPublicId);
+ void HandleStartElement(const char16_t *aName, const char16_t **aAtts);
+ nsresult HandleEndElement(const char16_t *aName);
+ nsresult HandleCharacterData(const char16_t *aCData, const uint32_t aLength);
+ nsresult HandleComment(const char16_t *aName);
+ nsresult HandleProcessingInstruction(const char16_t *aTarget,
+ const char16_t *aData);
+ nsresult HandleXMLDeclaration(const char16_t *aVersion,
+ const char16_t *aEncoding,
+ int32_t aStandalone);
+ nsresult HandleDefault(const char16_t *aData, const uint32_t aLength);
+ nsresult HandleStartCdataSection();
+ nsresult HandleEndCdataSection();
+ nsresult HandleStartDoctypeDecl(const char16_t* aDoctypeName,
+ const char16_t* aSysid,
+ const char16_t* aPubid,
+ bool aHasInternalSubset);
+ nsresult HandleEndDoctypeDecl();
+ nsresult HandleStartNamespaceDecl(const char16_t* aPrefix,
+ const char16_t* aUri);
+ nsresult HandleEndNamespaceDecl(const char16_t* aPrefix);
+ nsresult HandleNotationDecl(const char16_t* aNotationName,
+ const char16_t* aBase,
+ const char16_t* aSysid,
+ const char16_t* aPubid);
+ nsresult HandleUnparsedEntityDecl(const char16_t* aEntityName,
+ const char16_t* aBase,
+ const char16_t* aSysid,
+ const char16_t* aPubid,
+ const char16_t* aNotationName);
+
+private:
+ // Load up an external stream to get external entity information
+ nsresult OpenInputStreamFromExternalDTD(const char16_t* aFPIStr,
+ const char16_t* aURLStr,
+ const char16_t* aBaseURL,
+ nsIInputStream** aStream,
+ nsAString& aAbsURL);
+
+ /**
+ * Pass a buffer to Expat. If Expat is blocked aBuffer should be null and
+ * aLength should be 0. The result of the call will be stored in
+ * mInternalState. Expat will parse as much of the buffer as it can and store
+ * the rest in its internal buffer.
+ *
+ * @param aBuffer the buffer to pass to Expat. May be null.
+ * @param aLength the length of the buffer to pass to Expat (in number of
+ * char16_t's). Must be 0 if aBuffer is null and > 0 if
+ * aBuffer is not null.
+ * @param aIsFinal whether there will definitely not be any more new buffers
+ * passed in to ParseBuffer
+ * @param aConsumed [out] the number of PRUnichars that Expat consumed. This
+ * doesn't include the PRUnichars that Expat stored in
+ * its buffer but didn't parse yet.
+ */
+ void ParseBuffer(const char16_t *aBuffer, uint32_t aLength, bool aIsFinal,
+ uint32_t *aConsumed);
+ nsresult HandleError();
+
+ void MaybeStopParser(nsresult aState);
+
+ bool BlockedOrInterrupted()
+ {
+ return mInternalState == NS_ERROR_HTMLPARSER_BLOCK ||
+ mInternalState == NS_ERROR_HTMLPARSER_INTERRUPTED;
+ }
+
+ XML_Parser mExpatParser;
+ nsString mLastLine;
+ nsString mCDataText;
+ // Various parts of a doctype
+ nsString mDoctypeName;
+ nsString mSystemID;
+ nsString mPublicID;
+ nsString mInternalSubset;
+ bool mInCData;
+ bool mInInternalSubset;
+ bool mInExternalDTD;
+ bool mMadeFinalCallToExpat;
+
+ // Whether we're sure that we won't be getting more buffers to parse from
+ // Necko
+ bool mIsFinalChunk;
+
+ // The depth of nested parsing we are currently at
+ uint16_t mTagDepth;
+
+ nsresult mInternalState;
+
+ // The length of the data in Expat's buffer (in number of PRUnichars).
+ uint32_t mExpatBuffered;
+
+ // These sinks all refer the same conceptual object. mOriginalSink is
+ // identical with the nsIContentSink* passed to WillBuildModel, and exists
+ // only to avoid QI-ing back to nsIContentSink*.
+ nsCOMPtr<nsIContentSink> mOriginalSink;
+ nsCOMPtr<nsIExpatSink> mSink;
+ nsCOMPtr<nsIExtendedExpatSink> mExtendedSink;
+
+ const nsCatalogData* mCatalogData; // weak
+ nsString mURISpec;
+
+ // Used for error reporting.
+ uint64_t mInnerWindowID;
+};
+
+#endif
diff --git a/components/htmlparser/src/nsHTMLEntities.cpp b/components/htmlparser/src/nsHTMLEntities.cpp
new file mode 100644
index 000000000..e8365c21f
--- /dev/null
+++ b/components/htmlparser/src/nsHTMLEntities.cpp
@@ -0,0 +1,205 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/ArrayUtils.h"
+
+#include "nsHTMLEntities.h"
+
+#include "nsString.h"
+#include "nsCRT.h"
+#include "PLDHashTable.h"
+
+using namespace mozilla;
+
+struct EntityNode {
+ const char* mStr; // never owns buffer
+ int32_t mUnicode;
+};
+
+struct EntityNodeEntry : public PLDHashEntryHdr
+{
+ const EntityNode* node;
+};
+
+static bool matchNodeString(const PLDHashEntryHdr* aHdr, const void* key)
+{
+ const EntityNodeEntry* entry = static_cast<const EntityNodeEntry*>(aHdr);
+ const char* str = static_cast<const char*>(key);
+ return (nsCRT::strcmp(entry->node->mStr, str) == 0);
+}
+
+static bool matchNodeUnicode(const PLDHashEntryHdr* aHdr, const void* key)
+{
+ const EntityNodeEntry* entry = static_cast<const EntityNodeEntry*>(aHdr);
+ const int32_t ucode = NS_PTR_TO_INT32(key);
+ return (entry->node->mUnicode == ucode);
+}
+
+static PLDHashNumber hashUnicodeValue(const void* key)
+{
+ // key is actually the unicode value
+ return PLDHashNumber(NS_PTR_TO_INT32(key));
+}
+
+
+static const PLDHashTableOps EntityToUnicodeOps = {
+ PLDHashTable::HashStringKey,
+ matchNodeString,
+ PLDHashTable::MoveEntryStub,
+ PLDHashTable::ClearEntryStub,
+ nullptr,
+};
+
+static const PLDHashTableOps UnicodeToEntityOps = {
+ hashUnicodeValue,
+ matchNodeUnicode,
+ PLDHashTable::MoveEntryStub,
+ PLDHashTable::ClearEntryStub,
+ nullptr,
+};
+
+static PLDHashTable* gEntityToUnicode;
+static PLDHashTable* gUnicodeToEntity;
+static nsrefcnt gTableRefCnt = 0;
+
+#define HTML_ENTITY(_name, _value) { #_name, _value },
+static const EntityNode gEntityArray[] = {
+#include "nsHTMLEntityList.h"
+};
+#undef HTML_ENTITY
+
+#define NS_HTML_ENTITY_COUNT ((int32_t)ArrayLength(gEntityArray))
+
+nsresult
+nsHTMLEntities::AddRefTable(void)
+{
+ if (!gTableRefCnt) {
+ gEntityToUnicode = new PLDHashTable(&EntityToUnicodeOps,
+ sizeof(EntityNodeEntry),
+ NS_HTML_ENTITY_COUNT);
+ gUnicodeToEntity = new PLDHashTable(&UnicodeToEntityOps,
+ sizeof(EntityNodeEntry),
+ NS_HTML_ENTITY_COUNT);
+ for (const EntityNode *node = gEntityArray,
+ *node_end = ArrayEnd(gEntityArray);
+ node < node_end; ++node) {
+
+ // add to Entity->Unicode table
+ auto entry = static_cast<EntityNodeEntry*>
+ (gEntityToUnicode->Add(node->mStr, fallible));
+ NS_ASSERTION(entry, "Error adding an entry");
+ // Prefer earlier entries when we have duplication.
+ if (!entry->node)
+ entry->node = node;
+
+ // add to Unicode->Entity table
+ entry = static_cast<EntityNodeEntry*>
+ (gUnicodeToEntity->Add(NS_INT32_TO_PTR(node->mUnicode),
+ fallible));
+ NS_ASSERTION(entry, "Error adding an entry");
+ // Prefer earlier entries when we have duplication.
+ if (!entry->node)
+ entry->node = node;
+ }
+#ifdef DEBUG
+ gUnicodeToEntity->MarkImmutable();
+ gEntityToUnicode->MarkImmutable();
+#endif
+ }
+ ++gTableRefCnt;
+ return NS_OK;
+}
+
+void
+nsHTMLEntities::ReleaseTable(void)
+{
+ if (--gTableRefCnt != 0) {
+ return;
+ }
+
+ delete gEntityToUnicode;
+ delete gUnicodeToEntity;
+ gEntityToUnicode = nullptr;
+ gUnicodeToEntity = nullptr;
+}
+
+int32_t
+nsHTMLEntities::EntityToUnicode(const nsCString& aEntity)
+{
+ NS_ASSERTION(gEntityToUnicode, "no lookup table, needs addref");
+ if (!gEntityToUnicode) {
+ return -1;
+ }
+
+ //this little piece of code exists because entities may or may not have the terminating ';'.
+ //if we see it, strip if off for this test...
+
+ if(';'==aEntity.Last()) {
+ nsAutoCString temp(aEntity);
+ temp.Truncate(aEntity.Length()-1);
+ return EntityToUnicode(temp);
+ }
+
+ auto entry =
+ static_cast<EntityNodeEntry*>(gEntityToUnicode->Search(aEntity.get()));
+
+ return entry ? entry->node->mUnicode : -1;
+}
+
+
+int32_t
+nsHTMLEntities::EntityToUnicode(const nsAString& aEntity) {
+ nsAutoCString theEntity; theEntity.AssignWithConversion(aEntity);
+ if(';'==theEntity.Last()) {
+ theEntity.Truncate(theEntity.Length()-1);
+ }
+
+ return EntityToUnicode(theEntity);
+}
+
+
+const char*
+nsHTMLEntities::UnicodeToEntity(int32_t aUnicode)
+{
+ NS_ASSERTION(gUnicodeToEntity, "no lookup table, needs addref");
+ auto entry =
+ static_cast<EntityNodeEntry*>
+ (gUnicodeToEntity->Search(NS_INT32_TO_PTR(aUnicode)));
+
+ return entry ? entry->node->mStr : nullptr;
+}
+
+#ifdef DEBUG
+#include <stdio.h>
+
+class nsTestEntityTable {
+public:
+ nsTestEntityTable() {
+ int32_t value;
+ nsHTMLEntities::AddRefTable();
+
+ // Make sure we can find everything we are supposed to
+ for (int i = 0; i < NS_HTML_ENTITY_COUNT; ++i) {
+ nsAutoString entity; entity.AssignWithConversion(gEntityArray[i].mStr);
+
+ value = nsHTMLEntities::EntityToUnicode(entity);
+ NS_ASSERTION(value != -1, "can't find entity");
+ NS_ASSERTION(value == gEntityArray[i].mUnicode, "bad unicode value");
+
+ entity.AssignWithConversion(nsHTMLEntities::UnicodeToEntity(value));
+ NS_ASSERTION(entity.EqualsASCII(gEntityArray[i].mStr), "bad entity name");
+ }
+
+ // Make sure we don't find things that aren't there
+ value = nsHTMLEntities::EntityToUnicode(nsAutoCString("@"));
+ NS_ASSERTION(value == -1, "found @");
+ value = nsHTMLEntities::EntityToUnicode(nsAutoCString("zzzzz"));
+ NS_ASSERTION(value == -1, "found zzzzz");
+ nsHTMLEntities::ReleaseTable();
+ }
+};
+//nsTestEntityTable validateEntityTable;
+#endif
+
diff --git a/components/htmlparser/src/nsHTMLEntities.h b/components/htmlparser/src/nsHTMLEntities.h
new file mode 100644
index 000000000..f38856bfa
--- /dev/null
+++ b/components/htmlparser/src/nsHTMLEntities.h
@@ -0,0 +1,35 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef nsHTMLEntities_h___
+#define nsHTMLEntities_h___
+
+#include "nsString.h"
+
+class nsHTMLEntities {
+public:
+
+ static nsresult AddRefTable(void);
+ static void ReleaseTable(void);
+
+/**
+ * Translate an entity string into it's unicode value. This call
+ * returns -1 if the entity cannot be mapped. Note that the string
+ * passed in must NOT have the leading "&" nor the trailing ";"
+ * in it.
+ */
+ static int32_t EntityToUnicode(const nsAString& aEntity);
+ static int32_t EntityToUnicode(const nsCString& aEntity);
+
+/**
+ * Translate a unicode value into an entity string. This call
+ * returns null if the entity cannot be mapped.
+ * Note that the string returned DOES NOT have the leading "&" nor
+ * the trailing ";" in it.
+ */
+ static const char* UnicodeToEntity(int32_t aUnicode);
+};
+
+
+#endif /* nsHTMLEntities_h___ */
diff --git a/components/htmlparser/src/nsHTMLEntityList.h b/components/htmlparser/src/nsHTMLEntityList.h
new file mode 100644
index 000000000..fa05382bf
--- /dev/null
+++ b/components/htmlparser/src/nsHTMLEntityList.h
@@ -0,0 +1,303 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/******
+
+ This file contains the list of all HTML entities
+ See nsHTMLEntities.h for access to the enum values for entities
+
+ It is designed to be used as inline input to nsHTMLEntities.cpp *only*
+ through the magic of C preprocessing.
+
+ All entries must be enclosed in the macro HTML_ENTITY which will have cruel
+ and unusual things done to it
+
+ It is recommended (but not strictly necessary) to keep all entries
+ in alphabetical order
+
+ The first argument to HTML_ENTITY is the string value of the entity
+ The second argument it HTML_ENTITY is the unicode value of the entity
+
+ ******/
+
+// ISO 8859-1 entities.
+// See the HTML4.0 spec for this list in it's DTD form
+HTML_ENTITY(nbsp, 160)
+HTML_ENTITY(iexcl, 161)
+HTML_ENTITY(cent, 162)
+HTML_ENTITY(pound, 163)
+HTML_ENTITY(curren, 164)
+HTML_ENTITY(yen, 165)
+HTML_ENTITY(brvbar, 166)
+HTML_ENTITY(sect, 167)
+HTML_ENTITY(uml, 168)
+HTML_ENTITY(copy, 169)
+HTML_ENTITY(ordf, 170)
+HTML_ENTITY(laquo, 171)
+HTML_ENTITY(not, 172)
+HTML_ENTITY(shy, 173)
+HTML_ENTITY(reg, 174)
+HTML_ENTITY(macr, 175)
+HTML_ENTITY(deg, 176)
+HTML_ENTITY(plusmn, 177)
+HTML_ENTITY(sup2, 178)
+HTML_ENTITY(sup3, 179)
+HTML_ENTITY(acute, 180)
+HTML_ENTITY(micro, 181)
+HTML_ENTITY(para, 182)
+HTML_ENTITY(middot, 183)
+HTML_ENTITY(cedil, 184)
+HTML_ENTITY(sup1, 185)
+HTML_ENTITY(ordm, 186)
+HTML_ENTITY(raquo, 187)
+HTML_ENTITY(frac14, 188)
+HTML_ENTITY(frac12, 189)
+HTML_ENTITY(frac34, 190)
+HTML_ENTITY(iquest, 191)
+HTML_ENTITY(Agrave, 192)
+HTML_ENTITY(Aacute, 193)
+HTML_ENTITY(Acirc, 194)
+HTML_ENTITY(Atilde, 195)
+HTML_ENTITY(Auml, 196)
+HTML_ENTITY(Aring, 197)
+HTML_ENTITY(AElig, 198)
+HTML_ENTITY(Ccedil, 199)
+HTML_ENTITY(Egrave, 200)
+HTML_ENTITY(Eacute, 201)
+HTML_ENTITY(Ecirc, 202)
+HTML_ENTITY(Euml, 203)
+HTML_ENTITY(Igrave, 204)
+HTML_ENTITY(Iacute, 205)
+HTML_ENTITY(Icirc, 206)
+HTML_ENTITY(Iuml, 207)
+HTML_ENTITY(ETH, 208)
+HTML_ENTITY(Ntilde, 209)
+HTML_ENTITY(Ograve, 210)
+HTML_ENTITY(Oacute, 211)
+HTML_ENTITY(Ocirc, 212)
+HTML_ENTITY(Otilde, 213)
+HTML_ENTITY(Ouml, 214)
+HTML_ENTITY(times, 215)
+HTML_ENTITY(Oslash, 216)
+HTML_ENTITY(Ugrave, 217)
+HTML_ENTITY(Uacute, 218)
+HTML_ENTITY(Ucirc, 219)
+HTML_ENTITY(Uuml, 220)
+HTML_ENTITY(Yacute, 221)
+HTML_ENTITY(THORN, 222)
+HTML_ENTITY(szlig, 223)
+HTML_ENTITY(agrave, 224)
+HTML_ENTITY(aacute, 225)
+HTML_ENTITY(acirc, 226)
+HTML_ENTITY(atilde, 227)
+HTML_ENTITY(auml, 228)
+HTML_ENTITY(aring, 229)
+HTML_ENTITY(aelig, 230)
+HTML_ENTITY(ccedil, 231)
+HTML_ENTITY(egrave, 232)
+HTML_ENTITY(eacute, 233)
+HTML_ENTITY(ecirc, 234)
+HTML_ENTITY(euml, 235)
+HTML_ENTITY(igrave, 236)
+HTML_ENTITY(iacute, 237)
+HTML_ENTITY(icirc, 238)
+HTML_ENTITY(iuml, 239)
+HTML_ENTITY(eth, 240)
+HTML_ENTITY(ntilde, 241)
+HTML_ENTITY(ograve, 242)
+HTML_ENTITY(oacute, 243)
+HTML_ENTITY(ocirc, 244)
+HTML_ENTITY(otilde, 245)
+HTML_ENTITY(ouml, 246)
+HTML_ENTITY(divide, 247)
+HTML_ENTITY(oslash, 248)
+HTML_ENTITY(ugrave, 249)
+HTML_ENTITY(uacute, 250)
+HTML_ENTITY(ucirc, 251)
+HTML_ENTITY(uuml, 252)
+HTML_ENTITY(yacute, 253)
+HTML_ENTITY(thorn, 254)
+HTML_ENTITY(yuml, 255)
+
+// Symbols, mathematical symbols and Greek letters
+// See the HTML4.0 spec for this list in it's DTD form
+HTML_ENTITY(fnof, 402)
+HTML_ENTITY(Alpha, 913)
+HTML_ENTITY(Beta, 914)
+HTML_ENTITY(Gamma, 915)
+HTML_ENTITY(Delta, 916)
+HTML_ENTITY(Epsilon, 917)
+HTML_ENTITY(Zeta, 918)
+HTML_ENTITY(Eta, 919)
+HTML_ENTITY(Theta, 920)
+HTML_ENTITY(Iota, 921)
+HTML_ENTITY(Kappa, 922)
+HTML_ENTITY(Lambda, 923)
+HTML_ENTITY(Mu, 924)
+HTML_ENTITY(Nu, 925)
+HTML_ENTITY(Xi, 926)
+HTML_ENTITY(Omicron, 927)
+HTML_ENTITY(Pi, 928)
+HTML_ENTITY(Rho, 929)
+HTML_ENTITY(Sigma, 931)
+HTML_ENTITY(Tau, 932)
+HTML_ENTITY(Upsilon, 933)
+HTML_ENTITY(Phi, 934)
+HTML_ENTITY(Chi, 935)
+HTML_ENTITY(Psi, 936)
+HTML_ENTITY(Omega, 937)
+HTML_ENTITY(alpha, 945)
+HTML_ENTITY(beta, 946)
+HTML_ENTITY(gamma, 947)
+HTML_ENTITY(delta, 948)
+HTML_ENTITY(epsilon, 949)
+HTML_ENTITY(zeta, 950)
+HTML_ENTITY(eta, 951)
+HTML_ENTITY(theta, 952)
+HTML_ENTITY(iota, 953)
+HTML_ENTITY(kappa, 954)
+HTML_ENTITY(lambda, 955)
+HTML_ENTITY(mu, 956)
+HTML_ENTITY(nu, 957)
+HTML_ENTITY(xi, 958)
+HTML_ENTITY(omicron, 959)
+HTML_ENTITY(pi, 960)
+HTML_ENTITY(rho, 961)
+HTML_ENTITY(sigmaf, 962)
+HTML_ENTITY(sigma, 963)
+HTML_ENTITY(tau, 964)
+HTML_ENTITY(upsilon, 965)
+HTML_ENTITY(phi, 966)
+HTML_ENTITY(chi, 967)
+HTML_ENTITY(psi, 968)
+HTML_ENTITY(omega, 969)
+HTML_ENTITY(thetasym, 977)
+HTML_ENTITY(upsih, 978)
+HTML_ENTITY(piv, 982)
+HTML_ENTITY(bull, 8226)
+HTML_ENTITY(hellip, 8230)
+HTML_ENTITY(prime, 8242)
+HTML_ENTITY(Prime, 8243)
+HTML_ENTITY(oline, 8254)
+HTML_ENTITY(frasl, 8260)
+HTML_ENTITY(weierp, 8472)
+HTML_ENTITY(image, 8465)
+HTML_ENTITY(real, 8476)
+HTML_ENTITY(trade, 8482)
+HTML_ENTITY(alefsym, 8501)
+HTML_ENTITY(larr, 8592)
+HTML_ENTITY(uarr, 8593)
+HTML_ENTITY(rarr, 8594)
+HTML_ENTITY(darr, 8595)
+HTML_ENTITY(harr, 8596)
+HTML_ENTITY(crarr, 8629)
+HTML_ENTITY(lArr, 8656)
+HTML_ENTITY(uArr, 8657)
+HTML_ENTITY(rArr, 8658)
+HTML_ENTITY(dArr, 8659)
+HTML_ENTITY(hArr, 8660)
+HTML_ENTITY(forall, 8704)
+HTML_ENTITY(part, 8706)
+HTML_ENTITY(exist, 8707)
+HTML_ENTITY(empty, 8709)
+HTML_ENTITY(nabla, 8711)
+HTML_ENTITY(isin, 8712)
+HTML_ENTITY(notin, 8713)
+HTML_ENTITY(ni, 8715)
+HTML_ENTITY(prod, 8719)
+HTML_ENTITY(sum, 8721)
+HTML_ENTITY(minus, 8722)
+HTML_ENTITY(lowast, 8727)
+HTML_ENTITY(radic, 8730)
+HTML_ENTITY(prop, 8733)
+HTML_ENTITY(infin, 8734)
+HTML_ENTITY(ang, 8736)
+HTML_ENTITY(and, 8743)
+HTML_ENTITY(or, 8744)
+HTML_ENTITY(cap, 8745)
+HTML_ENTITY(cup, 8746)
+HTML_ENTITY(int, 8747)
+HTML_ENTITY(there4, 8756)
+HTML_ENTITY(sim, 8764)
+HTML_ENTITY(cong, 8773)
+HTML_ENTITY(asymp, 8776)
+HTML_ENTITY(ne, 8800)
+HTML_ENTITY(equiv, 8801)
+HTML_ENTITY(le, 8804)
+HTML_ENTITY(ge, 8805)
+HTML_ENTITY(sub, 8834)
+HTML_ENTITY(sup, 8835)
+HTML_ENTITY(nsub, 8836)
+HTML_ENTITY(sube, 8838)
+HTML_ENTITY(supe, 8839)
+HTML_ENTITY(oplus, 8853)
+HTML_ENTITY(otimes, 8855)
+HTML_ENTITY(perp, 8869)
+HTML_ENTITY(sdot, 8901)
+HTML_ENTITY(lceil, 8968)
+HTML_ENTITY(rceil, 8969)
+HTML_ENTITY(lfloor, 8970)
+HTML_ENTITY(rfloor, 8971)
+// Bug 603716: expansions of &lang; and &rang; have been modified in HTML5.
+// See http://www.w3.org/2003/entities/2007/htmlmathml-f.ent
+HTML_ENTITY(lang, 0x27E8)
+HTML_ENTITY(rang, 0x27E9)
+HTML_ENTITY(loz, 9674)
+HTML_ENTITY(spades, 9824)
+HTML_ENTITY(clubs, 9827)
+HTML_ENTITY(hearts, 9829)
+HTML_ENTITY(diams, 9830)
+
+// Markup-significant and internationalization characters
+// See the HTML4.0 spec for this list in it's DTD form
+HTML_ENTITY(quot, 34)
+HTML_ENTITY(amp, 38)
+HTML_ENTITY(lt, 60)
+HTML_ENTITY(gt, 62)
+HTML_ENTITY(OElig, 338)
+HTML_ENTITY(oelig, 339)
+HTML_ENTITY(Scaron, 352)
+HTML_ENTITY(scaron, 353)
+HTML_ENTITY(Yuml, 376)
+HTML_ENTITY(circ, 710)
+HTML_ENTITY(tilde, 732)
+HTML_ENTITY(ensp, 8194)
+HTML_ENTITY(emsp, 8195)
+HTML_ENTITY(thinsp, 8201)
+HTML_ENTITY(zwnj, 8204)
+HTML_ENTITY(zwj, 8205)
+HTML_ENTITY(lrm, 8206)
+HTML_ENTITY(rlm, 8207)
+HTML_ENTITY(ndash, 8211)
+HTML_ENTITY(mdash, 8212)
+HTML_ENTITY(lsquo, 8216)
+HTML_ENTITY(rsquo, 8217)
+HTML_ENTITY(sbquo, 8218)
+HTML_ENTITY(ldquo, 8220)
+HTML_ENTITY(rdquo, 8221)
+HTML_ENTITY(bdquo, 8222)
+HTML_ENTITY(dagger, 8224)
+HTML_ENTITY(Dagger, 8225)
+HTML_ENTITY(permil, 8240)
+HTML_ENTITY(lsaquo, 8249)
+HTML_ENTITY(rsaquo, 8250)
+HTML_ENTITY(euro, 8364)
+
+// Navigator entity extensions
+// This block of entities needs to be at the bottom of the list since it
+// contains duplicate Unicode codepoints. The codepoint to entity name
+// mapping (used by Composer) must ignores them, which occurs only
+// because they are listed later.
+
+// apos is from XML
+HTML_ENTITY(apos, 39)
+// The capitalized versions are required to handle non-standard input.
+HTML_ENTITY(AMP, 38)
+HTML_ENTITY(COPY, 169)
+HTML_ENTITY(GT, 62)
+HTML_ENTITY(LT, 60)
+HTML_ENTITY(QUOT, 34)
+HTML_ENTITY(REG, 174)
+
diff --git a/components/htmlparser/src/nsHTMLTagList.h b/components/htmlparser/src/nsHTMLTagList.h
new file mode 100644
index 000000000..4cb2a61e0
--- /dev/null
+++ b/components/htmlparser/src/nsHTMLTagList.h
@@ -0,0 +1,197 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// IWYU pragma: private, include "nsHTMLTags.h"
+
+/******
+
+ This file contains the list of all HTML tags.
+ See nsHTMLTags.h for access to the enum values for tags.
+
+ It is designed to be used as input to various places that will define the
+ HTML_TAG macro in useful ways through the magic of C preprocessing.
+ Additionally, it is consumed by the self-regeneration code in
+ ElementName.java from which nsHtml5ElementName.cpp/h is translated.
+ See parser/html/java/README.txt.
+
+ If you edit this list, you need to re-run ElementName.java
+ self-regeneration and the HTML parser Java to C++ translation.
+
+ All entries must be enclosed in the macro HTML_TAG which will have cruel
+ and unusual things done to it.
+
+ It is recommended (but not strictly necessary) to keep all entries
+ in alphabetical order.
+
+ The first argument to HTML_TAG is the tag name. The second argument is the
+ "creator" method of the form NS_New$TAGNAMEElement, that will be used by
+ nsHTMLContentSink.cpp to create a content object for a tag of that
+ type. Use NOTUSED, if the particular tag has a non-standard creator.
+ The third argument is the interface name specified for this element
+ in the HTML specification. It can be empty if the relevant interface name
+ is "HTMLElement".
+
+ The HTML_OTHER macro is for values in the nsHTMLTag enum that are
+ not strictly tags.
+
+ Entries *must* use only lowercase characters.
+
+ Don't forget to update /editor/libeditor/HTMLEditUtils.cpp as well.
+
+ ** Break these invariants and bad things will happen. **
+
+ ******/
+#define HTML_HTMLELEMENT_TAG(_tag) HTML_TAG(_tag, , )
+
+HTML_TAG(a, Anchor, Anchor)
+HTML_HTMLELEMENT_TAG(abbr)
+HTML_HTMLELEMENT_TAG(acronym)
+HTML_HTMLELEMENT_TAG(address)
+HTML_TAG(applet, SharedObject, Applet)
+HTML_TAG(area, Area, Area)
+HTML_HTMLELEMENT_TAG(article)
+HTML_HTMLELEMENT_TAG(aside)
+HTML_TAG(audio, Audio, Audio)
+HTML_HTMLELEMENT_TAG(b)
+HTML_TAG(base, Shared, Base)
+HTML_HTMLELEMENT_TAG(basefont)
+HTML_HTMLELEMENT_TAG(bdo)
+HTML_TAG(bgsound, Unknown, Unknown)
+HTML_HTMLELEMENT_TAG(big)
+HTML_TAG(blockquote, Shared, Quote)
+HTML_TAG(body, Body, Body)
+HTML_TAG(br, BR, BR)
+HTML_TAG(button, Button, Button)
+HTML_TAG(canvas, Canvas, Canvas)
+HTML_TAG(caption, TableCaption, TableCaption)
+HTML_HTMLELEMENT_TAG(center)
+HTML_HTMLELEMENT_TAG(cite)
+HTML_HTMLELEMENT_TAG(code)
+HTML_TAG(col, TableCol, TableCol)
+HTML_TAG(colgroup, TableCol, TableCol)
+HTML_TAG(data, Data, Data)
+HTML_TAG(datalist, DataList, DataList)
+HTML_HTMLELEMENT_TAG(dd)
+HTML_TAG(del, Mod, Mod)
+HTML_TAG(details, Details, Details)
+HTML_HTMLELEMENT_TAG(dfn)
+HTML_TAG(dialog, Dialog, Dialog)
+HTML_TAG(dir, Shared, Directory)
+HTML_TAG(div, Div, Div)
+HTML_TAG(dl, SharedList, DList)
+HTML_HTMLELEMENT_TAG(dt)
+HTML_HTMLELEMENT_TAG(em)
+HTML_TAG(embed, SharedObject, Embed)
+HTML_TAG(fieldset, FieldSet, FieldSet)
+HTML_HTMLELEMENT_TAG(figcaption)
+HTML_HTMLELEMENT_TAG(figure)
+HTML_TAG(font, Font, Font)
+HTML_HTMLELEMENT_TAG(footer)
+HTML_TAG(form, Form, Form)
+HTML_TAG(frame, Frame, Frame)
+HTML_TAG(frameset, FrameSet, FrameSet)
+HTML_TAG(h1, Heading, Heading)
+HTML_TAG(h2, Heading, Heading)
+HTML_TAG(h3, Heading, Heading)
+HTML_TAG(h4, Heading, Heading)
+HTML_TAG(h5, Heading, Heading)
+HTML_TAG(h6, Heading, Heading)
+HTML_TAG(head, Shared, Head)
+HTML_HTMLELEMENT_TAG(header)
+HTML_HTMLELEMENT_TAG(hgroup)
+HTML_TAG(hr, HR, HR)
+HTML_TAG(html, Shared, Html)
+HTML_HTMLELEMENT_TAG(i)
+HTML_TAG(iframe, IFrame, IFrame)
+HTML_HTMLELEMENT_TAG(image)
+HTML_TAG(img, Image, Image)
+HTML_TAG(input, Input, Input)
+HTML_TAG(ins, Mod, Mod)
+HTML_HTMLELEMENT_TAG(kbd)
+HTML_TAG(keygen, Span, Span)
+HTML_TAG(label, Label, Label)
+HTML_TAG(legend, Legend, Legend)
+HTML_TAG(li, LI, LI)
+HTML_TAG(link, Link, Link)
+HTML_TAG(listing, Pre, Pre)
+HTML_HTMLELEMENT_TAG(main)
+HTML_TAG(map, Map, Map)
+HTML_HTMLELEMENT_TAG(mark)
+HTML_TAG(menu, Menu, Menu)
+HTML_TAG(menuitem, MenuItem, MenuItem)
+HTML_TAG(meta, Meta, Meta)
+HTML_TAG(meter, Meter, Meter)
+HTML_TAG(multicol, Unknown, Unknown)
+HTML_HTMLELEMENT_TAG(nav)
+HTML_HTMLELEMENT_TAG(nobr)
+HTML_HTMLELEMENT_TAG(noembed)
+HTML_HTMLELEMENT_TAG(noframes)
+HTML_HTMLELEMENT_TAG(noscript)
+HTML_TAG(object, Object, Object)
+HTML_TAG(ol, SharedList, OList)
+HTML_TAG(optgroup, OptGroup, OptGroup)
+HTML_TAG(option, Option, Option)
+HTML_TAG(output, Output, Output)
+HTML_TAG(p, Paragraph, Paragraph)
+HTML_TAG(param, Shared, Param)
+HTML_TAG(picture, Picture, Picture)
+HTML_HTMLELEMENT_TAG(plaintext)
+HTML_TAG(pre, Pre, Pre)
+HTML_TAG(progress, Progress, Progress)
+HTML_TAG(q, Shared, Quote)
+HTML_HTMLELEMENT_TAG(rb)
+HTML_HTMLELEMENT_TAG(rp)
+HTML_HTMLELEMENT_TAG(rt)
+HTML_HTMLELEMENT_TAG(rtc)
+HTML_HTMLELEMENT_TAG(ruby)
+HTML_HTMLELEMENT_TAG(s)
+HTML_HTMLELEMENT_TAG(samp)
+HTML_TAG(script, Script, Script)
+HTML_HTMLELEMENT_TAG(section)
+HTML_TAG(select, Select, Select)
+HTML_HTMLELEMENT_TAG(small)
+HTML_TAG(slot, Slot, Slot)
+HTML_TAG(source, Source, Source)
+HTML_TAG(span, Span, Span)
+HTML_HTMLELEMENT_TAG(strike)
+HTML_HTMLELEMENT_TAG(strong)
+HTML_TAG(style, Style, Style)
+HTML_HTMLELEMENT_TAG(sub)
+HTML_TAG(summary, Summary, )
+HTML_HTMLELEMENT_TAG(sup)
+HTML_TAG(table, Table, Table)
+HTML_TAG(tbody, TableSection, TableSection)
+HTML_TAG(td, TableCell, TableCell)
+HTML_TAG(textarea, TextArea, TextArea)
+HTML_TAG(tfoot, TableSection, TableSection)
+HTML_TAG(th, TableCell, TableCell)
+HTML_TAG(thead, TableSection, TableSection)
+HTML_TAG(template, Template, Template)
+HTML_TAG(time, Time, Time)
+HTML_TAG(title, Title, Title)
+HTML_TAG(tr, TableRow, TableRow)
+HTML_TAG(track, Track, Track)
+HTML_HTMLELEMENT_TAG(tt)
+HTML_HTMLELEMENT_TAG(u)
+HTML_TAG(ul, SharedList, UList)
+HTML_HTMLELEMENT_TAG(var)
+HTML_TAG(video, Video, Video)
+HTML_HTMLELEMENT_TAG(wbr)
+HTML_TAG(xmp, Pre, Pre)
+
+
+/* These are not for tags. But they will be included in the nsHTMLTag
+ enum anyway */
+
+HTML_OTHER(text)
+HTML_OTHER(whitespace)
+HTML_OTHER(newline)
+HTML_OTHER(comment)
+HTML_OTHER(entity)
+HTML_OTHER(doctypeDecl)
+HTML_OTHER(markupDecl)
+HTML_OTHER(instruction)
+
+#undef HTML_HTMLELEMENT_TAG
diff --git a/components/htmlparser/src/nsHTMLTags.cpp b/components/htmlparser/src/nsHTMLTags.cpp
new file mode 100644
index 000000000..681c37489
--- /dev/null
+++ b/components/htmlparser/src/nsHTMLTags.cpp
@@ -0,0 +1,259 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsHTMLTags.h"
+#include "nsCRT.h"
+#include "nsReadableUtils.h"
+#include "nsString.h"
+#include "nsStaticAtom.h"
+#include "nsUnicharUtils.h"
+#include "mozilla/HashFunctions.h"
+#include <algorithm>
+
+using namespace mozilla;
+
+// static array of unicode tag names
+#define HTML_TAG(_tag, _classname, _interfacename) (u"" #_tag),
+#define HTML_OTHER(_tag)
+const char16_t* const nsHTMLTags::sTagUnicodeTable[] = {
+#include "nsHTMLTagList.h"
+};
+#undef HTML_TAG
+#undef HTML_OTHER
+
+// static array of tag atoms
+nsIAtom* nsHTMLTags::sTagAtomTable[eHTMLTag_userdefined - 1];
+
+int32_t nsHTMLTags::gTableRefCount;
+PLHashTable* nsHTMLTags::gTagTable;
+PLHashTable* nsHTMLTags::gTagAtomTable;
+
+
+// char16_t* -> id hash
+static PLHashNumber
+HTMLTagsHashCodeUCPtr(const void *key)
+{
+ return HashString(static_cast<const char16_t*>(key));
+}
+
+static int
+HTMLTagsKeyCompareUCPtr(const void *key1, const void *key2)
+{
+ const char16_t *str1 = (const char16_t *)key1;
+ const char16_t *str2 = (const char16_t *)key2;
+
+ return nsCRT::strcmp(str1, str2) == 0;
+}
+
+// nsIAtom* -> id hash
+static PLHashNumber
+HTMLTagsHashCodeAtom(const void *key)
+{
+ return NS_PTR_TO_INT32(key) >> 2;
+}
+
+#define NS_HTMLTAG_NAME_MAX_LENGTH 10
+
+// static
+void
+nsHTMLTags::RegisterAtoms(void)
+{
+#define HTML_TAG(_tag, _classname, _interfacename) NS_STATIC_ATOM_BUFFER(Atombuffer_##_tag, #_tag)
+#define HTML_OTHER(_tag)
+#include "nsHTMLTagList.h"
+#undef HTML_TAG
+#undef HTML_OTHER
+
+// static array of tag StaticAtom structs
+#define HTML_TAG(_tag, _classname, _interfacename) NS_STATIC_ATOM(Atombuffer_##_tag, &nsHTMLTags::sTagAtomTable[eHTMLTag_##_tag - 1]),
+#define HTML_OTHER(_tag)
+ static const nsStaticAtom sTagAtoms_info[] = {
+#include "nsHTMLTagList.h"
+ };
+#undef HTML_TAG
+#undef HTML_OTHER
+
+ // Fill in our static atom pointers
+ NS_RegisterStaticAtoms(sTagAtoms_info);
+
+
+#if defined(DEBUG)
+ {
+ // let's verify that all names in the the table are lowercase...
+ for (int32_t i = 0; i < NS_HTML_TAG_MAX; ++i) {
+ nsAutoString temp1((char16_t*)sTagAtoms_info[i].mStringBuffer->Data());
+ nsAutoString temp2((char16_t*)sTagAtoms_info[i].mStringBuffer->Data());
+ ToLowerCase(temp1);
+ NS_ASSERTION(temp1.Equals(temp2), "upper case char in table");
+ }
+
+ // let's verify that all names in the unicode strings above are
+ // correct.
+ for (int32_t i = 0; i < NS_HTML_TAG_MAX; ++i) {
+ nsAutoString temp1(sTagUnicodeTable[i]);
+ nsAutoString temp2((char16_t*)sTagAtoms_info[i].mStringBuffer->Data());
+ NS_ASSERTION(temp1.Equals(temp2), "Bad unicode tag name!");
+ }
+
+ // let's verify that NS_HTMLTAG_NAME_MAX_LENGTH is correct
+ uint32_t maxTagNameLength = 0;
+ for (int32_t i = 0; i < NS_HTML_TAG_MAX; ++i) {
+ uint32_t len = NS_strlen(sTagUnicodeTable[i]);
+ maxTagNameLength = std::max(len, maxTagNameLength);
+ }
+ NS_ASSERTION(maxTagNameLength == NS_HTMLTAG_NAME_MAX_LENGTH,
+ "NS_HTMLTAG_NAME_MAX_LENGTH not set correctly!");
+ }
+#endif
+}
+
+// static
+nsresult
+nsHTMLTags::AddRefTable(void)
+{
+ if (gTableRefCount++ == 0) {
+ NS_ASSERTION(!gTagTable && !gTagAtomTable, "pre existing hash!");
+
+ gTagTable = PL_NewHashTable(64, HTMLTagsHashCodeUCPtr,
+ HTMLTagsKeyCompareUCPtr, PL_CompareValues,
+ nullptr, nullptr);
+ NS_ENSURE_TRUE(gTagTable, NS_ERROR_OUT_OF_MEMORY);
+
+ gTagAtomTable = PL_NewHashTable(64, HTMLTagsHashCodeAtom,
+ PL_CompareValues, PL_CompareValues,
+ nullptr, nullptr);
+ NS_ENSURE_TRUE(gTagAtomTable, NS_ERROR_OUT_OF_MEMORY);
+
+ // Fill in gTagTable with the above static char16_t strings as
+ // keys and the value of the corresponding enum as the value in
+ // the table.
+
+ int32_t i;
+ for (i = 0; i < NS_HTML_TAG_MAX; ++i) {
+ PL_HashTableAdd(gTagTable, sTagUnicodeTable[i],
+ NS_INT32_TO_PTR(i + 1));
+
+ PL_HashTableAdd(gTagAtomTable, sTagAtomTable[i],
+ NS_INT32_TO_PTR(i + 1));
+ }
+ }
+
+ return NS_OK;
+}
+
+// static
+void
+nsHTMLTags::ReleaseTable(void)
+{
+ if (0 == --gTableRefCount) {
+ if (gTagTable) {
+ // Nothing to delete/free in this table, just destroy the table.
+
+ PL_HashTableDestroy(gTagTable);
+ PL_HashTableDestroy(gTagAtomTable);
+ gTagTable = nullptr;
+ gTagAtomTable = nullptr;
+ }
+ }
+}
+
+// static
+nsHTMLTag
+nsHTMLTags::StringTagToId(const nsAString& aTagName)
+{
+ uint32_t length = aTagName.Length();
+
+ if (length > NS_HTMLTAG_NAME_MAX_LENGTH) {
+ return eHTMLTag_userdefined;
+ }
+
+ char16_t buf[NS_HTMLTAG_NAME_MAX_LENGTH + 1];
+
+ nsAString::const_iterator iter;
+ uint32_t i = 0;
+ char16_t c;
+
+ aTagName.BeginReading(iter);
+
+ // Fast lowercasing-while-copying of ASCII characters into a
+ // char16_t buffer
+
+ while (i < length) {
+ c = *iter;
+
+ if (c <= 'Z' && c >= 'A') {
+ c |= 0x20; // Lowercase the ASCII character.
+ }
+
+ buf[i] = c; // Copy ASCII character.
+
+ ++i;
+ ++iter;
+ }
+
+ buf[i] = 0;
+
+ return CaseSensitiveStringTagToId(buf);
+}
+
+#ifdef DEBUG
+void
+nsHTMLTags::TestTagTable()
+{
+ const char16_t *tag;
+ nsHTMLTag id;
+ nsCOMPtr<nsIAtom> atom;
+
+ nsHTMLTags::AddRefTable();
+ // Make sure we can find everything we are supposed to
+ for (int i = 0; i < NS_HTML_TAG_MAX; ++i) {
+ tag = sTagUnicodeTable[i];
+ id = StringTagToId(nsDependentString(tag));
+ NS_ASSERTION(id != eHTMLTag_userdefined, "can't find tag id");
+ const char16_t* check = GetStringValue(id);
+ NS_ASSERTION(0 == nsCRT::strcmp(check, tag), "can't map id back to tag");
+
+ nsAutoString uname(tag);
+ ToUpperCase(uname);
+ NS_ASSERTION(id == StringTagToId(uname), "wrong id");
+
+ NS_ASSERTION(id == CaseSensitiveStringTagToId(tag), "wrong id");
+
+ atom = NS_Atomize(tag);
+ NS_ASSERTION(id == CaseSensitiveAtomTagToId(atom), "wrong id");
+ NS_ASSERTION(atom == GetAtom(id), "can't map id back to atom");
+ }
+
+ // Make sure we don't find things that aren't there
+ id = StringTagToId(NS_LITERAL_STRING("@"));
+ NS_ASSERTION(id == eHTMLTag_userdefined, "found @");
+ id = StringTagToId(NS_LITERAL_STRING("zzzzz"));
+ NS_ASSERTION(id == eHTMLTag_userdefined, "found zzzzz");
+
+ atom = NS_Atomize("@");
+ id = CaseSensitiveAtomTagToId(atom);
+ NS_ASSERTION(id == eHTMLTag_userdefined, "found @");
+ atom = NS_Atomize("zzzzz");
+ id = CaseSensitiveAtomTagToId(atom);
+ NS_ASSERTION(id == eHTMLTag_userdefined, "found zzzzz");
+
+ tag = GetStringValue((nsHTMLTag) 0);
+ NS_ASSERTION(!tag, "found enum 0");
+ tag = GetStringValue((nsHTMLTag) -1);
+ NS_ASSERTION(!tag, "found enum -1");
+ tag = GetStringValue((nsHTMLTag) (NS_HTML_TAG_MAX + 1));
+ NS_ASSERTION(!tag, "found past max enum");
+
+ atom = GetAtom((nsHTMLTag) 0);
+ NS_ASSERTION(!atom, "found enum 0");
+ atom = GetAtom((nsHTMLTag) -1);
+ NS_ASSERTION(!atom, "found enum -1");
+ atom = GetAtom((nsHTMLTag) (NS_HTML_TAG_MAX + 1));
+ NS_ASSERTION(!atom, "found past max enum");
+
+ ReleaseTable();
+}
+
+#endif // DEBUG
diff --git a/components/htmlparser/src/nsHTMLTags.h b/components/htmlparser/src/nsHTMLTags.h
new file mode 100644
index 000000000..b21df55f8
--- /dev/null
+++ b/components/htmlparser/src/nsHTMLTags.h
@@ -0,0 +1,100 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsHTMLTags_h___
+#define nsHTMLTags_h___
+
+#include "nsIAtom.h"
+#include "nsString.h"
+#include "plhash.h"
+
+class nsIAtom;
+
+/*
+ Declare the enum list using the magic of preprocessing
+ enum values are "eHTMLTag_foo" (where foo is the tag)
+
+ To change the list of tags, see nsHTMLTagList.h
+
+ These enum values are used as the index of array in various places.
+ If we change the structure of the enum by adding entries to it or removing
+ entries from it _directly_, not via nsHTMLTagList.h, don't forget to update
+ dom/bindings/BindingUtils.cpp and dom/html/nsHTMLContentSink.cpp as well.
+ */
+#define HTML_TAG(_tag, _classname, _interfacename) eHTMLTag_##_tag,
+#define HTML_OTHER(_tag) eHTMLTag_##_tag,
+enum nsHTMLTag {
+ /* this enum must be first and must be zero */
+ eHTMLTag_unknown = 0,
+#include "nsHTMLTagList.h"
+
+ /* can't be moved into nsHTMLTagList since gcc3.4 doesn't like a
+ comma at the end of enum list*/
+ eHTMLTag_userdefined
+};
+#undef HTML_TAG
+#undef HTML_OTHER
+
+// All tags before eHTMLTag_text are HTML tags
+#define NS_HTML_TAG_MAX int32_t(eHTMLTag_text - 1)
+
+class nsHTMLTags {
+public:
+ static void RegisterAtoms(void);
+ static nsresult AddRefTable(void);
+ static void ReleaseTable(void);
+
+ // Functions for converting string or atom to id
+ static nsHTMLTag StringTagToId(const nsAString& aTagName);
+ static nsHTMLTag AtomTagToId(nsIAtom* aTagName)
+ {
+ return StringTagToId(nsDependentAtomString(aTagName));
+ }
+
+ static nsHTMLTag CaseSensitiveStringTagToId(const char16_t* aTagName)
+ {
+ NS_ASSERTION(gTagTable, "no lookup table, needs addref");
+ NS_ASSERTION(aTagName, "null tagname!");
+
+ void* tag = PL_HashTableLookupConst(gTagTable, aTagName);
+
+ return tag ? (nsHTMLTag)NS_PTR_TO_INT32(tag) : eHTMLTag_userdefined;
+ }
+ static nsHTMLTag CaseSensitiveAtomTagToId(nsIAtom* aTagName)
+ {
+ NS_ASSERTION(gTagAtomTable, "no lookup table, needs addref");
+ NS_ASSERTION(aTagName, "null tagname!");
+
+ void* tag = PL_HashTableLookupConst(gTagAtomTable, aTagName);
+
+ return tag ? (nsHTMLTag)NS_PTR_TO_INT32(tag) : eHTMLTag_userdefined;
+ }
+
+ // Functions for converting an id to a string or atom
+ static const char16_t *GetStringValue(nsHTMLTag aEnum)
+ {
+ return aEnum <= eHTMLTag_unknown || aEnum > NS_HTML_TAG_MAX ?
+ nullptr : sTagUnicodeTable[aEnum - 1];
+ }
+ static nsIAtom *GetAtom(nsHTMLTag aEnum)
+ {
+ return aEnum <= eHTMLTag_unknown || aEnum > NS_HTML_TAG_MAX ?
+ nullptr : sTagAtomTable[aEnum - 1];
+ }
+
+#ifdef DEBUG
+ static void TestTagTable();
+#endif
+
+private:
+ static nsIAtom* sTagAtomTable[eHTMLTag_userdefined - 1];
+ static const char16_t* const sTagUnicodeTable[];
+
+ static int32_t gTableRefCount;
+ static PLHashTable* gTagTable;
+ static PLHashTable* gTagAtomTable;
+};
+
+#endif /* nsHTMLTags_h___ */
diff --git a/components/htmlparser/src/nsHTMLTokenizer.cpp b/components/htmlparser/src/nsHTMLTokenizer.cpp
new file mode 100644
index 000000000..a40e11f0e
--- /dev/null
+++ b/components/htmlparser/src/nsHTMLTokenizer.cpp
@@ -0,0 +1,59 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+/**
+ * @file nsHTMLTokenizer.cpp
+ * This is an implementation of the nsITokenizer interface.
+ * This file contains the implementation of a tokenizer to tokenize an HTML
+ * document. It attempts to do so, making tradeoffs between compatibility with
+ * older parsers and the SGML specification. Note that most of the real
+ * "tokenization" takes place in nsHTMLTokens.cpp.
+ */
+
+#include "nsHTMLTokenizer.h"
+#include "nsIParser.h"
+#include "nsParserConstants.h"
+
+/************************************************************************
+ And now for the main class -- nsHTMLTokenizer...
+ ************************************************************************/
+
+/**
+ * Satisfy the nsISupports interface.
+ */
+NS_IMPL_ISUPPORTS(nsHTMLTokenizer, nsITokenizer)
+
+/**
+ * Default constructor
+ */
+nsHTMLTokenizer::nsHTMLTokenizer()
+{
+ // TODO Assert about:blank-ness.
+}
+
+nsresult
+nsHTMLTokenizer::WillTokenize(bool aIsFinalChunk)
+{
+ return NS_OK;
+}
+
+/**
+ * This method is repeatedly called by the tokenizer.
+ * Each time, we determine the kind of token we're about to
+ * read, and then we call the appropriate method to handle
+ * that token type.
+ *
+ * @param aScanner The source of our input.
+ * @param aFlushTokens An OUT parameter to tell the caller whether it should
+ * process our queued tokens up to now (e.g., when we
+ * reach a <script>).
+ * @return Success or error
+ */
+nsresult
+nsHTMLTokenizer::ConsumeToken(nsScanner& aScanner, bool& aFlushTokens)
+{
+ return kEOF;
+}
diff --git a/components/htmlparser/src/nsHTMLTokenizer.h b/components/htmlparser/src/nsHTMLTokenizer.h
new file mode 100644
index 000000000..0d2940c5e
--- /dev/null
+++ b/components/htmlparser/src/nsHTMLTokenizer.h
@@ -0,0 +1,35 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+/**
+ * MODULE NOTES:
+ * @update gess 4/1/98
+ *
+ */
+
+#ifndef __NSHTMLTOKENIZER
+#define __NSHTMLTOKENIZER
+
+#include "mozilla/Attributes.h"
+#include "nsISupports.h"
+#include "nsITokenizer.h"
+
+#ifdef _MSC_VER
+#pragma warning( disable : 4275 )
+#endif
+
+class nsHTMLTokenizer final : public nsITokenizer {
+ ~nsHTMLTokenizer() {}
+
+public:
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSITOKENIZER
+ nsHTMLTokenizer();
+};
+
+#endif
+
+
diff --git a/components/htmlparser/src/nsIContentSink.h b/components/htmlparser/src/nsIContentSink.h
new file mode 100644
index 000000000..56c70a1b4
--- /dev/null
+++ b/components/htmlparser/src/nsIContentSink.h
@@ -0,0 +1,132 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef nsIContentSink_h___
+#define nsIContentSink_h___
+
+/**
+ * MODULE NOTES:
+ * @update gess 4/1/98
+ *
+ * This pure virtual interface is used as the "glue" that connects the parsing
+ * process to the content model construction process.
+ *
+ * The icontentsink interface is a very lightweight wrapper that represents the
+ * content-sink model building process. There is another one that you may care
+ * about more, which is the IHTMLContentSink interface. (See that file for details).
+ */
+#include "nsISupports.h"
+#include "nsString.h"
+#include "mozFlushType.h"
+#include "nsIDTD.h"
+
+class nsParserBase;
+
+#define NS_ICONTENT_SINK_IID \
+{ 0xcf9a7cbb, 0xfcbc, 0x4e13, \
+ { 0x8e, 0xf5, 0x18, 0xef, 0x2d, 0x3d, 0x58, 0x29 } }
+
+class nsIContentSink : public nsISupports {
+public:
+
+ NS_DECLARE_STATIC_IID_ACCESSOR(NS_ICONTENT_SINK_IID)
+
+ /**
+ * This method is called by the parser when it is entered from
+ * the event loop. The content sink wants to know how long the
+ * parser has been active since we last processed events on the
+ * main event loop and this call calibrates that measurement.
+ */
+ NS_IMETHOD WillParse(void)=0;
+
+ /**
+ * This method gets called when the parser begins the process
+ * of building the content model via the content sink.
+ *
+ * Default implementation provided since the sink should have the option of
+ * doing nothing in response to this call.
+ *
+ * @update 5/7/98 gess
+ */
+ NS_IMETHOD WillBuildModel(nsDTDMode aDTDMode) {
+ return NS_OK;
+ }
+
+ /**
+ * This method gets called when the parser concludes the process
+ * of building the content model via the content sink.
+ *
+ * Default implementation provided since the sink should have the option of
+ * doing nothing in response to this call.
+ *
+ * @update 5/7/98 gess
+ */
+ NS_IMETHOD DidBuildModel(bool aTerminated) {
+ return NS_OK;
+ }
+
+ /**
+ * This method gets called when the parser gets i/o blocked,
+ * and wants to notify the sink that it may be a while before
+ * more data is available.
+ *
+ * @update 5/7/98 gess
+ */
+ NS_IMETHOD WillInterrupt(void)=0;
+
+ /**
+ * This method gets called when the parser i/o gets unblocked,
+ * and we're about to start dumping content again to the sink.
+ *
+ * @update 5/7/98 gess
+ */
+ NS_IMETHOD WillResume(void)=0;
+
+ /**
+ * This method gets called by the parser so that the content
+ * sink can retain a reference to the parser. The expectation
+ * is that the content sink will drop the reference when it
+ * gets the DidBuildModel notification i.e. when parsing is done.
+ */
+ NS_IMETHOD SetParser(nsParserBase* aParser)=0;
+
+ /**
+ * Flush content so that the content model is in sync with the state
+ * of the sink.
+ *
+ * @param aType the type of flush to perform
+ */
+ virtual void FlushPendingNotifications(mozFlushType aType)=0;
+
+ /**
+ * Set the document character set. This should be passed on to the
+ * document itself.
+ */
+ NS_IMETHOD SetDocumentCharset(nsACString& aCharset)=0;
+
+ /**
+ * Returns the target object (often a document object) into which
+ * the content built by this content sink is being added, if any
+ * (IOW, may return null).
+ */
+ virtual nsISupports *GetTarget()=0;
+
+ /**
+ * Returns true if there's currently script executing that we need to hold
+ * parsing for.
+ */
+ virtual bool IsScriptExecuting()
+ {
+ return false;
+ }
+
+ /**
+ * Posts a runnable that continues parsing.
+ */
+ virtual void ContinueInterruptedParsingAsync() {}
+};
+
+NS_DEFINE_STATIC_IID_ACCESSOR(nsIContentSink, NS_ICONTENT_SINK_IID)
+
+#endif /* nsIContentSink_h___ */
diff --git a/components/htmlparser/src/nsIDTD.h b/components/htmlparser/src/nsIDTD.h
new file mode 100644
index 000000000..cbae4d507
--- /dev/null
+++ b/components/htmlparser/src/nsIDTD.h
@@ -0,0 +1,136 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsIDTD_h___
+#define nsIDTD_h___
+
+/**
+ * MODULE NOTES:
+ * @update gess 7/20/98
+ *
+ * This interface defines standard interface for DTD's. Note that this
+ * isn't HTML specific. DTD's have several functions within the parser
+ * system:
+ * 1) To coordinate the consumption of an input stream via the
+ * parser
+ * 2) To serve as proxy to represent the containment rules of the
+ * underlying document
+ * 3) To offer autodetection services to the parser (mainly for doc
+ * conversion)
+ * */
+
+#include "nsISupports.h"
+#include "nsString.h"
+#include "nsITokenizer.h"
+
+#define NS_IDTD_IID \
+{ 0x3de05873, 0xefa7, 0x410d, \
+ { 0xa4, 0x61, 0x80, 0x33, 0xaf, 0xd9, 0xe3, 0x26 } }
+
+enum eAutoDetectResult {
+ eUnknownDetect,
+ eValidDetect,
+ ePrimaryDetect,
+ eInvalidDetect
+};
+
+enum nsDTDMode {
+ eDTDMode_unknown = 0,
+ eDTDMode_quirks, //pre 4.0 versions
+ eDTDMode_almost_standards,
+ eDTDMode_full_standards,
+ eDTDMode_autodetect,
+ eDTDMode_fragment
+};
+
+
+class nsIContentSink;
+class CParserContext;
+
+class nsIDTD : public nsISupports
+{
+public:
+
+ NS_DECLARE_STATIC_IID_ACCESSOR(NS_IDTD_IID)
+
+ NS_IMETHOD WillBuildModel(const CParserContext& aParserContext,
+ nsITokenizer* aTokenizer,
+ nsIContentSink* aSink) = 0;
+
+ /**
+ * Called by the parser after the parsing process has concluded
+ * @update gess5/18/98
+ * @param anErrorCode - contains error code resulting from parse process
+ * @return
+ */
+ NS_IMETHOD DidBuildModel(nsresult anErrorCode) = 0;
+
+ /**
+ * Called (possibly repeatedly) by the parser to parse tokens and construct
+ * the document model via the sink provided to WillBuildModel.
+ *
+ * @param aTokenizer - tokenizer providing the token stream to be parsed
+ * @param aCountLines - informs the DTD whether to count newlines
+ * (not wanted, e.g., when handling document.write)
+ * @param aCharsetPtr - address of an nsCString containing the charset
+ * that the DTD should use (pointer in case the DTD
+ * opts to ignore this parameter)
+ */
+ NS_IMETHOD BuildModel(nsITokenizer* aTokenizer, nsIContentSink* aSink) = 0;
+
+ /**
+ * This method is called to determine whether or not a tag of one
+ * type can contain a tag of another type.
+ *
+ * @update gess 3/25/98
+ * @param aParent -- int tag of parent container
+ * @param aChild -- int tag of child container
+ * @return true if parent can contain child
+ */
+ NS_IMETHOD_(bool) CanContain(int32_t aParent,int32_t aChild) const = 0;
+
+ /**
+ * This method gets called to determine whether a given
+ * tag is itself a container
+ *
+ * @update gess 3/25/98
+ * @param aTag -- tag to test for containership
+ * @return true if given tag can contain other tags
+ */
+ NS_IMETHOD_(bool) IsContainer(int32_t aTag) const = 0;
+
+ /**
+ * Use this id you want to stop the building content model
+ * --------------[ Sets DTD to STOP mode ]----------------
+ * It's recommended to use this method in accordance with
+ * the parser's terminate() method.
+ *
+ * @update harishd 07/22/99
+ * @param
+ * @return
+ */
+ NS_IMETHOD_(void) Terminate() = 0;
+
+ NS_IMETHOD_(int32_t) GetType() = 0;
+
+ /**
+ * Call this method after calling WillBuildModel to determine what mode the
+ * DTD actually is using, as it may differ from aParserContext.mDTDMode.
+ */
+ NS_IMETHOD_(nsDTDMode) GetMode() const = 0;
+};
+
+NS_DEFINE_STATIC_IID_ACCESSOR(nsIDTD, NS_IDTD_IID)
+
+#define NS_DECL_NSIDTD \
+ NS_IMETHOD WillBuildModel( const CParserContext& aParserContext, nsITokenizer* aTokenizer, nsIContentSink* aSink) override;\
+ NS_IMETHOD DidBuildModel(nsresult anErrorCode) override;\
+ NS_IMETHOD BuildModel(nsITokenizer* aTokenizer, nsIContentSink* aSink) override;\
+ NS_IMETHOD_(bool) CanContain(int32_t aParent,int32_t aChild) const override;\
+ NS_IMETHOD_(bool) IsContainer(int32_t aTag) const override;\
+ NS_IMETHOD_(void) Terminate() override;\
+ NS_IMETHOD_(int32_t) GetType() override;\
+ NS_IMETHOD_(nsDTDMode) GetMode() const override;
+#endif /* nsIDTD_h___ */
diff --git a/components/htmlparser/src/nsIFragmentContentSink.h b/components/htmlparser/src/nsIFragmentContentSink.h
new file mode 100644
index 000000000..8d547ed66
--- /dev/null
+++ b/components/htmlparser/src/nsIFragmentContentSink.h
@@ -0,0 +1,77 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef nsIFragmentContentSink_h___
+#define nsIFragmentContentSink_h___
+
+#include "nsISupports.h"
+
+class nsIDOMDocumentFragment;
+class nsIDocument;
+
+#define NS_I_FRAGMENT_CONTENT_SINK_IID \
+ { 0x1a8ce30b, 0x63fc, 0x441a, \
+ { 0xa3, 0xaa, 0xf7, 0x16, 0xc0, 0xfe, 0x96, 0x69 } }
+
+/**
+ * The fragment sink allows a client to parse a fragment of sink, possibly
+ * surrounded in context. Also see nsIParser::ParseFragment().
+ * Note: once you've parsed a fragment, the fragment sink must be re-set on
+ * the parser in order to parse another fragment.
+ */
+class nsIFragmentContentSink : public nsISupports {
+public:
+ NS_DECLARE_STATIC_IID_ACCESSOR(NS_I_FRAGMENT_CONTENT_SINK_IID)
+ /**
+ * This method is used to obtain the fragment created by
+ * a fragment content sink and to release resources held by the parser.
+ *
+ * The sink drops its reference to the fragment.
+ */
+ NS_IMETHOD FinishFragmentParsing(nsIDOMDocumentFragment** aFragment) = 0;
+
+ /**
+ * This method is used to set the target document for this fragment
+ * sink. This document's nodeinfo manager will be used to create
+ * the content objects. This MUST be called before the sink is used.
+ *
+ * @param aDocument the document the new nodes will belong to
+ * (should not be null)
+ */
+ NS_IMETHOD SetTargetDocument(nsIDocument* aDocument) = 0;
+
+ /**
+ * This method is used to indicate to the sink that we're done building
+ * the context and should start paying attention to the incoming content
+ */
+ NS_IMETHOD WillBuildContent() = 0;
+
+ /**
+ * This method is used to indicate to the sink that we're done building
+ * The real content. This is useful if you want to parse additional context
+ * (such as an end context).
+ */
+ NS_IMETHOD DidBuildContent() = 0;
+
+ /**
+ * This method is a total hack to help with parsing fragments. It is called to
+ * tell the fragment sink that a container from the context will be delivered
+ * after the call to WillBuildContent(). This is only relevent for HTML
+ * fragments that use nsHTMLTokenizer/CNavDTD.
+ */
+ NS_IMETHOD IgnoreFirstContainer() = 0;
+
+ /**
+ * Sets whether scripts elements are marked as unexecutable.
+ */
+ NS_IMETHOD SetPreventScriptExecution(bool aPreventScriptExecution) = 0;
+};
+
+NS_DEFINE_STATIC_IID_ACCESSOR(nsIFragmentContentSink,
+ NS_I_FRAGMENT_CONTENT_SINK_IID)
+
+nsresult
+NS_NewXMLFragmentContentSink(nsIFragmentContentSink** aInstancePtrResult);
+
+#endif
diff --git a/components/htmlparser/src/nsIHTMLContentSink.h b/components/htmlparser/src/nsIHTMLContentSink.h
new file mode 100644
index 000000000..bf08c4b5e
--- /dev/null
+++ b/components/htmlparser/src/nsIHTMLContentSink.h
@@ -0,0 +1,89 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef nsIHTMLContentSink_h___
+#define nsIHTMLContentSink_h___
+
+/**
+ * This interface is OBSOLETE and in the process of being REMOVED.
+ * Do NOT implement!
+ *
+ * This file declares the concrete HTMLContentSink class.
+ * This class is used during the parsing process as the
+ * primary interface between the parser and the content
+ * model.
+ *
+ * After the tokenizer completes, the parser iterates over
+ * the known token list. As the parser identifies valid
+ * elements, it calls the contentsink interface to notify
+ * the content model that a new node or child node is being
+ * created and added to the content model.
+ *
+ * The HTMLContentSink interface assumes 4 underlying
+ * containers: HTML, HEAD, BODY and FRAMESET. Before
+ * accessing any these, the parser will call the appropriate
+ * OpennsIHTMLContentSink method: OpenHTML,OpenHead,OpenBody,OpenFrameSet;
+ * likewise, the ClosensIHTMLContentSink version will be called when the
+ * parser is done with a given section.
+ *
+ * IMPORTANT: The parser may Open each container more than
+ * once! This is due to the irregular nature of HTML files.
+ * For example, it is possible to encounter plain text at
+ * the start of an HTML document (that precedes the HTML tag).
+ * Such text is treated as if it were part of the body.
+ * In such cases, the parser will Open the body, pass the text-
+ * node in and then Close the body. The body will likely be
+ * re-Opened later when the actual <BODY> tag has been seen.
+ *
+ * Containers within the body are Opened and Closed
+ * using the OpenContainer(...) and CloseContainer(...) calls.
+ * It is assumed that the document or contentSink is
+ * maintaining its state to manage where new content should
+ * be added to the underlying document.
+ *
+ * NOTE: OpenHTML() and OpenBody() may get called multiple times
+ * in the same document. That's fine, and it doesn't mean
+ * that we have multiple bodies or HTML's.
+ *
+ * NOTE: I haven't figured out how sub-documents (non-frames)
+ * are going to be handled. Stay tuned.
+ */
+#include "nsIContentSink.h"
+#include "nsHTMLTags.h"
+
+#define NS_IHTML_CONTENT_SINK_IID \
+ {0xefc5af86, 0x5cfd, 0x4918, {0x9d, 0xd3, 0x5f, 0x7a, 0xb2, 0x88, 0xb2, 0x68}}
+
+/**
+ * This interface is OBSOLETE and in the process of being REMOVED.
+ * Do NOT implement!
+ */
+class nsIHTMLContentSink : public nsIContentSink
+{
+public:
+
+ NS_DECLARE_STATIC_IID_ACCESSOR(NS_IHTML_CONTENT_SINK_IID)
+
+ enum ElementType { eHTML, eBody };
+
+ /**
+ * This method is used to open a generic container in the sink.
+ *
+ * @update 4/1/98 gess
+ */
+ NS_IMETHOD OpenContainer(ElementType aNodeType) = 0;
+
+ /**
+ * This method gets called by the parser when a close
+ * container tag has been consumed and needs to be closed.
+ *
+ * @param aTag - The tag to be closed.
+ */
+ NS_IMETHOD CloseContainer(ElementType aTag) = 0;
+};
+
+NS_DEFINE_STATIC_IID_ACCESSOR(nsIHTMLContentSink, NS_IHTML_CONTENT_SINK_IID)
+
+#endif /* nsIHTMLContentSink_h___ */
+
diff --git a/components/htmlparser/src/nsIParser.h b/components/htmlparser/src/nsIParser.h
new file mode 100644
index 000000000..4bf0b3370
--- /dev/null
+++ b/components/htmlparser/src/nsIParser.h
@@ -0,0 +1,272 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef NS_IPARSER___
+#define NS_IPARSER___
+
+
+ /**
+ * This GECKO-INTERNAL interface is on track to being REMOVED (or refactored
+ * to the point of being near-unrecognizable).
+ *
+ * Please DO NOT #include this file in comm-central code, in your XUL
+ * app or binary extensions.
+ *
+ * Please DO NOT #include this into new files even inside Gecko. It is more
+ * likely than not that #including this header is the wrong thing to do.
+ */
+
+#include "nsISupports.h"
+#include "nsIStreamListener.h"
+#include "nsIDTD.h"
+#include "nsString.h"
+#include "nsTArray.h"
+#include "nsIAtom.h"
+#include "nsParserBase.h"
+
+#define NS_IPARSER_IID \
+{ 0x2c4ad90a, 0x740e, 0x4212, \
+ { 0xba, 0x3f, 0xfe, 0xac, 0xda, 0x4b, 0x92, 0x9e } }
+
+// {41421C60-310A-11d4-816F-000064657374}
+#define NS_IDEBUG_DUMP_CONTENT_IID \
+{ 0x41421c60, 0x310a, 0x11d4, \
+{ 0x81, 0x6f, 0x0, 0x0, 0x64, 0x65, 0x73, 0x74 } }
+
+class nsIContentSink;
+class nsIRequestObserver;
+class nsString;
+class nsIURI;
+class nsIChannel;
+class nsIContent;
+
+enum eParserCommands {
+ eViewNormal,
+ eViewSource,
+ eViewFragment,
+ eViewErrors
+};
+
+enum eParserDocType {
+ ePlainText = 0,
+ eXML,
+ eHTML_Quirks,
+ eHTML_Strict
+};
+
+enum eStreamState {eNone,eOnStart,eOnDataAvail,eOnStop};
+
+/**
+ * This GECKO-INTERNAL interface is on track to being REMOVED (or refactored
+ * to the point of being near-unrecognizable).
+ *
+ * Please DO NOT #include this file in comm-central code, in your XUL
+ * app or binary extensions.
+ *
+ * Please DO NOT #include this into new files even inside Gecko. It is more
+ * likely than not that #including this header is the wrong thing to do.
+ */
+class nsIParser : public nsParserBase {
+ public:
+
+ NS_DECLARE_STATIC_IID_ACCESSOR(NS_IPARSER_IID)
+
+ /**
+ * Select given content sink into parser for parser output
+ * @update gess5/11/98
+ * @param aSink is the new sink to be used by parser
+ * @return
+ */
+ NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink)=0;
+
+
+ /**
+ * retrieve the sink set into the parser
+ * @update gess5/11/98
+ * @return current sink
+ */
+ NS_IMETHOD_(nsIContentSink*) GetContentSink(void)=0;
+
+ /**
+ * Call this method once you've created a parser, and want to instruct it
+ * about the command which caused the parser to be constructed. For example,
+ * this allows us to select a DTD which can do, say, view-source.
+ *
+ * @update gess 3/25/98
+ * @param aCommand -- ptrs to string that contains command
+ * @return nada
+ */
+ NS_IMETHOD_(void) GetCommand(nsCString& aCommand)=0;
+ NS_IMETHOD_(void) SetCommand(const char* aCommand)=0;
+ NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand)=0;
+
+ /**
+ * Call this method once you've created a parser, and want to instruct it
+ * about what charset to load
+ *
+ * @update ftang 4/23/99
+ * @param aCharset- the charest of a document
+ * @param aCharsetSource- the soure of the chares
+ * @return nada
+ */
+ NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, int32_t aSource)=0;
+ NS_IMETHOD_(void) GetDocumentCharset(nsACString& oCharset, int32_t& oSource)=0;
+
+ /**
+ * Get the channel associated with this parser
+ * @update harishd,gagan 07/17/01
+ * @param aChannel out param that will contain the result
+ * @return NS_OK if successful
+ */
+ NS_IMETHOD GetChannel(nsIChannel** aChannel) override = 0;
+
+ /**
+ * Get the DTD associated with this parser
+ * @update vidur 9/29/99
+ * @param aDTD out param that will contain the result
+ * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error
+ */
+ NS_IMETHOD GetDTD(nsIDTD** aDTD) = 0;
+
+ /**
+ * Get the nsIStreamListener for this parser
+ */
+ virtual nsIStreamListener* GetStreamListener() = 0;
+
+ /**************************************************************************
+ * Parse methods always begin with an input source, and perform
+ * conversions until you wind up being emitted to the given contentsink
+ * (which may or may not be a proxy for the NGLayout content model).
+ ************************************************************************/
+
+ // Call this method to resume the parser from an unblocked state.
+ // This can happen, for example, if parsing was interrupted and then the
+ // consumer needed to restart the parser without waiting for more data.
+ // This also happens after loading scripts, which unblock the parser in
+ // order to process the output of document.write() and then need to
+ // continue on with the page load on an enabled parser.
+ NS_IMETHOD ContinueInterruptedParsing() = 0;
+
+ // Stops parsing temporarily.
+ NS_IMETHOD_(void) BlockParser() = 0;
+
+ // Open up the parser for tokenization, building up content
+ // model..etc. However, this method does not resume parsing
+ // automatically. It's the callers' responsibility to restart
+ // the parsing engine.
+ NS_IMETHOD_(void) UnblockParser() = 0;
+
+ /**
+ * Asynchronously continues parsing.
+ */
+ NS_IMETHOD_(void) ContinueInterruptedParsingAsync() = 0;
+
+ NS_IMETHOD_(bool) IsParserEnabled() override = 0;
+ NS_IMETHOD_(bool) IsComplete() = 0;
+
+ NS_IMETHOD Parse(nsIURI* aURL,
+ nsIRequestObserver* aListener = nullptr,
+ void* aKey = 0,
+ nsDTDMode aMode = eDTDMode_autodetect) = 0;
+
+ NS_IMETHOD Terminate(void) = 0;
+
+ /**
+ * This method gets called when you want to parse a fragment of HTML or XML
+ * surrounded by the context |aTagStack|. It requires that the parser have
+ * been given a fragment content sink.
+ *
+ * @param aSourceBuffer The XML or HTML that hasn't been parsed yet.
+ * @param aTagStack The context of the source buffer.
+ * @return Success or failure.
+ */
+ NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer,
+ nsTArray<nsString>& aTagStack) = 0;
+
+ /**
+ * This method gets called when the tokens have been consumed, and it's time
+ * to build the model via the content sink.
+ * @update gess5/11/98
+ * @return error code -- 0 if model building went well .
+ */
+ NS_IMETHOD BuildModel(void) = 0;
+
+ /**
+ * Call this method to cancel any pending parsing events.
+ * Parsing events may be pending if all of the document's content
+ * has been passed to the parser but the parser has been interrupted
+ * because processing the tokens took too long.
+ *
+ * @update kmcclusk 05/18/01
+ * @return NS_OK if succeeded else ERROR.
+ */
+
+ NS_IMETHOD CancelParsingEvents() = 0;
+
+ virtual void Reset() = 0;
+
+ /**
+ * True if the insertion point (per HTML5) is defined.
+ */
+ virtual bool IsInsertionPointDefined() = 0;
+
+ /**
+ * Call immediately before starting to evaluate a parser-inserted script or
+ * in general when the spec says to define an insertion point.
+ */
+ virtual void PushDefinedInsertionPoint() = 0;
+
+ /**
+ * Call immediately after having evaluated a parser-inserted script or
+ * generally want to restore to the state before the last
+ * PushDefinedInsertionPoint call.
+ */
+ virtual void PopDefinedInsertionPoint() = 0;
+
+ /**
+ * Marks the HTML5 parser as not a script-created parser.
+ */
+ virtual void MarkAsNotScriptCreated(const char* aCommand) = 0;
+
+ /**
+ * True if this is a script-created HTML5 parser.
+ */
+ virtual bool IsScriptCreated() = 0;
+};
+
+NS_DEFINE_STATIC_IID_ACCESSOR(nsIParser, NS_IPARSER_IID)
+
+/* ===========================================================*
+ Some useful constants...
+ * ===========================================================*/
+
+#include "nsError.h"
+
+const nsresult kEOF = NS_ERROR_HTMLPARSER_EOF;
+const nsresult kUnknownError = NS_ERROR_HTMLPARSER_UNKNOWN;
+const nsresult kCantPropagate = NS_ERROR_HTMLPARSER_CANTPROPAGATE;
+const nsresult kContextMismatch = NS_ERROR_HTMLPARSER_CONTEXTMISMATCH;
+const nsresult kBadFilename = NS_ERROR_HTMLPARSER_BADFILENAME;
+const nsresult kBadURL = NS_ERROR_HTMLPARSER_BADURL;
+const nsresult kInvalidParserContext = NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT;
+const nsresult kBlocked = NS_ERROR_HTMLPARSER_BLOCK;
+const nsresult kBadStringLiteral = NS_ERROR_HTMLPARSER_UNTERMINATEDSTRINGLITERAL;
+const nsresult kHierarchyTooDeep = NS_ERROR_HTMLPARSER_HIERARCHYTOODEEP;
+const nsresult kFakeEndTag = NS_ERROR_HTMLPARSER_FAKE_ENDTAG;
+const nsresult kNotAComment = NS_ERROR_HTMLPARSER_INVALID_COMMENT;
+
+#define NS_IPARSER_FLAG_UNKNOWN_MODE 0x00000000
+#define NS_IPARSER_FLAG_QUIRKS_MODE 0x00000002
+#define NS_IPARSER_FLAG_STRICT_MODE 0x00000004
+#define NS_IPARSER_FLAG_AUTO_DETECT_MODE 0x00000010
+#define NS_IPARSER_FLAG_VIEW_NORMAL 0x00000020
+#define NS_IPARSER_FLAG_VIEW_SOURCE 0x00000040
+#define NS_IPARSER_FLAG_VIEW_ERRORS 0x00000080
+#define NS_IPARSER_FLAG_PLAIN_TEXT 0x00000100
+#define NS_IPARSER_FLAG_XML 0x00000200
+#define NS_IPARSER_FLAG_HTML 0x00000400
+#define NS_IPARSER_FLAG_SCRIPT_ENABLED 0x00000800
+#define NS_IPARSER_FLAG_FRAMES_ENABLED 0x00001000
+
+#endif
diff --git a/components/htmlparser/src/nsIParserService.h b/components/htmlparser/src/nsIParserService.h
new file mode 100644
index 000000000..2906974e9
--- /dev/null
+++ b/components/htmlparser/src/nsIParserService.h
@@ -0,0 +1,98 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsIParserService_h__
+#define nsIParserService_h__
+
+#include "nsISupports.h"
+#include "nsString.h"
+#include "nsHTMLTags.h"
+
+class nsIParser;
+
+#define NS_PARSERSERVICE_CONTRACTID "@mozilla.org/parser/parser-service;1"
+
+// {90a92e37-abd6-441b-9b39-4064d98e1ede}
+#define NS_IPARSERSERVICE_IID \
+{ 0x90a92e37, 0xabd6, 0x441b, { 0x9b, 0x39, 0x40, 0x64, 0xd9, 0x8e, 0x1e, 0xde } }
+
+class nsIParserService : public nsISupports {
+ public:
+ NS_DECLARE_STATIC_IID_ACCESSOR(NS_IPARSERSERVICE_IID)
+
+ /**
+ * Looks up the nsHTMLTag enum value corresponding to the tag in aAtom. The
+ * lookup happens case insensitively.
+ *
+ * @param aAtom The tag to look up.
+ *
+ * @return int32_t The nsHTMLTag enum value corresponding to the tag in aAtom
+ * or eHTMLTag_userdefined if the tag does not correspond to
+ * any of the tag nsHTMLTag enum values.
+ */
+ virtual int32_t HTMLAtomTagToId(nsIAtom* aAtom) const = 0;
+
+ /**
+ * Looks up the nsHTMLTag enum value corresponding to the tag in aAtom.
+ *
+ * @param aAtom The tag to look up.
+ *
+ * @return int32_t The nsHTMLTag enum value corresponding to the tag in aAtom
+ * or eHTMLTag_userdefined if the tag does not correspond to
+ * any of the tag nsHTMLTag enum values.
+ */
+ virtual int32_t HTMLCaseSensitiveAtomTagToId(nsIAtom* aAtom) const = 0;
+
+ /**
+ * Looks up the nsHTMLTag enum value corresponding to the tag in aTag. The
+ * lookup happens case insensitively.
+ *
+ * @param aTag The tag to look up.
+ *
+ * @return int32_t The nsHTMLTag enum value corresponding to the tag in aTag
+ * or eHTMLTag_userdefined if the tag does not correspond to
+ * any of the tag nsHTMLTag enum values.
+ */
+ virtual int32_t HTMLStringTagToId(const nsAString& aTag) const = 0;
+
+ /**
+ * Gets the tag corresponding to the nsHTMLTag enum value in aId. The
+ * returned tag will be in lowercase.
+ *
+ * @param aId The nsHTMLTag enum value to get the tag for.
+ *
+ * @return const char16_t* The tag corresponding to the nsHTMLTag enum
+ * value, or nullptr if the enum value doesn't
+ * correspond to a tag (eHTMLTag_unknown,
+ * eHTMLTag_userdefined, eHTMLTag_text, ...).
+ */
+ virtual const char16_t *HTMLIdToStringTag(int32_t aId) const = 0;
+
+ /**
+ * Gets the tag corresponding to the nsHTMLTag enum value in aId. The
+ * returned tag will be in lowercase.
+ *
+ * @param aId The nsHTMLTag enum value to get the tag for.
+ *
+ * @return nsIAtom* The tag corresponding to the nsHTMLTag enum value, or
+ * nullptr if the enum value doesn't correspond to a tag
+ * (eHTMLTag_unknown, eHTMLTag_userdefined, eHTMLTag_text,
+ * ...).
+ */
+ virtual nsIAtom *HTMLIdToAtomTag(int32_t aId) const = 0;
+
+ NS_IMETHOD HTMLConvertEntityToUnicode(const nsAString& aEntity,
+ int32_t* aUnicode) const = 0;
+
+ NS_IMETHOD HTMLConvertUnicodeToEntity(int32_t aUnicode,
+ nsCString& aEntity) const = 0;
+
+ NS_IMETHOD IsContainer(int32_t aId, bool& aIsContainer) const = 0;
+ NS_IMETHOD IsBlock(int32_t aId, bool& aIsBlock) const = 0;
+};
+
+NS_DEFINE_STATIC_IID_ACCESSOR(nsIParserService, NS_IPARSERSERVICE_IID)
+
+#endif // nsIParserService_h__
diff --git a/components/htmlparser/src/nsITokenizer.h b/components/htmlparser/src/nsITokenizer.h
new file mode 100644
index 000000000..2ed09d410
--- /dev/null
+++ b/components/htmlparser/src/nsITokenizer.h
@@ -0,0 +1,44 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+/**
+ * MODULE NOTES:
+ * @update gess 4/1/98
+ *
+ */
+
+#ifndef __NSITOKENIZER__
+#define __NSITOKENIZER__
+
+#include "nsISupports.h"
+
+class nsScanner;
+
+#define NS_ITOKENIZER_IID \
+{ 0Xae98a348, 0X5e91, 0X41a8, \
+ { 0Xa5, 0Xb4, 0Xd2, 0X20, 0Xf3, 0X1f, 0Xc4, 0Xab } }
+
+/***************************************************************
+ Notes:
+ ***************************************************************/
+
+
+class nsITokenizer : public nsISupports {
+public:
+ NS_DECLARE_STATIC_IID_ACCESSOR(NS_ITOKENIZER_IID)
+
+ NS_IMETHOD WillTokenize(bool aIsFinalChunk)=0;
+ NS_IMETHOD ConsumeToken(nsScanner& aScanner,bool& aFlushTokens)=0;
+};
+
+NS_DEFINE_STATIC_IID_ACCESSOR(nsITokenizer, NS_ITOKENIZER_IID)
+
+#define NS_DECL_NSITOKENIZER \
+ NS_IMETHOD WillTokenize(bool aIsFinalChunk) override;\
+ NS_IMETHOD ConsumeToken(nsScanner& aScanner,bool& aFlushTokens) override;\
+
+
+#endif
diff --git a/components/htmlparser/src/nsParser.cpp b/components/htmlparser/src/nsParser.cpp
new file mode 100644
index 000000000..791ccf772
--- /dev/null
+++ b/components/htmlparser/src/nsParser.cpp
@@ -0,0 +1,1599 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsIAtom.h"
+#include "nsParser.h"
+#include "nsString.h"
+#include "nsCRT.h"
+#include "nsScanner.h"
+#include "plstr.h"
+#include "nsIStringStream.h"
+#include "nsIChannel.h"
+#include "nsICachingChannel.h"
+#include "nsIInputStream.h"
+#include "CNavDTD.h"
+#include "prenv.h"
+#include "prlock.h"
+#include "prcvar.h"
+#include "nsParserCIID.h"
+#include "nsReadableUtils.h"
+#include "nsCOMPtr.h"
+#include "nsExpatDriver.h"
+#include "nsIServiceManager.h"
+#include "nsICategoryManager.h"
+#include "nsISupportsPrimitives.h"
+#include "nsIFragmentContentSink.h"
+#include "nsStreamUtils.h"
+#include "nsHTMLTokenizer.h"
+#include "nsDataHashtable.h"
+#include "nsXPCOMCIDInternal.h"
+#include "nsMimeTypes.h"
+#include "mozilla/CondVar.h"
+#include "mozilla/Mutex.h"
+#include "nsParserConstants.h"
+#include "nsCharsetSource.h"
+#include "nsContentUtils.h"
+#include "nsThreadUtils.h"
+#include "nsIHTMLContentSink.h"
+
+#include "mozilla/dom/EncodingUtils.h"
+#include "mozilla/dom/ScriptLoader.h"
+#include "mozilla/BinarySearch.h"
+
+using namespace mozilla;
+using mozilla::dom::EncodingUtils;
+
+#define NS_PARSER_FLAG_PARSER_ENABLED 0x00000002
+#define NS_PARSER_FLAG_OBSERVERS_ENABLED 0x00000004
+#define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008
+#define NS_PARSER_FLAG_FLUSH_TOKENS 0x00000020
+#define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000040
+
+//-------------- Begin ParseContinue Event Definition ------------------------
+/*
+The parser can be explicitly interrupted by passing a return value of
+NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
+the parser to stop processing and allow the application to return to the event
+loop. The data which was left at the time of interruption will be processed
+the next time OnDataAvailable is called. If the parser has received its final
+chunk of data then OnDataAvailable will no longer be called by the networking
+module, so the parser will schedule a nsParserContinueEvent which will call
+the parser to process the remaining data after returning to the event loop.
+If the parser is interrupted while processing the remaining data it will
+schedule another ParseContinueEvent. The processing of data followed by
+scheduling of the continue events will proceed until either:
+
+ 1) All of the remaining data can be processed without interrupting
+ 2) The parser has been cancelled.
+
+
+This capability is currently used in CNavDTD and nsHTMLContentSink. The
+nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be
+processed and when each token is processed. The nsHTML content sink records
+the time when the chunk has started processing and will return
+NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a
+threshold called max tokenizing processing time. This allows the content sink
+to limit how much data is processed in a single chunk which in turn gates how
+much time is spent away from the event loop. Processing smaller chunks of data
+also reduces the time spent in subsequent reflows.
+
+This capability is most apparent when loading large documents. If the maximum
+token processing time is set small enough the application will remain
+responsive during document load.
+
+A side-effect of this capability is that document load is not complete when
+the last chunk of data is passed to OnDataAvailable since the parser may have
+been interrupted when the last chunk of data arrived. The document is complete
+when all of the document has been tokenized and there aren't any pending
+nsParserContinueEvents. This can cause problems if the application assumes
+that it can monitor the load requests to determine when the document load has
+been completed. This is what happens in Mozilla. The document is considered
+completely loaded when all of the load requests have been satisfied. To delay
+the document load until all of the parsing has been completed the
+nsHTMLContentSink adds a dummy parser load request which is not removed until
+the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call
+DidBuildModel until the final chunk of data has been passed to the parser
+through the OnDataAvailable and there aren't any pending
+nsParserContineEvents.
+
+Currently the parser is ignores requests to be interrupted during the
+processing of script. This is because a document.write followed by JavaScript
+calls to manipulate the DOM may fail if the parser was interrupted during the
+document.write.
+
+For more details @see bugzilla bug 76722
+*/
+
+
+class nsParserContinueEvent : public Runnable
+{
+public:
+ RefPtr<nsParser> mParser;
+
+ explicit nsParserContinueEvent(nsParser* aParser)
+ : mParser(aParser)
+ {}
+
+ NS_IMETHOD Run() override
+ {
+ mParser->HandleParserContinueEvent(this);
+ return NS_OK;
+ }
+};
+
+//-------------- End ParseContinue Event Definition ------------------------
+
+/**
+ * default constructor
+ */
+nsParser::nsParser()
+{
+ Initialize(true);
+}
+
+nsParser::~nsParser()
+{
+ Cleanup();
+}
+
+void
+nsParser::Initialize(bool aConstructor)
+{
+ if (aConstructor) {
+ // Raw pointer
+ mParserContext = 0;
+ }
+ else {
+ // nsCOMPtrs
+ mObserver = nullptr;
+ mUnusedInput.Truncate();
+ }
+
+ mContinueEvent = nullptr;
+ mCharsetSource = kCharsetUninitialized;
+ mCharset.AssignLiteral("ISO-8859-1");
+ mInternalState = NS_OK;
+ mStreamStatus = NS_OK;
+ mCommand = eViewNormal;
+ mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED |
+ NS_PARSER_FLAG_PARSER_ENABLED |
+ NS_PARSER_FLAG_CAN_TOKENIZE;
+
+ mProcessingNetworkData = false;
+ mIsAboutBlank = false;
+}
+
+void
+nsParser::Cleanup()
+{
+#ifdef DEBUG
+ if (mParserContext && mParserContext->mPrevContext) {
+ NS_WARNING("Extra parser contexts still on the parser stack");
+ }
+#endif
+
+ while (mParserContext) {
+ CParserContext *pc = mParserContext->mPrevContext;
+ delete mParserContext;
+ mParserContext = pc;
+ }
+
+ // It should not be possible for this flag to be set when we are getting
+ // destroyed since this flag implies a pending nsParserContinueEvent, which
+ // has an owning reference to |this|.
+ NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");
+}
+
+NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)
+
+NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)
+ NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD)
+ NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink)
+ NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver)
+NS_IMPL_CYCLE_COLLECTION_UNLINK_END
+
+NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)
+ NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD)
+ NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink)
+ NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver)
+ CParserContext *pc = tmp->mParserContext;
+ while (pc) {
+ cb.NoteXPCOMChild(pc->mTokenizer);
+ pc = pc->mPrevContext;
+ }
+NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)
+ NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
+ NS_INTERFACE_MAP_ENTRY(nsIParser)
+ NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
+ NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
+ NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)
+NS_INTERFACE_MAP_END
+
+// The parser continue event is posted only if
+// all of the data to parse has been passed to ::OnDataAvailable
+// and the parser has been interrupted by the content sink
+// because the processing of tokens took too long.
+
+nsresult
+nsParser::PostContinueEvent()
+{
+ if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {
+ // If this flag isn't set, then there shouldn't be a live continue event!
+ NS_ASSERTION(!mContinueEvent, "bad");
+
+ // This creates a reference cycle between this and the event that is
+ // broken when the event fires.
+ nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);
+ if (NS_FAILED(NS_DispatchToCurrentThread(event))) {
+ NS_WARNING("failed to dispatch parser continuation event");
+ } else {
+ mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
+ mContinueEvent = event;
+ }
+ }
+ return NS_OK;
+}
+
+NS_IMETHODIMP_(void)
+nsParser::GetCommand(nsCString& aCommand)
+{
+ aCommand = mCommandStr;
+}
+
+/**
+ * Call this method once you've created a parser, and want to instruct it
+ * about the command which caused the parser to be constructed. For example,
+ * this allows us to select a DTD which can do, say, view-source.
+ *
+ * @param aCommand the command string to set
+ */
+NS_IMETHODIMP_(void)
+nsParser::SetCommand(const char* aCommand)
+{
+ mCommandStr.Assign(aCommand);
+ if (mCommandStr.EqualsLiteral("view-source")) {
+ mCommand = eViewSource;
+ } else if (mCommandStr.EqualsLiteral("view-fragment")) {
+ mCommand = eViewFragment;
+ } else {
+ mCommand = eViewNormal;
+ }
+}
+
+/**
+ * Call this method once you've created a parser, and want to instruct it
+ * about the command which caused the parser to be constructed. For example,
+ * this allows us to select a DTD which can do, say, view-source.
+ *
+ * @param aParserCommand the command to set
+ */
+NS_IMETHODIMP_(void)
+nsParser::SetCommand(eParserCommands aParserCommand)
+{
+ mCommand = aParserCommand;
+}
+
+/**
+ * Call this method once you've created a parser, and want to instruct it
+ * about what charset to load
+ *
+ * @param aCharset- the charset of a document
+ * @param aCharsetSource- the source of the charset
+ */
+NS_IMETHODIMP_(void)
+nsParser::SetDocumentCharset(const nsACString& aCharset, int32_t aCharsetSource)
+{
+ mCharset = aCharset;
+ mCharsetSource = aCharsetSource;
+ if (mParserContext && mParserContext->mScanner) {
+ mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource);
+ }
+}
+
+void
+nsParser::SetSinkCharset(nsACString& aCharset)
+{
+ if (mSink) {
+ mSink->SetDocumentCharset(aCharset);
+ }
+}
+
+/**
+ * This method gets called in order to set the content
+ * sink for this parser to dump nodes to.
+ *
+ * @param nsIContentSink interface for node receiver
+ */
+NS_IMETHODIMP_(void)
+nsParser::SetContentSink(nsIContentSink* aSink)
+{
+ NS_PRECONDITION(aSink, "sink cannot be null!");
+ mSink = aSink;
+
+ if (mSink) {
+ mSink->SetParser(this);
+ nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink);
+ if (htmlSink) {
+ mIsAboutBlank = true;
+ }
+ }
+}
+
+/**
+ * retrieve the sink set into the parser
+ * @return current sink
+ */
+NS_IMETHODIMP_(nsIContentSink*)
+nsParser::GetContentSink()
+{
+ return mSink;
+}
+
+static nsIDTD*
+FindSuitableDTD(CParserContext& aParserContext)
+{
+ // We always find a DTD.
+ aParserContext.mAutoDetectStatus = ePrimaryDetect;
+
+ // Quick check for view source.
+ MOZ_ASSERT(aParserContext.mParserCommand != eViewSource,
+ "The old parser is not supposed to be used for View Source "
+ "anymore.");
+
+ // Now see if we're parsing HTML (which, as far as we're concerned, simply
+ // means "not XML").
+ if (aParserContext.mDocType != eXML) {
+ return new CNavDTD();
+ }
+
+ // If we're here, then we'd better be parsing XML.
+ NS_ASSERTION(aParserContext.mDocType == eXML, "What are you trying to send me, here?");
+ return new nsExpatDriver();
+}
+
+NS_IMETHODIMP
+nsParser::CancelParsingEvents()
+{
+ if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {
+ NS_ASSERTION(mContinueEvent, "mContinueEvent is null");
+ // Revoke the pending continue parsing event
+ mContinueEvent = nullptr;
+ mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
+ }
+ return NS_OK;
+}
+
+////////////////////////////////////////////////////////////////////////
+
+/**
+ * Evalutes EXPR1 and EXPR2 exactly once each, in that order. Stores the value
+ * of EXPR2 in RV is EXPR2 fails, otherwise RV contains the result of EXPR1
+ * (which could be success or failure).
+ *
+ * To understand the motivation for this construct, consider these example
+ * methods:
+ *
+ * nsresult nsSomething::DoThatThing(nsIWhatever* obj) {
+ * nsresult rv = NS_OK;
+ * ...
+ * return obj->DoThatThing();
+ * NS_ENSURE_SUCCESS(rv, rv);
+ * ...
+ * return rv;
+ * }
+ *
+ * void nsCaller::MakeThingsHappen() {
+ * return mSomething->DoThatThing(mWhatever);
+ * }
+ *
+ * Suppose, for whatever reason*, we want to shift responsibility for calling
+ * mWhatever->DoThatThing() from nsSomething::DoThatThing up to
+ * nsCaller::MakeThingsHappen. We might rewrite the two methods as follows:
+ *
+ * nsresult nsSomething::DoThatThing() {
+ * nsresult rv = NS_OK;
+ * ...
+ * ...
+ * return rv;
+ * }
+ *
+ * void nsCaller::MakeThingsHappen() {
+ * nsresult rv;
+ * PREFER_LATTER_ERROR_CODE(mSomething->DoThatThing(),
+ * mWhatever->DoThatThing(),
+ * rv);
+ * return rv;
+ * }
+ *
+ * *Possible reasons include: nsCaller doesn't want to give mSomething access
+ * to mWhatever, nsCaller wants to guarantee that mWhatever->DoThatThing() will
+ * be called regardless of how nsSomething::DoThatThing behaves, &c.
+ */
+#define PREFER_LATTER_ERROR_CODE(EXPR1, EXPR2, RV) { \
+ nsresult RV##__temp = EXPR1; \
+ RV = EXPR2; \
+ if (NS_FAILED(RV)) { \
+ RV = RV##__temp; \
+ } \
+}
+
+/**
+ * This gets called just prior to the model actually
+ * being constructed. It's important to make this the
+ * last thing that happens right before parsing, so we
+ * can delay until the last moment the resolution of
+ * which DTD to use (unless of course we're assigned one).
+ */
+nsresult
+nsParser::WillBuildModel(nsString& aFilename)
+{
+ if (!mParserContext)
+ return kInvalidParserContext;
+
+ if (eUnknownDetect != mParserContext->mAutoDetectStatus)
+ return NS_OK;
+
+ if (eDTDMode_unknown == mParserContext->mDTDMode ||
+ eDTDMode_autodetect == mParserContext->mDTDMode) {
+ if (mIsAboutBlank) {
+ mParserContext->mDTDMode = eDTDMode_quirks;
+ mParserContext->mDocType = eHTML_Quirks;
+ } else {
+ mParserContext->mDTDMode = eDTDMode_full_standards;
+ mParserContext->mDocType = eXML;
+ }
+ } // else XML fragment with nested parser context
+
+ NS_ASSERTION(!mDTD || !mParserContext->mPrevContext,
+ "Clobbering DTD for non-root parser context!");
+ mDTD = FindSuitableDTD(*mParserContext);
+ NS_ENSURE_TRUE(mDTD, NS_ERROR_OUT_OF_MEMORY);
+
+ nsITokenizer* tokenizer;
+ nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, tokenizer);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = mDTD->WillBuildModel(*mParserContext, tokenizer, mSink);
+ nsresult sinkResult = mSink->WillBuildModel(mDTD->GetMode());
+ // nsIDTD::WillBuildModel used to be responsible for calling
+ // nsIContentSink::WillBuildModel, but that obligation isn't expressible
+ // in the nsIDTD interface itself, so it's sounder and simpler to give that
+ // responsibility back to the parser. The former behavior of the DTD was to
+ // NS_ENSURE_SUCCESS the sink WillBuildModel call, so if the sink returns
+ // failure we should use sinkResult instead of rv, to preserve the old error
+ // handling behavior of the DTD:
+ return NS_FAILED(sinkResult) ? sinkResult : rv;
+}
+
+/**
+ * This gets called when the parser is done with its input.
+ * Note that the parser may have been called recursively, so we
+ * have to check for a prev. context before closing out the DTD/sink.
+ */
+nsresult
+nsParser::DidBuildModel(nsresult anErrorCode)
+{
+ nsresult result = anErrorCode;
+
+ if (IsComplete()) {
+ if (mParserContext && !mParserContext->mPrevContext) {
+ // Let sink know if we're about to end load because we've been terminated.
+ // In that case we don't want it to run deferred scripts.
+ bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING;
+ if (mDTD && mSink) {
+ nsresult dtdResult = mDTD->DidBuildModel(anErrorCode),
+ sinkResult = mSink->DidBuildModel(terminated);
+ // nsIDTD::DidBuildModel used to be responsible for calling
+ // nsIContentSink::DidBuildModel, but that obligation isn't expressible
+ // in the nsIDTD interface itself, so it's sounder and simpler to give
+ // that responsibility back to the parser. The former behavior of the
+ // DTD was to NS_ENSURE_SUCCESS the sink DidBuildModel call, so if the
+ // sink returns failure we should use sinkResult instead of dtdResult,
+ // to preserve the old error handling behavior of the DTD:
+ result = NS_FAILED(sinkResult) ? sinkResult : dtdResult;
+ }
+
+ //Ref. to bug 61462.
+ mParserContext->mRequest = nullptr;
+ }
+ }
+
+ return result;
+}
+
+/**
+ * This method adds a new parser context to the list,
+ * pushing the current one to the next position.
+ *
+ * @param ptr to new context
+ */
+void
+nsParser::PushContext(CParserContext& aContext)
+{
+ NS_ASSERTION(aContext.mPrevContext == mParserContext,
+ "Trying to push a context whose previous context differs from "
+ "the current parser context.");
+ mParserContext = &aContext;
+}
+
+/**
+ * This method pops the topmost context off the stack,
+ * returning it to the user. The next context (if any)
+ * becomes the current context.
+ * @update gess7/22/98
+ * @return prev. context
+ */
+CParserContext*
+nsParser::PopContext()
+{
+ CParserContext* oldContext = mParserContext;
+ if (oldContext) {
+ mParserContext = oldContext->mPrevContext;
+ if (mParserContext) {
+ // If the old context was blocked, propagate the blocked state
+ // back to the new one. Also, propagate the stream listener state
+ // but don't override onStop state to guarantee the call to DidBuildModel().
+ if (mParserContext->mStreamListenerState != eOnStop) {
+ mParserContext->mStreamListenerState = oldContext->mStreamListenerState;
+ }
+ }
+ }
+ return oldContext;
+}
+
+/**
+ * Call this when you want control whether or not the parser will parse
+ * and tokenize input (TRUE), or whether it just caches input to be
+ * parsed later (FALSE).
+ *
+ * @param aState determines whether we parse/tokenize or just cache.
+ * @return current state
+ */
+void
+nsParser::SetUnusedInput(nsString& aBuffer)
+{
+ mUnusedInput = aBuffer;
+}
+
+/**
+ * Call this when you want to *force* the parser to terminate the
+ * parsing process altogether. This is binary -- so once you terminate
+ * you can't resume without restarting altogether.
+ */
+NS_IMETHODIMP
+nsParser::Terminate(void)
+{
+ // We should only call DidBuildModel once, so don't do anything if this is
+ // the second time that Terminate has been called.
+ if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
+ return NS_OK;
+ }
+
+ nsresult result = NS_OK;
+ // XXX - [ until we figure out a way to break parser-sink circularity ]
+ // Hack - Hold a reference until we are completely done...
+ nsCOMPtr<nsIParser> kungFuDeathGrip(this);
+ mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;
+
+ // CancelParsingEvents must be called to avoid leaking the nsParser object
+ // @see bug 108049
+ // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents
+ // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note:
+ // The IsComplete() call inside of DidBuildModel looks at the pendingContinueEvents flag.
+ CancelParsingEvents();
+
+ // If we got interrupted in the middle of a document.write, then we might
+ // have more than one parser context on our parsercontext stack. This has
+ // the effect of making DidBuildModel a no-op, meaning that we never call
+ // our sink's DidBuildModel and break the reference cycle, causing a leak.
+ // Since we're getting terminated, we manually clean up our context stack.
+ while (mParserContext && mParserContext->mPrevContext) {
+ CParserContext *prev = mParserContext->mPrevContext;
+ delete mParserContext;
+ mParserContext = prev;
+ }
+
+ if (mDTD) {
+ mDTD->Terminate();
+ DidBuildModel(result);
+ } else if (mSink) {
+ // We have no parser context or no DTD yet (so we got terminated before we
+ // got any data). Manually break the reference cycle with the sink.
+ result = mSink->DidBuildModel(true);
+ NS_ENSURE_SUCCESS(result, result);
+ }
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsParser::ContinueInterruptedParsing()
+{
+ // If there are scripts executing, then the content sink is jumping the gun
+ // (probably due to a synchronous XMLHttpRequest) and will re-enable us
+ // later, see bug 460706.
+ if (!IsOkToProcessNetworkData()) {
+ return NS_OK;
+ }
+
+ // If the stream has already finished, there's a good chance
+ // that we might start closing things down when the parser
+ // is reenabled. To make sure that we're not deleted across
+ // the reenabling process, hold a reference to ourselves.
+ nsresult result=NS_OK;
+ nsCOMPtr<nsIParser> kungFuDeathGrip(this);
+ nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
+
+#ifdef DEBUG
+ if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) {
+ NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");
+ }
+#endif
+
+ bool isFinalChunk = mParserContext &&
+ mParserContext->mStreamListenerState == eOnStop;
+
+ mProcessingNetworkData = true;
+ if (sinkDeathGrip) {
+ sinkDeathGrip->WillParse();
+ }
+ result = ResumeParse(true, isFinalChunk); // Ref. bug 57999
+ mProcessingNetworkData = false;
+
+ if (result != NS_OK) {
+ result=mInternalState;
+ }
+
+ return result;
+}
+
+/**
+ * Stops parsing temporarily. That's it will prevent the
+ * parser from building up content model.
+ */
+NS_IMETHODIMP_(void)
+nsParser::BlockParser()
+{
+ mFlags &= ~NS_PARSER_FLAG_PARSER_ENABLED;
+}
+
+/**
+ * Open up the parser for tokenization, building up content
+ * model..etc. However, this method does not resume parsing
+ * automatically. It's the callers' responsibility to restart
+ * the parsing engine.
+ */
+NS_IMETHODIMP_(void)
+nsParser::UnblockParser()
+{
+ if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) {
+ mFlags |= NS_PARSER_FLAG_PARSER_ENABLED;
+ } else {
+ NS_WARNING("Trying to unblock an unblocked parser.");
+ }
+}
+
+NS_IMETHODIMP_(void)
+nsParser::ContinueInterruptedParsingAsync()
+{
+ mSink->ContinueInterruptedParsingAsync();
+}
+
+/**
+ * Call this to query whether the parser is enabled or not.
+ */
+NS_IMETHODIMP_(bool)
+nsParser::IsParserEnabled()
+{
+ return (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) != 0;
+}
+
+/**
+ * Call this to query whether the parser thinks it's done with parsing.
+ */
+NS_IMETHODIMP_(bool)
+nsParser::IsComplete()
+{
+ return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);
+}
+
+
+void nsParser::HandleParserContinueEvent(nsParserContinueEvent *ev)
+{
+ // Ignore any revoked continue events...
+ if (mContinueEvent != ev)
+ return;
+
+ mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
+ mContinueEvent = nullptr;
+
+ NS_ASSERTION(IsOkToProcessNetworkData(),
+ "Interrupted in the middle of a script?");
+ ContinueInterruptedParsing();
+}
+
+bool
+nsParser::IsInsertionPointDefined()
+{
+ return false;
+}
+
+void
+nsParser::PushDefinedInsertionPoint()
+{
+}
+
+void
+nsParser::PopDefinedInsertionPoint()
+{
+}
+
+void
+nsParser::MarkAsNotScriptCreated(const char* aCommand)
+{
+}
+
+bool
+nsParser::IsScriptCreated()
+{
+ return false;
+}
+
+/**
+ * This is the main controlling routine in the parsing process.
+ * Note that it may get called multiple times for the same scanner,
+ * since this is a pushed based system, and all the tokens may
+ * not have been consumed by the scanner during a given invocation
+ * of this method.
+ */
+NS_IMETHODIMP
+nsParser::Parse(nsIURI* aURL,
+ nsIRequestObserver* aListener,
+ void* aKey,
+ nsDTDMode aMode)
+{
+
+ NS_PRECONDITION(aURL, "Error: Null URL given");
+
+ nsresult result=kBadURL;
+ mObserver = aListener;
+
+ if (aURL) {
+ nsAutoCString spec;
+ nsresult rv = aURL->GetSpec(spec);
+ if (rv != NS_OK) {
+ return rv;
+ }
+ NS_ConvertUTF8toUTF16 theName(spec);
+
+ nsScanner* theScanner = new nsScanner(theName, false);
+ CParserContext* pc = new CParserContext(mParserContext, theScanner, aKey,
+ mCommand, aListener);
+ if (pc && theScanner) {
+ pc->mMultipart = true;
+ pc->mContextType = CParserContext::eCTURL;
+ pc->mDTDMode = aMode;
+ PushContext(*pc);
+
+ result = NS_OK;
+ } else {
+ result = mInternalState = NS_ERROR_HTMLPARSER_BADCONTEXT;
+ }
+ }
+ return result;
+}
+
+/**
+ * Used by XML fragment parsing below.
+ *
+ * @param aSourceBuffer contains a string-full of real content
+ */
+nsresult
+nsParser::Parse(const nsAString& aSourceBuffer,
+ void* aKey,
+ bool aLastCall)
+{
+ nsresult result = NS_OK;
+
+ // Don't bother if we're never going to parse this.
+ if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
+ return result;
+ }
+
+ if (!aLastCall && aSourceBuffer.IsEmpty()) {
+ // Nothing is being passed to the parser so return
+ // immediately. mUnusedInput will get processed when
+ // some data is actually passed in.
+ // But if this is the last call, make sure to finish up
+ // stuff correctly.
+ return result;
+ }
+
+ // Maintain a reference to ourselves so we don't go away
+ // till we're completely done.
+ nsCOMPtr<nsIParser> kungFuDeathGrip(this);
+
+ if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) {
+ // Note: The following code will always find the parser context associated
+ // with the given key, even if that context has been suspended (e.g., for
+ // another document.write call). This doesn't appear to be exactly what IE
+ // does in the case where this happens, but this makes more sense.
+ CParserContext* pc = mParserContext;
+ while (pc && pc->mKey != aKey) {
+ pc = pc->mPrevContext;
+ }
+
+ if (!pc) {
+ // Only make a new context if we don't have one, OR if we do, but has a
+ // different context key.
+ nsScanner* theScanner = new nsScanner(mUnusedInput);
+ NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY);
+
+ eAutoDetectResult theStatus = eUnknownDetect;
+
+ if (mParserContext &&
+ mParserContext->mMimeType.EqualsLiteral("application/xml")) {
+ // Ref. Bug 90379
+ NS_ASSERTION(mDTD, "How come the DTD is null?");
+
+ if (mParserContext) {
+ theStatus = mParserContext->mAutoDetectStatus;
+ // Added this to fix bug 32022.
+ }
+ }
+
+ pc = new CParserContext(mParserContext, theScanner, aKey, mCommand,
+ 0, theStatus, aLastCall);
+ NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY);
+
+ PushContext(*pc);
+
+ pc->mMultipart = !aLastCall; // By default
+ if (pc->mPrevContext) {
+ pc->mMultipart |= pc->mPrevContext->mMultipart;
+ }
+
+ // Start fix bug 40143
+ if (pc->mMultipart) {
+ pc->mStreamListenerState = eOnDataAvail;
+ if (pc->mScanner) {
+ pc->mScanner->SetIncremental(true);
+ }
+ } else {
+ pc->mStreamListenerState = eOnStop;
+ if (pc->mScanner) {
+ pc->mScanner->SetIncremental(false);
+ }
+ }
+ // end fix for 40143
+
+ pc->mContextType=CParserContext::eCTString;
+ pc->SetMimeType(NS_LITERAL_CSTRING("application/xml"));
+ pc->mDTDMode = eDTDMode_full_standards;
+
+ mUnusedInput.Truncate();
+
+ pc->mScanner->Append(aSourceBuffer);
+ // Do not interrupt document.write() - bug 95487
+ result = ResumeParse(false, false, false);
+ } else {
+ pc->mScanner->Append(aSourceBuffer);
+ if (!pc->mPrevContext) {
+ // Set stream listener state to eOnStop, on the final context - Fix 68160,
+ // to guarantee DidBuildModel() call - Fix 36148
+ if (aLastCall) {
+ pc->mStreamListenerState = eOnStop;
+ pc->mScanner->SetIncremental(false);
+ }
+
+ if (pc == mParserContext) {
+ // If pc is not mParserContext, then this call to ResumeParse would
+ // do the wrong thing and try to continue parsing using
+ // mParserContext. We need to wait to actually resume parsing on pc.
+ ResumeParse(false, false, false);
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+NS_IMETHODIMP
+nsParser::ParseFragment(const nsAString& aSourceBuffer,
+ nsTArray<nsString>& aTagStack)
+{
+ nsresult result = NS_OK;
+ nsAutoString theContext;
+ uint32_t theCount = aTagStack.Length();
+ uint32_t theIndex = 0;
+
+ // Disable observers for fragments
+ mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED;
+
+ for (theIndex = 0; theIndex < theCount; theIndex++) {
+ theContext.Append('<');
+ theContext.Append(aTagStack[theCount - theIndex - 1]);
+ theContext.Append('>');
+ }
+
+ if (theCount == 0) {
+ // Ensure that the buffer is not empty. Because none of the DTDs care
+ // about leading whitespace, this doesn't change the result.
+ theContext.Assign(' ');
+ }
+
+ // First, parse the context to build up the DTD's tag stack. Note that we
+ // pass false for the aLastCall parameter.
+ result = Parse(theContext,
+ (void*)&theContext,
+ false);
+ if (NS_FAILED(result)) {
+ mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
+ return result;
+ }
+
+ if (!mSink) {
+ // Parse must have failed in the XML case and so the sink was killed.
+ return NS_ERROR_HTMLPARSER_STOPPARSING;
+ }
+
+ nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);
+ NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");
+
+ fragSink->WillBuildContent();
+ // Now, parse the actual content. Note that this is the last call
+ // for HTML content, but for XML, we will want to build and parse
+ // the end tags. However, if tagStack is empty, it's the last call
+ // for XML as well.
+ if (theCount == 0) {
+ result = Parse(aSourceBuffer,
+ &theContext,
+ true);
+ fragSink->DidBuildContent();
+ } else {
+ // Add an end tag chunk, so expat will read the whole source buffer,
+ // and not worry about ']]' etc.
+ result = Parse(aSourceBuffer + NS_LITERAL_STRING("</"),
+ &theContext,
+ false);
+ fragSink->DidBuildContent();
+
+ if (NS_SUCCEEDED(result)) {
+ nsAutoString endContext;
+ for (theIndex = 0; theIndex < theCount; theIndex++) {
+ // we already added an end tag chunk above
+ if (theIndex > 0) {
+ endContext.AppendLiteral("</");
+ }
+
+ nsString& thisTag = aTagStack[theIndex];
+ // was there an xmlns=?
+ int32_t endOfTag = thisTag.FindChar(char16_t(' '));
+ if (endOfTag == -1) {
+ endContext.Append(thisTag);
+ } else {
+ endContext.Append(Substring(thisTag,0,endOfTag));
+ }
+
+ endContext.Append('>');
+ }
+
+ result = Parse(endContext,
+ &theContext,
+ true);
+ }
+ }
+
+ mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
+
+ return result;
+}
+
+/**
+ * This routine is called to cause the parser to continue parsing its
+ * underlying stream. This call allows the parse process to happen in
+ * chunks, such as when the content is push based, and we need to parse in
+ * pieces.
+ *
+ * An interesting change in how the parser gets used has led us to add extra
+ * processing to this method. The case occurs when the parser is blocked in
+ * one context, and gets a parse(string) call in another context. In this
+ * case, the parserContexts are linked. No problem.
+ *
+ * The problem is that Parse(string) assumes that it can proceed unabated,
+ * but if the parser is already blocked that assumption is false. So we
+ * needed to add a mechanism here to allow the parser to continue to process
+ * (the pop and free) contexts until 1) it get's blocked again; 2) it runs
+ * out of contexts.
+ *
+ *
+ * @param allowItertion : set to true if non-script resumption is requested
+ * @param aIsFinalChunk : tells us when the last chunk of data is provided.
+ * @return error code -- 0 if ok, non-zero if error.
+ */
+nsresult
+nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,
+ bool aCanInterrupt)
+{
+ nsresult result = NS_OK;
+
+ if ((mFlags & NS_PARSER_FLAG_PARSER_ENABLED) &&
+ mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
+
+ result = WillBuildModel(mParserContext->mScanner->GetFilename());
+ if (NS_FAILED(result)) {
+ mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;
+ return result;
+ }
+
+ if (mDTD) {
+ mSink->WillResume();
+ bool theIterationIsOk = true;
+
+ while (result == NS_OK && theIterationIsOk) {
+ if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) {
+ // -- Ref: Bug# 22485 --
+ // Insert the unused input into the source buffer
+ // as if it was read from the input stream.
+ // Adding UngetReadable() per vidur!!
+ mParserContext->mScanner->UngetReadable(mUnusedInput);
+ mUnusedInput.Truncate(0);
+ }
+
+ // Only allow parsing to be interrupted in the subsequent call to
+ // build model.
+ nsresult theTokenizerResult = (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE)
+ ? Tokenize(aIsFinalChunk)
+ : NS_OK;
+ result = BuildModel();
+
+ if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {
+ PostContinueEvent();
+ }
+
+ theIterationIsOk = theTokenizerResult != kEOF &&
+ result != NS_ERROR_HTMLPARSER_INTERRUPTED;
+
+ // Make sure not to stop parsing too early. Therefore, before shutting
+ // down the parser, it's important to check whether the input buffer
+ // has been scanned to completion (theTokenizerResult should be kEOF).
+ // kEOF -> End of buffer.
+
+ // If we're told to block the parser, we disable all further parsing
+ // (and cache any data coming in) until the parser is re-enabled.
+ if (NS_ERROR_HTMLPARSER_BLOCK == result) {
+ mSink->WillInterrupt();
+ if (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) {
+ // If we were blocked by a recursive invocation, don't re-block.
+ BlockParser();
+ }
+ return NS_OK;
+ }
+ if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
+ // Note: Parser Terminate() calls DidBuildModel.
+ if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
+ DidBuildModel(mStreamStatus);
+ mInternalState = result;
+ }
+
+ return NS_OK;
+ }
+ if ((NS_OK == result && theTokenizerResult == kEOF) ||
+ result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
+ bool theContextIsStringBased =
+ CParserContext::eCTString == mParserContext->mContextType;
+
+ if (mParserContext->mStreamListenerState == eOnStop ||
+ !mParserContext->mMultipart || theContextIsStringBased) {
+ if (!mParserContext->mPrevContext) {
+ if (mParserContext->mStreamListenerState == eOnStop) {
+ DidBuildModel(mStreamStatus);
+ return NS_OK;
+ }
+ } else {
+ CParserContext* theContext = PopContext();
+ if (theContext) {
+ theIterationIsOk = allowIteration && theContextIsStringBased;
+ if (theContext->mCopyUnused) {
+ if (!theContext->mScanner->CopyUnusedData(mUnusedInput)) {
+ mInternalState = NS_ERROR_OUT_OF_MEMORY;
+ }
+ }
+
+ delete theContext;
+ }
+
+ result = mInternalState;
+ aIsFinalChunk = mParserContext &&
+ mParserContext->mStreamListenerState == eOnStop;
+ // ...then intentionally fall through to mSink->WillInterrupt()...
+ }
+ }
+ }
+
+ if (theTokenizerResult == kEOF ||
+ result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
+ result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
+ mSink->WillInterrupt();
+ }
+ }
+ } else {
+ mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;
+ }
+ }
+
+ return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
+}
+
+/**
+ * This is where we loop over the tokens created in the
+ * tokenization phase, and try to make sense out of them.
+ */
+nsresult
+nsParser::BuildModel()
+{
+ nsITokenizer* theTokenizer = nullptr;
+
+ nsresult result = NS_OK;
+ if (mParserContext) {
+ result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
+ }
+
+ if (NS_SUCCEEDED(result)) {
+ if (mDTD) {
+ result = mDTD->BuildModel(theTokenizer, mSink);
+ }
+ } else {
+ mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER;
+ }
+ return result;
+}
+
+/*******************************************************************
+ These methods are used to talk to the netlib system...
+ *******************************************************************/
+
+nsresult
+nsParser::OnStartRequest(nsIRequest *request, nsISupports* aContext)
+{
+ NS_PRECONDITION(eNone == mParserContext->mStreamListenerState,
+ "Parser's nsIStreamListener API was not setup "
+ "correctly in constructor.");
+ if (mObserver) {
+ mObserver->OnStartRequest(request, aContext);
+ }
+ mParserContext->mStreamListenerState = eOnStart;
+ mParserContext->mAutoDetectStatus = eUnknownDetect;
+ mParserContext->mRequest = request;
+
+ NS_ASSERTION(!mParserContext->mPrevContext,
+ "Clobbering DTD for non-root parser context!");
+ mDTD = nullptr;
+
+ nsresult rv;
+ nsAutoCString contentType;
+ nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
+ if (channel) {
+ rv = channel->GetContentType(contentType);
+ if (NS_SUCCEEDED(rv)) {
+ mParserContext->SetMimeType(contentType);
+ }
+ }
+
+ rv = NS_OK;
+
+ return rv;
+}
+
+static bool
+ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes, int32_t aLen,
+ nsCString& oCharset)
+{
+ // This code is rather pointless to have. Might as well reuse expat as
+ // seen in nsHtml5StreamParser. -- hsivonen
+ oCharset.Truncate();
+ if ((aLen >= 5) &&
+ ('<' == aBytes[0]) &&
+ ('?' == aBytes[1]) &&
+ ('x' == aBytes[2]) &&
+ ('m' == aBytes[3]) &&
+ ('l' == aBytes[4])) {
+ int32_t i;
+ bool versionFound = false, encodingFound = false;
+ for (i = 6; i < aLen && !encodingFound; ++i) {
+ // end of XML declaration?
+ if ((((char*) aBytes)[i] == '?') &&
+ ((i + 1) < aLen) &&
+ (((char*) aBytes)[i + 1] == '>')) {
+ break;
+ }
+ // Version is required.
+ if (!versionFound) {
+ // Want to avoid string comparisons, hence looking for 'n'
+ // and only if found check the string leading to it. Not
+ // foolproof, but fast.
+ // The shortest string allowed before this is (strlen==13):
+ // <?xml version
+ if ((((char*) aBytes)[i] == 'n') &&
+ (i >= 12) &&
+ (0 == PL_strncmp("versio", (char*) (aBytes + i - 6), 6))) {
+ // Fast forward through version
+ char q = 0;
+ for (++i; i < aLen; ++i) {
+ char qi = ((char*) aBytes)[i];
+ if (qi == '\'' || qi == '"') {
+ if (q && q == qi) {
+ // ending quote
+ versionFound = true;
+ break;
+ } else {
+ // Starting quote
+ q = qi;
+ }
+ }
+ }
+ }
+ } else {
+ // encoding must follow version
+ // Want to avoid string comparisons, hence looking for 'g'
+ // and only if found check the string leading to it. Not
+ // foolproof, but fast.
+ // The shortest allowed string before this (strlen==26):
+ // <?xml version="1" encoding
+ if ((((char*) aBytes)[i] == 'g') && (i >= 25) && (0 == PL_strncmp(
+ "encodin", (char*) (aBytes + i - 7), 7))) {
+ int32_t encStart = 0;
+ char q = 0;
+ for (++i; i < aLen; ++i) {
+ char qi = ((char*) aBytes)[i];
+ if (qi == '\'' || qi == '"') {
+ if (q && q == qi) {
+ int32_t count = i - encStart;
+ // encoding value is invalid if it is UTF-16
+ if (count > 0 && PL_strncasecmp("UTF-16",
+ (char*) (aBytes + encStart), count)) {
+ oCharset.Assign((char*) (aBytes + encStart), count);
+ }
+ encodingFound = true;
+ break;
+ } else {
+ encStart = i + 1;
+ q = qi;
+ }
+ }
+ }
+ }
+ } // if (!versionFound)
+ } // for
+ }
+ return !oCharset.IsEmpty();
+}
+
+inline char
+GetNextChar(nsACString::const_iterator& aStart,
+ nsACString::const_iterator& aEnd)
+{
+ NS_ASSERTION(aStart != aEnd, "end of buffer");
+ return (++aStart != aEnd) ? *aStart : '\0';
+}
+
+static nsresult
+NoOpParserWriteFunc(nsIInputStream* in,
+ void* closure,
+ const char* fromRawSegment,
+ uint32_t toOffset,
+ uint32_t count,
+ uint32_t *writeCount)
+{
+ *writeCount = count;
+ return NS_OK;
+}
+
+typedef struct {
+ bool mNeedCharsetCheck;
+ nsParser* mParser;
+ nsScanner* mScanner;
+ nsIRequest* mRequest;
+} ParserWriteStruct;
+
+/*
+ * This function is invoked as a result of a call to a stream's
+ * ReadSegments() method. It is called for each contiguous buffer
+ * of data in the underlying stream or pipe. Using ReadSegments
+ * allows us to avoid copying data to read out of the stream.
+ */
+static nsresult
+ParserWriteFunc(nsIInputStream* in,
+ void* closure,
+ const char* fromRawSegment,
+ uint32_t toOffset,
+ uint32_t count,
+ uint32_t *writeCount)
+{
+ nsresult result;
+ ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
+ const unsigned char* buf =
+ reinterpret_cast<const unsigned char*> (fromRawSegment);
+ uint32_t theNumRead = count;
+
+ if (!pws) {
+ return NS_ERROR_FAILURE;
+ }
+
+ if (pws->mNeedCharsetCheck) {
+ pws->mNeedCharsetCheck = false;
+ int32_t source;
+ nsAutoCString preferred;
+ nsAutoCString maybePrefer;
+ pws->mParser->GetDocumentCharset(preferred, source);
+
+ // This code was bogus when I found it. It expects the BOM or the XML
+ // declaration to be entirely in the first network buffer. -- hsivonen
+ if (nsContentUtils::CheckForBOM(buf, count, maybePrefer)) {
+ // The decoder will swallow the BOM. The UTF-16 will re-sniff for
+ // endianness. The value of preferred is now either "UTF-8" or "UTF-16".
+ preferred.Assign(maybePrefer);
+ source = kCharsetFromByteOrderMark;
+ } else if (source < kCharsetFromChannel) {
+ nsAutoCString declCharset;
+
+ if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {
+ if (EncodingUtils::FindEncodingForLabel(declCharset, maybePrefer)) {
+ preferred.Assign(maybePrefer);
+ source = kCharsetFromMetaTag;
+ }
+ }
+ }
+
+ pws->mParser->SetDocumentCharset(preferred, source);
+ pws->mParser->SetSinkCharset(preferred);
+
+ }
+
+ result = pws->mScanner->Append(fromRawSegment, theNumRead);
+ if (NS_SUCCEEDED(result)) {
+ *writeCount = count;
+ }
+
+ return result;
+}
+
+nsresult
+nsParser::OnDataAvailable(nsIRequest *request, nsISupports* aContext,
+ nsIInputStream *pIStream, uint64_t sourceOffset,
+ uint32_t aLength)
+{
+ NS_PRECONDITION((eOnStart == mParserContext->mStreamListenerState ||
+ eOnDataAvail == mParserContext->mStreamListenerState),
+ "Error: OnStartRequest() must be called before OnDataAvailable()");
+ NS_PRECONDITION(NS_InputStreamIsBuffered(pIStream),
+ "Must have a buffered input stream");
+
+ nsresult rv = NS_OK;
+
+ if (mIsAboutBlank) {
+ MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank");
+ // ... but if an extension tries to feed us data for about:blank in a
+ // release build, silently ignore the data.
+ uint32_t totalRead;
+ rv = pIStream->ReadSegments(NoOpParserWriteFunc,
+ nullptr,
+ aLength,
+ &totalRead);
+ return rv;
+ }
+
+ CParserContext *theContext = mParserContext;
+
+ while (theContext && theContext->mRequest != request) {
+ theContext = theContext->mPrevContext;
+ }
+
+ if (theContext) {
+ theContext->mStreamListenerState = eOnDataAvail;
+
+ if (eInvalidDetect == theContext->mAutoDetectStatus) {
+ if (theContext->mScanner) {
+ nsScannerIterator iter;
+ theContext->mScanner->EndReading(iter);
+ theContext->mScanner->SetPosition(iter, true);
+ }
+ }
+
+ uint32_t totalRead;
+ ParserWriteStruct pws;
+ pws.mNeedCharsetCheck = true;
+ pws.mParser = this;
+ pws.mScanner = theContext->mScanner;
+ pws.mRequest = request;
+
+ rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ if (IsOkToProcessNetworkData()) {
+ nsCOMPtr<nsIParser> kungFuDeathGrip(this);
+ nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
+ mProcessingNetworkData = true;
+ if (sinkDeathGrip) {
+ sinkDeathGrip->WillParse();
+ }
+ rv = ResumeParse();
+ mProcessingNetworkData = false;
+ }
+ } else {
+ rv = NS_ERROR_UNEXPECTED;
+ }
+
+ return rv;
+}
+
+/**
+ * This is called by the networking library once the last block of data
+ * has been collected from the net.
+ */
+nsresult
+nsParser::OnStopRequest(nsIRequest *request, nsISupports* aContext,
+ nsresult status)
+{
+ nsresult rv = NS_OK;
+
+ CParserContext *pc = mParserContext;
+ while (pc) {
+ if (pc->mRequest == request) {
+ pc->mStreamListenerState = eOnStop;
+ pc->mScanner->SetIncremental(false);
+ break;
+ }
+
+ pc = pc->mPrevContext;
+ }
+
+ mStreamStatus = status;
+
+ if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) {
+ mProcessingNetworkData = true;
+ if (mSink) {
+ mSink->WillParse();
+ }
+ rv = ResumeParse(true, true);
+ mProcessingNetworkData = false;
+ }
+
+ // If the parser isn't enabled, we don't finish parsing till
+ // it is reenabled.
+
+
+ // XXX Should we wait to notify our observers as well if the
+ // parser isn't yet enabled?
+ if (mObserver) {
+ mObserver->OnStopRequest(request, aContext, status);
+ }
+
+ return rv;
+}
+
+
+/*******************************************************************
+ Here come the tokenization methods...
+ *******************************************************************/
+
+
+/**
+ * Part of the code sandwich, this gets called right before
+ * the tokenization process begins. The main reason for
+ * this call is to allow the delegate to do initialization.
+ */
+bool
+nsParser::WillTokenize(bool aIsFinalChunk)
+{
+ if (!mParserContext) {
+ return true;
+ }
+
+ nsITokenizer* theTokenizer;
+ nsresult result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
+ NS_ENSURE_SUCCESS(result, false);
+ return NS_SUCCEEDED(theTokenizer->WillTokenize(aIsFinalChunk));
+}
+
+
+/**
+ * This is the primary control routine to consume tokens.
+ * It iteratively consumes tokens until an error occurs or
+ * you run out of data.
+ */
+nsresult nsParser::Tokenize(bool aIsFinalChunk)
+{
+ nsITokenizer* theTokenizer;
+
+ nsresult result = NS_ERROR_NOT_AVAILABLE;
+ if (mParserContext) {
+ result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
+ }
+
+ if (NS_SUCCEEDED(result)) {
+ bool flushTokens = false;
+
+ bool killSink = false;
+
+ WillTokenize(aIsFinalChunk);
+ while (NS_SUCCEEDED(result)) {
+ mParserContext->mScanner->Mark();
+ result = theTokenizer->ConsumeToken(*mParserContext->mScanner,
+ flushTokens);
+ if (NS_FAILED(result)) {
+ mParserContext->mScanner->RewindToMark();
+ if (kEOF == result){
+ break;
+ }
+ if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
+ killSink = true;
+ result = Terminate();
+ break;
+ }
+ } else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) {
+ // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix Bug# 23931.
+ // Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 --
+ // Also remember to update the marked position.
+ mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS;
+ mParserContext->mScanner->Mark();
+ break;
+ }
+ }
+
+ if (killSink) {
+ mSink = nullptr;
+ }
+ } else {
+ result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER;
+ }
+
+ return result;
+}
+
+/**
+ * Get the channel associated with this parser
+ *
+ * @param aChannel out param that will contain the result
+ * @return NS_OK if successful
+ */
+NS_IMETHODIMP
+nsParser::GetChannel(nsIChannel** aChannel)
+{
+ nsresult result = NS_ERROR_NOT_AVAILABLE;
+ if (mParserContext && mParserContext->mRequest) {
+ result = CallQueryInterface(mParserContext->mRequest, aChannel);
+ }
+ return result;
+}
+
+/**
+ * Get the DTD associated with this parser
+ */
+NS_IMETHODIMP
+nsParser::GetDTD(nsIDTD** aDTD)
+{
+ if (mParserContext) {
+ NS_IF_ADDREF(*aDTD = mDTD);
+ }
+
+ return NS_OK;
+}
+
+/**
+ * Get this as nsIStreamListener
+ */
+nsIStreamListener*
+nsParser::GetStreamListener()
+{
+ return this;
+}
diff --git a/components/htmlparser/src/nsParser.h b/components/htmlparser/src/nsParser.h
new file mode 100644
index 000000000..39bfe03b8
--- /dev/null
+++ b/components/htmlparser/src/nsParser.h
@@ -0,0 +1,398 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * MODULE NOTES:
+ *
+ * This class does two primary jobs:
+ * 1) It iterates the tokens provided during the
+ * tokenization process, identifing where elements
+ * begin and end (doing validation and normalization).
+ * 2) It controls and coordinates with an instance of
+ * the IContentSink interface, to coordinate the
+ * the production of the content model.
+ *
+ * The basic operation of this class assumes that an HTML
+ * document is non-normalized. Therefore, we don't process
+ * the document in a normalized way. Don't bother to look
+ * for methods like: doHead() or doBody().
+ *
+ * Instead, in order to be backward compatible, we must
+ * scan the set of tokens and perform this basic set of
+ * operations:
+ * 1) Determine the token type (easy, since the tokens know)
+ * 2) Determine the appropriate section of the HTML document
+ * each token belongs in (HTML,HEAD,BODY,FRAMESET).
+ * 3) Insert content into our document (via the sink) into
+ * the correct section.
+ * 4) In the case of tags that belong in the BODY, we must
+ * ensure that our underlying document state reflects
+ * the appropriate context for our tag.
+ *
+ * For example,if we see a <TR>, we must ensure our
+ * document contains a table into which the row can
+ * be placed. This may result in "implicit containers"
+ * created to ensure a well-formed document.
+ *
+ */
+
+#ifndef NS_PARSER__
+#define NS_PARSER__
+
+#include "nsIParser.h"
+#include "nsDeque.h"
+#include "nsIURL.h"
+#include "CParserContext.h"
+#include "nsParserCIID.h"
+#include "nsITokenizer.h"
+#include "nsHTMLTags.h"
+#include "nsIContentSink.h"
+#include "nsCOMArray.h"
+#include "nsCycleCollectionParticipant.h"
+#include "nsWeakReference.h"
+
+class nsIDTD;
+class nsIRunnable;
+
+#ifdef _MSC_VER
+#pragma warning( disable : 4275 )
+#endif
+
+
+class nsParser final : public nsIParser,
+ public nsIStreamListener,
+ public nsSupportsWeakReference
+{
+ /**
+ * Destructor
+ * @update gess5/11/98
+ */
+ virtual ~nsParser();
+
+ public:
+ /**
+ * Called on module init
+ */
+ static nsresult Init();
+
+ /**
+ * Called on module shutdown
+ */
+ static void Shutdown();
+
+ NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+ NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsParser, nsIParser)
+
+ /**
+ * default constructor
+ * @update gess5/11/98
+ */
+ nsParser();
+
+ /**
+ * Select given content sink into parser for parser output
+ * @update gess5/11/98
+ * @param aSink is the new sink to be used by parser
+ * @return old sink, or nullptr
+ */
+ NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink) override;
+
+ /**
+ * retrive the sink set into the parser
+ * @update gess5/11/98
+ * @param aSink is the new sink to be used by parser
+ * @return old sink, or nullptr
+ */
+ NS_IMETHOD_(nsIContentSink*) GetContentSink(void) override;
+
+ /**
+ * Call this method once you've created a parser, and want to instruct it
+ * about the command which caused the parser to be constructed. For example,
+ * this allows us to select a DTD which can do, say, view-source.
+ *
+ * @update gess 3/25/98
+ * @param aCommand -- ptrs to string that contains command
+ * @return nada
+ */
+ NS_IMETHOD_(void) GetCommand(nsCString& aCommand) override;
+ NS_IMETHOD_(void) SetCommand(const char* aCommand) override;
+ NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand) override;
+
+ /**
+ * Call this method once you've created a parser, and want to instruct it
+ * about what charset to load
+ *
+ * @update ftang 4/23/99
+ * @param aCharset- the charset of a document
+ * @param aCharsetSource- the source of the charset
+ * @return nada
+ */
+ NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, int32_t aSource) override;
+
+ NS_IMETHOD_(void) GetDocumentCharset(nsACString& aCharset, int32_t& aSource) override
+ {
+ aCharset = mCharset;
+ aSource = mCharsetSource;
+ }
+
+ /**
+ * Cause parser to parse input from given URL
+ * @update gess5/11/98
+ * @param aURL is a descriptor for source document
+ * @param aListener is a listener to forward notifications to
+ * @return TRUE if all went well -- FALSE otherwise
+ */
+ NS_IMETHOD Parse(nsIURI* aURL,
+ nsIRequestObserver* aListener = nullptr,
+ void* aKey = 0,
+ nsDTDMode aMode = eDTDMode_autodetect) override;
+
+ /**
+ * This method needs documentation
+ */
+ NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer,
+ nsTArray<nsString>& aTagStack) override;
+
+ /**
+ * This method gets called when the tokens have been consumed, and it's time
+ * to build the model via the content sink.
+ * @update gess5/11/98
+ * @return YES if model building went well -- NO otherwise.
+ */
+ NS_IMETHOD BuildModel(void) override;
+
+ NS_IMETHOD ContinueInterruptedParsing() override;
+ NS_IMETHOD_(void) BlockParser() override;
+ NS_IMETHOD_(void) UnblockParser() override;
+ NS_IMETHOD_(void) ContinueInterruptedParsingAsync() override;
+ NS_IMETHOD Terminate(void) override;
+
+ /**
+ * Call this to query whether the parser is enabled or not.
+ *
+ * @update vidur 4/12/99
+ * @return current state
+ */
+ NS_IMETHOD_(bool) IsParserEnabled() override;
+
+ /**
+ * Call this to query whether the parser thinks it's done with parsing.
+ *
+ * @update rickg 5/12/01
+ * @return complete state
+ */
+ NS_IMETHOD_(bool) IsComplete() override;
+
+ /**
+ * This rather arcane method (hack) is used as a signal between the
+ * DTD and the parser. It allows the DTD to tell the parser that content
+ * that comes through (parser::parser(string)) but not consumed should
+ * propagate into the next string based parse call.
+ *
+ * @update gess 9/1/98
+ * @param aState determines whether we propagate unused string content.
+ * @return current state
+ */
+ void SetUnusedInput(nsString& aBuffer);
+
+ /**
+ * This method gets called (automatically) during incremental parsing
+ * @update gess5/11/98
+ * @return TRUE if all went well, otherwise FALSE
+ */
+ virtual nsresult ResumeParse(bool allowIteration = true,
+ bool aIsFinalChunk = false,
+ bool aCanInterrupt = true);
+
+ //*********************************************
+ // These methods are callback methods used by
+ // net lib to let us know about our inputstream.
+ //*********************************************
+ // nsIRequestObserver methods:
+ NS_DECL_NSIREQUESTOBSERVER
+
+ // nsIStreamListener methods:
+ NS_DECL_NSISTREAMLISTENER
+
+ void PushContext(CParserContext& aContext);
+ CParserContext* PopContext();
+ CParserContext* PeekContext() {return mParserContext;}
+
+ /**
+ * Get the channel associated with this parser
+ * @update harishd,gagan 07/17/01
+ * @param aChannel out param that will contain the result
+ * @return NS_OK if successful
+ */
+ NS_IMETHOD GetChannel(nsIChannel** aChannel) override;
+
+ /**
+ * Get the DTD associated with this parser
+ * @update vidur 9/29/99
+ * @param aDTD out param that will contain the result
+ * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error
+ */
+ NS_IMETHOD GetDTD(nsIDTD** aDTD) override;
+
+ /**
+ * Get the nsIStreamListener for this parser
+ */
+ virtual nsIStreamListener* GetStreamListener() override;
+
+ void SetSinkCharset(nsACString& aCharset);
+
+ /**
+ * Removes continue parsing events
+ * @update kmcclusk 5/18/98
+ */
+
+ NS_IMETHOD CancelParsingEvents() override;
+
+ /**
+ * Return true.
+ */
+ virtual bool IsInsertionPointDefined() override;
+
+ /**
+ * No-op.
+ */
+ virtual void PushDefinedInsertionPoint() override;
+
+ /**
+ * No-op.
+ */
+ virtual void PopDefinedInsertionPoint() override;
+
+ /**
+ * No-op.
+ */
+ virtual void MarkAsNotScriptCreated(const char* aCommand) override;
+
+ /**
+ * Always false.
+ */
+ virtual bool IsScriptCreated() override;
+
+ /**
+ * Set to parser state to indicate whether parsing tokens can be interrupted
+ * @param aCanInterrupt true if parser can be interrupted, false if it can not be interrupted.
+ * @update kmcclusk 5/18/98
+ */
+ void SetCanInterrupt(bool aCanInterrupt);
+
+ /**
+ * This is called when the final chunk has been
+ * passed to the parser and the content sink has
+ * interrupted token processing. It schedules
+ * a ParserContinue PL_Event which will ask the parser
+ * to HandleParserContinueEvent when it is handled.
+ * @update kmcclusk6/1/2001
+ */
+ nsresult PostContinueEvent();
+
+ /**
+ * Fired when the continue parse event is triggered.
+ * @update kmcclusk 5/18/98
+ */
+ void HandleParserContinueEvent(class nsParserContinueEvent *);
+
+ virtual void Reset() override {
+ Cleanup();
+ Initialize();
+ }
+
+ bool IsScriptExecuting() {
+ return mSink && mSink->IsScriptExecuting();
+ }
+
+ bool IsOkToProcessNetworkData() {
+ return !IsScriptExecuting() && !mProcessingNetworkData;
+ }
+
+ protected:
+
+ void Initialize(bool aConstructor = false);
+ void Cleanup();
+
+ /**
+ *
+ * @update gess5/18/98
+ * @param
+ * @return
+ */
+ nsresult WillBuildModel(nsString& aFilename);
+
+ /**
+ *
+ * @update gess5/18/98
+ * @param
+ * @return
+ */
+ nsresult DidBuildModel(nsresult anErrorCode);
+
+private:
+
+ /*******************************************
+ These are the tokenization methods...
+ *******************************************/
+
+ /**
+ * Part of the code sandwich, this gets called right before
+ * the tokenization process begins. The main reason for
+ * this call is to allow the delegate to do initialization.
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return TRUE if it's ok to proceed
+ */
+ bool WillTokenize(bool aIsFinalChunk = false);
+
+
+ /**
+ * This is the primary control routine. It iteratively
+ * consumes tokens until an error occurs or you run out
+ * of data.
+ *
+ * @update gess 3/25/98
+ * @return error code
+ */
+ nsresult Tokenize(bool aIsFinalChunk = false);
+
+ /**
+ * Pushes XML fragment parsing data to expat without an input stream.
+ */
+ nsresult Parse(const nsAString& aSourceBuffer,
+ void* aKey,
+ bool aLastCall);
+
+protected:
+ //*********************************************
+ // And now, some data members...
+ //*********************************************
+
+
+ CParserContext* mParserContext;
+ nsCOMPtr<nsIDTD> mDTD;
+ nsCOMPtr<nsIRequestObserver> mObserver;
+ nsCOMPtr<nsIContentSink> mSink;
+ nsIRunnable* mContinueEvent; // weak ref
+
+ eParserCommands mCommand;
+ nsresult mInternalState;
+ nsresult mStreamStatus;
+ int32_t mCharsetSource;
+
+ uint16_t mFlags;
+
+ nsString mUnusedInput;
+ nsCString mCharset;
+ nsCString mCommandStr;
+
+ bool mProcessingNetworkData;
+ bool mIsAboutBlank;
+};
+
+#endif
+
diff --git a/components/htmlparser/src/nsParserBase.h b/components/htmlparser/src/nsParserBase.h
new file mode 100644
index 000000000..83b68c554
--- /dev/null
+++ b/components/htmlparser/src/nsParserBase.h
@@ -0,0 +1,20 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsParserBase_h_
+#define nsParserBase_h_
+
+#include "nsIChannel.h"
+
+class nsParserBase : public nsISupports
+{
+ public:
+ NS_IMETHOD_(bool) IsParserEnabled() { return true; }
+ NS_IMETHOD GetChannel(nsIChannel** aChannel) {
+ *aChannel = nullptr;
+ return NS_OK;
+ }
+};
+
+#endif // nsParserBase_h_
diff --git a/components/htmlparser/src/nsParserCIID.h b/components/htmlparser/src/nsParserCIID.h
new file mode 100644
index 000000000..4a2b7b1ad
--- /dev/null
+++ b/components/htmlparser/src/nsParserCIID.h
@@ -0,0 +1,39 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsParserCIID_h__
+#define nsParserCIID_h__
+
+#include "nsISupports.h"
+#include "nsIFactory.h"
+#include "nsIComponentManager.h"
+
+// {2ce606b0-bee6-11d1-aad9-00805f8a3e14}
+#define NS_PARSER_CID \
+{ 0x2ce606b0, 0xbee6, 0x11d1, { 0xaa, 0xd9, 0x0, 0x80, 0x5f, 0x8a, 0x3e, 0x14 } }
+
+// XXX: This object should not be exposed outside of the parser.
+// Remove when CNavDTD subclasses do not need access
+#define NS_PARSER_NODE_IID \
+ {0x9039c670, 0x2717, 0x11d2, \
+ {0x92, 0x46, 0x00, 0x80, 0x5f, 0x8a, 0x7a, 0xb6}}
+
+// {a6cf9107-15b3-11d2-932e-00805f8add32}
+#define NS_CNAVDTD_CID \
+{ 0xa6cf9107, 0x15b3, 0x11d2, { 0x93, 0x2e, 0x0, 0x80, 0x5f, 0x8a, 0xdd, 0x32 } }
+
+// {FFF4FBE9-528A-4b37-819D-FC18F3A401A7}
+#define NS_EXPAT_DRIVER_CID \
+{ 0xfff4fbe9, 0x528a, 0x4b37, { 0x81, 0x9d, 0xfc, 0x18, 0xf3, 0xa4, 0x1, 0xa7 } }
+
+// {a6cf910f-15b3-11d2-932e-00805f8add32}
+#define NS_HTMLCONTENTSINKSTREAM_CID \
+{ 0xa6cf910f, 0x15b3, 0x11d2, { 0x93, 0x2e, 0x0, 0x80, 0x5f, 0x8a, 0xdd, 0x32 } }
+
+// {a6cf9112-15b3-11d2-932e-00805f8add32}
+#define NS_PARSERSERVICE_CID \
+{ 0xa6cf9112, 0x15b3, 0x11d2, { 0x93, 0x2e, 0x0, 0x80, 0x5f, 0x8a, 0xdd, 0x32 } }
+
+#endif
diff --git a/components/htmlparser/src/nsParserConstants.h b/components/htmlparser/src/nsParserConstants.h
new file mode 100644
index 000000000..2f2373c7f
--- /dev/null
+++ b/components/htmlparser/src/nsParserConstants.h
@@ -0,0 +1,38 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsParserConstants_h_
+#define nsParserConstants_h_
+const char16_t kNewLine = '\n';
+const char16_t kCR = '\r';
+const char16_t kLF = '\n';
+const char16_t kTab = '\t';
+const char16_t kSpace = ' ';
+const char16_t kQuote = '"';
+const char16_t kApostrophe = '\'';
+const char16_t kLessThan = '<';
+const char16_t kGreaterThan = '>';
+const char16_t kAmpersand = '&';
+const char16_t kForwardSlash = '/';
+const char16_t kBackSlash = '\\';
+const char16_t kEqual = '=';
+const char16_t kMinus = '-';
+const char16_t kPlus = '+';
+const char16_t kExclamation = '!';
+const char16_t kSemicolon = ';';
+const char16_t kHashsign = '#';
+const char16_t kAsterisk = '*';
+const char16_t kUnderbar = '_';
+const char16_t kComma = ',';
+const char16_t kLeftParen = '(';
+const char16_t kRightParen = ')';
+const char16_t kLeftBrace = '{';
+const char16_t kRightBrace = '}';
+const char16_t kQuestionMark = '?';
+const char16_t kLeftSquareBracket = '[';
+const char16_t kRightSquareBracket = ']';
+const char16_t kNullCh = '\0';
+
+#endif // nsParserConstants_h_
diff --git a/components/htmlparser/src/nsParserModule.cpp b/components/htmlparser/src/nsParserModule.cpp
new file mode 100644
index 000000000..00c2d6c56
--- /dev/null
+++ b/components/htmlparser/src/nsParserModule.cpp
@@ -0,0 +1,107 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsIAtom.h"
+#include "nsString.h"
+#include "nspr.h"
+#include "nsCOMPtr.h"
+#include "mozilla/ModuleUtils.h"
+#include "nsParserCIID.h"
+#include "nsParser.h"
+#include "CNavDTD.h"
+#include "nsHTMLEntities.h"
+#include "nsHTMLTokenizer.h"
+//#include "nsTextTokenizer.h"
+#include "nsElementTable.h"
+#include "nsParserService.h"
+#include "nsSAXAttributes.h"
+#include "nsSAXLocator.h"
+#include "nsSAXXMLReader.h"
+
+#if defined(DEBUG)
+#include "nsExpatDriver.h"
+#endif
+
+//----------------------------------------------------------------------
+
+#if defined(DEBUG)
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsExpatDriver)
+#endif
+
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsParser)
+NS_GENERIC_FACTORY_CONSTRUCTOR(CNavDTD)
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsParserService)
+
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsSAXAttributes)
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsSAXXMLReader)
+
+#if defined(DEBUG)
+NS_DEFINE_NAMED_CID(NS_EXPAT_DRIVER_CID);
+#endif
+NS_DEFINE_NAMED_CID(NS_PARSER_CID);
+NS_DEFINE_NAMED_CID(NS_CNAVDTD_CID);
+NS_DEFINE_NAMED_CID(NS_PARSERSERVICE_CID);
+NS_DEFINE_NAMED_CID(NS_SAXATTRIBUTES_CID);
+NS_DEFINE_NAMED_CID(NS_SAXXMLREADER_CID);
+
+static const mozilla::Module::CIDEntry kParserCIDs[] = {
+#if defined(DEBUG)
+ { &kNS_EXPAT_DRIVER_CID, false, nullptr, nsExpatDriverConstructor },
+#endif
+ { &kNS_PARSER_CID, false, nullptr, nsParserConstructor },
+ { &kNS_CNAVDTD_CID, false, nullptr, CNavDTDConstructor },
+ { &kNS_PARSERSERVICE_CID, false, nullptr, nsParserServiceConstructor },
+ { &kNS_SAXATTRIBUTES_CID, false, nullptr, nsSAXAttributesConstructor },
+ { &kNS_SAXXMLREADER_CID, false, nullptr, nsSAXXMLReaderConstructor },
+ { nullptr }
+};
+
+static const mozilla::Module::ContractIDEntry kParserContracts[] = {
+ { NS_PARSERSERVICE_CONTRACTID, &kNS_PARSERSERVICE_CID },
+ { NS_SAXATTRIBUTES_CONTRACTID, &kNS_SAXATTRIBUTES_CID },
+ { NS_SAXXMLREADER_CONTRACTID, &kNS_SAXXMLREADER_CID },
+ { nullptr }
+};
+
+static nsresult
+Initialize()
+{
+ nsresult rv = nsHTMLTags::AddRefTable();
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = nsHTMLEntities::AddRefTable();
+ if (NS_FAILED(rv)) {
+ nsHTMLTags::ReleaseTable();
+ return rv;
+ }
+#ifdef DEBUG
+ CheckElementTable();
+#endif
+
+#ifdef DEBUG
+ nsHTMLTags::TestTagTable();
+#endif
+
+ return rv;
+}
+
+static void
+Shutdown()
+{
+ nsHTMLTags::ReleaseTable();
+ nsHTMLEntities::ReleaseTable();
+}
+
+static mozilla::Module kParserModule = {
+ mozilla::Module::kVersion,
+ kParserCIDs,
+ kParserContracts,
+ nullptr,
+ nullptr,
+ Initialize,
+ Shutdown
+};
+
+NSMODULE_DEFN(nsParserModule) = &kParserModule;
diff --git a/components/htmlparser/src/nsParserMsgUtils.cpp b/components/htmlparser/src/nsParserMsgUtils.cpp
new file mode 100644
index 000000000..627f57a0e
--- /dev/null
+++ b/components/htmlparser/src/nsParserMsgUtils.cpp
@@ -0,0 +1,65 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsIServiceManager.h"
+#include "nsIStringBundle.h"
+#include "nsXPIDLString.h"
+#include "nsParserMsgUtils.h"
+#include "nsNetCID.h"
+#include "mozilla/Services.h"
+
+static nsresult GetBundle(const char * aPropFileName, nsIStringBundle **aBundle)
+{
+ NS_ENSURE_ARG_POINTER(aPropFileName);
+ NS_ENSURE_ARG_POINTER(aBundle);
+
+ // Create a bundle for the localization
+
+ nsCOMPtr<nsIStringBundleService> stringService =
+ mozilla::services::GetStringBundleService();
+ if (!stringService)
+ return NS_ERROR_FAILURE;
+
+ return stringService->CreateBundle(aPropFileName, aBundle);
+}
+
+nsresult
+nsParserMsgUtils::GetLocalizedStringByName(const char * aPropFileName, const char* aKey, nsString& oVal)
+{
+ oVal.Truncate();
+
+ NS_ENSURE_ARG_POINTER(aKey);
+
+ nsCOMPtr<nsIStringBundle> bundle;
+ nsresult rv = GetBundle(aPropFileName,getter_AddRefs(bundle));
+ if (NS_SUCCEEDED(rv) && bundle) {
+ nsXPIDLString valUni;
+ nsAutoString key; key.AssignWithConversion(aKey);
+ rv = bundle->GetStringFromName(key.get(), getter_Copies(valUni));
+ if (NS_SUCCEEDED(rv) && valUni) {
+ oVal.Assign(valUni);
+ }
+ }
+
+ return rv;
+}
+
+nsresult
+nsParserMsgUtils::GetLocalizedStringByID(const char * aPropFileName, uint32_t aID, nsString& oVal)
+{
+ oVal.Truncate();
+
+ nsCOMPtr<nsIStringBundle> bundle;
+ nsresult rv = GetBundle(aPropFileName,getter_AddRefs(bundle));
+ if (NS_SUCCEEDED(rv) && bundle) {
+ nsXPIDLString valUni;
+ rv = bundle->GetStringFromID(aID, getter_Copies(valUni));
+ if (NS_SUCCEEDED(rv) && valUni) {
+ oVal.Assign(valUni);
+ }
+ }
+
+ return rv;
+}
diff --git a/components/htmlparser/src/nsParserMsgUtils.h b/components/htmlparser/src/nsParserMsgUtils.h
new file mode 100644
index 000000000..adf3fda8a
--- /dev/null
+++ b/components/htmlparser/src/nsParserMsgUtils.h
@@ -0,0 +1,21 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsParserMsgUtils_h
+#define nsParserMsgUtils_h
+
+#include "nsString.h"
+
+#define XMLPARSER_PROPERTIES "chrome://global/locale/layout/xmlparser.properties"
+
+class nsParserMsgUtils {
+ nsParserMsgUtils(); // Currently this is not meant to be created, use the static methods
+ ~nsParserMsgUtils(); // If perf required, change this to cache values etc.
+public:
+ static nsresult GetLocalizedStringByName(const char * aPropFileName, const char* aKey, nsString& aVal);
+ static nsresult GetLocalizedStringByID(const char * aPropFileName, uint32_t aID, nsString& aVal);
+};
+
+#endif
diff --git a/components/htmlparser/src/nsParserService.cpp b/components/htmlparser/src/nsParserService.cpp
new file mode 100644
index 000000000..5893f19a9
--- /dev/null
+++ b/components/htmlparser/src/nsParserService.cpp
@@ -0,0 +1,90 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsError.h"
+#include "nsIAtom.h"
+#include "nsParserService.h"
+#include "nsHTMLEntities.h"
+#include "nsElementTable.h"
+#include "nsICategoryManager.h"
+#include "nsCategoryManagerUtils.h"
+
+nsParserService::nsParserService()
+{
+}
+
+nsParserService::~nsParserService()
+{
+}
+
+NS_IMPL_ISUPPORTS(nsParserService, nsIParserService)
+
+int32_t
+nsParserService::HTMLAtomTagToId(nsIAtom* aAtom) const
+{
+ return nsHTMLTags::StringTagToId(nsDependentAtomString(aAtom));
+}
+
+int32_t
+nsParserService::HTMLCaseSensitiveAtomTagToId(nsIAtom* aAtom) const
+{
+ return nsHTMLTags::CaseSensitiveAtomTagToId(aAtom);
+}
+
+int32_t
+nsParserService::HTMLStringTagToId(const nsAString& aTag) const
+{
+ return nsHTMLTags::StringTagToId(aTag);
+}
+
+const char16_t*
+nsParserService::HTMLIdToStringTag(int32_t aId) const
+{
+ return nsHTMLTags::GetStringValue((nsHTMLTag)aId);
+}
+
+nsIAtom*
+nsParserService::HTMLIdToAtomTag(int32_t aId) const
+{
+ return nsHTMLTags::GetAtom((nsHTMLTag)aId);
+}
+
+NS_IMETHODIMP
+nsParserService::HTMLConvertEntityToUnicode(const nsAString& aEntity,
+ int32_t* aUnicode) const
+{
+ *aUnicode = nsHTMLEntities::EntityToUnicode(aEntity);
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsParserService::HTMLConvertUnicodeToEntity(int32_t aUnicode,
+ nsCString& aEntity) const
+{
+ const char* str = nsHTMLEntities::UnicodeToEntity(aUnicode);
+ if (str) {
+ aEntity.Assign(str);
+ }
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsParserService::IsContainer(int32_t aId, bool& aIsContainer) const
+{
+ aIsContainer = nsHTMLElement::IsContainer((nsHTMLTag)aId);
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsParserService::IsBlock(int32_t aId, bool& aIsBlock) const
+{
+ aIsBlock = nsHTMLElement::IsBlock((nsHTMLTag)aId);
+
+ return NS_OK;
+}
diff --git a/components/htmlparser/src/nsParserService.h b/components/htmlparser/src/nsParserService.h
new file mode 100644
index 000000000..0ea7ec98c
--- /dev/null
+++ b/components/htmlparser/src/nsParserService.h
@@ -0,0 +1,58 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef NS_PARSERSERVICE_H__
+#define NS_PARSERSERVICE_H__
+
+#include "nsIParserService.h"
+
+extern "C" int MOZ_XMLIsLetter(const char* ptr);
+extern "C" int MOZ_XMLIsNCNameChar(const char* ptr);
+/**
+ * Decodes an entity into the UTF-16 encoding of a Unicode character. If a ';'
+ * is found between `ptr` and `end` it will try to decode the entity and set
+ * `*next` to point to the character after the ;. The resulting UTF-16 code
+ * units will be written in `*result`, so if the entity is a valid numeric
+ * entity there needs to be space for at least two char16_t at the location
+ * `result` points to.
+ *
+ * @param ptr pointer to the ampersand.
+ * @param end pointer to the position after the last character of the
+ * string.
+ * @param next [out] will be set to the character after the ';' or null if
+ * the decoding was unsuccessful.
+ * @param result the buffer to write the resulting UTF-16 character in.
+ * @return the number of char16_t written to `*result`.
+ */
+extern "C" int MOZ_XMLTranslateEntity(const char* ptr, const char* end,
+ const char** next, char16_t* result);
+
+class nsParserService : public nsIParserService {
+ virtual ~nsParserService();
+
+public:
+ nsParserService();
+
+ NS_DECL_ISUPPORTS
+
+ int32_t HTMLAtomTagToId(nsIAtom* aAtom) const override;
+
+ int32_t HTMLCaseSensitiveAtomTagToId(nsIAtom* aAtom) const override;
+
+ int32_t HTMLStringTagToId(const nsAString& aTag) const override;
+
+ const char16_t *HTMLIdToStringTag(int32_t aId) const override;
+
+ nsIAtom *HTMLIdToAtomTag(int32_t aId) const override;
+
+ NS_IMETHOD HTMLConvertEntityToUnicode(const nsAString& aEntity,
+ int32_t* aUnicode) const override;
+ NS_IMETHOD HTMLConvertUnicodeToEntity(int32_t aUnicode,
+ nsCString& aEntity) const override;
+ NS_IMETHOD IsContainer(int32_t aId, bool& aIsContainer) const override;
+ NS_IMETHOD IsBlock(int32_t aId, bool& aIsBlock) const override;
+};
+
+#endif
diff --git a/components/htmlparser/src/nsScanner.cpp b/components/htmlparser/src/nsScanner.cpp
new file mode 100644
index 000000000..0fa8e43c6
--- /dev/null
+++ b/components/htmlparser/src/nsScanner.cpp
@@ -0,0 +1,408 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//#define __INCREMENTAL 1
+
+#include "mozilla/Attributes.h"
+#include "mozilla/DebugOnly.h"
+
+#include "nsScanner.h"
+#include "nsDebug.h"
+#include "nsReadableUtils.h"
+#include "nsIInputStream.h"
+#include "nsIFile.h"
+#include "nsUTF8Utils.h" // for LossyConvertEncoding
+#include "nsCRT.h"
+#include "nsParser.h"
+#include "nsCharsetSource.h"
+
+#include "mozilla/dom/EncodingUtils.h"
+
+using mozilla::dom::EncodingUtils;
+
+nsReadEndCondition::nsReadEndCondition(const char16_t* aTerminateChars) :
+ mChars(aTerminateChars), mFilter(char16_t(~0)) // All bits set
+{
+ // Build filter that will be used to filter out characters with
+ // bits that none of the terminal chars have. This works very well
+ // because terminal chars often have only the last 4-6 bits set and
+ // normal ascii letters have bit 7 set. Other letters have even higher
+ // bits set.
+
+ // Calculate filter
+ const char16_t *current = aTerminateChars;
+ char16_t terminalChar = *current;
+ while (terminalChar) {
+ mFilter &= ~terminalChar;
+ ++current;
+ terminalChar = *current;
+ }
+}
+
+/**
+ * Use this constructor if you want i/o to be based on
+ * a single string you hand in during construction.
+ * This short cut was added for Javascript.
+ *
+ * @update gess 5/12/98
+ * @param aMode represents the parser mode (nav, other)
+ * @return
+ */
+nsScanner::nsScanner(const nsAString& anHTMLString)
+{
+ MOZ_COUNT_CTOR(nsScanner);
+
+ mSlidingBuffer = nullptr;
+ if (AppendToBuffer(anHTMLString)) {
+ mSlidingBuffer->BeginReading(mCurrentPosition);
+ } else {
+ /* XXX see hack below, re: bug 182067 */
+ memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
+ mEndPosition = mCurrentPosition;
+ }
+ mMarkPosition = mCurrentPosition;
+ mIncremental = false;
+ mUnicodeDecoder = nullptr;
+ mCharsetSource = kCharsetUninitialized;
+}
+
+/**
+ * Use this constructor if you want i/o to be based on strings
+ * the scanner receives. If you pass a null filename, you
+ * can still provide data to the scanner via append.
+ */
+nsScanner::nsScanner(nsString& aFilename, bool aCreateStream)
+ : mFilename(aFilename)
+{
+ MOZ_COUNT_CTOR(nsScanner);
+ NS_ASSERTION(!aCreateStream, "This is always true.");
+
+ mSlidingBuffer = nullptr;
+
+ // XXX This is a big hack. We need to initialize the iterators to something.
+ // What matters is that mCurrentPosition == mEndPosition, so that our methods
+ // believe that we are at EOF (see bug 182067). We null out mCurrentPosition
+ // so that we have some hope of catching null pointer dereferences associated
+ // with this hack. --darin
+ memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
+ mMarkPosition = mCurrentPosition;
+ mEndPosition = mCurrentPosition;
+
+ mIncremental = true;
+
+ mUnicodeDecoder = nullptr;
+ mCharsetSource = kCharsetUninitialized;
+ // XML defaults to UTF-8 and about:blank is UTF-8, too.
+ SetDocumentCharset(NS_LITERAL_CSTRING("UTF-8"), kCharsetFromDocTypeDefault);
+}
+
+nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , int32_t aSource)
+{
+ if (aSource < mCharsetSource) // priority is lower than the current one
+ return NS_OK;
+
+ mCharsetSource = aSource;
+
+ nsCString charsetName;
+ mozilla::DebugOnly<bool> valid =
+ EncodingUtils::FindEncodingForLabel(aCharset, charsetName);
+ MOZ_ASSERT(valid, "Should never call with a bogus aCharset.");
+
+ if (!mCharset.IsEmpty() && charsetName.Equals(mCharset)) {
+ return NS_OK; // no difference, don't change it
+ }
+
+ // different, need to change it
+
+ mCharset.Assign(charsetName);
+
+ mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset);
+ mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
+
+ return NS_OK;
+}
+
+
+/**
+ * default destructor
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return
+ */
+nsScanner::~nsScanner() {
+
+ delete mSlidingBuffer;
+
+ MOZ_COUNT_DTOR(nsScanner);
+}
+
+/**
+ * Resets current offset position of input stream to marked position.
+ * This allows us to back up to this point if the need should arise,
+ * such as when tokenization gets interrupted.
+ * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
+ *
+ * @update gess 5/12/98
+ * @param
+ * @return
+ */
+void nsScanner::RewindToMark(void){
+ if (mSlidingBuffer) {
+ mCurrentPosition = mMarkPosition;
+ }
+}
+
+
+/**
+ * Records current offset position in input stream. This allows us
+ * to back up to this point if the need should arise, such as when
+ * tokenization gets interrupted.
+ *
+ * @update gess 7/29/98
+ * @param
+ * @return
+ */
+int32_t nsScanner::Mark() {
+ int32_t distance = 0;
+ if (mSlidingBuffer) {
+ nsScannerIterator oldStart;
+ mSlidingBuffer->BeginReading(oldStart);
+
+ distance = Distance(oldStart, mCurrentPosition);
+
+ mSlidingBuffer->DiscardPrefix(mCurrentPosition);
+ mSlidingBuffer->BeginReading(mCurrentPosition);
+ mMarkPosition = mCurrentPosition;
+ }
+
+ return distance;
+}
+
+/**
+ * Insert data to our underlying input buffer as
+ * if it were read from an input stream.
+ *
+ * @update harishd 01/12/99
+ * @return error code
+ */
+bool nsScanner::UngetReadable(const nsAString& aBuffer) {
+ if (!mSlidingBuffer) {
+ return false;
+ }
+
+ mSlidingBuffer->UngetReadable(aBuffer,mCurrentPosition);
+ mSlidingBuffer->BeginReading(mCurrentPosition); // Insertion invalidated our iterators
+ mSlidingBuffer->EndReading(mEndPosition);
+
+ return true;
+}
+
+/**
+ * Append data to our underlying input buffer as
+ * if it were read from an input stream.
+ *
+ * @update gess4/3/98
+ * @return error code
+ */
+nsresult nsScanner::Append(const nsAString& aBuffer) {
+ if (!AppendToBuffer(aBuffer))
+ return NS_ERROR_OUT_OF_MEMORY;
+ return NS_OK;
+}
+
+/**
+ *
+ *
+ * @update gess 5/21/98
+ * @param
+ * @return
+ */
+nsresult nsScanner::Append(const char* aBuffer, uint32_t aLen)
+{
+ nsresult res = NS_OK;
+ if (mUnicodeDecoder) {
+ int32_t unicharBufLen = 0;
+
+ nsresult rv = mUnicodeDecoder->GetMaxLength(aBuffer, aLen, &unicharBufLen);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ nsScannerString::Buffer* buffer = nsScannerString::AllocBuffer(unicharBufLen + 1);
+ NS_ENSURE_TRUE(buffer,NS_ERROR_OUT_OF_MEMORY);
+ char16_t *unichars = buffer->DataStart();
+
+ int32_t totalChars = 0;
+ int32_t unicharLength = unicharBufLen;
+
+ do {
+ int32_t srcLength = aLen;
+ res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars, &unicharLength);
+
+ totalChars += unicharLength;
+ // Continuation of failure case
+ if(NS_FAILED(res)) {
+ // if we failed, we consume one byte, replace it with the replacement
+ // character and try the conversion again.
+
+ // This is only needed because some decoders don't follow the
+ // nsIUnicodeDecoder contract: they return a failure when *aDestLength
+ // is 0 rather than the correct NS_OK_UDEC_MOREOUTPUT. See bug 244177
+ if ((unichars + unicharLength) >= buffer->DataEnd()) {
+ NS_ERROR("Unexpected end of destination buffer");
+ break;
+ }
+
+ // Since about:blank is empty, this line runs only for XML. Use a
+ // character that's illegal in XML instead of U+FFFD in order to make
+ // expat flag the error.
+ unichars[unicharLength++] = 0xFFFF;
+
+ unichars = unichars + unicharLength;
+ unicharLength = unicharBufLen - (++totalChars);
+
+ mUnicodeDecoder->Reset();
+
+ if(((uint32_t) (srcLength + 1)) > aLen) {
+ srcLength = aLen;
+ }
+ else {
+ ++srcLength;
+ }
+
+ aBuffer += srcLength;
+ aLen -= srcLength;
+ }
+ } while (NS_FAILED(res) && (aLen > 0));
+
+ buffer->SetDataLength(totalChars);
+ // Don't propagate return code of unicode decoder
+ // since it doesn't reflect on our success or failure
+ // - Ref. bug 87110
+ res = NS_OK;
+ if (!AppendToBuffer(buffer))
+ res = NS_ERROR_OUT_OF_MEMORY;
+ }
+ else {
+ NS_WARNING("No decoder found.");
+ res = NS_ERROR_FAILURE;
+ }
+
+ return res;
+}
+
+/**
+ * retrieve next char from scanners internal input stream
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return error code reflecting read status
+ */
+nsresult nsScanner::GetChar(char16_t& aChar) {
+ if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
+ aChar = 0;
+ return kEOF;
+ }
+
+ aChar = *mCurrentPosition++;
+
+ return NS_OK;
+}
+
+
+void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd)
+{
+ aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);
+}
+
+void nsScanner::CurrentPosition(nsScannerIterator& aPosition)
+{
+ aPosition = mCurrentPosition;
+}
+
+void nsScanner::EndReading(nsScannerIterator& aPosition)
+{
+ aPosition = mEndPosition;
+}
+
+void nsScanner::SetPosition(nsScannerIterator& aPosition, bool aTerminate)
+{
+ if (mSlidingBuffer) {
+ mCurrentPosition = aPosition;
+ if (aTerminate && (mCurrentPosition == mEndPosition)) {
+ mMarkPosition = mCurrentPosition;
+ mSlidingBuffer->DiscardPrefix(mCurrentPosition);
+ }
+ }
+}
+
+bool nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf)
+{
+ if (!mSlidingBuffer) {
+ mSlidingBuffer = new nsScannerString(aBuf);
+ if (!mSlidingBuffer)
+ return false;
+ mSlidingBuffer->BeginReading(mCurrentPosition);
+ mMarkPosition = mCurrentPosition;
+ mSlidingBuffer->EndReading(mEndPosition);
+ }
+ else {
+ mSlidingBuffer->AppendBuffer(aBuf);
+ if (mCurrentPosition == mEndPosition) {
+ mSlidingBuffer->BeginReading(mCurrentPosition);
+ }
+ mSlidingBuffer->EndReading(mEndPosition);
+ }
+
+ return true;
+}
+
+/**
+ * call this to copy bytes out of the scanner that have not yet been consumed
+ * by the tokenization process.
+ *
+ * @update gess 5/12/98
+ * @param aCopyBuffer is where the scanner buffer will be copied to
+ * @return true if OK or false on OOM
+ */
+bool nsScanner::CopyUnusedData(nsString& aCopyBuffer) {
+ if (!mSlidingBuffer) {
+ aCopyBuffer.Truncate();
+ return true;
+ }
+
+ nsScannerIterator start, end;
+ start = mCurrentPosition;
+ end = mEndPosition;
+
+ return CopyUnicodeTo(start, end, aCopyBuffer);
+}
+
+/**
+ * Retrieve the name of the file that the scanner is reading from.
+ * In some cases, it's just a given name, because the scanner isn't
+ * really reading from a file.
+ *
+ * @update gess 5/12/98
+ * @return
+ */
+nsString& nsScanner::GetFilename(void) {
+ return mFilename;
+}
+
+/**
+ * Conduct self test. Actually, selftesting for this class
+ * occurs in the parser selftest.
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return
+ */
+
+void nsScanner::SelfTest(void) {
+#ifdef _DEBUG
+#endif
+}
diff --git a/components/htmlparser/src/nsScanner.h b/components/htmlparser/src/nsScanner.h
new file mode 100644
index 000000000..88edcf74e
--- /dev/null
+++ b/components/htmlparser/src/nsScanner.h
@@ -0,0 +1,190 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+/**
+ * MODULE NOTES:
+ * @update gess 4/1/98
+ *
+ * The scanner is a low-level service class that knows
+ * how to consume characters out of an (internal) stream.
+ * This class also offers a series of utility methods
+ * that most tokenizers want, such as readUntil()
+ * and SkipWhitespace().
+ */
+
+
+#ifndef SCANNER
+#define SCANNER
+
+#include "nsCOMPtr.h"
+#include "nsString.h"
+#include "nsIParser.h"
+#include "nsIUnicodeDecoder.h"
+#include "nsScannerString.h"
+#include "mozilla/CheckedInt.h"
+
+class nsReadEndCondition {
+public:
+ const char16_t *mChars;
+ char16_t mFilter;
+ explicit nsReadEndCondition(const char16_t* aTerminateChars);
+private:
+ nsReadEndCondition(const nsReadEndCondition& aOther); // No copying
+ void operator=(const nsReadEndCondition& aOther); // No assigning
+};
+
+class nsScanner {
+ public:
+
+ /**
+ * Use this constructor for the XML fragment parsing case
+ */
+ explicit nsScanner(const nsAString& anHTMLString);
+
+ /**
+ * Use this constructor if you want i/o to be based on
+ * a file (therefore a stream) or just data you provide via Append().
+ */
+ nsScanner(nsString& aFilename, bool aCreateStream);
+
+ ~nsScanner();
+
+ /**
+ * retrieve next char from internal input stream
+ *
+ * @update gess 3/25/98
+ * @param ch is the char to accept new value
+ * @return error code reflecting read status
+ */
+ nsresult GetChar(char16_t& ch);
+
+ /**
+ * Records current offset position in input stream. This allows us
+ * to back up to this point if the need should arise, such as when
+ * tokenization gets interrupted.
+ *
+ * @update gess 5/12/98
+ * @param
+ * @return
+ */
+ int32_t Mark(void);
+
+ /**
+ * Resets current offset position of input stream to marked position.
+ * This allows us to back up to this point if the need should arise,
+ * such as when tokenization gets interrupted.
+ * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
+ *
+ * @update gess 5/12/98
+ * @param
+ * @return
+ */
+ void RewindToMark(void);
+
+
+ /**
+ *
+ *
+ * @update harishd 01/12/99
+ * @param
+ * @return
+ */
+ bool UngetReadable(const nsAString& aBuffer);
+
+ /**
+ *
+ *
+ * @update gess 5/13/98
+ * @param
+ * @return
+ */
+ nsresult Append(const nsAString& aBuffer);
+
+ /**
+ *
+ *
+ * @update gess 5/21/98
+ * @param
+ * @return
+ */
+ nsresult Append(const char* aBuffer, uint32_t aLen);
+
+ /**
+ * Call this to copy bytes out of the scanner that have not yet been consumed
+ * by the tokenization process.
+ *
+ * @update gess 5/12/98
+ * @param aCopyBuffer is where the scanner buffer will be copied to
+ * @return true if OK or false on OOM
+ */
+ bool CopyUnusedData(nsString& aCopyBuffer);
+
+ /**
+ * Retrieve the name of the file that the scanner is reading from.
+ * In some cases, it's just a given name, because the scanner isn't
+ * really reading from a file.
+ *
+ * @update gess 5/12/98
+ * @return
+ */
+ nsString& GetFilename(void);
+
+ static void SelfTest();
+
+ /**
+ * Use this setter to change the scanner's unicode decoder
+ *
+ * @update ftang 3/02/99
+ * @param aCharset a normalized (alias resolved) charset name
+ * @param aCharsetSource- where the charset info came from
+ * @return
+ */
+ nsresult SetDocumentCharset(const nsACString& aCharset, int32_t aSource);
+
+ void BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd);
+ void CurrentPosition(nsScannerIterator& aPosition);
+ void EndReading(nsScannerIterator& aPosition);
+ void SetPosition(nsScannerIterator& aPosition,
+ bool aTruncate = false);
+
+ /**
+ * Internal method used to cause the internal buffer to
+ * be filled with data.
+ *
+ * @update gess4/3/98
+ */
+ bool IsIncremental(void) {return mIncremental;}
+ void SetIncremental(bool anIncrValue) {mIncremental=anIncrValue;}
+
+ protected:
+
+ bool AppendToBuffer(nsScannerString::Buffer* aBuffer);
+ bool AppendToBuffer(const nsAString& aStr)
+ {
+ nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr);
+ if (!buf)
+ return false;
+ AppendToBuffer(buf);
+ return true;
+ }
+
+ nsScannerString* mSlidingBuffer;
+ nsScannerIterator mCurrentPosition; // The position we will next read from in the scanner buffer
+ nsScannerIterator mMarkPosition; // The position last marked (we may rewind to here)
+ nsScannerIterator mEndPosition; // The current end of the scanner buffer
+ nsString mFilename;
+ bool mIncremental;
+ int32_t mCharsetSource;
+ nsCString mCharset;
+ nsCOMPtr<nsIUnicodeDecoder> mUnicodeDecoder;
+
+ private:
+ nsScanner &operator =(const nsScanner &); // Not implemented.
+};
+
+#endif
+
+
diff --git a/components/htmlparser/src/nsScannerString.cpp b/components/htmlparser/src/nsScannerString.cpp
new file mode 100644
index 000000000..53ac117f1
--- /dev/null
+++ b/components/htmlparser/src/nsScannerString.cpp
@@ -0,0 +1,650 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdlib.h>
+#include "nsScannerString.h"
+#include "mozilla/CheckedInt.h"
+
+
+ /**
+ * nsScannerBufferList
+ */
+
+#define MAX_CAPACITY ((UINT32_MAX / sizeof(char16_t)) - \
+ (sizeof(Buffer) + sizeof(char16_t)))
+
+nsScannerBufferList::Buffer*
+nsScannerBufferList::AllocBufferFromString( const nsAString& aString )
+ {
+ uint32_t len = aString.Length();
+ Buffer* buf = AllocBuffer(len);
+
+ if (buf)
+ {
+ nsAString::const_iterator source;
+ aString.BeginReading(source);
+ nsCharTraits<char16_t>::copy(buf->DataStart(), source.get(), len);
+ }
+ return buf;
+ }
+
+nsScannerBufferList::Buffer*
+nsScannerBufferList::AllocBuffer( uint32_t capacity )
+ {
+ if (capacity > MAX_CAPACITY)
+ return nullptr;
+
+ void* ptr = malloc(sizeof(Buffer) + (capacity + 1) * sizeof(char16_t));
+ if (!ptr)
+ return nullptr;
+
+ Buffer* buf = new (ptr) Buffer();
+
+ buf->mUsageCount = 0;
+ buf->mDataEnd = buf->DataStart() + capacity;
+
+ // XXX null terminate. this shouldn't be required, but we do it because
+ // nsScanner erroneously thinks it can dereference DataEnd :-(
+ *buf->mDataEnd = char16_t(0);
+ return buf;
+ }
+
+void
+nsScannerBufferList::ReleaseAll()
+ {
+ while (!mBuffers.isEmpty())
+ {
+ Buffer* node = mBuffers.popFirst();
+ //printf(">>> freeing buffer @%p\n", node);
+ free(node);
+ }
+ }
+
+void
+nsScannerBufferList::SplitBuffer( const Position& pos )
+ {
+ // splitting to the right keeps the work string and any extant token
+ // pointing to and holding a reference count on the same buffer.
+
+ Buffer* bufferToSplit = pos.mBuffer;
+ NS_ASSERTION(bufferToSplit, "null pointer");
+
+ uint32_t splitOffset = pos.mPosition - bufferToSplit->DataStart();
+ NS_ASSERTION(pos.mPosition >= bufferToSplit->DataStart() &&
+ splitOffset <= bufferToSplit->DataLength(),
+ "split offset is outside buffer");
+
+ uint32_t len = bufferToSplit->DataLength() - splitOffset;
+ Buffer* new_buffer = AllocBuffer(len);
+ if (new_buffer)
+ {
+ nsCharTraits<char16_t>::copy(new_buffer->DataStart(),
+ bufferToSplit->DataStart() + splitOffset,
+ len);
+ InsertAfter(new_buffer, bufferToSplit);
+ bufferToSplit->SetDataLength(splitOffset);
+ }
+ }
+
+void
+nsScannerBufferList::DiscardUnreferencedPrefix( Buffer* aBuf )
+ {
+ if (aBuf == Head())
+ {
+ while (!mBuffers.isEmpty() && !Head()->IsInUse())
+ {
+ Buffer* buffer = Head();
+ buffer->remove();
+ free(buffer);
+ }
+ }
+ }
+
+size_t
+nsScannerBufferList::Position::Distance( const Position& aStart, const Position& aEnd )
+ {
+ size_t result = 0;
+ if (aStart.mBuffer == aEnd.mBuffer)
+ {
+ result = aEnd.mPosition - aStart.mPosition;
+ }
+ else
+ {
+ result = aStart.mBuffer->DataEnd() - aStart.mPosition;
+ for (Buffer* b = aStart.mBuffer->Next(); b != aEnd.mBuffer; b = b->Next())
+ result += b->DataLength();
+ result += aEnd.mPosition - aEnd.mBuffer->DataStart();
+ }
+ return result;
+ }
+
+
+/**
+ * nsScannerSubstring
+ */
+
+nsScannerSubstring::nsScannerSubstring()
+ : mStart(nullptr, nullptr)
+ , mEnd(nullptr, nullptr)
+ , mBufferList(nullptr)
+ , mLength(0)
+ , mIsDirty(true)
+ {
+ }
+
+nsScannerSubstring::nsScannerSubstring( const nsAString& s )
+ : mBufferList(nullptr)
+ , mIsDirty(true)
+ {
+ Rebind(s);
+ }
+
+nsScannerSubstring::~nsScannerSubstring()
+ {
+ release_ownership_of_buffer_list();
+ }
+
+int32_t
+nsScannerSubstring::CountChar( char16_t c ) const
+ {
+ /*
+ re-write this to use a counting sink
+ */
+
+ size_type result = 0;
+ size_type lengthToExamine = Length();
+
+ nsScannerIterator iter;
+ for ( BeginReading(iter); ; )
+ {
+ int32_t lengthToExamineInThisFragment = iter.size_forward();
+ const char16_t* fromBegin = iter.get();
+ result += size_type(NS_COUNT(fromBegin, fromBegin+lengthToExamineInThisFragment, c));
+ if ( !(lengthToExamine -= lengthToExamineInThisFragment) )
+ return result;
+ iter.advance(lengthToExamineInThisFragment);
+ }
+ // never reached; quiets warnings
+ return 0;
+ }
+
+void
+nsScannerSubstring::Rebind( const nsScannerSubstring& aString,
+ const nsScannerIterator& aStart,
+ const nsScannerIterator& aEnd )
+ {
+ // allow for the case where &aString == this
+
+ aString.acquire_ownership_of_buffer_list();
+ release_ownership_of_buffer_list();
+
+ mStart = aStart;
+ mEnd = aEnd;
+ mBufferList = aString.mBufferList;
+ mLength = Distance(aStart, aEnd);
+ mIsDirty = true;
+ }
+
+void
+nsScannerSubstring::Rebind( const nsAString& aString )
+ {
+ release_ownership_of_buffer_list();
+
+ mBufferList = new nsScannerBufferList(AllocBufferFromString(aString));
+ mIsDirty = true;
+
+ init_range_from_buffer_list();
+ acquire_ownership_of_buffer_list();
+ }
+
+const nsSubstring&
+nsScannerSubstring::AsString() const
+ {
+ if (mIsDirty)
+ {
+ nsScannerSubstring* mutable_this = const_cast<nsScannerSubstring*>(this);
+
+ if (mStart.mBuffer == mEnd.mBuffer) {
+ // We only have a single fragment to deal with, so just return it
+ // as a substring.
+ mutable_this->mFlattenedRep.Rebind(mStart.mPosition, mEnd.mPosition);
+ } else {
+ // Otherwise, we need to copy the data into a flattened buffer.
+ nsScannerIterator start, end;
+ CopyUnicodeTo(BeginReading(start), EndReading(end), mutable_this->mFlattenedRep);
+ }
+
+ mutable_this->mIsDirty = false;
+ }
+
+ return mFlattenedRep;
+ }
+
+nsScannerIterator&
+nsScannerSubstring::BeginReading( nsScannerIterator& iter ) const
+ {
+ iter.mOwner = this;
+
+ iter.mFragment.mBuffer = mStart.mBuffer;
+ iter.mFragment.mFragmentStart = mStart.mPosition;
+ if (mStart.mBuffer == mEnd.mBuffer)
+ iter.mFragment.mFragmentEnd = mEnd.mPosition;
+ else
+ iter.mFragment.mFragmentEnd = mStart.mBuffer->DataEnd();
+
+ iter.mPosition = mStart.mPosition;
+ iter.normalize_forward();
+ return iter;
+ }
+
+nsScannerIterator&
+nsScannerSubstring::EndReading( nsScannerIterator& iter ) const
+ {
+ iter.mOwner = this;
+
+ iter.mFragment.mBuffer = mEnd.mBuffer;
+ iter.mFragment.mFragmentEnd = mEnd.mPosition;
+ if (mStart.mBuffer == mEnd.mBuffer)
+ iter.mFragment.mFragmentStart = mStart.mPosition;
+ else
+ iter.mFragment.mFragmentStart = mEnd.mBuffer->DataStart();
+
+ iter.mPosition = mEnd.mPosition;
+ // must not |normalize_backward| as that would likely invalidate tests like |while ( first != last )|
+ return iter;
+ }
+
+bool
+nsScannerSubstring::GetNextFragment( nsScannerFragment& frag ) const
+ {
+ // check to see if we are at the end of the buffer list
+ if (frag.mBuffer == mEnd.mBuffer)
+ return false;
+
+ frag.mBuffer = frag.mBuffer->getNext();
+
+ if (frag.mBuffer == mStart.mBuffer)
+ frag.mFragmentStart = mStart.mPosition;
+ else
+ frag.mFragmentStart = frag.mBuffer->DataStart();
+
+ if (frag.mBuffer == mEnd.mBuffer)
+ frag.mFragmentEnd = mEnd.mPosition;
+ else
+ frag.mFragmentEnd = frag.mBuffer->DataEnd();
+
+ return true;
+ }
+
+bool
+nsScannerSubstring::GetPrevFragment( nsScannerFragment& frag ) const
+ {
+ // check to see if we are at the beginning of the buffer list
+ if (frag.mBuffer == mStart.mBuffer)
+ return false;
+
+ frag.mBuffer = frag.mBuffer->getPrevious();
+
+ if (frag.mBuffer == mStart.mBuffer)
+ frag.mFragmentStart = mStart.mPosition;
+ else
+ frag.mFragmentStart = frag.mBuffer->DataStart();
+
+ if (frag.mBuffer == mEnd.mBuffer)
+ frag.mFragmentEnd = mEnd.mPosition;
+ else
+ frag.mFragmentEnd = frag.mBuffer->DataEnd();
+
+ return true;
+ }
+
+
+ /**
+ * nsScannerString
+ */
+
+nsScannerString::nsScannerString( Buffer* aBuf )
+ {
+ mBufferList = new nsScannerBufferList(aBuf);
+
+ init_range_from_buffer_list();
+ acquire_ownership_of_buffer_list();
+ }
+
+void
+nsScannerString::AppendBuffer( Buffer* aBuf )
+ {
+ mBufferList->Append(aBuf);
+ mLength += aBuf->DataLength();
+
+ mEnd.mBuffer = aBuf;
+ mEnd.mPosition = aBuf->DataEnd();
+
+ mIsDirty = true;
+ }
+
+void
+nsScannerString::DiscardPrefix( const nsScannerIterator& aIter )
+ {
+ Position old_start(mStart);
+ mStart = aIter;
+ mLength -= Position::Distance(old_start, mStart);
+
+ mStart.mBuffer->IncrementUsageCount();
+ old_start.mBuffer->DecrementUsageCount();
+
+ mBufferList->DiscardUnreferencedPrefix(old_start.mBuffer);
+
+ mIsDirty = true;
+ }
+
+void
+nsScannerString::UngetReadable( const nsAString& aReadable, const nsScannerIterator& aInsertPoint )
+ /*
+ * Warning: this routine manipulates the shared buffer list in an unexpected way.
+ * The original design did not really allow for insertions, but this call promises
+ * that if called for a point after the end of all extant token strings, that no token string
+ * or the work string will be invalidated.
+ *
+ * This routine is protected because it is the responsibility of the derived class to keep those promises.
+ */
+ {
+ Position insertPos(aInsertPoint);
+
+ mBufferList->SplitBuffer(insertPos);
+ // splitting to the right keeps the work string and any extant token pointing to and
+ // holding a reference count on the same buffer
+
+ Buffer* new_buffer = AllocBufferFromString(aReadable);
+ // make a new buffer with all the data to insert...
+ // BULLSHIT ALERT: we may have empty space to re-use in the split buffer, measure the cost
+ // of this and decide if we should do the work to fill it
+
+ Buffer* buffer_to_split = insertPos.mBuffer;
+ mBufferList->InsertAfter(new_buffer, buffer_to_split);
+ mLength += aReadable.Length();
+
+ mEnd.mBuffer = mBufferList->Tail();
+ mEnd.mPosition = mEnd.mBuffer->DataEnd();
+
+ mIsDirty = true;
+ }
+
+ /**
+ * nsScannerSharedSubstring
+ */
+
+void
+nsScannerSharedSubstring::Rebind(const nsScannerIterator &aStart,
+ const nsScannerIterator &aEnd)
+{
+ // If the start and end positions are inside the same buffer, we must
+ // acquire ownership of the buffer. If not, we can optimize by not holding
+ // onto it.
+
+ Buffer *buffer = const_cast<Buffer*>(aStart.buffer());
+ bool sameBuffer = buffer == aEnd.buffer();
+
+ nsScannerBufferList *bufferList;
+
+ if (sameBuffer) {
+ bufferList = aStart.mOwner->mBufferList;
+ bufferList->AddRef();
+ buffer->IncrementUsageCount();
+ }
+
+ if (mBufferList)
+ ReleaseBuffer();
+
+ if (sameBuffer) {
+ mBuffer = buffer;
+ mBufferList = bufferList;
+ mString.Rebind(aStart.mPosition, aEnd.mPosition);
+ } else {
+ mBuffer = nullptr;
+ mBufferList = nullptr;
+ CopyUnicodeTo(aStart, aEnd, mString);
+ }
+}
+
+void
+nsScannerSharedSubstring::ReleaseBuffer()
+{
+ NS_ASSERTION(mBufferList, "Should only be called with non-null mBufferList");
+ mBuffer->DecrementUsageCount();
+ mBufferList->DiscardUnreferencedPrefix(mBuffer);
+ mBufferList->Release();
+}
+
+void
+nsScannerSharedSubstring::MakeMutable()
+{
+ nsString temp(mString); // this will force a copy of the data
+ mString.Assign(temp); // mString will now share the just-allocated buffer
+
+ ReleaseBuffer();
+
+ mBuffer = nullptr;
+ mBufferList = nullptr;
+}
+
+ /**
+ * utils -- based on code from nsReadableUtils.cpp
+ */
+
+// private helper function
+static inline
+nsAString::iterator&
+copy_multifragment_string( nsScannerIterator& first, const nsScannerIterator& last, nsAString::iterator& result )
+ {
+ typedef nsCharSourceTraits<nsScannerIterator> source_traits;
+ typedef nsCharSinkTraits<nsAString::iterator> sink_traits;
+
+ while ( first != last )
+ {
+ uint32_t distance = source_traits::readable_distance(first, last);
+ sink_traits::write(result, source_traits::read(first), distance);
+ NS_ASSERTION(distance > 0, "|copy_multifragment_string| will never terminate");
+ source_traits::advance(first, distance);
+ }
+
+ return result;
+ }
+
+bool
+CopyUnicodeTo( const nsScannerIterator& aSrcStart,
+ const nsScannerIterator& aSrcEnd,
+ nsAString& aDest )
+ {
+ nsAString::iterator writer;
+
+ mozilla::CheckedInt<nsAString::size_type> distance(Distance(aSrcStart, aSrcEnd));
+ if (!distance.isValid()) {
+ return false; // overflow detected
+ }
+
+ if (!aDest.SetLength(distance.value(), mozilla::fallible)) {
+ aDest.Truncate();
+ return false; // out of memory
+ }
+ aDest.BeginWriting(writer);
+ nsScannerIterator fromBegin(aSrcStart);
+
+ copy_multifragment_string(fromBegin, aSrcEnd, writer);
+ return true;
+ }
+
+bool
+AppendUnicodeTo( const nsScannerIterator& aSrcStart,
+ const nsScannerIterator& aSrcEnd,
+ nsScannerSharedSubstring& aDest )
+ {
+ // Check whether we can just create a dependent string.
+ if (aDest.str().IsEmpty()) {
+ // We can just make |aDest| point to the buffer.
+ // This will take care of copying if the buffer spans fragments.
+ aDest.Rebind(aSrcStart, aSrcEnd);
+ return true;
+ }
+ // The dest string is not empty, so it can't be a dependent substring.
+ return AppendUnicodeTo(aSrcStart, aSrcEnd, aDest.writable());
+ }
+
+bool
+AppendUnicodeTo( const nsScannerIterator& aSrcStart,
+ const nsScannerIterator& aSrcEnd,
+ nsAString& aDest )
+ {
+ nsAString::iterator writer;
+ const nsAString::size_type oldLength = aDest.Length();
+ mozilla::CheckedInt<nsAString::size_type> newLen(Distance(aSrcStart, aSrcEnd));
+ newLen += oldLength;
+ if (!newLen.isValid()) {
+ return false; // overflow detected
+ }
+
+ if (!aDest.SetLength(newLen.value(), mozilla::fallible))
+ return false; // out of memory
+ aDest.BeginWriting(writer).advance(oldLength);
+ nsScannerIterator fromBegin(aSrcStart);
+
+ copy_multifragment_string(fromBegin, aSrcEnd, writer);
+ return true;
+ }
+
+bool
+FindCharInReadable( char16_t aChar,
+ nsScannerIterator& aSearchStart,
+ const nsScannerIterator& aSearchEnd )
+ {
+ while ( aSearchStart != aSearchEnd )
+ {
+ int32_t fragmentLength;
+ if ( SameFragment(aSearchStart, aSearchEnd) )
+ fragmentLength = aSearchEnd.get() - aSearchStart.get();
+ else
+ fragmentLength = aSearchStart.size_forward();
+
+ const char16_t* charFoundAt = nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
+ if ( charFoundAt ) {
+ aSearchStart.advance( charFoundAt - aSearchStart.get() );
+ return true;
+ }
+
+ aSearchStart.advance(fragmentLength);
+ }
+
+ return false;
+ }
+
+bool
+FindInReadable( const nsAString& aPattern,
+ nsScannerIterator& aSearchStart,
+ nsScannerIterator& aSearchEnd,
+ const nsStringComparator& compare )
+ {
+ bool found_it = false;
+
+ // only bother searching at all if we're given a non-empty range to search
+ if ( aSearchStart != aSearchEnd )
+ {
+ nsAString::const_iterator aPatternStart, aPatternEnd;
+ aPattern.BeginReading(aPatternStart);
+ aPattern.EndReading(aPatternEnd);
+
+ // outer loop keeps searching till we find it or run out of string to search
+ while ( !found_it )
+ {
+ // fast inner loop (that's what it's called, not what it is) looks for a potential match
+ while ( aSearchStart != aSearchEnd &&
+ compare(aPatternStart.get(), aSearchStart.get(), 1, 1) )
+ ++aSearchStart;
+
+ // if we broke out of the `fast' loop because we're out of string ... we're done: no match
+ if ( aSearchStart == aSearchEnd )
+ break;
+
+ // otherwise, we're at a potential match, let's see if we really hit one
+ nsAString::const_iterator testPattern(aPatternStart);
+ nsScannerIterator testSearch(aSearchStart);
+
+ // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
+ for(;;)
+ {
+ // we already compared the first character in the outer loop,
+ // so we'll advance before the next comparison
+ ++testPattern;
+ ++testSearch;
+
+ // if we verified all the way to the end of the pattern, then we found it!
+ if ( testPattern == aPatternEnd )
+ {
+ found_it = true;
+ aSearchEnd = testSearch; // return the exact found range through the parameters
+ break;
+ }
+
+ // if we got to end of the string we're searching before we hit the end of the
+ // pattern, we'll never find what we're looking for
+ if ( testSearch == aSearchEnd )
+ {
+ aSearchStart = aSearchEnd;
+ break;
+ }
+
+ // else if we mismatched ... it's time to advance to the next search position
+ // and get back into the `fast' loop
+ if ( compare(testPattern.get(), testSearch.get(), 1, 1) )
+ {
+ ++aSearchStart;
+ break;
+ }
+ }
+ }
+ }
+
+ return found_it;
+ }
+
+ /**
+ * This implementation is simple, but does too much work.
+ * It searches the entire string from left to right, and returns the last match found, if any.
+ * This implementation will be replaced when I get |reverse_iterator|s working.
+ */
+bool
+RFindInReadable( const nsAString& aPattern,
+ nsScannerIterator& aSearchStart,
+ nsScannerIterator& aSearchEnd,
+ const nsStringComparator& aComparator )
+ {
+ bool found_it = false;
+
+ nsScannerIterator savedSearchEnd(aSearchEnd);
+ nsScannerIterator searchStart(aSearchStart), searchEnd(aSearchEnd);
+
+ while ( searchStart != searchEnd )
+ {
+ if ( FindInReadable(aPattern, searchStart, searchEnd, aComparator) )
+ {
+ found_it = true;
+
+ // this is the best match so far, so remember it
+ aSearchStart = searchStart;
+ aSearchEnd = searchEnd;
+
+ // ...and get ready to search some more
+ // (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns)
+ ++searchStart;
+ searchEnd = savedSearchEnd;
+ }
+ }
+
+ // if we never found it, return an empty range
+ if ( !found_it )
+ aSearchStart = aSearchEnd;
+
+ return found_it;
+ }
diff --git a/components/htmlparser/src/nsScannerString.h b/components/htmlparser/src/nsScannerString.h
new file mode 100644
index 000000000..247c04c04
--- /dev/null
+++ b/components/htmlparser/src/nsScannerString.h
@@ -0,0 +1,604 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsScannerString_h___
+#define nsScannerString_h___
+
+#include "nsString.h"
+#include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator
+#include "mozilla/LinkedList.h"
+#include <algorithm>
+
+
+ /**
+ * NOTE: nsScannerString (and the other classes defined in this file) are
+ * not related to nsAString or any of the other xpcom/string classes.
+ *
+ * nsScannerString is based on the nsSlidingString implementation that used
+ * to live in xpcom/string. Now that nsAString is limited to representing
+ * only single fragment strings, nsSlidingString can no longer be used.
+ *
+ * An advantage to this design is that it does not employ any virtual
+ * functions.
+ *
+ * This file uses SCC-style indenting in deference to the nsSlidingString
+ * code from which this code is derived ;-)
+ */
+
+class nsScannerIterator;
+class nsScannerSubstring;
+class nsScannerString;
+
+
+ /**
+ * nsScannerBufferList
+ *
+ * This class maintains a list of heap-allocated Buffer objects. The buffers
+ * are maintained in a circular linked list. Each buffer has a usage count
+ * that is decremented by the owning nsScannerSubstring.
+ *
+ * The buffer list itself is reference counted. This allows the buffer list
+ * to be shared by multiple nsScannerSubstring objects. The reference
+ * counting is not threadsafe, which is not at all a requirement.
+ *
+ * When a nsScannerSubstring releases its reference to a buffer list, it
+ * decrements the usage count of the first buffer in the buffer list that it
+ * was referencing. It informs the buffer list that it can discard buffers
+ * starting at that prefix. The buffer list will do so if the usage count of
+ * that buffer is 0 and if it is the first buffer in the list. It will
+ * continue to prune buffers starting from the front of the buffer list until
+ * it finds a buffer that has a usage count that is non-zero.
+ */
+class nsScannerBufferList
+ {
+ public:
+
+ /**
+ * Buffer objects are directly followed by a data segment. The start
+ * of the data segment is determined by increment the |this| pointer
+ * by 1 unit.
+ */
+ class Buffer : public mozilla::LinkedListElement<Buffer>
+ {
+ public:
+
+ void IncrementUsageCount() { ++mUsageCount; }
+ void DecrementUsageCount() { --mUsageCount; }
+
+ bool IsInUse() const { return mUsageCount != 0; }
+
+ const char16_t* DataStart() const { return (const char16_t*) (this+1); }
+ char16_t* DataStart() { return ( char16_t*) (this+1); }
+
+ const char16_t* DataEnd() const { return mDataEnd; }
+ char16_t* DataEnd() { return mDataEnd; }
+
+ const Buffer* Next() const { return getNext(); }
+ Buffer* Next() { return getNext(); }
+
+ const Buffer* Prev() const { return getPrevious(); }
+ Buffer* Prev() { return getPrevious(); }
+
+ uint32_t DataLength() const { return mDataEnd - DataStart(); }
+ void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; }
+
+ private:
+
+ friend class nsScannerBufferList;
+
+ int32_t mUsageCount;
+ char16_t* mDataEnd;
+ };
+
+ /**
+ * Position objects serve as lightweight pointers into a buffer list.
+ * The mPosition member must be contained with mBuffer->DataStart()
+ * and mBuffer->DataEnd().
+ */
+ class Position
+ {
+ public:
+
+ Position() {}
+
+ Position( Buffer* buffer, char16_t* position )
+ : mBuffer(buffer)
+ , mPosition(position)
+ {}
+
+ inline
+ explicit Position( const nsScannerIterator& aIter );
+
+ inline
+ Position& operator=( const nsScannerIterator& aIter );
+
+ static size_t Distance( const Position& p1, const Position& p2 );
+
+ Buffer* mBuffer;
+ char16_t* mPosition;
+ };
+
+ static Buffer* AllocBufferFromString( const nsAString& );
+ static Buffer* AllocBuffer( uint32_t capacity ); // capacity = number of chars
+
+ explicit nsScannerBufferList( Buffer* buf )
+ : mRefCnt(0)
+ {
+ mBuffers.insertBack(buf);
+ }
+
+ void AddRef() { ++mRefCnt; }
+ void Release() { if (--mRefCnt == 0) delete this; }
+
+ void Append( Buffer* buf ) { mBuffers.insertBack(buf); }
+ void InsertAfter( Buffer* buf, Buffer* prev ) { prev->setNext(buf); }
+ void SplitBuffer( const Position& );
+ void DiscardUnreferencedPrefix( Buffer* );
+
+ Buffer* Head() { return mBuffers.getFirst(); }
+ const Buffer* Head() const { return mBuffers.getFirst(); }
+
+ Buffer* Tail() { return mBuffers.getLast(); }
+ const Buffer* Tail() const { return mBuffers.getLast(); }
+
+ private:
+
+ friend class nsScannerSubstring;
+
+ ~nsScannerBufferList() { ReleaseAll(); }
+ void ReleaseAll();
+
+ int32_t mRefCnt;
+ mozilla::LinkedList<Buffer> mBuffers;
+ };
+
+
+ /**
+ * nsScannerFragment represents a "slice" of a Buffer object.
+ */
+struct nsScannerFragment
+ {
+ typedef nsScannerBufferList::Buffer Buffer;
+
+ const Buffer* mBuffer;
+ const char16_t* mFragmentStart;
+ const char16_t* mFragmentEnd;
+ };
+
+
+ /**
+ * nsScannerSubstring is the base class for nsScannerString. It provides
+ * access to iterators and methods to bind the substring to another
+ * substring or nsAString instance.
+ *
+ * This class owns the buffer list.
+ */
+class nsScannerSubstring
+ {
+ public:
+ typedef nsScannerBufferList::Buffer Buffer;
+ typedef nsScannerBufferList::Position Position;
+ typedef uint32_t size_type;
+
+ nsScannerSubstring();
+ explicit nsScannerSubstring( const nsAString& s );
+
+ ~nsScannerSubstring();
+
+ nsScannerIterator& BeginReading( nsScannerIterator& iter ) const;
+ nsScannerIterator& EndReading( nsScannerIterator& iter ) const;
+
+ size_type Length() const { return mLength; }
+
+ int32_t CountChar( char16_t ) const;
+
+ void Rebind( const nsScannerSubstring&, const nsScannerIterator&, const nsScannerIterator& );
+ void Rebind( const nsAString& );
+
+ const nsSubstring& AsString() const;
+
+ bool GetNextFragment( nsScannerFragment& ) const;
+ bool GetPrevFragment( nsScannerFragment& ) const;
+
+ static inline Buffer* AllocBufferFromString( const nsAString& aStr ) { return nsScannerBufferList::AllocBufferFromString(aStr); }
+ static inline Buffer* AllocBuffer( size_type aCapacity ) { return nsScannerBufferList::AllocBuffer(aCapacity); }
+
+ protected:
+
+ void acquire_ownership_of_buffer_list() const
+ {
+ mBufferList->AddRef();
+ mStart.mBuffer->IncrementUsageCount();
+ }
+
+ void release_ownership_of_buffer_list()
+ {
+ if (mBufferList)
+ {
+ mStart.mBuffer->DecrementUsageCount();
+ mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer);
+ mBufferList->Release();
+ }
+ }
+
+ void init_range_from_buffer_list()
+ {
+ mStart.mBuffer = mBufferList->Head();
+ mStart.mPosition = mStart.mBuffer->DataStart();
+
+ mEnd.mBuffer = mBufferList->Tail();
+ mEnd.mPosition = mEnd.mBuffer->DataEnd();
+
+ mLength = Position::Distance(mStart, mEnd);
+ }
+
+ Position mStart;
+ Position mEnd;
+ nsScannerBufferList *mBufferList;
+ size_type mLength;
+
+ // these fields are used to implement AsString
+ nsDependentSubstring mFlattenedRep;
+ bool mIsDirty;
+
+ friend class nsScannerSharedSubstring;
+ };
+
+
+ /**
+ * nsScannerString provides methods to grow and modify a buffer list.
+ */
+class nsScannerString : public nsScannerSubstring
+ {
+ public:
+
+ explicit nsScannerString( Buffer* );
+
+ // you are giving ownership to the string, it takes and keeps your
+ // buffer, deleting it when done.
+ // Use AllocBuffer or AllocBufferFromString to create a Buffer object
+ // for use with this function.
+ void AppendBuffer( Buffer* );
+
+ void DiscardPrefix( const nsScannerIterator& );
+ // any other way you want to do this?
+
+ void UngetReadable(const nsAString& aReadable, const nsScannerIterator& aCurrentPosition);
+ };
+
+
+ /**
+ * nsScannerSharedSubstring implements copy-on-write semantics for
+ * nsScannerSubstring. When you call .writable(), it will copy the data
+ * and return a mutable string object. This class also manages releasing
+ * the reference to the scanner buffer when it is no longer needed.
+ */
+
+class nsScannerSharedSubstring
+ {
+ public:
+ nsScannerSharedSubstring()
+ : mBuffer(nullptr), mBufferList(nullptr) { }
+
+ ~nsScannerSharedSubstring()
+ {
+ if (mBufferList)
+ ReleaseBuffer();
+ }
+
+ // Acquire a copy-on-write reference to the given substring.
+ void Rebind(const nsScannerIterator& aStart,
+ const nsScannerIterator& aEnd);
+
+ // Get a mutable reference to this string
+ nsSubstring& writable()
+ {
+ if (mBufferList)
+ MakeMutable();
+
+ return mString;
+ }
+
+ // Get a const reference to this string
+ const nsSubstring& str() const { return mString; }
+
+ private:
+ typedef nsScannerBufferList::Buffer Buffer;
+
+ void ReleaseBuffer();
+ void MakeMutable();
+
+ nsDependentSubstring mString;
+ Buffer *mBuffer;
+ nsScannerBufferList *mBufferList;
+ };
+
+ /**
+ * nsScannerIterator works just like nsReadingIterator<CharT> except that
+ * it knows how to iterate over a list of scanner buffers.
+ */
+class nsScannerIterator
+ {
+ public:
+ typedef nsScannerIterator self_type;
+ typedef ptrdiff_t difference_type;
+ typedef char16_t value_type;
+ typedef const char16_t* pointer;
+ typedef const char16_t& reference;
+ typedef nsScannerSubstring::Buffer Buffer;
+
+ protected:
+
+ nsScannerFragment mFragment;
+ const char16_t* mPosition;
+ const nsScannerSubstring* mOwner;
+
+ friend class nsScannerSubstring;
+ friend class nsScannerSharedSubstring;
+
+ public:
+ // nsScannerIterator(); // auto-generate default constructor is OK
+ // nsScannerIterator( const nsScannerIterator& ); // auto-generated copy-constructor OK
+ // nsScannerIterator& operator=( const nsScannerIterator& ); // auto-generated copy-assignment operator OK
+
+ inline void normalize_forward();
+ inline void normalize_backward();
+
+ pointer get() const
+ {
+ return mPosition;
+ }
+
+ char16_t operator*() const
+ {
+ return *get();
+ }
+
+ const nsScannerFragment& fragment() const
+ {
+ return mFragment;
+ }
+
+ const Buffer* buffer() const
+ {
+ return mFragment.mBuffer;
+ }
+
+ self_type& operator++()
+ {
+ ++mPosition;
+ normalize_forward();
+ return *this;
+ }
+
+ self_type operator++( int )
+ {
+ self_type result(*this);
+ ++mPosition;
+ normalize_forward();
+ return result;
+ }
+
+ self_type& operator--()
+ {
+ normalize_backward();
+ --mPosition;
+ return *this;
+ }
+
+ self_type operator--( int )
+ {
+ self_type result(*this);
+ normalize_backward();
+ --mPosition;
+ return result;
+ }
+
+ difference_type size_forward() const
+ {
+ return mFragment.mFragmentEnd - mPosition;
+ }
+
+ difference_type size_backward() const
+ {
+ return mPosition - mFragment.mFragmentStart;
+ }
+
+ self_type& advance( difference_type n )
+ {
+ while ( n > 0 )
+ {
+ difference_type one_hop = std::min(n, size_forward());
+
+ NS_ASSERTION(one_hop>0, "Infinite loop: can't advance a reading iterator beyond the end of a string");
+ // perhaps I should |break| if |!one_hop|?
+
+ mPosition += one_hop;
+ normalize_forward();
+ n -= one_hop;
+ }
+
+ while ( n < 0 )
+ {
+ normalize_backward();
+ difference_type one_hop = std::max(n, -size_backward());
+
+ NS_ASSERTION(one_hop<0, "Infinite loop: can't advance (backward) a reading iterator beyond the end of a string");
+ // perhaps I should |break| if |!one_hop|?
+
+ mPosition += one_hop;
+ n -= one_hop;
+ }
+
+ return *this;
+ }
+ };
+
+
+inline
+bool
+SameFragment( const nsScannerIterator& a, const nsScannerIterator& b )
+ {
+ return a.fragment().mFragmentStart == b.fragment().mFragmentStart;
+ }
+
+
+ /**
+ * this class is needed in order to make use of the methods in nsAlgorithm.h
+ */
+template <>
+struct nsCharSourceTraits<nsScannerIterator>
+ {
+ typedef nsScannerIterator::difference_type difference_type;
+
+ static
+ uint32_t
+ readable_distance( const nsScannerIterator& first, const nsScannerIterator& last )
+ {
+ return uint32_t(SameFragment(first, last) ? last.get() - first.get() : first.size_forward());
+ }
+
+ static
+ const nsScannerIterator::value_type*
+ read( const nsScannerIterator& iter )
+ {
+ return iter.get();
+ }
+
+ static
+ void
+ advance( nsScannerIterator& s, difference_type n )
+ {
+ s.advance(n);
+ }
+ };
+
+
+ /**
+ * inline methods follow
+ */
+
+inline
+void
+nsScannerIterator::normalize_forward()
+ {
+ while (mPosition == mFragment.mFragmentEnd && mOwner->GetNextFragment(mFragment))
+ mPosition = mFragment.mFragmentStart;
+ }
+
+inline
+void
+nsScannerIterator::normalize_backward()
+ {
+ while (mPosition == mFragment.mFragmentStart && mOwner->GetPrevFragment(mFragment))
+ mPosition = mFragment.mFragmentEnd;
+ }
+
+inline
+bool
+operator==( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
+ {
+ return lhs.get() == rhs.get();
+ }
+
+inline
+bool
+operator!=( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
+ {
+ return lhs.get() != rhs.get();
+ }
+
+
+inline
+nsScannerBufferList::Position::Position(const nsScannerIterator& aIter)
+ : mBuffer(const_cast<Buffer*>(aIter.buffer()))
+ , mPosition(const_cast<char16_t*>(aIter.get()))
+ {}
+
+inline
+nsScannerBufferList::Position&
+nsScannerBufferList::Position::operator=(const nsScannerIterator& aIter)
+ {
+ mBuffer = const_cast<Buffer*>(aIter.buffer());
+ mPosition = const_cast<char16_t*>(aIter.get());
+ return *this;
+ }
+
+
+ /**
+ * scanner string utils
+ *
+ * These methods mimic the API provided by nsReadableUtils in xpcom/string.
+ * Here we provide only the methods that the htmlparser module needs.
+ */
+
+inline
+size_t
+Distance( const nsScannerIterator& aStart, const nsScannerIterator& aEnd )
+ {
+ typedef nsScannerBufferList::Position Position;
+ return Position::Distance(Position(aStart), Position(aEnd));
+ }
+
+bool
+CopyUnicodeTo( const nsScannerIterator& aSrcStart,
+ const nsScannerIterator& aSrcEnd,
+ nsAString& aDest );
+
+inline
+bool
+CopyUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
+ {
+ nsScannerIterator begin, end;
+ return CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
+ }
+
+bool
+AppendUnicodeTo( const nsScannerIterator& aSrcStart,
+ const nsScannerIterator& aSrcEnd,
+ nsAString& aDest );
+
+inline
+bool
+AppendUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
+ {
+ nsScannerIterator begin, end;
+ return AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
+ }
+
+bool
+AppendUnicodeTo( const nsScannerIterator& aSrcStart,
+ const nsScannerIterator& aSrcEnd,
+ nsScannerSharedSubstring& aDest );
+
+bool
+FindCharInReadable( char16_t aChar,
+ nsScannerIterator& aStart,
+ const nsScannerIterator& aEnd );
+
+bool
+FindInReadable( const nsAString& aPattern,
+ nsScannerIterator& aStart,
+ nsScannerIterator& aEnd,
+ const nsStringComparator& = nsDefaultStringComparator() );
+
+bool
+RFindInReadable( const nsAString& aPattern,
+ nsScannerIterator& aStart,
+ nsScannerIterator& aEnd,
+ const nsStringComparator& = nsDefaultStringComparator() );
+
+inline
+bool
+CaseInsensitiveFindInReadable( const nsAString& aPattern,
+ nsScannerIterator& aStart,
+ nsScannerIterator& aEnd )
+ {
+ return FindInReadable(aPattern, aStart, aEnd,
+ nsCaseInsensitiveStringComparator());
+ }
+
+#endif // !defined(nsScannerString_h___)
diff --git a/components/htmlparser/src/nsToken.h b/components/htmlparser/src/nsToken.h
new file mode 100644
index 000000000..6221aca57
--- /dev/null
+++ b/components/htmlparser/src/nsToken.h
@@ -0,0 +1,19 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef CTOKEN__
+#define CTOKEN__
+
+enum eHTMLTokenTypes {
+ eToken_unknown=0,
+ eToken_start=1, eToken_end, eToken_comment, eToken_entity,
+ eToken_whitespace, eToken_newline, eToken_text, eToken_attribute,
+ eToken_instruction, eToken_cdatasection, eToken_doctypeDecl, eToken_markupDecl,
+ eToken_last //make sure this stays the last token...
+};
+
+#endif
+
+
diff --git a/components/moz.build b/components/moz.build
index b854260ef..22bfd42b7 100644
--- a/components/moz.build
+++ b/components/moz.build
@@ -32,6 +32,7 @@ DIRS += [
'finalizationwitness',
'formautofill',
'find',
+ 'htmlparser',
'gfx',
'global',
'handling',