summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt A. Tobin <email@mattatobin.com>2020-01-15 18:10:14 -0500
committerMatt A. Tobin <email@mattatobin.com>2020-01-15 18:10:14 -0500
commit3da18fda029a038784525e840c831ffd73b25c33 (patch)
tree9371014cdb7ae79cddd53be1dd8c13c2da40adde
parentee1d953749603d9b9cc125541b89235268585756 (diff)
downloaduxp-3da18fda029a038784525e840c831ffd73b25c33.tar.gz
Bug 1562033
Adjust tokenization of U+0000 (java htmlparser)
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java12
1 files changed, 1 insertions, 11 deletions
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java
index 70e1df75c1..75ba2e1e4f 100644
--- a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java
@@ -2457,8 +2457,6 @@ public class Tokenizer implements Locator {
}
c = checkChar(buf, pos);
switch (c) {
- case '\u0000':
- break stateloop;
case '-':
clearStrBufAfterOneHyphen();
state = transition(state, Tokenizer.COMMENT_START, reconsume, pos);
@@ -2963,9 +2961,6 @@ public class Tokenizer implements Locator {
break stateloop;
}
c = checkChar(buf, pos);
- if (c == '\u0000') {
- break stateloop;
- }
/*
* Unlike the definition is the spec, this state does not
* return a value and never requires the caller to
@@ -2991,6 +2986,7 @@ public class Tokenizer implements Locator {
case '\u000C':
case '<':
case '&':
+ case '\u0000':
emitOrAppendCharRefBuf(returnState);
if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
cstart = pos;
@@ -3044,9 +3040,6 @@ public class Tokenizer implements Locator {
break stateloop;
}
c = checkChar(buf, pos);
- if (c == '\u0000') {
- break stateloop;
- }
/*
* The data structure is as follows:
*
@@ -3122,9 +3115,6 @@ public class Tokenizer implements Locator {
break stateloop;
}
c = checkChar(buf, pos);
- if (c == '\u0000') {
- break stateloop;
- }
entCol++;
/*
* Consume the maximum number of characters possible,