Skip to main content

[jsonp~git:736b83ae] Improving escaped unicode char parsing using a lookup table.

  • From:
  • To:
  • Subject: [jsonp~git:736b83ae] Improving escaped unicode char parsing using a lookup table.
  • Date: Tue, 29 Oct 2013 03:03:53 +0000

Project:    jsonp
Repository: git
Revision:   736b83aefadfcb4428f3fbffdf56191bae55f5b0
Author:     jitu
Date:       2013-10-29 03:02:34 UTC
Link:       

Log Message:
------------
Improving escaped unicode char parsing using a lookup table.



Revisions:
----------
736b83aefadfcb4428f3fbffdf56191bae55f5b0


Modified Paths:
---------------
impl/src/main/java/org/glassfish/json/JsonTokenizer.java
tests/src/test/java/org/glassfish/json/tests/JsonParsingExceptionTest.java


Diffs:
------
--- a/impl/src/main/java/org/glassfish/json/JsonTokenizer.java
+++ b/impl/src/main/java/org/glassfish/json/JsonTokenizer.java
@@ -58,6 +58,22 @@ import javax.json.stream.JsonParser.Event;
  * @author Jitendra Kotamraju
  */
 final class JsonTokenizer implements Closeable {
+    // Table to look up hex ch -> value (for e.g HEX['F'] = 15, HEX['5'] = 5)
+    private final static int[] HEX = new int[128];
+    static {
+        Arrays.fill(HEX, -1);
+        for (int i='0'; i <= '9'; i++) {
+            HEX[i] = i-'0';
+        }
+        for (int i='A'; i <= 'F'; i++) {
+            HEX[i] = 10+i-'A';
+        }
+        for (int i='a'; i <= 'f'; i++) {
+            HEX[i] = 10+i-'a';
+        }
+    }
+    private final static int HEX_LENGTH = HEX.length;
+
     private final BufferPool bufferPool;
 
     private final Reader reader;
@@ -203,21 +219,16 @@ final class JsonTokenizer implements Closeable {
                 buf[storeEnd++] = (char)ch;
                 break;
             case 'u': {
-                char unicode = 0;
+                int unicode = 0;
                 for (int i = 0; i < 4; i++) {
                     int ch3 = read();
-                    unicode <<= 4;
-                    if (ch3 >= '0' && ch3 <= '9') {
-                        unicode |= ((char) ch3) - '0';
-                    } else if (ch3 >= 'a' && ch3 <= 'f') {
-                        unicode |= (((char) ch3) - 'a') + 0xA;
-                    } else if (ch3 >= 'A' && ch3 <= 'F') {
-                        unicode |= (((char) ch3) - 'A') + 0xA;
-                    } else {
+                    int digit = (ch3 >= 0 && ch3 < HEX_LENGTH) ? HEX[ch3] : 
-1;
+                    if (digit < 0) {
                         throw new 
JsonParsingException(JsonMessages.TOKENIZER_UNEXPECTED_CHAR(ch3), 
getLastCharLocation());
                     }
+                    unicode = (unicode << 4)|digit;
                 }
-                buf[storeEnd++] = (char) (unicode & 0xffff);
+                buf[storeEnd++] = (char)unicode;
                 break;
             }
             default:--- 
a/tests/src/test/java/org/glassfish/json/tests/JsonParsingExceptionTest.java
+++ 
b/tests/src/test/java/org/glassfish/json/tests/JsonParsingExceptionTest.java
@@ -95,6 +95,25 @@ public class JsonParsingExceptionTest extends TestCase {
         testMalformedJson("{ \"a\" : {}, \"b\": [] ]", null);
     }
 
+    public void testWrongUnicode() {
+        testMalformedJson("[ \"\\uX00F\" ]", null);
+        testMalformedJson("[ \"\\u000Z\" ]", null);
+        testMalformedJson("[ \"\\u000\" ]", null);
+        testMalformedJson("[ \"\\u00\" ]", null);
+        testMalformedJson("[ \"\\u0\" ]", null);
+        testMalformedJson("[ \"\\u\" ]", null);
+        testMalformedJson("[ \"\\u\"", null);
+        testMalformedJson("[ \"\\", null);
+    }
+
+    public void testControlChar() {
+        testMalformedJson("[ \"\u0000\" ]", null);
+        testMalformedJson("[ \"\u000c\" ]", null);
+        testMalformedJson("[ \"\u000f\" ]", null);
+        testMalformedJson("[ \"\u001F\" ]", null);
+        testMalformedJson("[ \"\u001f\" ]", null);
+    }
+
     public void testLocation1() {
         testMalformedJson("x", new MyLocation(1, 1, 0));
         testMalformedJson("{]", new MyLocation(1, 2, 1));





[jsonp~git:736b83ae] Improving escaped unicode char parsing using a lookup table.

jitu 10/29/2013
 
 
Close
loading
Please Confirm
Close