You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
913 lines
23 KiB
C#
913 lines
23 KiB
C#
#region Header
|
|
/**
|
|
* Lexer.cs
|
|
* JSON lexer implementation based on a finite state machine.
|
|
*
|
|
* The authors disclaim copyright to this source code. For more details, see
|
|
* the COPYING file included with this distribution.
|
|
**/
|
|
#endregion
|
|
|
|
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.Text;
|
|
|
|
|
|
namespace LitJson
|
|
{
|
|
internal class FsmContext
|
|
{
|
|
public bool Return;
|
|
public int NextState;
|
|
public Lexer L;
|
|
public int StateStack;
|
|
}
|
|
|
|
|
|
internal class Lexer
|
|
{
|
|
#region Fields
|
|
private delegate bool StateHandler (FsmContext ctx);
|
|
|
|
private static int[] fsm_return_table;
|
|
private static StateHandler[] fsm_handler_table;
|
|
|
|
private bool allow_comments;
|
|
private bool allow_single_quoted_strings;
|
|
private bool end_of_input;
|
|
private FsmContext fsm_context;
|
|
private int input_buffer;
|
|
private int input_char;
|
|
private TextReader reader;
|
|
private int state;
|
|
private StringBuilder string_buffer;
|
|
private string string_value;
|
|
private int token;
|
|
private int unichar;
|
|
#endregion
|
|
|
|
|
|
#region Properties
|
|
public bool AllowComments {
|
|
get { return allow_comments; }
|
|
set { allow_comments = value; }
|
|
}
|
|
|
|
public bool AllowSingleQuotedStrings {
|
|
get { return allow_single_quoted_strings; }
|
|
set { allow_single_quoted_strings = value; }
|
|
}
|
|
|
|
public bool EndOfInput {
|
|
get { return end_of_input; }
|
|
}
|
|
|
|
public int Token {
|
|
get { return token; }
|
|
}
|
|
|
|
public string StringValue {
|
|
get { return string_value; }
|
|
}
|
|
#endregion
|
|
|
|
|
|
#region Constructors
|
|
static Lexer ()
|
|
{
|
|
PopulateFsmTables ();
|
|
}
|
|
|
|
public Lexer (TextReader reader)
|
|
{
|
|
allow_comments = true;
|
|
allow_single_quoted_strings = true;
|
|
|
|
input_buffer = 0;
|
|
string_buffer = new StringBuilder (128);
|
|
state = 1;
|
|
end_of_input = false;
|
|
this.reader = reader;
|
|
|
|
fsm_context = new FsmContext ();
|
|
fsm_context.L = this;
|
|
}
|
|
#endregion
|
|
|
|
|
|
#region Static Methods
|
|
private static int HexValue (int digit)
|
|
{
|
|
switch (digit) {
|
|
case 'a':
|
|
case 'A':
|
|
return 10;
|
|
|
|
case 'b':
|
|
case 'B':
|
|
return 11;
|
|
|
|
case 'c':
|
|
case 'C':
|
|
return 12;
|
|
|
|
case 'd':
|
|
case 'D':
|
|
return 13;
|
|
|
|
case 'e':
|
|
case 'E':
|
|
return 14;
|
|
|
|
case 'f':
|
|
case 'F':
|
|
return 15;
|
|
|
|
default:
|
|
return digit - '0';
|
|
}
|
|
}
|
|
|
|
private static void PopulateFsmTables ()
|
|
{
|
|
// See section A.1. of the manual for details of the finite
|
|
// state machine.
|
|
fsm_handler_table = new StateHandler[28] {
|
|
State1,
|
|
State2,
|
|
State3,
|
|
State4,
|
|
State5,
|
|
State6,
|
|
State7,
|
|
State8,
|
|
State9,
|
|
State10,
|
|
State11,
|
|
State12,
|
|
State13,
|
|
State14,
|
|
State15,
|
|
State16,
|
|
State17,
|
|
State18,
|
|
State19,
|
|
State20,
|
|
State21,
|
|
State22,
|
|
State23,
|
|
State24,
|
|
State25,
|
|
State26,
|
|
State27,
|
|
State28
|
|
};
|
|
|
|
fsm_return_table = new int[28] {
|
|
(int) ParserToken.Char,
|
|
0,
|
|
(int) ParserToken.Number,
|
|
(int) ParserToken.Number,
|
|
0,
|
|
(int) ParserToken.Number,
|
|
0,
|
|
(int) ParserToken.Number,
|
|
0,
|
|
0,
|
|
(int) ParserToken.True,
|
|
0,
|
|
0,
|
|
0,
|
|
(int) ParserToken.False,
|
|
0,
|
|
0,
|
|
(int) ParserToken.Null,
|
|
(int) ParserToken.CharSeq,
|
|
(int) ParserToken.Char,
|
|
0,
|
|
0,
|
|
(int) ParserToken.CharSeq,
|
|
(int) ParserToken.Char,
|
|
0,
|
|
0,
|
|
0,
|
|
0
|
|
};
|
|
}
|
|
|
|
private static char ProcessEscChar (int esc_char)
|
|
{
|
|
switch (esc_char) {
|
|
case '"':
|
|
case '\'':
|
|
case '\\':
|
|
case '/':
|
|
return Convert.ToChar (esc_char);
|
|
|
|
case 'n':
|
|
return '\n';
|
|
|
|
case 't':
|
|
return '\t';
|
|
|
|
case 'r':
|
|
return '\r';
|
|
|
|
case 'b':
|
|
return '\b';
|
|
|
|
case 'f':
|
|
return '\f';
|
|
|
|
default:
|
|
// Unreachable
|
|
return '?';
|
|
}
|
|
}
|
|
|
|
private static bool State1 (FsmContext ctx)
|
|
{
|
|
while (ctx.L.GetChar ()) {
|
|
if (ctx.L.input_char == ' ' ||
|
|
ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r')
|
|
continue;
|
|
|
|
if (ctx.L.input_char >= '1' && ctx.L.input_char <= '9') {
|
|
ctx.L.string_buffer.Append ((char) ctx.L.input_char);
|
|
ctx.NextState = 3;
|
|
return true;
|
|
}
|
|
|
|
switch (ctx.L.input_char) {
|
|
case '"':
|
|
ctx.NextState = 19;
|
|
ctx.Return = true;
|
|
return true;
|
|
|
|
case ',':
|
|
case ':':
|
|
case '[':
|
|
case ']':
|
|
case '{':
|
|
case '}':
|
|
ctx.NextState = 1;
|
|
ctx.Return = true;
|
|
return true;
|
|
|
|
case '-':
|
|
ctx.L.string_buffer.Append ((char) ctx.L.input_char);
|
|
ctx.NextState = 2;
|
|
return true;
|
|
|
|
case '0':
|
|
ctx.L.string_buffer.Append ((char) ctx.L.input_char);
|
|
ctx.NextState = 4;
|
|
return true;
|
|
|
|
case 'f':
|
|
ctx.NextState = 12;
|
|
return true;
|
|
|
|
case 'n':
|
|
ctx.NextState = 16;
|
|
return true;
|
|
|
|
case 't':
|
|
ctx.NextState = 9;
|
|
return true;
|
|
|
|
case '\'':
|
|
if (! ctx.L.allow_single_quoted_strings)
|
|
return false;
|
|
|
|
ctx.L.input_char = '"';
|
|
ctx.NextState = 23;
|
|
ctx.Return = true;
|
|
return true;
|
|
|
|
case '/':
|
|
if (! ctx.L.allow_comments)
|
|
return false;
|
|
|
|
ctx.NextState = 25;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
private static bool State2 (FsmContext ctx)
|
|
{
|
|
ctx.L.GetChar ();
|
|
|
|
if (ctx.L.input_char >= '1' && ctx.L.input_char<= '9') {
|
|
ctx.L.string_buffer.Append ((char) ctx.L.input_char);
|
|
ctx.NextState = 3;
|
|
return true;
|
|
}
|
|
|
|
switch (ctx.L.input_char) {
|
|
case '0':
|
|
ctx.L.string_buffer.Append ((char) ctx.L.input_char);
|
|
ctx.NextState = 4;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static bool State3 (FsmContext ctx)
|
|
{
|
|
while (ctx.L.GetChar ()) {
|
|
if (ctx.L.input_char >= '0' && ctx.L.input_char <= '9') {
|
|
ctx.L.string_buffer.Append ((char) ctx.L.input_char);
|
|
continue;
|
|
}
|
|
|
|
if (ctx.L.input_char == ' ' ||
|
|
ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r') {
|
|
ctx.Return = true;
|
|
ctx.NextState = 1;
|
|
return true;
|
|
}
|
|
|
|
switch (ctx.L.input_char) {
|
|
case ',':
|
|
case ']':
|
|
case '}':
|
|
ctx.L.UngetChar ();
|
|
ctx.Return = true;
|
|
ctx.NextState = 1;
|
|
return true;
|
|
|
|
case '.':
|
|
ctx.L.string_buffer.Append ((char) ctx.L.input_char);
|
|
ctx.NextState = 5;
|
|
return true;
|
|
|
|
case 'e':
|
|
case 'E':
|
|
ctx.L.string_buffer.Append ((char) ctx.L.input_char);
|
|
ctx.NextState = 7;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
private static bool State4 (FsmContext ctx)
|
|
{
|
|
ctx.L.GetChar ();
|
|
|
|
if (ctx.L.input_char == ' ' ||
|
|
ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r') {
|
|
ctx.Return = true;
|
|
ctx.NextState = 1;
|
|
return true;
|
|
}
|
|
|
|
switch (ctx.L.input_char) {
|
|
case ',':
|
|
case ']':
|
|
case '}':
|
|
ctx.L.UngetChar ();
|
|
ctx.Return = true;
|
|
ctx.NextState = 1;
|
|
return true;
|
|
|
|
case '.':
|
|
ctx.L.string_buffer.Append ((char) ctx.L.input_char);
|
|
ctx.NextState = 5;
|
|
return true;
|
|
|
|
case 'e':
|
|
case 'E':
|
|
ctx.L.string_buffer.Append ((char) ctx.L.input_char);
|
|
ctx.NextState = 7;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static bool State5 (FsmContext ctx)
|
|
{
|
|
ctx.L.GetChar ();
|
|
|
|
if (ctx.L.input_char >= '0' && ctx.L.input_char <= '9') {
|
|
ctx.L.string_buffer.Append ((char) ctx.L.input_char);
|
|
ctx.NextState = 6;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
private static bool State6 (FsmContext ctx)
|
|
{
|
|
while (ctx.L.GetChar ()) {
|
|
if (ctx.L.input_char >= '0' && ctx.L.input_char <= '9') {
|
|
ctx.L.string_buffer.Append ((char) ctx.L.input_char);
|
|
continue;
|
|
}
|
|
|
|
if (ctx.L.input_char == ' ' ||
|
|
ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r') {
|
|
ctx.Return = true;
|
|
ctx.NextState = 1;
|
|
return true;
|
|
}
|
|
|
|
switch (ctx.L.input_char) {
|
|
case ',':
|
|
case ']':
|
|
case '}':
|
|
ctx.L.UngetChar ();
|
|
ctx.Return = true;
|
|
ctx.NextState = 1;
|
|
return true;
|
|
|
|
case 'e':
|
|
case 'E':
|
|
ctx.L.string_buffer.Append ((char) ctx.L.input_char);
|
|
ctx.NextState = 7;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
private static bool State7 (FsmContext ctx)
|
|
{
|
|
ctx.L.GetChar ();
|
|
|
|
if (ctx.L.input_char >= '0' && ctx.L.input_char<= '9') {
|
|
ctx.L.string_buffer.Append ((char) ctx.L.input_char);
|
|
ctx.NextState = 8;
|
|
return true;
|
|
}
|
|
|
|
switch (ctx.L.input_char) {
|
|
case '+':
|
|
case '-':
|
|
ctx.L.string_buffer.Append ((char) ctx.L.input_char);
|
|
ctx.NextState = 8;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static bool State8 (FsmContext ctx)
|
|
{
|
|
while (ctx.L.GetChar ()) {
|
|
if (ctx.L.input_char >= '0' && ctx.L.input_char<= '9') {
|
|
ctx.L.string_buffer.Append ((char) ctx.L.input_char);
|
|
continue;
|
|
}
|
|
|
|
if (ctx.L.input_char == ' ' ||
|
|
ctx.L.input_char >= '\t' && ctx.L.input_char<= '\r') {
|
|
ctx.Return = true;
|
|
ctx.NextState = 1;
|
|
return true;
|
|
}
|
|
|
|
switch (ctx.L.input_char) {
|
|
case ',':
|
|
case ']':
|
|
case '}':
|
|
ctx.L.UngetChar ();
|
|
ctx.Return = true;
|
|
ctx.NextState = 1;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
private static bool State9 (FsmContext ctx)
|
|
{
|
|
ctx.L.GetChar ();
|
|
|
|
switch (ctx.L.input_char) {
|
|
case 'r':
|
|
ctx.NextState = 10;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static bool State10 (FsmContext ctx)
|
|
{
|
|
ctx.L.GetChar ();
|
|
|
|
switch (ctx.L.input_char) {
|
|
case 'u':
|
|
ctx.NextState = 11;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static bool State11 (FsmContext ctx)
|
|
{
|
|
ctx.L.GetChar ();
|
|
|
|
switch (ctx.L.input_char) {
|
|
case 'e':
|
|
ctx.Return = true;
|
|
ctx.NextState = 1;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static bool State12 (FsmContext ctx)
|
|
{
|
|
ctx.L.GetChar ();
|
|
|
|
switch (ctx.L.input_char) {
|
|
case 'a':
|
|
ctx.NextState = 13;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static bool State13 (FsmContext ctx)
|
|
{
|
|
ctx.L.GetChar ();
|
|
|
|
switch (ctx.L.input_char) {
|
|
case 'l':
|
|
ctx.NextState = 14;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static bool State14 (FsmContext ctx)
|
|
{
|
|
ctx.L.GetChar ();
|
|
|
|
switch (ctx.L.input_char) {
|
|
case 's':
|
|
ctx.NextState = 15;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static bool State15 (FsmContext ctx)
|
|
{
|
|
ctx.L.GetChar ();
|
|
|
|
switch (ctx.L.input_char) {
|
|
case 'e':
|
|
ctx.Return = true;
|
|
ctx.NextState = 1;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static bool State16 (FsmContext ctx)
|
|
{
|
|
ctx.L.GetChar ();
|
|
|
|
switch (ctx.L.input_char) {
|
|
case 'u':
|
|
ctx.NextState = 17;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static bool State17 (FsmContext ctx)
|
|
{
|
|
ctx.L.GetChar ();
|
|
|
|
switch (ctx.L.input_char) {
|
|
case 'l':
|
|
ctx.NextState = 18;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static bool State18 (FsmContext ctx)
|
|
{
|
|
ctx.L.GetChar ();
|
|
|
|
switch (ctx.L.input_char) {
|
|
case 'l':
|
|
ctx.Return = true;
|
|
ctx.NextState = 1;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static bool State19 (FsmContext ctx)
|
|
{
|
|
while (ctx.L.GetChar ()) {
|
|
switch (ctx.L.input_char) {
|
|
case '"':
|
|
ctx.L.UngetChar ();
|
|
ctx.Return = true;
|
|
ctx.NextState = 20;
|
|
return true;
|
|
|
|
case '\\':
|
|
ctx.StateStack = 19;
|
|
ctx.NextState = 21;
|
|
return true;
|
|
|
|
default:
|
|
ctx.L.string_buffer.Append ((char) ctx.L.input_char);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
private static bool State20 (FsmContext ctx)
|
|
{
|
|
ctx.L.GetChar ();
|
|
|
|
switch (ctx.L.input_char) {
|
|
case '"':
|
|
ctx.Return = true;
|
|
ctx.NextState = 1;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static bool State21 (FsmContext ctx)
|
|
{
|
|
ctx.L.GetChar ();
|
|
|
|
switch (ctx.L.input_char) {
|
|
case 'u':
|
|
ctx.NextState = 22;
|
|
return true;
|
|
|
|
case '"':
|
|
case '\'':
|
|
case '/':
|
|
case '\\':
|
|
case 'b':
|
|
case 'f':
|
|
case 'n':
|
|
case 'r':
|
|
case 't':
|
|
ctx.L.string_buffer.Append (
|
|
ProcessEscChar (ctx.L.input_char));
|
|
ctx.NextState = ctx.StateStack;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static bool State22 (FsmContext ctx)
|
|
{
|
|
int counter = 0;
|
|
int mult = 4096;
|
|
|
|
ctx.L.unichar = 0;
|
|
|
|
while (ctx.L.GetChar ()) {
|
|
|
|
if (ctx.L.input_char >= '0' && ctx.L.input_char <= '9' ||
|
|
ctx.L.input_char >= 'A' && ctx.L.input_char <= 'F' ||
|
|
ctx.L.input_char >= 'a' && ctx.L.input_char <= 'f') {
|
|
|
|
ctx.L.unichar += HexValue (ctx.L.input_char) * mult;
|
|
|
|
counter++;
|
|
mult /= 16;
|
|
|
|
if (counter == 4) {
|
|
ctx.L.string_buffer.Append (
|
|
Convert.ToChar (ctx.L.unichar));
|
|
ctx.NextState = ctx.StateStack;
|
|
return true;
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
private static bool State23 (FsmContext ctx)
|
|
{
|
|
while (ctx.L.GetChar ()) {
|
|
switch (ctx.L.input_char) {
|
|
case '\'':
|
|
ctx.L.UngetChar ();
|
|
ctx.Return = true;
|
|
ctx.NextState = 24;
|
|
return true;
|
|
|
|
case '\\':
|
|
ctx.StateStack = 23;
|
|
ctx.NextState = 21;
|
|
return true;
|
|
|
|
default:
|
|
ctx.L.string_buffer.Append ((char) ctx.L.input_char);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
private static bool State24 (FsmContext ctx)
|
|
{
|
|
ctx.L.GetChar ();
|
|
|
|
switch (ctx.L.input_char) {
|
|
case '\'':
|
|
ctx.L.input_char = '"';
|
|
ctx.Return = true;
|
|
ctx.NextState = 1;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static bool State25 (FsmContext ctx)
|
|
{
|
|
ctx.L.GetChar ();
|
|
|
|
switch (ctx.L.input_char) {
|
|
case '*':
|
|
ctx.NextState = 27;
|
|
return true;
|
|
|
|
case '/':
|
|
ctx.NextState = 26;
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static bool State26 (FsmContext ctx)
|
|
{
|
|
while (ctx.L.GetChar ()) {
|
|
if (ctx.L.input_char == '\n') {
|
|
ctx.NextState = 1;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
private static bool State27 (FsmContext ctx)
|
|
{
|
|
while (ctx.L.GetChar ()) {
|
|
if (ctx.L.input_char == '*') {
|
|
ctx.NextState = 28;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
private static bool State28 (FsmContext ctx)
|
|
{
|
|
while (ctx.L.GetChar ()) {
|
|
if (ctx.L.input_char == '*')
|
|
continue;
|
|
|
|
if (ctx.L.input_char == '/') {
|
|
ctx.NextState = 1;
|
|
return true;
|
|
}
|
|
|
|
ctx.NextState = 27;
|
|
return true;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
#endregion
|
|
|
|
|
|
private bool GetChar ()
|
|
{
|
|
if ((input_char = NextChar ()) != -1)
|
|
return true;
|
|
|
|
end_of_input = true;
|
|
return false;
|
|
}
|
|
|
|
private int NextChar ()
|
|
{
|
|
if (input_buffer != 0) {
|
|
int tmp = input_buffer;
|
|
input_buffer = 0;
|
|
|
|
return tmp;
|
|
}
|
|
|
|
return reader.Read ();
|
|
}
|
|
|
|
public bool NextToken ()
|
|
{
|
|
StateHandler handler;
|
|
fsm_context.Return = false;
|
|
|
|
while (true) {
|
|
handler = fsm_handler_table[state - 1];
|
|
|
|
if (! handler (fsm_context))
|
|
throw new JsonException (input_char);
|
|
|
|
if (end_of_input)
|
|
return false;
|
|
|
|
if (fsm_context.Return) {
|
|
string_value = string_buffer.ToString ();
|
|
string_buffer.Remove (0, string_buffer.Length);
|
|
token = fsm_return_table[state - 1];
|
|
|
|
if (token == (int) ParserToken.Char)
|
|
token = input_char;
|
|
|
|
state = fsm_context.NextState;
|
|
|
|
return true;
|
|
}
|
|
|
|
state = fsm_context.NextState;
|
|
}
|
|
}
|
|
|
|
private void UngetChar ()
|
|
{
|
|
input_buffer = input_char;
|
|
}
|
|
}
|
|
}
|