1 /++
2 JSON Parsing API
3 
4 Copyright: Tamedia Digital, 2016-2017
5 
6 Authors: Ilia Ki
7 
8 License: MIT
9 
10 Macros:
11 SUBMODULE = $(LINK2 asdf_$1.html, asdf.$1)
12 SUBREF = $(LINK2 asdf_$1.html#.$2, $(TT $2))$(NBSP)
13 T2=$(TR $(TDNW $(LREF $1)) $(TD $+))
14 T4=$(TR $(TDNW $(LREF $1)) $(TD $2) $(TD $3) $(TD $4))
15 +/
16 module asdf.jsonparser;
17 
18 import asdf.asdf;
19 import asdf.outputarray;
20 import std.experimental.allocator.gc_allocator;
21 import std.meta;
22 import std.range.primitives;
23 import std.traits;
24 import std.typecons;
25 import mir.serde: SerdeException;
26 
27 version(LDC)
28 {
29     import ldc.attributes: optStrategy;
30     enum minsize = optStrategy("minsize");
31 
32     static if (__traits(targetHasFeature, "sse4.2"))
33     {
34         import core.simd;
35         import ldc.simd;
36         import ldc.gccbuiltins_x86;
37         version = SSE42;
38     }
39 }
40 else
41 {
42     enum minsize;
43 }
44 
45 version(X86_64)
46     version = X86_Any;
47 else
48 version(X86)
49     version = X86_Any;
50 
51 private alias ASDFGCAllocator = typeof(GCAllocator.instance);
52 
53 /++
54 Parses json value
55 Params:
56     chunks = input range composed of elements type of `const(ubyte)[]`.
57         `chunks` can use the same buffer for each chunk.
58     initLength = initial output buffer length. Minimum value is 32.
59 Returns:
60     ASDF value
61 +/
62 Asdf parseJson(
63     Flag!"includingNewLine" includingNewLine = Yes.includingNewLine,
64     Flag!"spaces" spaces = Yes.spaces,
65     Chunks)
66     (Chunks chunks, size_t initLength = 32)
67     if(is(ElementType!Chunks : const(ubyte)[]))
68 {
69     enum assumeValid = false;
70     auto parser = jsonParser!(includingNewLine, spaces, assumeValid)(ASDFGCAllocator.instance, chunks);
71     return parseJson(parser);
72 }
73 
74 ///
75 unittest
76 {
77     import std.range: chunks;
78     auto text = cast(const ubyte[])`true `;
79     auto ch = text.chunks(3);
80     assert(ch.parseJson(32).data == [1]);
81 }
82 
83 
84 /++
85 Parses json value
86 Params:
87     str = input string
88     allocator = (optional) memory allocator
89 Returns:
90     ASDF value
91 +/
92 Asdf parseJson(
93     Flag!"includingNewLine" includingNewLine = Yes.includingNewLine,
94     Flag!"spaces" spaces = Yes.spaces,
95     Flag!"assumeValid" assumeValid = No.assumeValid,
96     Allocator,
97     )
98     (in char[] str, auto ref Allocator allocator)
99 {
100     auto parser = jsonParser!(includingNewLine, spaces, assumeValid)(allocator, str);
101     return parseJson(parser);
102 }
103 
104 
105 ///
106 @system unittest {
107     import std.experimental.allocator.mallocator: Mallocator;
108     import std.experimental.allocator.showcase: StackFront;
109 
110     StackFront!(1024, Mallocator) allocator;
111     auto json = parseJson(`{"ak": {"sub": "subval"} }`, allocator);
112     assert(json["ak", "sub"] == "subval");
113 }
114 
115 /// Faulty location
116 pure unittest
117 {
118     import asdf;
119     try
120     {
121         auto data = `[1, 2, ]`.parseJson;
122     }
123     catch(AsdfSerdeException e)
124     {
125         import std.conv;
126         /// zero based index
127         assert(e.location == 7);
128         return;
129     }
130     assert(0);
131 }
132 
133 /// ditto
134 Asdf parseJson(
135     Flag!"includingNewLine" includingNewLine = Yes.includingNewLine,
136     Flag!"spaces" spaces = Yes.spaces,
137     Flag!"assumeValid" assumeValid = No.assumeValid,
138     )
139     (in char[] str)
140 {
141     auto parser = jsonParser!(includingNewLine, spaces, assumeValid)(ASDFGCAllocator.instance, str);
142     return parseJson(parser);
143 }
144 
145 ///
146 unittest
147 {
148     assert(`{"ak": {"sub": "subval"} }`.parseJson["ak", "sub"] == "subval");
149 }
150 
151 
152 private Asdf parseJson(Parser)(ref Parser parser) {
153     size_t location;
154     if (parser.parse(location))
155         throw new AsdfSerdeException(parser.lastError, location);
156     return Asdf(parser.result);
157 }
158 
159 
160 deprecated("please remove the initBufferLength argument (latest)")
161 auto parseJsonByLine(
162     Flag!"spaces" spaces = Yes.spaces,
163     Input)
164     (Input input, sizediff_t initBufferLength)
165 {
166     return .parseJsonByLine!(spaces,  No.throwOnInvalidLines, Input)(input);
167 }
168 
169 /++
170 Parses JSON value in each line from a Range of buffers.
171 Params:
172     spaces = adds support for spaces beetwen json tokens. Default value is Yes.
173     throwOnInvalidLines = throws an $(LREF SerdeException) on invalid lines if Yes and ignore invalid lines if No. Default value is No.
174     input = input range composed of elements type of `const(ubyte)[]` or string / const(char)[].
175         `chunks` can use the same buffer for each chunk.
176 Returns:
177     Input range composed of ASDF values. Each value uses the same internal buffer.
178 +/
179 auto parseJsonByLine(
180     Flag!"spaces" spaces = Yes.spaces,
181     Flag!"throwOnInvalidLines" throwOnInvalidLines = No.throwOnInvalidLines,
182     Input)
183     (Input input)
184 {
185     alias Parser = JsonParser!(false, cast(bool)spaces, false, ASDFGCAllocator, Input);
186     struct ByLineValue
187     {
188         Parser parser;
189         private bool _empty, _nextEmpty;
190 
191         void popFront()
192         {
193             for(;;)
194             {
195                 assert(!empty);
196                 if(_nextEmpty)
197                 {
198                     _empty = true;
199                     return;
200                 }
201                 // parser.oa.shift = 0;
202                 parser.dataLength = 0;
203                 auto error = parser.parse;
204                 if(!error)
205                 {
206                     auto t = parser.skipSpaces_;
207                     if(t != '\n' && t != 0)
208                     {
209                         error = AsdfErrorCode.unexpectedValue;
210                         parser._lastError = "expected new line or end of input";
211                     }
212                     else
213                     if(t == 0)
214                     {
215                         _nextEmpty = true;
216                         return;
217                     }
218                     else
219                     {
220                         parser.skipNewLine;
221                         _nextEmpty = !parser.skipSpaces_;
222                         return;
223                     }
224                 }
225                 static if (throwOnInvalidLines)
226                     throw new SerdeException(parser.lastError);
227                 else
228                     parser.skipLine();
229             }
230         }
231 
232         auto front() @property
233         {
234             assert(!empty);
235             return Asdf(parser.result);
236         }
237 
238         bool empty()
239         {
240             return _empty;
241         }
242     }
243     ByLineValue ret;
244     if(input.empty)
245     {
246         ret._empty = ret._nextEmpty = true;
247     }
248     else
249     {
250         ret = ByLineValue(Parser(ASDFGCAllocator.instance, input));
251         ret.popFront;
252     }
253     return ret;
254 }
255 
256 version(LDC)
257 {
258     public import ldc.intrinsics: _expect = llvm_expect;
259 }
260 else
261 {
262     T _expect(T)(T val, T expected_val) if (__traits(isIntegral, T))
263     {
264         return val;
265     }
266 }
267 
268 enum AsdfErrorCode
269 {
270     success,
271     unexpectedEnd,
272     unexpectedValue,
273 }
274 
275 private __gshared immutable ubyte[256] parseFlags = [
276  // 0 1 2 3 4 5 6 7   8 9 A B C D E F
277     0,0,0,0,0,0,0,0,  0,6,2,0,0,6,0,0, // 0
278     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, // 1
279     7,1,0,1,1,1,1,1,  1,1,1,9,1,9,9,1, // 2
280     9,9,9,9,9,9,9,9,  9,9,1,1,1,1,1,1, // 3
281 
282     1,1,1,1,1,9,1,1,  1,1,1,1,1,1,1,1, // 4
283     1,1,1,1,1,1,1,1,  1,1,1,1,0,1,1,1, // 5
284     1,1,1,1,1,9,1,1,  1,1,1,1,1,1,1,1, // 6
285     1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1, // 7
286 
287     1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,
288     1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,
289     1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,
290     1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,
291 
292     1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,
293     1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,
294     1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,
295     1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,
296 ];
297 
298 private __gshared immutable byte[256] uniFlags = [
299  //  0  1  2  3  4  5  6  7    8  9  A  B  C  D  E  F
300     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1, // 0
301     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1, // 1
302     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1, // 2
303      0, 1, 2, 3, 4, 5, 6, 7,   8, 9,-1,-1,-1,-1,-1,-1, // 3
304 
305     -1,10,11,12,13,14,15,-1,  -1,-1,-1,-1,-1,-1,-1,-1, // 4
306     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1, // 5
307     -1,10,11,12,13,14,15,-1,  -1,-1,-1,-1,-1,-1,-1,-1, // 6
308     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1, // 7
309 
310     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1,
311     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1,
312     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1,
313     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1,
314 
315     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1,
316     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1,
317     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1,
318     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1,
319 ];
320 
321 
322 pragma(inline, true)
323 bool isPlainJsonCharacter()(size_t c)
324 {
325     return (parseFlags[c] & 1) != 0;
326 }
327 
328 pragma(inline, true)
329 bool isJsonWhitespace()(size_t c)
330 {
331     return (parseFlags[c] & 2) != 0;
332 }
333 
334 pragma(inline, true)
335 bool isJsonLineWhitespace()(size_t c)
336 {
337     return (parseFlags[c] & 4) != 0;
338 }
339 
340 pragma(inline, true)
341 bool isJsonNumber()(size_t c)
342 {
343     return (parseFlags[c] & 8) != 0;
344 }
345 
346 package auto assumePure(T)(T t)
347     if (isFunctionPointer!T || isDelegate!T)
348 {
349     enum attrs = functionAttributes!T | FunctionAttribute.pure_;
350     return cast(SetFunctionAttributes!(T, functionLinkage!T, attrs)) t;
351 }
352 
353 package auto callPure(alias fn,T...)(T args)
354 {
355     auto fp = assumePure(&fn);
356     return (*fp)(args);
357 }
358 
359 /+
360 Fast picewise stack
361 +/
362 private struct Stack
363 {
364     import core.stdc.stdlib: cmalloc = malloc, cfree = free;
365     @disable this(this);
366 
367     struct Node
368     {
369         enum length = 32; // 2 power
370         Node* prev;
371         size_t* buff;
372     }
373 
374     size_t[Node.length] buffer = void;
375     size_t length = 0;
376     Node node;
377 
378 pure:
379 
380     void push()(size_t value)
381     {
382         version(LDC)
383             pragma(inline, true);
384         immutable local = length++ & (Node.length - 1);
385         if (local)
386         {
387             node.buff[local] = value;
388         }
389         else
390         if (length == 1)
391         {
392             node = Node(null, buffer.ptr);
393             buffer[0] = value;
394         }
395         else
396         {
397             auto prevNode = cast(Node*) callPure!cmalloc(Node.sizeof);
398             *prevNode = node;
399             node.prev = prevNode;
400             node.buff = cast(size_t*) callPure!cmalloc(Node.length * size_t.sizeof);
401             node.buff[0] = value;
402         }
403     }
404 
405     size_t top()()
406     {
407         version(LDC)
408             pragma(inline, true);
409         assert(length);
410         immutable local = (length - 1) & (Node.length - 1);
411         return node.buff[local];
412     }
413 
414     size_t pop()()
415     {
416         version(LDC)
417             pragma(inline, true);
418         assert(length);
419         immutable local = --length & (Node.length - 1);
420         immutable ret = node.buff[local];
421         if (local == 0)
422         {
423             if (node.buff != buffer.ptr)
424             {
425                 callPure!cfree(node.buff);
426                 node = *node.prev;
427             }
428         }
429         return ret;
430     }
431 
432     pragma(inline, false)
433     void free()()
434     {
435         version(LDC)
436             pragma(inline, true);
437         if (node.buff is null)
438             return;
439         while(node.buff !is buffer.ptr)
440         {
441             callPure!cfree(node.buff);
442             node = *node.prev;
443         }
444     }
445 }
446 
447 unittest
448 {
449     Stack stack;
450     assert(stack.length == 0);
451     foreach(i; 1 .. 100)
452     {
453         stack.push(i);
454         assert(stack.length == i);
455         assert(stack.top() == i);
456     }
457     foreach_reverse(i; 1 .. 100)
458     {
459         assert(stack.length == i);
460         assert(stack.pop() == i);
461     }
462     assert(stack.length == 0);
463 }
464 
465 ///
466 auto jsonParser(bool includingNewLine, bool hasSpaces, bool assumeValid, Allocator, Input = const(ubyte)[])(auto ref Allocator allocator, Input input) {
467     return JsonParser!(includingNewLine, hasSpaces, assumeValid, Allocator, Input)(allocator, input);
468 }
469 
470 ///
471 struct JsonParser(bool includingNewLine, bool hasSpaces, bool assumeValid, Allocator, Input = const(ubyte)[])
472 {
473 
474     ubyte[] data;
475     Allocator* allocator;
476     Input input;
477     static if (chunked)
478         ubyte[] front;
479     else
480         alias front = input;
481     size_t dataLength;
482 
483     string _lastError;
484 
485     enum bool chunked = !is(Input : const(char)[]);
486 
487     this(ref Allocator allocator, Input input)
488 
489     {
490         this.input = input;
491         this.allocator = &allocator;
492     }
493 
494     bool prepareInput_()()
495     {
496         static if (chunked)
497         {
498             if (front.length == 0)
499             {
500                 assert(!input.empty);
501                 input.popFront;
502                 if (input.empty)
503                     return false;
504                 front = cast(typeof(front)) input.front;
505             }
506         }
507         return front.length != 0;
508     }
509 
510     void skipNewLine()()
511     {
512         assert(front.length);
513         assert(front[0] == '\n');
514         front = front[1 .. $];
515     }
516 
517     char skipSpaces_()()
518     {
519         static if (hasSpaces)
520         for(;;)
521         {
522             if (prepareInput_ == false)
523                 return 0;
524             static if (includingNewLine)
525                 alias isWhite = isJsonWhitespace;
526             else
527                 alias isWhite = isJsonLineWhitespace;
528             if (isWhite(front[0]))
529             {
530                 front = front[1 .. $];
531                 continue;
532             }
533             return front[0];
534         }
535         else
536         {
537             if (prepareInput_ == false)
538                 return 0;
539             return front[0];
540         }
541     }
542 
543     bool skipLine()()
544     {
545         for(;;)
546         {
547             if (_expect(!prepareInput_, false))
548                 return false;
549             auto c = front[0];
550             front = front[1 .. $];
551             if (c == '\n')
552                 return true;
553         }
554     }
555 
556     auto result()()
557     {
558         return data[0 .. dataLength];
559     }
560 
561     string lastError()() @property
562     {
563         return _lastError;
564     }
565 
566 
567     AsdfErrorCode parse()
568     {
569         size_t location;
570         return parse(location);
571     }
572 
573     pragma(inline, false)
574     AsdfErrorCode parse(out size_t location)
575     {
576         version(SSE42)
577         {
578             enum byte16 str2E = [
579                 '\u0001', '\u001F',
580                 '\"', '\"',
581                 '\\', '\\',
582                 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0'];
583             enum byte16 num2E = ['+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'e', 'E', '\0'];
584             byte16 str2 = str2E;
585             byte16 num2 = num2E;
586         }
587 
588         const(ubyte)* strPtr;
589         const(ubyte)* strEnd;
590         ubyte* dataPtr;
591         ubyte* stringAndNumberShift = void;
592         static if (chunked)
593         {
594             bool prepareInput()()
595             {
596                 pragma(inline, false);
597                 if(strPtr)
598                 {
599                     location += front.length;
600                     input.popFront;
601                     if (input.empty)
602                     {
603                         return false;
604                     }
605                 }
606                 front = cast(typeof(front)) input.front;
607                 if (front.length == 0)
608                     return false;
609                 strPtr = front.ptr;
610                 strEnd = front.ptr + front.length;
611                 const dataAddLength = front.length * 6;
612                 const dataLength = dataPtr - data.ptr;
613                 const dataRequiredLength = dataLength + dataAddLength;
614                 if (data.length < dataRequiredLength)
615                 {
616                     const valueLength = stringAndNumberShift - dataPtr;
617                     import std.algorithm.comparison: max;
618                     const len = max(data.length * 2, dataRequiredLength);
619                     allocator.reallocate(*cast(void[]*)&data, len);
620                     dataPtr = data.ptr + dataLength;
621                     stringAndNumberShift = dataPtr + valueLength;
622                 }
623                 return true;
624             }
625             strPtr = front.ptr;
626             strEnd = front.ptr + front.length;
627         }
628         else
629         {
630             strPtr = cast(const(ubyte)*) input.ptr;
631             strEnd = cast(const(ubyte)*) input.ptr + input.length;
632             enum bool prepareInput = false;
633         }
634 
635         auto rl = (strEnd - strPtr) * 6;
636         if (data.ptr !is null && data.length < rl)
637         {
638             allocator.deallocate(data);
639             data = null;
640         }
641         if (data.ptr is null)
642         {
643             data = cast(ubyte[])allocator.allocate(rl);
644         }
645         dataPtr = data.ptr;
646 
647         bool skipSpaces()()
648         {
649             version(LDC)
650                 pragma(inline, true);
651             static if (includingNewLine)
652                 alias isWhite = isJsonWhitespace;
653             else
654                 alias isWhite = isJsonLineWhitespace;
655             F:
656             {
657                 if (_expect(strEnd != strPtr, true))
658                 {
659                 L:
660                     static if (hasSpaces)
661                     {
662                         if (isWhite(strPtr[0]))
663                         {
664                             strPtr++;
665                             goto F;
666                         }
667                     }
668                     return true;
669                 }
670                 else
671                 {
672                     if (prepareInput)
673                         goto L;
674                     return false;
675                 }
676             }
677 
678         }
679 
680         @minsize
681         int readUnicode()(ref dchar d)
682         {
683             version(LDC)
684                 pragma(inline, true);
685             uint e = 0;
686             size_t i = 4;
687             do
688             {
689                 if (strEnd == strPtr && !prepareInput)
690                     return 1;
691                 int c = uniFlags[*strPtr++];
692                 assert(c < 16);
693                 if (c == -1)
694                     return -1;
695                 assert(c >= 0);
696                 e <<= 4;
697                 e ^= c;
698             }
699             while(--i);
700             d = e;
701             return 0;
702         }
703 
704         Stack stack;
705 
706         typeof(return) retCode;
707         bool currIsKey = void;
708         size_t stackValue = void;
709         goto value;
710 
711 /////////// RETURN
712     ret:
713         front = front[cast(typeof(front.ptr)) strPtr - front.ptr .. $];
714         dataLength = dataPtr - data.ptr;
715         assert(stack.length == 0);
716     ret_final:
717         return retCode;
718 ///////////
719 
720     key:
721         if (!skipSpaces)
722             goto object_key_unexpectedEnd;
723     key_start:
724         if (*strPtr != '"')
725             goto object_key_start_unexpectedValue;
726         currIsKey = true;
727         stringAndNumberShift = dataPtr;
728         // reserve 1 byte for the length
729         dataPtr += 1;
730         goto string;
731     next:
732         if (stack.length == 0)
733             goto ret;
734         {
735             if (!skipSpaces)
736                 goto next_unexpectedEnd;
737             stackValue = stack.top;
738             const isObject = stackValue & 1;
739             auto v = *strPtr++;
740             if (isObject)
741             {
742                 if (v == ',')
743                     goto key;
744                 if (v != '}')
745                     goto next_unexpectedValue;
746             }
747             else
748             {
749                 if (v == ',')
750                     goto value;
751                 if (v != ']')
752                     goto next_unexpectedValue;
753             }
754         }
755     structure_end: {
756         stackValue = stack.pop();
757         const structureShift = stackValue >> 1;
758         const structureLengthPtr = data.ptr + structureShift;
759         const size_t structureLength = dataPtr - structureLengthPtr - 4;
760         if (structureLength > uint.max)
761             goto object_or_array_is_to_large;
762         version(X86_Any)
763             *cast(uint*) structureLengthPtr = cast(uint) structureLength;
764         else
765             *cast(ubyte[4]*) structureLengthPtr = cast(ubyte[4]) cast(uint[1]) [cast(uint) structureLength];
766         goto next;
767     }
768     value:
769         if (!skipSpaces)
770             goto value_unexpectedEnd;
771     value_start:
772         switch(*strPtr)
773         {
774             stringValue:
775             case '"':
776                 currIsKey = false;
777                 *dataPtr++ = Asdf.Kind..string;
778                 stringAndNumberShift = dataPtr;
779                 // reserve 4 byte for the length
780                 dataPtr += 4;
781                 goto string;
782             case '-':
783             case '0':
784             ..
785             case '9': {
786                 *dataPtr++ = Asdf.Kind.number;
787                 stringAndNumberShift = dataPtr;
788                 // reserve 1 byte for the length
789                 dataPtr++; // write the first character
790                 *dataPtr++ = *strPtr++;
791                 for(;;)
792                 {
793                     if (strEnd == strPtr && !prepareInput)
794                         goto number_found;
795                     version(SSE42)
796                     {
797                         while (strEnd >= strPtr + 16)
798                         {
799                             byte16 str1 = loadUnaligned!byte16(cast(byte*)strPtr);
800                             size_t ecx = __builtin_ia32_pcmpistri128(num2, str1, 0x10);
801                             storeUnaligned!byte16(str1, cast(byte*)dataPtr);
802                             strPtr += ecx;
803                             dataPtr += ecx;
804                             if(ecx != 16)
805                                 goto number_found;
806                         }
807                     }
808                     else
809                     {
810                         while(strEnd >= strPtr + 4)
811                         {
812                             char c0 = strPtr[0]; dataPtr += 4;     if (!isJsonNumber(c0)) goto number_found0;
813                             char c1 = strPtr[1]; dataPtr[-4] = c0; if (!isJsonNumber(c1)) goto number_found1;
814                             char c2 = strPtr[2]; dataPtr[-3] = c1; if (!isJsonNumber(c2)) goto number_found2;
815                             char c3 = strPtr[3]; dataPtr[-2] = c2; if (!isJsonNumber(c3)) goto number_found3;
816                             strPtr += 4;         dataPtr[-1] = c3;
817                         }
818                     }
819                     while(strEnd > strPtr)
820                     {
821                         char c0 = strPtr[0]; if (!isJsonNumber(c0)) goto number_found; dataPtr[0] = c0;
822                         strPtr += 1;
823                         dataPtr += 1;
824                     }
825                 }
826             version(SSE42){} else
827             {
828                 number_found3: dataPtr++; strPtr++;
829                 number_found2: dataPtr++; strPtr++;
830                 number_found1: dataPtr++; strPtr++;
831                 number_found0: dataPtr -= 4;
832             }
833             number_found:
834 
835                 auto numberLength = dataPtr - stringAndNumberShift - 1;
836                 if (numberLength > ubyte.max)
837                     goto number_length_unexpectedValue;
838                 *stringAndNumberShift = cast(ubyte) numberLength;
839                 goto next;
840             }
841             case '{':
842                 strPtr++;
843                 *dataPtr++ = Asdf.Kind.object;
844                 stack.push(((dataPtr - data.ptr) << 1) ^ 1);
845                 dataPtr += 4;
846                 if (!skipSpaces)
847                     goto object_first_value_start_unexpectedEnd;
848                 if (*strPtr != '}')
849                     goto key_start;
850                 strPtr++;
851                 goto structure_end;
852             case '[':
853                 strPtr++;
854                 *dataPtr++ = Asdf.Kind.array;
855                 stack.push(((dataPtr - data.ptr) << 1) ^ 0);
856                 dataPtr += 4;
857                 if (!skipSpaces)
858                     goto array_first_value_start_unexpectedEnd;
859                 if (*strPtr != ']')
860                     goto value_start;
861                 strPtr++;
862                 goto structure_end;
863             foreach (name; AliasSeq!("false", "null", "true"))
864             {
865             case name[0]:
866                     if (_expect(strEnd - strPtr >= name.length, true))
867                     {
868                         static if (!assumeValid)
869                         {
870                             version(X86_Any)
871                             {
872                                 enum uint referenceValue =
873                                         (uint(name[$ - 4]) << 0x00) ^
874                                         (uint(name[$ - 3]) << 0x08) ^
875                                         (uint(name[$ - 2]) << 0x10) ^
876                                         (uint(name[$ - 1]) << 0x18);
877                                 if (*cast(uint*)(strPtr + bool(name.length == 5)) != referenceValue)
878                                 {
879                                     static if (name == "true")
880                                         goto true_unexpectedValue;
881                                     else
882                                     static if (name == "false")
883                                         goto false_unexpectedValue;
884                                     else
885                                         goto null_unexpectedValue;
886                                 }
887                             }
888                             else
889                             {
890                                 char[name.length - 1] c = void;
891                                 import std.range: iota;
892                                 foreach (i; aliasSeqOf!(iota(1, name.length)))
893                                     c[i - 1] = strPtr[i];
894                                 foreach (i; aliasSeqOf!(iota(1, name.length)))
895                                 {
896                                     if (c[i - 1] != name[i])
897                                     {
898 
899                                         static if (name == "true")
900                                             goto true_unexpectedValue;
901                                         else
902                                         static if (name == "false")
903                                             goto false_unexpectedValue;
904                                         else
905                                             goto null_unexpectedValue;
906                                     }
907                                 }
908                             }
909                         }
910                         static if (name == "null")
911                             *dataPtr++ = Asdf.Kind.null_;
912                         else
913                         static if (name == "false")
914                             *dataPtr++ = Asdf.Kind.false_;
915                         else
916                             *dataPtr++ = Asdf.Kind.true_;
917                         strPtr += name.length;
918                         goto next;
919                     }
920                     else
921                     {
922                         strPtr += 1;
923                         foreach (i; 1 .. name.length)
924                         {
925                             if (strEnd == strPtr && !prepareInput)
926                             {
927                                 static if (name == "true")
928                                     goto true_unexpectedEnd;
929                                 else
930                                 static if (name == "false")
931                                     goto false_unexpectedEnd;
932                                 else
933                                     goto null_unexpectedEnd;
934                             }
935                             static if (!assumeValid)
936                             {
937                                 if (_expect(strPtr[0] != name[i], false))
938                                 {
939                                     static if (name == "true")
940                                         goto true_unexpectedValue;
941                                     else
942                                     static if (name == "false")
943                                         goto false_unexpectedValue;
944                                     else
945                                         goto null_unexpectedValue;
946                                 }
947                             }
948                             strPtr++;
949                         }
950                         static if (name == "null")
951                             *dataPtr++ = Asdf.Kind.null_;
952                         else
953                         static if (name == "false")
954                             *dataPtr++ = Asdf.Kind.false_;
955                         else
956                             *dataPtr++ = Asdf.Kind.true_;
957                         goto next;
958                     }
959             }
960             default: goto value_unexpectedStart;
961         }
962 
963     string:
964         debug assert(*strPtr == '"', "Internal ASDF logic error. Please report an issue.");
965         strPtr += 1;
966 
967     StringLoop: {
968         for(;;)
969         {
970             if (strEnd == strPtr && !prepareInput)
971                 goto string_unexpectedEnd;
972             version(SSE42)
973             {
974                 while (strEnd >= strPtr + 16)
975                 {
976                     byte16 str1 = loadUnaligned!byte16(cast(byte*)strPtr);
977                     size_t ecx = __builtin_ia32_pcmpistri128(str2, str1, 0x04);
978                     storeUnaligned!byte16(str1, cast(byte*)dataPtr);
979                     strPtr += ecx;
980                     dataPtr += ecx;
981                     if(ecx != 16)
982                         goto string_found;
983                 }
984             }
985             else
986             {
987                 while(strEnd >= strPtr + 4)
988                 {
989                     char c0 = strPtr[0]; dataPtr += 4;     if (!isPlainJsonCharacter(c0)) goto string_found0;
990                     char c1 = strPtr[1]; dataPtr[-4] = c0; if (!isPlainJsonCharacter(c1)) goto string_found1;
991                     char c2 = strPtr[2]; dataPtr[-3] = c1; if (!isPlainJsonCharacter(c2)) goto string_found2;
992                     char c3 = strPtr[3]; dataPtr[-2] = c2; if (!isPlainJsonCharacter(c3)) goto string_found3;
993                     strPtr += 4;         dataPtr[-1] = c3;
994                 }
995             }
996             while(strEnd > strPtr)
997             {
998                 char c0 = strPtr[0]; if (!isPlainJsonCharacter(c0)) goto string_found; dataPtr[0] = c0;
999                 strPtr += 1;
1000                 dataPtr += 1;
1001             }
1002         }
1003         version(SSE42) {} else
1004         {
1005             string_found3: dataPtr++; strPtr++;
1006             string_found2: dataPtr++; strPtr++;
1007             string_found1: dataPtr++; strPtr++;
1008             string_found0: dataPtr -= 4;
1009         }
1010         string_found:
1011 
1012         uint c = strPtr[0];
1013         if (c == '\"')
1014         {
1015             strPtr += 1;
1016             if (currIsKey)
1017             {
1018                 auto stringLength = dataPtr - stringAndNumberShift - 1;
1019                 if (stringLength > ubyte.max)
1020                     goto key_is_to_large;
1021                 *cast(ubyte*)stringAndNumberShift = cast(ubyte) stringLength;
1022                 if (!skipSpaces)
1023                     goto failed_to_read_after_key;
1024                 if (*strPtr != ':')
1025                     goto unexpected_character_after_key;
1026                 strPtr++;
1027                 goto value;
1028             }
1029             else
1030             {
1031                 auto stringLength = dataPtr - stringAndNumberShift - 4;
1032                 if (stringLength > uint.max)
1033                     goto string_length_is_too_large;
1034                 version(X86_Any)
1035                     *cast(uint*)stringAndNumberShift = cast(uint) stringLength;
1036                 else
1037                     *cast(ubyte[4]*)stringAndNumberShift = cast(ubyte[4]) cast(uint[1]) [cast(uint) stringLength];
1038                 goto next;
1039             }
1040         }
1041         if (c == '\\')
1042         {
1043             strPtr += 1;
1044             if (strEnd == strPtr && !prepareInput)
1045                 goto string_unexpectedEnd;
1046             c = *strPtr++;
1047             switch(c)
1048             {
1049                 case '/' :
1050                 case '\"':
1051                 case '\\':
1052                     *dataPtr++ = cast(ubyte) c;
1053                     goto StringLoop;
1054                 case 'b' : *dataPtr++ = '\b'; goto StringLoop;
1055                 case 'f' : *dataPtr++ = '\f'; goto StringLoop;
1056                 case 'n' : *dataPtr++ = '\n'; goto StringLoop;
1057                 case 'r' : *dataPtr++ = '\r'; goto StringLoop;
1058                 case 't' : *dataPtr++ = '\t'; goto StringLoop;
1059                 case 'u' :
1060                     uint wur = void;
1061                     dchar d = void;
1062                     if (auto r = (readUnicode(d)))
1063                     {
1064                         if (r == 1)
1065                             goto string_unexpectedEnd;
1066                         goto string_unexpectedValue;
1067                     }
1068                     if (_expect(0xD800 <= d && d <= 0xDFFF, false))
1069                     {
1070                         if (d >= 0xDC00)
1071                             goto string_unexpectedValue;
1072                         if (strEnd == strPtr && !prepareInput)
1073                             goto string_unexpectedEnd;
1074                         if (*strPtr++ != '\\')
1075                             goto string_unexpectedValue;
1076                         if (strEnd == strPtr && !prepareInput)
1077                             goto string_unexpectedEnd;
1078                         if (*strPtr++ != 'u')
1079                             goto string_unexpectedValue;
1080                         d = (d & 0x3FF) << 10;
1081                         dchar trailing;
1082                         if (auto r = (readUnicode(trailing)))
1083                         {
1084                             if (r == 1)
1085                                 goto string_unexpectedEnd;
1086                             goto string_unexpectedValue;
1087                         }
1088                         if (!(0xDC00 <= trailing && trailing <= 0xDFFF))
1089                             goto invalid_trail_surrogate;
1090                         {
1091                             d |= trailing & 0x3FF;
1092                             d += 0x10000;
1093                         }
1094                     }
1095                     if (!(d < 0xD800 || (d > 0xDFFF && d <= 0x10FFFF)))
1096                         goto invalid_utf_value;
1097                     encodeUTF8(d, dataPtr);
1098                     goto StringLoop;
1099                 default: goto string_unexpectedValue;
1100             }
1101         }
1102         goto string_unexpectedValue;
1103     }
1104 
1105     ret_error:
1106         location += strPtr - cast(const(ubyte)*)front.ptr;
1107         dataLength = dataPtr - data.ptr;
1108         stack.free();
1109         goto ret_final;
1110     unexpectedEnd:
1111         retCode = AsdfErrorCode.unexpectedEnd;
1112         goto ret_error;
1113     unexpectedValue:
1114         retCode = AsdfErrorCode.unexpectedValue;
1115         goto ret_error;
1116     object_key_unexpectedEnd:
1117         _lastError = "unexpected end of object key";
1118         goto unexpectedEnd;
1119     object_key_start_unexpectedValue:
1120         _lastError = "expected '\"' when start parsing object key";
1121         goto unexpectedValue;
1122     key_is_to_large:
1123         _lastError = "key length is limited to 255 characters";
1124         goto unexpectedValue;
1125     object_or_array_is_to_large:
1126         _lastError = "object or array serialized size is limited to 2^32-1";
1127         goto unexpectedValue;
1128     next_unexpectedEnd:
1129         stackValue = stack.top;
1130         _lastError = (stackValue & 1) ? "unexpected end when parsing object" : "unexpected end when parsing array";
1131         goto unexpectedEnd;
1132     next_unexpectedValue:
1133         stackValue = stack.top;
1134         _lastError = (stackValue & 1) ? "expected ',' or `}` when parsing object" : "expected ',' or `]` when parsing array";
1135         goto unexpectedValue;
1136     value_unexpectedStart:
1137         _lastError = "unexpected character when start parsing JSON value";
1138         goto unexpectedEnd;
1139     value_unexpectedEnd:
1140         _lastError = "unexpected end when start parsing JSON value";
1141         goto unexpectedEnd;
1142     number_length_unexpectedValue:
1143         _lastError = "number length is limited to 255 characters";
1144         goto unexpectedValue;
1145     object_first_value_start_unexpectedEnd:
1146         _lastError = "unexpected end of input data after '{'";
1147         goto unexpectedEnd;
1148     array_first_value_start_unexpectedEnd:
1149         _lastError = "unexpected end of input data after '['";
1150         goto unexpectedEnd;
1151     false_unexpectedEnd:
1152         _lastError = "unexpected end when parsing 'false'";
1153         goto unexpectedEnd;
1154     false_unexpectedValue:
1155         _lastError = "unexpected character when parsing 'false'";
1156         goto unexpectedValue;
1157     null_unexpectedEnd:
1158         _lastError = "unexpected end when parsing 'null'";
1159         goto unexpectedEnd;
1160     null_unexpectedValue:
1161         _lastError = "unexpected character when parsing 'null'";
1162         goto unexpectedValue;
1163     true_unexpectedEnd:
1164         _lastError = "unexpected end when parsing 'true'";
1165         goto unexpectedEnd;
1166     true_unexpectedValue:
1167         _lastError = "unexpected character when parsing 'true'";
1168         goto unexpectedValue;
1169     string_unexpectedEnd:
1170         _lastError = "unexpected end when parsing string";
1171         goto unexpectedEnd;
1172     string_unexpectedValue:
1173         _lastError = "unexpected character when parsing string";
1174         goto unexpectedValue;
1175     failed_to_read_after_key:
1176         _lastError = "unexpected end after object key";
1177         goto unexpectedEnd;
1178     unexpected_character_after_key:
1179         _lastError = "unexpected character after key";
1180         goto unexpectedValue;
1181     string_length_is_too_large:
1182         _lastError = "string size is limited to 2^32-1";
1183         goto unexpectedValue;
1184     invalid_trail_surrogate:
1185         _lastError = "invalid UTF-16 trail surrogate";
1186         goto unexpectedValue;
1187     invalid_utf_value:
1188         _lastError = "invalid UTF value";
1189         goto unexpectedValue;
1190     }
1191 }
1192 
1193 unittest
1194 {
1195     import mir.conv;
1196     auto asdf_data = parseJson(` [ true, 123 , [ false, 123.0 , "123211" ], "3e23e" ] `);
1197     auto str = asdf_data.to!string;
1198     auto str2 = `[true,123,[false,123.0,"123211"],"3e23e"]`;
1199     assert( str == str2);
1200 }
1201 
1202 pragma(inline, true)
1203 void encodeUTF8()(dchar c, ref ubyte* ptr)
1204 {
1205     if (c < 0x80)
1206     {
1207         ptr[0] = cast(ubyte) (c);
1208         ptr += 1;
1209     }
1210     else
1211     if (c < 0x800)
1212     {
1213         ptr[0] = cast(ubyte) (0xC0 | (c >> 6));
1214         ptr[1] = cast(ubyte) (0x80 | (c & 0x3F));
1215         ptr += 2;
1216     }
1217     else
1218     if (c < 0x10000)
1219     {
1220         ptr[0] = cast(ubyte) (0xE0 | (c >> 12));
1221         ptr[1] = cast(ubyte) (0x80 | ((c >> 6) & 0x3F));
1222         ptr[2] = cast(ubyte) (0x80 | (c & 0x3F));
1223         ptr += 3;
1224     }
1225     else
1226     {
1227     //    assert(c < 0x200000);
1228         ptr[0] = cast(ubyte) (0xF0 | (c >> 18));
1229         ptr[1] = cast(ubyte) (0x80 | ((c >> 12) & 0x3F));
1230         ptr[2] = cast(ubyte) (0x80 | ((c >> 6) & 0x3F));
1231         ptr[3] = cast(ubyte) (0x80 | (c & 0x3F));
1232         ptr += 4;
1233     }
1234 }
1235 
1236 unittest
1237 {
1238     auto asdf = "[\"\u007F\"]".parseJson;
1239 }
1240 
1241 unittest
1242 {
1243     auto f = `"\uD801\uDC37"`.parseJson;
1244     assert(f == "\"\U00010437\"".parseJson);
1245 }
1246 
1247 unittest
1248 {
1249     import std.string;
1250     import std.range;
1251     static immutable str = `"1234567890qwertyuiopasdfghjklzxcvbnm"`;
1252     auto data = Asdf(str[1..$-1]);
1253     assert(data == parseJson(str));
1254     foreach(i; 1 .. str.length)
1255     {
1256         auto s  = parseJson(str.representation.chunks(i));
1257         assert(data == s);
1258     }
1259 }
1260 
1261 unittest
1262 {
1263     import std.string;
1264     import std.range;
1265     static immutable str = `"\t\r\f\b\"\\\/\t\r\f\b\"\\\/\t\r\f\b\"\\\/\t\r\f\b\"\\\/"`;
1266     auto data = Asdf("\t\r\f\b\"\\/\t\r\f\b\"\\/\t\r\f\b\"\\/\t\r\f\b\"\\/");
1267     assert(data == parseJson(str));
1268     foreach(i; 1 .. str.length)
1269         assert(data == parseJson(str.representation.chunks(i)));
1270 }
1271 
1272 unittest
1273 {
1274     import std.string;
1275     import std.range;
1276     static immutable str = `"\u0026"`;
1277     auto data = Asdf("&");
1278     assert(data == parseJson(str));
1279 }
1280 
1281 version(unittest) immutable string test_data =
1282 q{{
1283   "coordinates": [
1284     {
1285       "x": 0.29811521136061625,
1286       "y": 0.47980763779335556,
1287       "z": 0.1704431616620138,
1288       "name": "tqxvsg 2780",
1289       "opts": {
1290         "1": [
1291           1,
1292           true
1293         ]
1294       }
1295     }
1296   ],
1297   "info": "some info"
1298 }
1299 };