1 /++ 2 JSON Parsing API 3 4 Copyright: Tamedia Digital, 2016-2017 5 6 Authors: Ilia Ki 7 8 License: MIT 9 10 Macros: 11 SUBMODULE = $(LINK2 asdf_$1.html, asdf.$1) 12 SUBREF = $(LINK2 asdf_$1.html#.$2, $(TT $2))$(NBSP) 13 T2=$(TR $(TDNW $(LREF $1)) $(TD $+)) 14 T4=$(TR $(TDNW $(LREF $1)) $(TD $2) $(TD $3) $(TD $4)) 15 +/ 16 module asdf.jsonparser; 17 18 import asdf.asdf; 19 import asdf.outputarray; 20 import std.experimental.allocator.gc_allocator; 21 import std.meta; 22 import std.range.primitives; 23 import std.traits; 24 import std.typecons; 25 import mir.serde: SerdeException; 26 27 version(LDC) 28 { 29 import ldc.attributes: optStrategy; 30 enum minsize = optStrategy("minsize"); 31 32 static if (__traits(targetHasFeature, "sse4.2")) 33 { 34 import core.simd; 35 import ldc.simd; 36 import ldc.gccbuiltins_x86; 37 version = SSE42; 38 } 39 } 40 else 41 { 42 enum minsize; 43 } 44 45 version(X86_64) 46 version = X86_Any; 47 else 48 version(X86) 49 version = X86_Any; 50 51 private alias ASDFGCAllocator = typeof(GCAllocator.instance); 52 53 /++ 54 Parses json value 55 Params: 56 chunks = input range composed of elements type of `const(ubyte)[]`. 57 `chunks` can use the same buffer for each chunk. 58 initLength = initial output buffer length. Minimum value is 32. 59 Returns: 60 ASDF value 61 +/ 62 Asdf parseJson( 63 Flag!"includingNewLine" includingNewLine = Yes.includingNewLine, 64 Flag!"spaces" spaces = Yes.spaces, 65 Chunks) 66 (Chunks chunks, size_t initLength = 32) 67 if(is(ElementType!Chunks : const(ubyte)[])) 68 { 69 enum assumeValid = false; 70 auto parser = jsonParser!(includingNewLine, spaces, assumeValid)(ASDFGCAllocator.instance, chunks); 71 return parseJson(parser); 72 } 73 74 /// 75 unittest 76 { 77 import std.range: chunks; 78 auto text = cast(const ubyte[])`true `; 79 auto ch = text.chunks(3); 80 assert(ch.parseJson(32).data == [1]); 81 } 82 83 84 /++ 85 Parses json value 86 Params: 87 str = input string 88 allocator = (optional) memory allocator 89 Returns: 90 ASDF value 91 +/ 92 Asdf parseJson( 93 Flag!"includingNewLine" includingNewLine = Yes.includingNewLine, 94 Flag!"spaces" spaces = Yes.spaces, 95 Flag!"assumeValid" assumeValid = No.assumeValid, 96 Allocator, 97 ) 98 (in char[] str, auto ref Allocator allocator) 99 { 100 auto parser = jsonParser!(includingNewLine, spaces, assumeValid)(allocator, str); 101 return parseJson(parser); 102 } 103 104 105 /// 106 @system unittest { 107 import std.experimental.allocator.mallocator: Mallocator; 108 import std.experimental.allocator.showcase: StackFront; 109 110 StackFront!(1024, Mallocator) allocator; 111 auto json = parseJson(`{"ak": {"sub": "subval"} }`, allocator); 112 assert(json["ak", "sub"] == "subval"); 113 } 114 115 /// Faulty location 116 pure unittest 117 { 118 import asdf; 119 try 120 { 121 auto data = `[1, 2, ]`.parseJson; 122 } 123 catch(AsdfSerdeException e) 124 { 125 import std.conv; 126 /// zero based index 127 assert(e.location == 7); 128 return; 129 } 130 assert(0); 131 } 132 133 /// ditto 134 Asdf parseJson( 135 Flag!"includingNewLine" includingNewLine = Yes.includingNewLine, 136 Flag!"spaces" spaces = Yes.spaces, 137 Flag!"assumeValid" assumeValid = No.assumeValid, 138 ) 139 (in char[] str) 140 { 141 auto parser = jsonParser!(includingNewLine, spaces, assumeValid)(ASDFGCAllocator.instance, str); 142 return parseJson(parser); 143 } 144 145 /// 146 unittest 147 { 148 assert(`{"ak": {"sub": "subval"} }`.parseJson["ak", "sub"] == "subval"); 149 } 150 151 152 private Asdf parseJson(Parser)(ref Parser parser) { 153 size_t location; 154 if (parser.parse(location)) 155 throw new AsdfSerdeException(parser.lastError, location); 156 return Asdf(parser.result); 157 } 158 159 160 deprecated("please remove the initBufferLength argument (latest)") 161 auto parseJsonByLine( 162 Flag!"spaces" spaces = Yes.spaces, 163 Input) 164 (Input input, sizediff_t initBufferLength) 165 { 166 return .parseJsonByLine!(spaces, No.throwOnInvalidLines, Input)(input); 167 } 168 169 /++ 170 Parses JSON value in each line from a Range of buffers. 171 Params: 172 spaces = adds support for spaces beetwen json tokens. Default value is Yes. 173 throwOnInvalidLines = throws an $(LREF SerdeException) on invalid lines if Yes and ignore invalid lines if No. Default value is No. 174 input = input range composed of elements type of `const(ubyte)[]` or string / const(char)[]. 175 `chunks` can use the same buffer for each chunk. 176 Returns: 177 Input range composed of ASDF values. Each value uses the same internal buffer. 178 +/ 179 auto parseJsonByLine( 180 Flag!"spaces" spaces = Yes.spaces, 181 Flag!"throwOnInvalidLines" throwOnInvalidLines = No.throwOnInvalidLines, 182 Input) 183 (Input input) 184 { 185 alias Parser = JsonParser!(false, cast(bool)spaces, false, ASDFGCAllocator, Input); 186 struct ByLineValue 187 { 188 Parser parser; 189 private bool _empty, _nextEmpty; 190 191 void popFront() 192 { 193 for(;;) 194 { 195 assert(!empty); 196 if(_nextEmpty) 197 { 198 _empty = true; 199 return; 200 } 201 // parser.oa.shift = 0; 202 parser.dataLength = 0; 203 auto error = parser.parse; 204 if(!error) 205 { 206 auto t = parser.skipSpaces_; 207 if(t != '\n' && t != 0) 208 { 209 error = AsdfErrorCode.unexpectedValue; 210 parser._lastError = "expected new line or end of input"; 211 } 212 else 213 if(t == 0) 214 { 215 _nextEmpty = true; 216 return; 217 } 218 else 219 { 220 parser.skipNewLine; 221 _nextEmpty = !parser.skipSpaces_; 222 return; 223 } 224 } 225 static if (throwOnInvalidLines) 226 throw new SerdeException(parser.lastError); 227 else 228 parser.skipLine(); 229 } 230 } 231 232 auto front() @property 233 { 234 assert(!empty); 235 return Asdf(parser.result); 236 } 237 238 bool empty() 239 { 240 return _empty; 241 } 242 } 243 ByLineValue ret; 244 if(input.empty) 245 { 246 ret._empty = ret._nextEmpty = true; 247 } 248 else 249 { 250 ret = ByLineValue(Parser(ASDFGCAllocator.instance, input)); 251 ret.popFront; 252 } 253 return ret; 254 } 255 256 version(LDC) 257 { 258 public import ldc.intrinsics: _expect = llvm_expect; 259 } 260 else 261 { 262 T _expect(T)(T val, T expected_val) if (__traits(isIntegral, T)) 263 { 264 return val; 265 } 266 } 267 268 enum AsdfErrorCode 269 { 270 success, 271 unexpectedEnd, 272 unexpectedValue, 273 } 274 275 private __gshared immutable ubyte[256] parseFlags = [ 276 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 277 0,0,0,0,0,0,0,0, 0,6,2,0,0,6,0,0, // 0 278 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, // 1 279 7,1,0,1,1,1,1,1, 1,1,1,9,1,9,9,1, // 2 280 9,9,9,9,9,9,9,9, 9,9,1,1,1,1,1,1, // 3 281 282 1,1,1,1,1,9,1,1, 1,1,1,1,1,1,1,1, // 4 283 1,1,1,1,1,1,1,1, 1,1,1,1,0,1,1,1, // 5 284 1,1,1,1,1,9,1,1, 1,1,1,1,1,1,1,1, // 6 285 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, // 7 286 287 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 288 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 289 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 290 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 291 292 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 293 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 294 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 295 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 296 ]; 297 298 private __gshared immutable byte[256] uniFlags = [ 299 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 300 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, // 0 301 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, // 1 302 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, // 2 303 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, // 3 304 305 -1,10,11,12,13,14,15,-1, -1,-1,-1,-1,-1,-1,-1,-1, // 4 306 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, // 5 307 -1,10,11,12,13,14,15,-1, -1,-1,-1,-1,-1,-1,-1,-1, // 6 308 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, // 7 309 310 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, 311 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, 312 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, 313 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, 314 315 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, 316 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, 317 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, 318 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, 319 ]; 320 321 322 pragma(inline, true) 323 bool isPlainJsonCharacter()(size_t c) 324 { 325 return (parseFlags[c] & 1) != 0; 326 } 327 328 pragma(inline, true) 329 bool isJsonWhitespace()(size_t c) 330 { 331 return (parseFlags[c] & 2) != 0; 332 } 333 334 pragma(inline, true) 335 bool isJsonLineWhitespace()(size_t c) 336 { 337 return (parseFlags[c] & 4) != 0; 338 } 339 340 pragma(inline, true) 341 bool isJsonNumber()(size_t c) 342 { 343 return (parseFlags[c] & 8) != 0; 344 } 345 346 package auto assumePure(T)(T t) 347 if (isFunctionPointer!T || isDelegate!T) 348 { 349 enum attrs = functionAttributes!T | FunctionAttribute.pure_; 350 return cast(SetFunctionAttributes!(T, functionLinkage!T, attrs)) t; 351 } 352 353 package auto callPure(alias fn,T...)(T args) 354 { 355 auto fp = assumePure(&fn); 356 return (*fp)(args); 357 } 358 359 /+ 360 Fast picewise stack 361 +/ 362 private struct Stack 363 { 364 import core.stdc.stdlib: cmalloc = malloc, cfree = free; 365 @disable this(this); 366 367 struct Node 368 { 369 enum length = 32; // 2 power 370 Node* prev; 371 size_t* buff; 372 } 373 374 size_t[Node.length] buffer = void; 375 size_t length = 0; 376 Node node; 377 378 pure: 379 380 void push()(size_t value) 381 { 382 version(LDC) 383 pragma(inline, true); 384 immutable local = length++ & (Node.length - 1); 385 if (local) 386 { 387 node.buff[local] = value; 388 } 389 else 390 if (length == 1) 391 { 392 node = Node(null, buffer.ptr); 393 buffer[0] = value; 394 } 395 else 396 { 397 auto prevNode = cast(Node*) callPure!cmalloc(Node.sizeof); 398 *prevNode = node; 399 node.prev = prevNode; 400 node.buff = cast(size_t*) callPure!cmalloc(Node.length * size_t.sizeof); 401 node.buff[0] = value; 402 } 403 } 404 405 size_t top()() 406 { 407 version(LDC) 408 pragma(inline, true); 409 assert(length); 410 immutable local = (length - 1) & (Node.length - 1); 411 return node.buff[local]; 412 } 413 414 size_t pop()() 415 { 416 version(LDC) 417 pragma(inline, true); 418 assert(length); 419 immutable local = --length & (Node.length - 1); 420 immutable ret = node.buff[local]; 421 if (local == 0) 422 { 423 if (node.buff != buffer.ptr) 424 { 425 callPure!cfree(node.buff); 426 node = *node.prev; 427 } 428 } 429 return ret; 430 } 431 432 pragma(inline, false) 433 void free()() 434 { 435 version(LDC) 436 pragma(inline, true); 437 if (node.buff is null) 438 return; 439 while(node.buff !is buffer.ptr) 440 { 441 callPure!cfree(node.buff); 442 node = *node.prev; 443 } 444 } 445 } 446 447 unittest 448 { 449 Stack stack; 450 assert(stack.length == 0); 451 foreach(i; 1 .. 100) 452 { 453 stack.push(i); 454 assert(stack.length == i); 455 assert(stack.top() == i); 456 } 457 foreach_reverse(i; 1 .. 100) 458 { 459 assert(stack.length == i); 460 assert(stack.pop() == i); 461 } 462 assert(stack.length == 0); 463 } 464 465 /// 466 auto jsonParser(bool includingNewLine, bool hasSpaces, bool assumeValid, Allocator, Input = const(ubyte)[])(auto ref Allocator allocator, Input input) { 467 return JsonParser!(includingNewLine, hasSpaces, assumeValid, Allocator, Input)(allocator, input); 468 } 469 470 /// 471 struct JsonParser(bool includingNewLine, bool hasSpaces, bool assumeValid, Allocator, Input = const(ubyte)[]) 472 { 473 474 ubyte[] data; 475 Allocator* allocator; 476 Input input; 477 static if (chunked) 478 ubyte[] front; 479 else 480 alias front = input; 481 size_t dataLength; 482 483 string _lastError; 484 485 enum bool chunked = !is(Input : const(char)[]); 486 487 this(ref Allocator allocator, Input input) 488 489 { 490 this.input = input; 491 this.allocator = &allocator; 492 } 493 494 bool prepareInput_()() 495 { 496 static if (chunked) 497 { 498 if (front.length == 0) 499 { 500 assert(!input.empty); 501 input.popFront; 502 if (input.empty) 503 return false; 504 front = cast(typeof(front)) input.front; 505 } 506 } 507 return front.length != 0; 508 } 509 510 void skipNewLine()() 511 { 512 assert(front.length); 513 assert(front[0] == '\n'); 514 front = front[1 .. $]; 515 } 516 517 char skipSpaces_()() 518 { 519 static if (hasSpaces) 520 for(;;) 521 { 522 if (prepareInput_ == false) 523 return 0; 524 static if (includingNewLine) 525 alias isWhite = isJsonWhitespace; 526 else 527 alias isWhite = isJsonLineWhitespace; 528 if (isWhite(front[0])) 529 { 530 front = front[1 .. $]; 531 continue; 532 } 533 return front[0]; 534 } 535 else 536 { 537 if (prepareInput_ == false) 538 return 0; 539 return front[0]; 540 } 541 } 542 543 bool skipLine()() 544 { 545 for(;;) 546 { 547 if (_expect(!prepareInput_, false)) 548 return false; 549 auto c = front[0]; 550 front = front[1 .. $]; 551 if (c == '\n') 552 return true; 553 } 554 } 555 556 auto result()() 557 { 558 return data[0 .. dataLength]; 559 } 560 561 string lastError()() @property 562 { 563 return _lastError; 564 } 565 566 567 AsdfErrorCode parse() 568 { 569 size_t location; 570 return parse(location); 571 } 572 573 pragma(inline, false) 574 AsdfErrorCode parse(out size_t location) 575 { 576 version(SSE42) 577 { 578 enum byte16 str2E = [ 579 '\u0001', '\u001F', 580 '\"', '\"', 581 '\\', '\\', 582 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0']; 583 enum byte16 num2E = ['+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'e', 'E', '\0']; 584 byte16 str2 = str2E; 585 byte16 num2 = num2E; 586 } 587 588 const(ubyte)* strPtr; 589 const(ubyte)* strEnd; 590 ubyte* dataPtr; 591 ubyte* stringAndNumberShift = void; 592 static if (chunked) 593 { 594 bool prepareInput()() 595 { 596 pragma(inline, false); 597 if(strPtr) 598 { 599 location += front.length; 600 input.popFront; 601 if (input.empty) 602 { 603 return false; 604 } 605 } 606 front = cast(typeof(front)) input.front; 607 if (front.length == 0) 608 return false; 609 strPtr = front.ptr; 610 strEnd = front.ptr + front.length; 611 const dataAddLength = front.length * 6; 612 const dataLength = dataPtr - data.ptr; 613 const dataRequiredLength = dataLength + dataAddLength; 614 if (data.length < dataRequiredLength) 615 { 616 const valueLength = stringAndNumberShift - dataPtr; 617 import std.algorithm.comparison: max; 618 const len = max(data.length * 2, dataRequiredLength); 619 allocator.reallocate(*cast(void[]*)&data, len); 620 dataPtr = data.ptr + dataLength; 621 stringAndNumberShift = dataPtr + valueLength; 622 } 623 return true; 624 } 625 strPtr = front.ptr; 626 strEnd = front.ptr + front.length; 627 } 628 else 629 { 630 strPtr = cast(const(ubyte)*) input.ptr; 631 strEnd = cast(const(ubyte)*) input.ptr + input.length; 632 enum bool prepareInput = false; 633 } 634 635 auto rl = (strEnd - strPtr) * 6; 636 if (data.ptr !is null && data.length < rl) 637 { 638 allocator.deallocate(data); 639 data = null; 640 } 641 if (data.ptr is null) 642 { 643 data = cast(ubyte[])allocator.allocate(rl); 644 } 645 dataPtr = data.ptr; 646 647 bool skipSpaces()() 648 { 649 version(LDC) 650 pragma(inline, true); 651 static if (includingNewLine) 652 alias isWhite = isJsonWhitespace; 653 else 654 alias isWhite = isJsonLineWhitespace; 655 F: 656 { 657 if (_expect(strEnd != strPtr, true)) 658 { 659 L: 660 static if (hasSpaces) 661 { 662 if (isWhite(strPtr[0])) 663 { 664 strPtr++; 665 goto F; 666 } 667 } 668 return true; 669 } 670 else 671 { 672 if (prepareInput) 673 goto L; 674 return false; 675 } 676 } 677 678 } 679 680 @minsize 681 int readUnicode()(ref dchar d) 682 { 683 version(LDC) 684 pragma(inline, true); 685 uint e = 0; 686 size_t i = 4; 687 do 688 { 689 if (strEnd == strPtr && !prepareInput) 690 return 1; 691 int c = uniFlags[*strPtr++]; 692 assert(c < 16); 693 if (c == -1) 694 return -1; 695 assert(c >= 0); 696 e <<= 4; 697 e ^= c; 698 } 699 while(--i); 700 d = e; 701 return 0; 702 } 703 704 Stack stack; 705 706 typeof(return) retCode; 707 bool currIsKey = void; 708 size_t stackValue = void; 709 goto value; 710 711 /////////// RETURN 712 ret: 713 front = front[cast(typeof(front.ptr)) strPtr - front.ptr .. $]; 714 dataLength = dataPtr - data.ptr; 715 assert(stack.length == 0); 716 ret_final: 717 return retCode; 718 /////////// 719 720 key: 721 if (!skipSpaces) 722 goto object_key_unexpectedEnd; 723 key_start: 724 if (*strPtr != '"') 725 goto object_key_start_unexpectedValue; 726 currIsKey = true; 727 stringAndNumberShift = dataPtr; 728 // reserve 1 byte for the length 729 dataPtr += 1; 730 goto string; 731 next: 732 if (stack.length == 0) 733 goto ret; 734 { 735 if (!skipSpaces) 736 goto next_unexpectedEnd; 737 stackValue = stack.top; 738 const isObject = stackValue & 1; 739 auto v = *strPtr++; 740 if (isObject) 741 { 742 if (v == ',') 743 goto key; 744 if (v != '}') 745 goto next_unexpectedValue; 746 } 747 else 748 { 749 if (v == ',') 750 goto value; 751 if (v != ']') 752 goto next_unexpectedValue; 753 } 754 } 755 structure_end: { 756 stackValue = stack.pop(); 757 const structureShift = stackValue >> 1; 758 const structureLengthPtr = data.ptr + structureShift; 759 const size_t structureLength = dataPtr - structureLengthPtr - 4; 760 if (structureLength > uint.max) 761 goto object_or_array_is_to_large; 762 version(X86_Any) 763 *cast(uint*) structureLengthPtr = cast(uint) structureLength; 764 else 765 *cast(ubyte[4]*) structureLengthPtr = cast(ubyte[4]) cast(uint[1]) [cast(uint) structureLength]; 766 goto next; 767 } 768 value: 769 if (!skipSpaces) 770 goto value_unexpectedEnd; 771 value_start: 772 switch(*strPtr) 773 { 774 stringValue: 775 case '"': 776 currIsKey = false; 777 *dataPtr++ = Asdf.Kind..string; 778 stringAndNumberShift = dataPtr; 779 // reserve 4 byte for the length 780 dataPtr += 4; 781 goto string; 782 case '-': 783 case '0': 784 .. 785 case '9': { 786 *dataPtr++ = Asdf.Kind.number; 787 stringAndNumberShift = dataPtr; 788 // reserve 1 byte for the length 789 dataPtr++; // write the first character 790 *dataPtr++ = *strPtr++; 791 for(;;) 792 { 793 if (strEnd == strPtr && !prepareInput) 794 goto number_found; 795 version(SSE42) 796 { 797 while (strEnd >= strPtr + 16) 798 { 799 byte16 str1 = loadUnaligned!byte16(cast(byte*)strPtr); 800 size_t ecx = __builtin_ia32_pcmpistri128(num2, str1, 0x10); 801 storeUnaligned!byte16(str1, cast(byte*)dataPtr); 802 strPtr += ecx; 803 dataPtr += ecx; 804 if(ecx != 16) 805 goto number_found; 806 } 807 } 808 else 809 { 810 while(strEnd >= strPtr + 4) 811 { 812 char c0 = strPtr[0]; dataPtr += 4; if (!isJsonNumber(c0)) goto number_found0; 813 char c1 = strPtr[1]; dataPtr[-4] = c0; if (!isJsonNumber(c1)) goto number_found1; 814 char c2 = strPtr[2]; dataPtr[-3] = c1; if (!isJsonNumber(c2)) goto number_found2; 815 char c3 = strPtr[3]; dataPtr[-2] = c2; if (!isJsonNumber(c3)) goto number_found3; 816 strPtr += 4; dataPtr[-1] = c3; 817 } 818 } 819 while(strEnd > strPtr) 820 { 821 char c0 = strPtr[0]; if (!isJsonNumber(c0)) goto number_found; dataPtr[0] = c0; 822 strPtr += 1; 823 dataPtr += 1; 824 } 825 } 826 version(SSE42){} else 827 { 828 number_found3: dataPtr++; strPtr++; 829 number_found2: dataPtr++; strPtr++; 830 number_found1: dataPtr++; strPtr++; 831 number_found0: dataPtr -= 4; 832 } 833 number_found: 834 835 auto numberLength = dataPtr - stringAndNumberShift - 1; 836 if (numberLength > ubyte.max) 837 goto number_length_unexpectedValue; 838 *stringAndNumberShift = cast(ubyte) numberLength; 839 goto next; 840 } 841 case '{': 842 strPtr++; 843 *dataPtr++ = Asdf.Kind.object; 844 stack.push(((dataPtr - data.ptr) << 1) ^ 1); 845 dataPtr += 4; 846 if (!skipSpaces) 847 goto object_first_value_start_unexpectedEnd; 848 if (*strPtr != '}') 849 goto key_start; 850 strPtr++; 851 goto structure_end; 852 case '[': 853 strPtr++; 854 *dataPtr++ = Asdf.Kind.array; 855 stack.push(((dataPtr - data.ptr) << 1) ^ 0); 856 dataPtr += 4; 857 if (!skipSpaces) 858 goto array_first_value_start_unexpectedEnd; 859 if (*strPtr != ']') 860 goto value_start; 861 strPtr++; 862 goto structure_end; 863 foreach (name; AliasSeq!("false", "null", "true")) 864 { 865 case name[0]: 866 if (_expect(strEnd - strPtr >= name.length, true)) 867 { 868 static if (!assumeValid) 869 { 870 version(X86_Any) 871 { 872 enum uint referenceValue = 873 (uint(name[$ - 4]) << 0x00) ^ 874 (uint(name[$ - 3]) << 0x08) ^ 875 (uint(name[$ - 2]) << 0x10) ^ 876 (uint(name[$ - 1]) << 0x18); 877 if (*cast(uint*)(strPtr + bool(name.length == 5)) != referenceValue) 878 { 879 static if (name == "true") 880 goto true_unexpectedValue; 881 else 882 static if (name == "false") 883 goto false_unexpectedValue; 884 else 885 goto null_unexpectedValue; 886 } 887 } 888 else 889 { 890 char[name.length - 1] c = void; 891 import std.range: iota; 892 foreach (i; aliasSeqOf!(iota(1, name.length))) 893 c[i - 1] = strPtr[i]; 894 foreach (i; aliasSeqOf!(iota(1, name.length))) 895 { 896 if (c[i - 1] != name[i]) 897 { 898 899 static if (name == "true") 900 goto true_unexpectedValue; 901 else 902 static if (name == "false") 903 goto false_unexpectedValue; 904 else 905 goto null_unexpectedValue; 906 } 907 } 908 } 909 } 910 static if (name == "null") 911 *dataPtr++ = Asdf.Kind.null_; 912 else 913 static if (name == "false") 914 *dataPtr++ = Asdf.Kind.false_; 915 else 916 *dataPtr++ = Asdf.Kind.true_; 917 strPtr += name.length; 918 goto next; 919 } 920 else 921 { 922 strPtr += 1; 923 foreach (i; 1 .. name.length) 924 { 925 if (strEnd == strPtr && !prepareInput) 926 { 927 static if (name == "true") 928 goto true_unexpectedEnd; 929 else 930 static if (name == "false") 931 goto false_unexpectedEnd; 932 else 933 goto null_unexpectedEnd; 934 } 935 static if (!assumeValid) 936 { 937 if (_expect(strPtr[0] != name[i], false)) 938 { 939 static if (name == "true") 940 goto true_unexpectedValue; 941 else 942 static if (name == "false") 943 goto false_unexpectedValue; 944 else 945 goto null_unexpectedValue; 946 } 947 } 948 strPtr++; 949 } 950 static if (name == "null") 951 *dataPtr++ = Asdf.Kind.null_; 952 else 953 static if (name == "false") 954 *dataPtr++ = Asdf.Kind.false_; 955 else 956 *dataPtr++ = Asdf.Kind.true_; 957 goto next; 958 } 959 } 960 default: goto value_unexpectedStart; 961 } 962 963 string: 964 debug assert(*strPtr == '"', "Internal ASDF logic error. Please report an issue."); 965 strPtr += 1; 966 967 StringLoop: { 968 for(;;) 969 { 970 if (strEnd == strPtr && !prepareInput) 971 goto string_unexpectedEnd; 972 version(SSE42) 973 { 974 while (strEnd >= strPtr + 16) 975 { 976 byte16 str1 = loadUnaligned!byte16(cast(byte*)strPtr); 977 size_t ecx = __builtin_ia32_pcmpistri128(str2, str1, 0x04); 978 storeUnaligned!byte16(str1, cast(byte*)dataPtr); 979 strPtr += ecx; 980 dataPtr += ecx; 981 if(ecx != 16) 982 goto string_found; 983 } 984 } 985 else 986 { 987 while(strEnd >= strPtr + 4) 988 { 989 char c0 = strPtr[0]; dataPtr += 4; if (!isPlainJsonCharacter(c0)) goto string_found0; 990 char c1 = strPtr[1]; dataPtr[-4] = c0; if (!isPlainJsonCharacter(c1)) goto string_found1; 991 char c2 = strPtr[2]; dataPtr[-3] = c1; if (!isPlainJsonCharacter(c2)) goto string_found2; 992 char c3 = strPtr[3]; dataPtr[-2] = c2; if (!isPlainJsonCharacter(c3)) goto string_found3; 993 strPtr += 4; dataPtr[-1] = c3; 994 } 995 } 996 while(strEnd > strPtr) 997 { 998 char c0 = strPtr[0]; if (!isPlainJsonCharacter(c0)) goto string_found; dataPtr[0] = c0; 999 strPtr += 1; 1000 dataPtr += 1; 1001 } 1002 } 1003 version(SSE42) {} else 1004 { 1005 string_found3: dataPtr++; strPtr++; 1006 string_found2: dataPtr++; strPtr++; 1007 string_found1: dataPtr++; strPtr++; 1008 string_found0: dataPtr -= 4; 1009 } 1010 string_found: 1011 1012 uint c = strPtr[0]; 1013 if (c == '\"') 1014 { 1015 strPtr += 1; 1016 if (currIsKey) 1017 { 1018 auto stringLength = dataPtr - stringAndNumberShift - 1; 1019 if (stringLength > ubyte.max) 1020 goto key_is_to_large; 1021 *cast(ubyte*)stringAndNumberShift = cast(ubyte) stringLength; 1022 if (!skipSpaces) 1023 goto failed_to_read_after_key; 1024 if (*strPtr != ':') 1025 goto unexpected_character_after_key; 1026 strPtr++; 1027 goto value; 1028 } 1029 else 1030 { 1031 auto stringLength = dataPtr - stringAndNumberShift - 4; 1032 if (stringLength > uint.max) 1033 goto string_length_is_too_large; 1034 version(X86_Any) 1035 *cast(uint*)stringAndNumberShift = cast(uint) stringLength; 1036 else 1037 *cast(ubyte[4]*)stringAndNumberShift = cast(ubyte[4]) cast(uint[1]) [cast(uint) stringLength]; 1038 goto next; 1039 } 1040 } 1041 if (c == '\\') 1042 { 1043 strPtr += 1; 1044 if (strEnd == strPtr && !prepareInput) 1045 goto string_unexpectedEnd; 1046 c = *strPtr++; 1047 switch(c) 1048 { 1049 case '/' : 1050 case '\"': 1051 case '\\': 1052 *dataPtr++ = cast(ubyte) c; 1053 goto StringLoop; 1054 case 'b' : *dataPtr++ = '\b'; goto StringLoop; 1055 case 'f' : *dataPtr++ = '\f'; goto StringLoop; 1056 case 'n' : *dataPtr++ = '\n'; goto StringLoop; 1057 case 'r' : *dataPtr++ = '\r'; goto StringLoop; 1058 case 't' : *dataPtr++ = '\t'; goto StringLoop; 1059 case 'u' : 1060 uint wur = void; 1061 dchar d = void; 1062 if (auto r = (readUnicode(d))) 1063 { 1064 if (r == 1) 1065 goto string_unexpectedEnd; 1066 goto string_unexpectedValue; 1067 } 1068 if (_expect(0xD800 <= d && d <= 0xDFFF, false)) 1069 { 1070 if (d >= 0xDC00) 1071 goto string_unexpectedValue; 1072 if (strEnd == strPtr && !prepareInput) 1073 goto string_unexpectedEnd; 1074 if (*strPtr++ != '\\') 1075 goto string_unexpectedValue; 1076 if (strEnd == strPtr && !prepareInput) 1077 goto string_unexpectedEnd; 1078 if (*strPtr++ != 'u') 1079 goto string_unexpectedValue; 1080 d = (d & 0x3FF) << 10; 1081 dchar trailing; 1082 if (auto r = (readUnicode(trailing))) 1083 { 1084 if (r == 1) 1085 goto string_unexpectedEnd; 1086 goto string_unexpectedValue; 1087 } 1088 if (!(0xDC00 <= trailing && trailing <= 0xDFFF)) 1089 goto invalid_trail_surrogate; 1090 { 1091 d |= trailing & 0x3FF; 1092 d += 0x10000; 1093 } 1094 } 1095 if (!(d < 0xD800 || (d > 0xDFFF && d <= 0x10FFFF))) 1096 goto invalid_utf_value; 1097 encodeUTF8(d, dataPtr); 1098 goto StringLoop; 1099 default: goto string_unexpectedValue; 1100 } 1101 } 1102 goto string_unexpectedValue; 1103 } 1104 1105 ret_error: 1106 location += strPtr - cast(const(ubyte)*)front.ptr; 1107 dataLength = dataPtr - data.ptr; 1108 stack.free(); 1109 goto ret_final; 1110 unexpectedEnd: 1111 retCode = AsdfErrorCode.unexpectedEnd; 1112 goto ret_error; 1113 unexpectedValue: 1114 retCode = AsdfErrorCode.unexpectedValue; 1115 goto ret_error; 1116 object_key_unexpectedEnd: 1117 _lastError = "unexpected end of object key"; 1118 goto unexpectedEnd; 1119 object_key_start_unexpectedValue: 1120 _lastError = "expected '\"' when start parsing object key"; 1121 goto unexpectedValue; 1122 key_is_to_large: 1123 _lastError = "key length is limited to 255 characters"; 1124 goto unexpectedValue; 1125 object_or_array_is_to_large: 1126 _lastError = "object or array serialized size is limited to 2^32-1"; 1127 goto unexpectedValue; 1128 next_unexpectedEnd: 1129 stackValue = stack.top; 1130 _lastError = (stackValue & 1) ? "unexpected end when parsing object" : "unexpected end when parsing array"; 1131 goto unexpectedEnd; 1132 next_unexpectedValue: 1133 stackValue = stack.top; 1134 _lastError = (stackValue & 1) ? "expected ',' or `}` when parsing object" : "expected ',' or `]` when parsing array"; 1135 goto unexpectedValue; 1136 value_unexpectedStart: 1137 _lastError = "unexpected character when start parsing JSON value"; 1138 goto unexpectedEnd; 1139 value_unexpectedEnd: 1140 _lastError = "unexpected end when start parsing JSON value"; 1141 goto unexpectedEnd; 1142 number_length_unexpectedValue: 1143 _lastError = "number length is limited to 255 characters"; 1144 goto unexpectedValue; 1145 object_first_value_start_unexpectedEnd: 1146 _lastError = "unexpected end of input data after '{'"; 1147 goto unexpectedEnd; 1148 array_first_value_start_unexpectedEnd: 1149 _lastError = "unexpected end of input data after '['"; 1150 goto unexpectedEnd; 1151 false_unexpectedEnd: 1152 _lastError = "unexpected end when parsing 'false'"; 1153 goto unexpectedEnd; 1154 false_unexpectedValue: 1155 _lastError = "unexpected character when parsing 'false'"; 1156 goto unexpectedValue; 1157 null_unexpectedEnd: 1158 _lastError = "unexpected end when parsing 'null'"; 1159 goto unexpectedEnd; 1160 null_unexpectedValue: 1161 _lastError = "unexpected character when parsing 'null'"; 1162 goto unexpectedValue; 1163 true_unexpectedEnd: 1164 _lastError = "unexpected end when parsing 'true'"; 1165 goto unexpectedEnd; 1166 true_unexpectedValue: 1167 _lastError = "unexpected character when parsing 'true'"; 1168 goto unexpectedValue; 1169 string_unexpectedEnd: 1170 _lastError = "unexpected end when parsing string"; 1171 goto unexpectedEnd; 1172 string_unexpectedValue: 1173 _lastError = "unexpected character when parsing string"; 1174 goto unexpectedValue; 1175 failed_to_read_after_key: 1176 _lastError = "unexpected end after object key"; 1177 goto unexpectedEnd; 1178 unexpected_character_after_key: 1179 _lastError = "unexpected character after key"; 1180 goto unexpectedValue; 1181 string_length_is_too_large: 1182 _lastError = "string size is limited to 2^32-1"; 1183 goto unexpectedValue; 1184 invalid_trail_surrogate: 1185 _lastError = "invalid UTF-16 trail surrogate"; 1186 goto unexpectedValue; 1187 invalid_utf_value: 1188 _lastError = "invalid UTF value"; 1189 goto unexpectedValue; 1190 } 1191 } 1192 1193 unittest 1194 { 1195 import mir.conv; 1196 auto asdf_data = parseJson(` [ true, 123 , [ false, 123.0 , "123211" ], "3e23e" ] `); 1197 auto str = asdf_data.to!string; 1198 auto str2 = `[true,123,[false,123.0,"123211"],"3e23e"]`; 1199 assert( str == str2); 1200 } 1201 1202 pragma(inline, true) 1203 void encodeUTF8()(dchar c, ref ubyte* ptr) 1204 { 1205 if (c < 0x80) 1206 { 1207 ptr[0] = cast(ubyte) (c); 1208 ptr += 1; 1209 } 1210 else 1211 if (c < 0x800) 1212 { 1213 ptr[0] = cast(ubyte) (0xC0 | (c >> 6)); 1214 ptr[1] = cast(ubyte) (0x80 | (c & 0x3F)); 1215 ptr += 2; 1216 } 1217 else 1218 if (c < 0x10000) 1219 { 1220 ptr[0] = cast(ubyte) (0xE0 | (c >> 12)); 1221 ptr[1] = cast(ubyte) (0x80 | ((c >> 6) & 0x3F)); 1222 ptr[2] = cast(ubyte) (0x80 | (c & 0x3F)); 1223 ptr += 3; 1224 } 1225 else 1226 { 1227 // assert(c < 0x200000); 1228 ptr[0] = cast(ubyte) (0xF0 | (c >> 18)); 1229 ptr[1] = cast(ubyte) (0x80 | ((c >> 12) & 0x3F)); 1230 ptr[2] = cast(ubyte) (0x80 | ((c >> 6) & 0x3F)); 1231 ptr[3] = cast(ubyte) (0x80 | (c & 0x3F)); 1232 ptr += 4; 1233 } 1234 } 1235 1236 unittest 1237 { 1238 auto asdf = "[\"\u007F\"]".parseJson; 1239 } 1240 1241 unittest 1242 { 1243 auto f = `"\uD801\uDC37"`.parseJson; 1244 assert(f == "\"\U00010437\"".parseJson); 1245 } 1246 1247 unittest 1248 { 1249 import std.string; 1250 import std.range; 1251 static immutable str = `"1234567890qwertyuiopasdfghjklzxcvbnm"`; 1252 auto data = Asdf(str[1..$-1]); 1253 assert(data == parseJson(str)); 1254 foreach(i; 1 .. str.length) 1255 { 1256 auto s = parseJson(str.representation.chunks(i)); 1257 assert(data == s); 1258 } 1259 } 1260 1261 unittest 1262 { 1263 import std.string; 1264 import std.range; 1265 static immutable str = `"\t\r\f\b\"\\\/\t\r\f\b\"\\\/\t\r\f\b\"\\\/\t\r\f\b\"\\\/"`; 1266 auto data = Asdf("\t\r\f\b\"\\/\t\r\f\b\"\\/\t\r\f\b\"\\/\t\r\f\b\"\\/"); 1267 assert(data == parseJson(str)); 1268 foreach(i; 1 .. str.length) 1269 assert(data == parseJson(str.representation.chunks(i))); 1270 } 1271 1272 unittest 1273 { 1274 import std.string; 1275 import std.range; 1276 static immutable str = `"\u0026"`; 1277 auto data = Asdf("&"); 1278 assert(data == parseJson(str)); 1279 } 1280 1281 version(unittest) immutable string test_data = 1282 q{{ 1283 "coordinates": [ 1284 { 1285 "x": 0.29811521136061625, 1286 "y": 0.47980763779335556, 1287 "z": 0.1704431616620138, 1288 "name": "tqxvsg 2780", 1289 "opts": { 1290 "1": [ 1291 1, 1292 true 1293 ] 1294 } 1295 } 1296 ], 1297 "info": "some info" 1298 } 1299 };