mirror of
https://github.com/Mauler125/r5sdk.git
synced 2025-02-09 19:15:03 +01:00
996 lines
32 KiB
C++
996 lines
32 KiB
C++
|
// Protocol Buffers - Google's data interchange format
|
||
|
// Copyright 2008 Google Inc. All rights reserved.
|
||
|
// https://developers.google.com/protocol-buffers/
|
||
|
//
|
||
|
// Redistribution and use in source and binary forms, with or without
|
||
|
// modification, are permitted provided that the following conditions are
|
||
|
// met:
|
||
|
//
|
||
|
// * Redistributions of source code must retain the above copyright
|
||
|
// notice, this list of conditions and the following disclaimer.
|
||
|
// * Redistributions in binary form must reproduce the above
|
||
|
// copyright notice, this list of conditions and the following disclaimer
|
||
|
// in the documentation and/or other materials provided with the
|
||
|
// distribution.
|
||
|
// * Neither the name of Google Inc. nor the names of its
|
||
|
// contributors may be used to endorse or promote products derived from
|
||
|
// this software without specific prior written permission.
|
||
|
//
|
||
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
|
||
|
#include <thirdparty/protobuf/util/internal/json_stream_parser.h>
|
||
|
|
||
|
#include <algorithm>
|
||
|
#include <cctype>
|
||
|
#include <cmath>
|
||
|
#include <memory>
|
||
|
#include <stack>
|
||
|
#include <string>
|
||
|
|
||
|
#include <thirdparty/protobuf/stubs/common.h>
|
||
|
#include <thirdparty/protobuf/stubs/logging.h>
|
||
|
#include <thirdparty/protobuf/stubs/strutil.h>
|
||
|
#include <thirdparty/protobuf/stubs/status.h>
|
||
|
#include <thirdparty/protobuf/util/internal/object_writer.h>
|
||
|
#include <thirdparty/protobuf/util/internal/json_escaping.h>
|
||
|
|
||
|
|
||
|
namespace google {
|
||
|
namespace protobuf {
|
||
|
namespace util {
|
||
|
|
||
|
namespace converter {
|
||
|
|
||
|
// Number of digits in an escaped UTF-16 code unit ('\\' 'u' X X X X)
|
||
|
static const int kUnicodeEscapedLength = 6;
|
||
|
|
||
|
static const int kDefaultMaxRecursionDepth = 100;
|
||
|
|
||
|
// These cannot be constexpr for portability with VS2015.
|
||
|
static const StringPiece kKeywordTrue = "true";
|
||
|
static const StringPiece kKeywordFalse = "false";
|
||
|
static const StringPiece kKeywordNull = "null";
|
||
|
|
||
|
inline bool IsLetter(char c) {
|
||
|
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_') ||
|
||
|
(c == '$');
|
||
|
}
|
||
|
|
||
|
inline bool IsAlphanumeric(char c) {
|
||
|
return IsLetter(c) || ('0' <= c && c <= '9');
|
||
|
}
|
||
|
|
||
|
// Indicates a character may not be part of an unquoted key.
|
||
|
inline bool IsKeySeparator(char c) {
|
||
|
return (ascii_isspace(c) || c == '"' || c == '\'' || c == '{' ||
|
||
|
c == '}' || c == '[' || c == ']' || c == ':' || c == ',');
|
||
|
}
|
||
|
|
||
|
inline void ReplaceInvalidCodePoints(StringPiece str,
|
||
|
const std::string& replacement,
|
||
|
std::string* dst) {
|
||
|
while (!str.empty()) {
|
||
|
int n_valid_bytes = internal::UTF8SpnStructurallyValid(str);
|
||
|
StringPiece valid_part = str.substr(0, n_valid_bytes);
|
||
|
StrAppend(dst, valid_part);
|
||
|
|
||
|
if (n_valid_bytes == str.size()) {
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
// Append replacement value.
|
||
|
StrAppend(dst, replacement);
|
||
|
|
||
|
// Move past valid bytes + one invalid byte.
|
||
|
str.remove_prefix(n_valid_bytes + 1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static bool ConsumeKey(StringPiece* input, StringPiece* key) {
|
||
|
if (input->empty() || !IsLetter((*input)[0])) return false;
|
||
|
int len = 1;
|
||
|
for (; len < input->size(); ++len) {
|
||
|
if (!IsAlphanumeric((*input)[len])) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
*key = StringPiece(input->data(), len);
|
||
|
*input = StringPiece(input->data() + len, input->size() - len);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
// Same as 'ConsumeKey', but allows a widened set of key characters.
|
||
|
static bool ConsumeKeyPermissive(StringPiece* input,
|
||
|
StringPiece* key) {
|
||
|
if (input->empty() || !IsLetter((*input)[0])) return false;
|
||
|
int len = 1;
|
||
|
for (; len < input->size(); ++len) {
|
||
|
if (IsKeySeparator((*input)[len])) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
*key = StringPiece(input->data(), len);
|
||
|
*input = StringPiece(input->data() + len, input->size() - len);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
static bool MatchKey(StringPiece input) {
|
||
|
return !input.empty() && IsLetter(input[0]);
|
||
|
}
|
||
|
|
||
|
JsonStreamParser::JsonStreamParser(ObjectWriter* ow)
|
||
|
: ow_(ow),
|
||
|
stack_(),
|
||
|
leftover_(),
|
||
|
json_(),
|
||
|
p_(),
|
||
|
key_(),
|
||
|
key_storage_(),
|
||
|
finishing_(false),
|
||
|
seen_non_whitespace_(false),
|
||
|
allow_no_root_element_(false),
|
||
|
parsed_(),
|
||
|
parsed_storage_(),
|
||
|
string_open_(0),
|
||
|
chunk_storage_(),
|
||
|
coerce_to_utf8_(false),
|
||
|
utf8_replacement_character_(" "),
|
||
|
allow_empty_null_(false),
|
||
|
allow_permissive_key_naming_(false),
|
||
|
loose_float_number_conversion_(false),
|
||
|
recursion_depth_(0),
|
||
|
max_recursion_depth_(kDefaultMaxRecursionDepth) {
|
||
|
// Initialize the stack with a single value to be parsed.
|
||
|
stack_.push(VALUE);
|
||
|
}
|
||
|
|
||
|
JsonStreamParser::~JsonStreamParser() {}
|
||
|
|
||
|
|
||
|
util::Status JsonStreamParser::Parse(StringPiece json) {
|
||
|
StringPiece chunk = json;
|
||
|
// If we have leftovers from a previous chunk, append the new chunk to it
|
||
|
// and create a new StringPiece pointing at the string's data. This could
|
||
|
// be large but we rely on the chunks to be small, assuming they are
|
||
|
// fragments of a Cord.
|
||
|
if (!leftover_.empty()) {
|
||
|
// Don't point chunk to leftover_ because leftover_ will be updated in
|
||
|
// ParseChunk(chunk).
|
||
|
chunk_storage_.swap(leftover_);
|
||
|
StrAppend(&chunk_storage_, json);
|
||
|
chunk = StringPiece(chunk_storage_);
|
||
|
}
|
||
|
|
||
|
// Find the structurally valid UTF8 prefix and parse only that.
|
||
|
int n = internal::UTF8SpnStructurallyValid(chunk);
|
||
|
if (n > 0) {
|
||
|
util::Status status = ParseChunk(chunk.substr(0, n));
|
||
|
|
||
|
// Any leftover characters are stashed in leftover_ for later parsing when
|
||
|
// there is more data available.
|
||
|
StrAppend(&leftover_, chunk.substr(n));
|
||
|
return status;
|
||
|
} else {
|
||
|
leftover_.assign(chunk.data(), chunk.size());
|
||
|
return util::Status();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::FinishParse() {
|
||
|
// If we do not expect anything and there is nothing left to parse we're all
|
||
|
// done.
|
||
|
if (stack_.empty() && leftover_.empty()) {
|
||
|
return util::Status();
|
||
|
}
|
||
|
|
||
|
// Lifetime needs to last until RunParser returns, so keep this variable
|
||
|
// outside of the coerce_to_utf8 block.
|
||
|
std::unique_ptr<std::string> scratch;
|
||
|
|
||
|
bool is_valid_utf8 = internal::IsStructurallyValidUTF8(leftover_);
|
||
|
if (coerce_to_utf8_ && !is_valid_utf8) {
|
||
|
scratch.reset(new std::string);
|
||
|
scratch->reserve(leftover_.size() * utf8_replacement_character_.size());
|
||
|
ReplaceInvalidCodePoints(leftover_, utf8_replacement_character_,
|
||
|
scratch.get());
|
||
|
p_ = json_ = *scratch;
|
||
|
} else {
|
||
|
p_ = json_ = leftover_;
|
||
|
if (!is_valid_utf8) {
|
||
|
return ReportFailure("Encountered non UTF-8 code points.",
|
||
|
ParseErrorType::NON_UTF_8);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Parse the remainder in finishing mode, which reports errors for things like
|
||
|
// unterminated strings or unknown tokens that would normally be retried.
|
||
|
finishing_ = true;
|
||
|
util::Status result = RunParser();
|
||
|
if (result.ok()) {
|
||
|
SkipWhitespace();
|
||
|
if (!p_.empty()) {
|
||
|
result =
|
||
|
ReportFailure("Parsing terminated before end of input.",
|
||
|
ParseErrorType::PARSING_TERMINATED_BEFORE_END_OF_INPUT);
|
||
|
}
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ParseChunk(StringPiece chunk) {
|
||
|
// Do not do any work if the chunk is empty.
|
||
|
if (chunk.empty()) return util::Status();
|
||
|
|
||
|
p_ = json_ = chunk;
|
||
|
|
||
|
finishing_ = false;
|
||
|
util::Status result = RunParser();
|
||
|
if (!result.ok()) return result;
|
||
|
|
||
|
SkipWhitespace();
|
||
|
if (p_.empty()) {
|
||
|
// If we parsed everything we had, clear the leftover.
|
||
|
leftover_.clear();
|
||
|
} else {
|
||
|
// If we do not expect anything i.e. stack is empty, and we have non-empty
|
||
|
// string left to parse, we report an error.
|
||
|
if (stack_.empty()) {
|
||
|
return ReportFailure(
|
||
|
"Parsing terminated before end of input.",
|
||
|
ParseErrorType::PARSING_TERMINATED_BEFORE_END_OF_INPUT);
|
||
|
}
|
||
|
// If we expect future data i.e. stack is non-empty, and we have some
|
||
|
// unparsed data left, we save it for later parse.
|
||
|
leftover_ = std::string(p_);
|
||
|
}
|
||
|
return util::Status();
|
||
|
}
|
||
|
|
||
|
bool JsonStreamParser::IsInputAllWhiteSpaces(TokenType type) {
|
||
|
// Conclude the whole input is full of white spaces by:
|
||
|
// - it is at the finishing stage
|
||
|
// - we have run out of the input data
|
||
|
// - haven't seen non-whitespace char so far
|
||
|
if (finishing_ && p_.empty() && type == UNKNOWN && !seen_non_whitespace_) {
|
||
|
return true;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::RunParser() {
|
||
|
while (!stack_.empty()) {
|
||
|
ParseType type = stack_.top();
|
||
|
TokenType t = (string_open_ == 0) ? GetNextTokenType() : BEGIN_STRING;
|
||
|
stack_.pop();
|
||
|
util::Status result;
|
||
|
switch (type) {
|
||
|
case VALUE:
|
||
|
if (allow_no_root_element_ && IsInputAllWhiteSpaces(t)) {
|
||
|
return util::Status();
|
||
|
}
|
||
|
result = ParseValue(t);
|
||
|
break;
|
||
|
|
||
|
case OBJ_MID:
|
||
|
result = ParseObjectMid(t);
|
||
|
break;
|
||
|
|
||
|
case ENTRY:
|
||
|
result = ParseEntry(t);
|
||
|
break;
|
||
|
|
||
|
case ENTRY_MID:
|
||
|
result = ParseEntryMid(t);
|
||
|
break;
|
||
|
|
||
|
case ARRAY_VALUE:
|
||
|
result = ParseArrayValue(t);
|
||
|
break;
|
||
|
|
||
|
case ARRAY_MID:
|
||
|
result = ParseArrayMid(t);
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
result =
|
||
|
util::InternalError(StrCat("Unknown parse type: ", type));
|
||
|
break;
|
||
|
}
|
||
|
if (!result.ok()) {
|
||
|
// If we were cancelled, save our state and try again later.
|
||
|
if (!finishing_ && util::IsCancelled(result)) {
|
||
|
stack_.push(type);
|
||
|
// If we have a key we still need to render, make sure to save off the
|
||
|
// contents in our own storage.
|
||
|
if (!key_.empty() && key_storage_.empty()) {
|
||
|
StrAppend(&key_storage_, key_);
|
||
|
key_ = StringPiece(key_storage_);
|
||
|
}
|
||
|
result = util::Status();
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
}
|
||
|
return util::Status();
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ParseValue(TokenType type) {
|
||
|
switch (type) {
|
||
|
case BEGIN_OBJECT:
|
||
|
return HandleBeginObject();
|
||
|
case BEGIN_ARRAY:
|
||
|
return HandleBeginArray();
|
||
|
case BEGIN_STRING:
|
||
|
return ParseString();
|
||
|
case BEGIN_NUMBER:
|
||
|
return ParseNumber();
|
||
|
case BEGIN_TRUE:
|
||
|
return ParseTrue();
|
||
|
case BEGIN_FALSE:
|
||
|
return ParseFalse();
|
||
|
case BEGIN_NULL:
|
||
|
return ParseNull();
|
||
|
case UNKNOWN:
|
||
|
return ReportUnknown("Expected a value.", ParseErrorType::EXPECTED_VALUE);
|
||
|
default: {
|
||
|
// Special case for having been cut off while parsing, wait for more data.
|
||
|
// This handles things like 'fals' being at the end of the string, we
|
||
|
// don't know if the next char would be e, completing it, or something
|
||
|
// else, making it invalid.
|
||
|
if (!finishing_ && p_.length() < kKeywordFalse.length()) {
|
||
|
return util::CancelledError("");
|
||
|
}
|
||
|
|
||
|
if (allow_empty_null_ && IsEmptyNullAllowed(type)) {
|
||
|
return ParseEmptyNull();
|
||
|
}
|
||
|
return ReportFailure("Unexpected token.",
|
||
|
ParseErrorType::UNEXPECTED_TOKEN);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ParseString() {
|
||
|
util::Status result = ParseStringHelper();
|
||
|
if (result.ok()) {
|
||
|
ow_->RenderString(key_, parsed_);
|
||
|
key_ = StringPiece();
|
||
|
parsed_ = StringPiece();
|
||
|
parsed_storage_.clear();
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ParseStringHelper() {
|
||
|
// If we haven't seen the start quote, grab it and remember it for later.
|
||
|
if (string_open_ == 0) {
|
||
|
string_open_ = *p_.data();
|
||
|
GOOGLE_DCHECK(string_open_ == '\"' || string_open_ == '\'');
|
||
|
Advance();
|
||
|
}
|
||
|
// Track where we last copied data from so we can minimize copying.
|
||
|
const char* last = p_.data();
|
||
|
while (!p_.empty()) {
|
||
|
const char* data = p_.data();
|
||
|
if (*data == '\\') {
|
||
|
// We're about to handle an escape, copy all bytes from last to data.
|
||
|
if (last < data) {
|
||
|
parsed_storage_.append(last, data - last);
|
||
|
}
|
||
|
// If we ran out of string after the \, cancel or report an error
|
||
|
// depending on if we expect more data later.
|
||
|
if (p_.length() == 1) {
|
||
|
if (!finishing_) {
|
||
|
return util::CancelledError("");
|
||
|
}
|
||
|
return ReportFailure("Closing quote expected in string.",
|
||
|
ParseErrorType::EXPECTED_CLOSING_QUOTE);
|
||
|
}
|
||
|
// Parse a unicode escape if we found \u in the string.
|
||
|
if (data[1] == 'u') {
|
||
|
util::Status result = ParseUnicodeEscape();
|
||
|
if (!result.ok()) {
|
||
|
return result;
|
||
|
}
|
||
|
// Move last pointer past the unicode escape and continue.
|
||
|
last = p_.data();
|
||
|
continue;
|
||
|
}
|
||
|
// Handle the standard set of backslash-escaped characters.
|
||
|
switch (data[1]) {
|
||
|
case 'b':
|
||
|
parsed_storage_.push_back('\b');
|
||
|
break;
|
||
|
case 'f':
|
||
|
parsed_storage_.push_back('\f');
|
||
|
break;
|
||
|
case 'n':
|
||
|
parsed_storage_.push_back('\n');
|
||
|
break;
|
||
|
case 'r':
|
||
|
parsed_storage_.push_back('\r');
|
||
|
break;
|
||
|
case 't':
|
||
|
parsed_storage_.push_back('\t');
|
||
|
break;
|
||
|
case 'v':
|
||
|
parsed_storage_.push_back('\v');
|
||
|
break;
|
||
|
default:
|
||
|
parsed_storage_.push_back(data[1]);
|
||
|
}
|
||
|
// We handled two characters, so advance past them and continue.
|
||
|
p_.remove_prefix(2);
|
||
|
last = p_.data();
|
||
|
continue;
|
||
|
}
|
||
|
// If we found the closing quote note it, advance past it, and return.
|
||
|
if (*data == string_open_) {
|
||
|
// If we didn't copy anything, reuse the input buffer.
|
||
|
if (parsed_storage_.empty()) {
|
||
|
parsed_ = StringPiece(last, data - last);
|
||
|
} else {
|
||
|
if (last < data) {
|
||
|
parsed_storage_.append(last, data - last);
|
||
|
}
|
||
|
parsed_ = StringPiece(parsed_storage_);
|
||
|
}
|
||
|
// Clear the quote char so next time we try to parse a string we'll
|
||
|
// start fresh.
|
||
|
string_open_ = 0;
|
||
|
Advance();
|
||
|
return util::Status();
|
||
|
}
|
||
|
// Normal character, just advance past it.
|
||
|
Advance();
|
||
|
}
|
||
|
// If we ran out of characters, copy over what we have so far.
|
||
|
if (last < p_.data()) {
|
||
|
parsed_storage_.append(last, p_.data() - last);
|
||
|
}
|
||
|
// If we didn't find the closing quote but we expect more data, cancel for now
|
||
|
if (!finishing_) {
|
||
|
return util::CancelledError("");
|
||
|
}
|
||
|
// End of string reached without a closing quote, report an error.
|
||
|
string_open_ = 0;
|
||
|
return ReportFailure("Closing quote expected in string.",
|
||
|
ParseErrorType::EXPECTED_CLOSING_QUOTE);
|
||
|
}
|
||
|
|
||
|
// Converts a unicode escaped character to a decimal value stored in a char32
|
||
|
// for use in UTF8 encoding utility. We assume that str begins with \uhhhh and
|
||
|
// convert that from the hex number to a decimal value.
|
||
|
//
|
||
|
// There are some security exploits with UTF-8 that we should be careful of:
|
||
|
// - http://www.unicode.org/reports/tr36/#UTF-8_Exploit
|
||
|
// - http://sites/intl-eng/design-guide/core-application
|
||
|
util::Status JsonStreamParser::ParseUnicodeEscape() {
|
||
|
if (p_.length() < kUnicodeEscapedLength) {
|
||
|
if (!finishing_) {
|
||
|
return util::CancelledError("");
|
||
|
}
|
||
|
return ReportFailure("Illegal hex string.",
|
||
|
ParseErrorType::ILLEGAL_HEX_STRING);
|
||
|
}
|
||
|
GOOGLE_DCHECK_EQ('\\', p_.data()[0]);
|
||
|
GOOGLE_DCHECK_EQ('u', p_.data()[1]);
|
||
|
uint32 code = 0;
|
||
|
for (int i = 2; i < kUnicodeEscapedLength; ++i) {
|
||
|
if (!isxdigit(p_.data()[i])) {
|
||
|
return ReportFailure("Invalid escape sequence.",
|
||
|
ParseErrorType::INVALID_ESCAPE_SEQUENCE);
|
||
|
}
|
||
|
code = (code << 4) + hex_digit_to_int(p_.data()[i]);
|
||
|
}
|
||
|
if (code >= JsonEscaping::kMinHighSurrogate &&
|
||
|
code <= JsonEscaping::kMaxHighSurrogate) {
|
||
|
if (p_.length() < 2 * kUnicodeEscapedLength) {
|
||
|
if (!finishing_) {
|
||
|
return util::CancelledError("");
|
||
|
}
|
||
|
if (!coerce_to_utf8_) {
|
||
|
return ReportFailure("Missing low surrogate.",
|
||
|
ParseErrorType::MISSING_LOW_SURROGATE);
|
||
|
}
|
||
|
} else if (p_.data()[kUnicodeEscapedLength] == '\\' &&
|
||
|
p_.data()[kUnicodeEscapedLength + 1] == 'u') {
|
||
|
uint32 low_code = 0;
|
||
|
for (int i = kUnicodeEscapedLength + 2; i < 2 * kUnicodeEscapedLength;
|
||
|
++i) {
|
||
|
if (!isxdigit(p_.data()[i])) {
|
||
|
return ReportFailure("Invalid escape sequence.",
|
||
|
ParseErrorType::INVALID_ESCAPE_SEQUENCE);
|
||
|
}
|
||
|
low_code = (low_code << 4) + hex_digit_to_int(p_.data()[i]);
|
||
|
}
|
||
|
if (low_code >= JsonEscaping::kMinLowSurrogate &&
|
||
|
low_code <= JsonEscaping::kMaxLowSurrogate) {
|
||
|
// Convert UTF-16 surrogate pair to 21-bit Unicode codepoint.
|
||
|
code = (((code & 0x3FF) << 10) | (low_code & 0x3FF)) +
|
||
|
JsonEscaping::kMinSupplementaryCodePoint;
|
||
|
// Advance past the first code unit escape.
|
||
|
p_.remove_prefix(kUnicodeEscapedLength);
|
||
|
} else if (!coerce_to_utf8_) {
|
||
|
return ReportFailure("Invalid low surrogate.",
|
||
|
ParseErrorType::INVALID_LOW_SURROGATE);
|
||
|
}
|
||
|
} else if (!coerce_to_utf8_) {
|
||
|
return ReportFailure("Missing low surrogate.",
|
||
|
ParseErrorType::MISSING_LOW_SURROGATE);
|
||
|
}
|
||
|
}
|
||
|
if (!coerce_to_utf8_ && !IsValidCodePoint(code)) {
|
||
|
return ReportFailure("Invalid unicode code point.",
|
||
|
ParseErrorType::INVALID_UNICODE);
|
||
|
}
|
||
|
char buf[UTFmax];
|
||
|
int len = EncodeAsUTF8Char(code, buf);
|
||
|
// Advance past the [final] code unit escape.
|
||
|
p_.remove_prefix(kUnicodeEscapedLength);
|
||
|
parsed_storage_.append(buf, len);
|
||
|
return util::Status();
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ParseNumber() {
|
||
|
NumberResult number;
|
||
|
util::Status result = ParseNumberHelper(&number);
|
||
|
if (result.ok()) {
|
||
|
switch (number.type) {
|
||
|
case NumberResult::DOUBLE:
|
||
|
ow_->RenderDouble(key_, number.double_val);
|
||
|
key_ = StringPiece();
|
||
|
break;
|
||
|
|
||
|
case NumberResult::INT:
|
||
|
ow_->RenderInt64(key_, number.int_val);
|
||
|
key_ = StringPiece();
|
||
|
break;
|
||
|
|
||
|
case NumberResult::UINT:
|
||
|
ow_->RenderUint64(key_, number.uint_val);
|
||
|
key_ = StringPiece();
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
return ReportFailure("Unable to parse number.",
|
||
|
ParseErrorType::UNABLE_TO_PARSE_NUMBER);
|
||
|
}
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ParseDoubleHelper(const std::string& number,
|
||
|
NumberResult* result) {
|
||
|
if (!safe_strtod(number, &result->double_val)) {
|
||
|
return ReportFailure("Unable to parse number.",
|
||
|
ParseErrorType::UNABLE_TO_PARSE_NUMBER);
|
||
|
}
|
||
|
if (!loose_float_number_conversion_ && !std::isfinite(result->double_val)) {
|
||
|
return ReportFailure("Number exceeds the range of double.",
|
||
|
ParseErrorType::NUMBER_EXCEEDS_RANGE_DOUBLE);
|
||
|
}
|
||
|
result->type = NumberResult::DOUBLE;
|
||
|
return util::Status();
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) {
|
||
|
const char* data = p_.data();
|
||
|
int length = p_.length();
|
||
|
|
||
|
// Look for the first non-numeric character, or the end of the string.
|
||
|
int index = 0;
|
||
|
bool floating = false;
|
||
|
bool negative = data[index] == '-';
|
||
|
// Find the first character that cannot be part of the number. Along the way
|
||
|
// detect if the number needs to be parsed as a double.
|
||
|
// Note that this restricts numbers to the JSON specification, so for example
|
||
|
// we do not support hex or octal notations.
|
||
|
for (; index < length; ++index) {
|
||
|
char c = data[index];
|
||
|
if (isdigit(c)) continue;
|
||
|
if (c == '.' || c == 'e' || c == 'E') {
|
||
|
floating = true;
|
||
|
continue;
|
||
|
}
|
||
|
if (c == '+' || c == '-' || c == 'x') continue;
|
||
|
// Not a valid number character, break out.
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
// If the entire input is a valid number, and we may have more content in the
|
||
|
// future, we abort for now and resume when we know more.
|
||
|
if (index == length && !finishing_) {
|
||
|
return util::CancelledError("");
|
||
|
}
|
||
|
|
||
|
// Create a string containing just the number, so we can use safe_strtoX
|
||
|
std::string number = std::string(p_.substr(0, index));
|
||
|
|
||
|
// Floating point number, parse as a double.
|
||
|
if (floating) {
|
||
|
util::Status status = ParseDoubleHelper(number, result);
|
||
|
if (status.ok()) {
|
||
|
p_.remove_prefix(index);
|
||
|
}
|
||
|
return status;
|
||
|
}
|
||
|
|
||
|
// Positive non-floating point number, parse as a uint64.
|
||
|
if (!negative) {
|
||
|
// Octal/Hex numbers are not valid JSON values.
|
||
|
if (number.length() >= 2 && number[0] == '0') {
|
||
|
return ReportFailure(
|
||
|
"Octal/hex numbers are not valid JSON values.",
|
||
|
ParseErrorType::OCTAL_OR_HEX_ARE_NOT_VALID_JSON_VALUES);
|
||
|
}
|
||
|
if (safe_strtou64(number, &result->uint_val)) {
|
||
|
result->type = NumberResult::UINT;
|
||
|
p_.remove_prefix(index);
|
||
|
return util::Status();
|
||
|
} else {
|
||
|
// If the value is too large, parse it as double.
|
||
|
util::Status status = ParseDoubleHelper(number, result);
|
||
|
if (status.ok()) {
|
||
|
p_.remove_prefix(index);
|
||
|
}
|
||
|
return status;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Octal/Hex numbers are not valid JSON values.
|
||
|
if (number.length() >= 3 && number[1] == '0') {
|
||
|
return ReportFailure(
|
||
|
"Octal/hex numbers are not valid JSON values.",
|
||
|
ParseErrorType::OCTAL_OR_HEX_ARE_NOT_VALID_JSON_VALUES);
|
||
|
}
|
||
|
// Negative non-floating point number, parse as an int64.
|
||
|
if (safe_strto64(number, &result->int_val)) {
|
||
|
result->type = NumberResult::INT;
|
||
|
p_.remove_prefix(index);
|
||
|
return util::Status();
|
||
|
} else {
|
||
|
// If the value is too large, parse it as double.
|
||
|
util::Status status = ParseDoubleHelper(number, result);
|
||
|
if (status.ok()) {
|
||
|
p_.remove_prefix(index);
|
||
|
}
|
||
|
return status;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::HandleBeginObject() {
|
||
|
GOOGLE_DCHECK_EQ('{', *p_.data());
|
||
|
Advance();
|
||
|
ow_->StartObject(key_);
|
||
|
auto status = IncrementRecursionDepth(key_);
|
||
|
if (!status.ok()) {
|
||
|
return status;
|
||
|
}
|
||
|
key_ = StringPiece();
|
||
|
stack_.push(ENTRY);
|
||
|
return util::Status();
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ParseObjectMid(TokenType type) {
|
||
|
if (type == UNKNOWN) {
|
||
|
return ReportUnknown("Expected , or } after key:value pair.",
|
||
|
ParseErrorType::EXPECTED_COMMA_OR_BRACES);
|
||
|
}
|
||
|
|
||
|
// Object is complete, advance past the comma and render the EndObject.
|
||
|
if (type == END_OBJECT) {
|
||
|
Advance();
|
||
|
ow_->EndObject();
|
||
|
--recursion_depth_;
|
||
|
return util::Status();
|
||
|
}
|
||
|
// Found a comma, advance past it and get ready for an entry.
|
||
|
if (type == VALUE_SEPARATOR) {
|
||
|
Advance();
|
||
|
stack_.push(ENTRY);
|
||
|
return util::Status();
|
||
|
}
|
||
|
// Illegal token after key:value pair.
|
||
|
return ReportFailure("Expected , or } after key:value pair.",
|
||
|
ParseErrorType::EXPECTED_COMMA_OR_BRACES);
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ParseEntry(TokenType type) {
|
||
|
if (type == UNKNOWN) {
|
||
|
return ReportUnknown("Expected an object key or }.",
|
||
|
ParseErrorType::EXPECTED_OBJECT_KEY_OR_BRACES);
|
||
|
}
|
||
|
|
||
|
// Close the object and return. This allows for trailing commas.
|
||
|
if (type == END_OBJECT) {
|
||
|
ow_->EndObject();
|
||
|
Advance();
|
||
|
--recursion_depth_;
|
||
|
return util::Status();
|
||
|
}
|
||
|
|
||
|
util::Status result;
|
||
|
if (type == BEGIN_STRING) {
|
||
|
// Key is a string (standard JSON), parse it and store the string.
|
||
|
result = ParseStringHelper();
|
||
|
if (result.ok()) {
|
||
|
key_storage_.clear();
|
||
|
if (!parsed_storage_.empty()) {
|
||
|
parsed_storage_.swap(key_storage_);
|
||
|
key_ = StringPiece(key_storage_);
|
||
|
} else {
|
||
|
key_ = parsed_;
|
||
|
}
|
||
|
parsed_ = StringPiece();
|
||
|
}
|
||
|
} else if (type == BEGIN_KEY) {
|
||
|
// Key is a bare key (back compat), create a StringPiece pointing to it.
|
||
|
result = ParseKey();
|
||
|
} else if (type == BEGIN_NULL || type == BEGIN_TRUE || type == BEGIN_FALSE) {
|
||
|
// Key may be a bare key that begins with a reserved word.
|
||
|
result = ParseKey();
|
||
|
if (result.ok() && (key_ == kKeywordNull || key_ == kKeywordTrue ||
|
||
|
key_ == kKeywordFalse)) {
|
||
|
result = ReportFailure("Expected an object key or }.",
|
||
|
ParseErrorType::EXPECTED_OBJECT_KEY_OR_BRACES);
|
||
|
}
|
||
|
} else {
|
||
|
// Unknown key type, report an error.
|
||
|
result = ReportFailure("Expected an object key or }.",
|
||
|
ParseErrorType::EXPECTED_OBJECT_KEY_OR_BRACES);
|
||
|
}
|
||
|
// On success we next expect an entry mid ':' then an object mid ',' or '}'
|
||
|
if (result.ok()) {
|
||
|
stack_.push(OBJ_MID);
|
||
|
stack_.push(ENTRY_MID);
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ParseEntryMid(TokenType type) {
|
||
|
if (type == UNKNOWN) {
|
||
|
return ReportUnknown("Expected : between key:value pair.",
|
||
|
ParseErrorType::EXPECTED_COLON);
|
||
|
}
|
||
|
if (type == ENTRY_SEPARATOR) {
|
||
|
Advance();
|
||
|
stack_.push(VALUE);
|
||
|
return util::Status();
|
||
|
}
|
||
|
return ReportFailure("Expected : between key:value pair.",
|
||
|
ParseErrorType::EXPECTED_COLON);
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::HandleBeginArray() {
|
||
|
GOOGLE_DCHECK_EQ('[', *p_.data());
|
||
|
Advance();
|
||
|
ow_->StartList(key_);
|
||
|
key_ = StringPiece();
|
||
|
stack_.push(ARRAY_VALUE);
|
||
|
return util::Status();
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ParseArrayValue(TokenType type) {
|
||
|
if (type == UNKNOWN) {
|
||
|
return ReportUnknown("Expected a value or ] within an array.",
|
||
|
ParseErrorType::EXPECTED_VALUE_OR_BRACKET);
|
||
|
}
|
||
|
|
||
|
if (type == END_ARRAY) {
|
||
|
ow_->EndList();
|
||
|
Advance();
|
||
|
return util::Status();
|
||
|
}
|
||
|
|
||
|
// The ParseValue call may push something onto the stack so we need to make
|
||
|
// sure an ARRAY_MID is after it, so we push it on now. Also, the parsing of
|
||
|
// empty-null array value is relying on this ARRAY_MID token.
|
||
|
stack_.push(ARRAY_MID);
|
||
|
util::Status result = ParseValue(type);
|
||
|
if (util::IsCancelled(result)) {
|
||
|
// If we were cancelled, pop back off the ARRAY_MID so we don't try to
|
||
|
// push it on again when we try over.
|
||
|
stack_.pop();
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ParseArrayMid(TokenType type) {
|
||
|
if (type == UNKNOWN) {
|
||
|
return ReportUnknown("Expected , or ] after array value.",
|
||
|
ParseErrorType::EXPECTED_COMMA_OR_BRACKET);
|
||
|
}
|
||
|
|
||
|
if (type == END_ARRAY) {
|
||
|
ow_->EndList();
|
||
|
Advance();
|
||
|
return util::Status();
|
||
|
}
|
||
|
|
||
|
// Found a comma, advance past it and expect an array value next.
|
||
|
if (type == VALUE_SEPARATOR) {
|
||
|
Advance();
|
||
|
stack_.push(ARRAY_VALUE);
|
||
|
return util::Status();
|
||
|
}
|
||
|
// Illegal token after array value.
|
||
|
return ReportFailure("Expected , or ] after array value.",
|
||
|
ParseErrorType::EXPECTED_COMMA_OR_BRACKET);
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ParseTrue() {
|
||
|
ow_->RenderBool(key_, true);
|
||
|
key_ = StringPiece();
|
||
|
p_.remove_prefix(kKeywordTrue.length());
|
||
|
return util::Status();
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ParseFalse() {
|
||
|
ow_->RenderBool(key_, false);
|
||
|
key_ = StringPiece();
|
||
|
p_.remove_prefix(kKeywordFalse.length());
|
||
|
return util::Status();
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ParseNull() {
|
||
|
ow_->RenderNull(key_);
|
||
|
key_ = StringPiece();
|
||
|
p_.remove_prefix(kKeywordNull.length());
|
||
|
return util::Status();
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ParseEmptyNull() {
|
||
|
ow_->RenderNull(key_);
|
||
|
key_ = StringPiece();
|
||
|
return util::Status();
|
||
|
}
|
||
|
|
||
|
bool JsonStreamParser::IsEmptyNullAllowed(TokenType type) {
|
||
|
if (stack_.empty()) return false;
|
||
|
return (stack_.top() == ARRAY_MID && type == VALUE_SEPARATOR) ||
|
||
|
stack_.top() == OBJ_MID;
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ReportFailure(StringPiece message,
|
||
|
ParseErrorType parse_code) {
|
||
|
(void)parse_code; // Parameter is used in Google-internal code.
|
||
|
static const int kContextLength = 20;
|
||
|
const char* p_start = p_.data();
|
||
|
const char* json_start = json_.data();
|
||
|
const char* begin = std::max(p_start - kContextLength, json_start);
|
||
|
const char* end =
|
||
|
std::min(p_start + kContextLength, json_start + json_.size());
|
||
|
StringPiece segment(begin, end - begin);
|
||
|
std::string location(p_start - begin, ' ');
|
||
|
location.push_back('^');
|
||
|
auto status = util::InvalidArgumentError(
|
||
|
StrCat(message, "\n", segment, "\n", location));
|
||
|
return status;
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ReportUnknown(StringPiece message,
|
||
|
ParseErrorType parse_code) {
|
||
|
// If we aren't finishing the parse, cancel parsing and try later.
|
||
|
if (!finishing_) {
|
||
|
return util::CancelledError("");
|
||
|
}
|
||
|
if (p_.empty()) {
|
||
|
return ReportFailure(StrCat("Unexpected end of string. ", message),
|
||
|
parse_code);
|
||
|
}
|
||
|
return ReportFailure(message, parse_code);
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::IncrementRecursionDepth(
|
||
|
StringPiece key) const {
|
||
|
if (++recursion_depth_ > max_recursion_depth_) {
|
||
|
return util::InvalidArgumentError(StrCat(
|
||
|
"Message too deep. Max recursion depth reached for key '", key, "'"));
|
||
|
}
|
||
|
return util::Status();
|
||
|
}
|
||
|
|
||
|
void JsonStreamParser::SkipWhitespace() {
|
||
|
while (!p_.empty() && ascii_isspace(*p_.data())) {
|
||
|
Advance();
|
||
|
}
|
||
|
if (!p_.empty() && !ascii_isspace(*p_.data())) {
|
||
|
seen_non_whitespace_ = true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void JsonStreamParser::Advance() {
|
||
|
// Advance by moving one UTF8 character while making sure we don't go beyond
|
||
|
// the length of StringPiece.
|
||
|
p_.remove_prefix(std::min<int>(
|
||
|
p_.length(), UTF8FirstLetterNumBytes(p_.data(), p_.length())));
|
||
|
}
|
||
|
|
||
|
util::Status JsonStreamParser::ParseKey() {
|
||
|
StringPiece original = p_;
|
||
|
|
||
|
if (allow_permissive_key_naming_) {
|
||
|
if (!ConsumeKeyPermissive(&p_, &key_)) {
|
||
|
return ReportFailure("Invalid key or variable name.",
|
||
|
ParseErrorType::INVALID_KEY_OR_VARIABLE_NAME);
|
||
|
}
|
||
|
} else {
|
||
|
if (!ConsumeKey(&p_, &key_)) {
|
||
|
return ReportFailure("Invalid key or variable name.",
|
||
|
ParseErrorType::INVALID_KEY_OR_VARIABLE_NAME);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// If we consumed everything but expect more data, reset p_ and cancel since
|
||
|
// we can't know if the key was complete or not.
|
||
|
if (!finishing_ && p_.empty()) {
|
||
|
p_ = original;
|
||
|
return util::CancelledError("");
|
||
|
}
|
||
|
// Since we aren't using the key storage, clear it out.
|
||
|
key_storage_.clear();
|
||
|
return util::Status();
|
||
|
}
|
||
|
|
||
|
JsonStreamParser::TokenType JsonStreamParser::GetNextTokenType() {
|
||
|
SkipWhitespace();
|
||
|
|
||
|
int size = p_.size();
|
||
|
if (size == 0) {
|
||
|
// If we ran out of data, report unknown and we'll place the previous parse
|
||
|
// type onto the stack and try again when we have more data.
|
||
|
return UNKNOWN;
|
||
|
}
|
||
|
// TODO(sven): Split this method based on context since different contexts
|
||
|
// support different tokens. Would slightly speed up processing?
|
||
|
const char* data = p_.data();
|
||
|
StringPiece data_view = StringPiece(data, size);
|
||
|
if (*data == '\"' || *data == '\'') return BEGIN_STRING;
|
||
|
if (*data == '-' || ('0' <= *data && *data <= '9')) {
|
||
|
return BEGIN_NUMBER;
|
||
|
}
|
||
|
if (size >= kKeywordTrue.length() &&
|
||
|
HasPrefixString(data_view, kKeywordTrue)) {
|
||
|
return BEGIN_TRUE;
|
||
|
}
|
||
|
if (size >= kKeywordFalse.length() &&
|
||
|
HasPrefixString(data_view, kKeywordFalse)) {
|
||
|
return BEGIN_FALSE;
|
||
|
}
|
||
|
if (size >= kKeywordNull.length() &&
|
||
|
HasPrefixString(data_view, kKeywordNull)) {
|
||
|
return BEGIN_NULL;
|
||
|
}
|
||
|
if (*data == '{') return BEGIN_OBJECT;
|
||
|
if (*data == '}') return END_OBJECT;
|
||
|
if (*data == '[') return BEGIN_ARRAY;
|
||
|
if (*data == ']') return END_ARRAY;
|
||
|
if (*data == ':') return ENTRY_SEPARATOR;
|
||
|
if (*data == ',') return VALUE_SEPARATOR;
|
||
|
if (MatchKey(p_)) {
|
||
|
return BEGIN_KEY;
|
||
|
}
|
||
|
|
||
|
// We don't know that we necessarily have an invalid token here, just that we
|
||
|
// can't parse what we have so far. So we don't report an error and just
|
||
|
// return UNKNOWN so we can try again later when we have more data, or if we
|
||
|
// finish and we have leftovers.
|
||
|
return UNKNOWN;
|
||
|
}
|
||
|
|
||
|
} // namespace converter
|
||
|
} // namespace util
|
||
|
} // namespace protobuf
|
||
|
} // namespace google
|