// Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. // https://developers.google.com/protocol-buffers/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include #include #include #include #include namespace google { namespace protobuf { namespace compiler { namespace cpp { namespace { using google::protobuf::internal::WireFormat; using google::protobuf::internal::WireFormatLite; std::vector GetOrderedFields( const Descriptor* descriptor, const Options& options) { std::vector ordered_fields; for (auto field : FieldRange(descriptor)) { if (!IsFieldStripped(field, options)) { ordered_fields.push_back(field); } } std::sort(ordered_fields.begin(), ordered_fields.end(), [](const FieldDescriptor* a, const FieldDescriptor* b) { return a->number() < b->number(); }); return ordered_fields; } bool HasInternalAccessors(const FieldOptions::CType ctype) { return ctype == FieldOptions::STRING || ctype == FieldOptions::CORD; } int TagSize(uint32_t field_number) { if (field_number < 16) return 1; GOOGLE_CHECK_LT(field_number, (1 << 14)) << "coded tag for " << field_number << " too big for uint16_t"; return 2; } std::string FieldParseFunctionName( const TailCallTableInfo::FieldEntryInfo& entry, const Options& options); bool IsFieldEligibleForFastParsing( const TailCallTableInfo::FieldEntryInfo& entry, const Options& options, MessageSCCAnalyzer* scc_analyzer) { const auto* field = entry.field; // Map, oneof, weak, and lazy fields are not handled on the fast path. if (field->is_map() || field->real_containing_oneof() || field->options().weak() || IsImplicitWeakField(field, options, scc_analyzer) || IsLazy(field, options, scc_analyzer)) { return false; } // We will check for a valid auxiliary index range later. However, we might // want to change the value we check for inlined string fields. int aux_idx = entry.aux_idx; switch (field->type()) { case FieldDescriptor::TYPE_ENUM: // If enum values are not validated at parse time, then this field can be // handled on the fast path like an int32. if (HasPreservingUnknownEnumSemantics(field)) { break; } if (field->is_repeated() && field->is_packed()) { return false; } break; // Some bytes fields can be handled on fast path. case FieldDescriptor::TYPE_STRING: case FieldDescriptor::TYPE_BYTES: if (field->options().ctype() != FieldOptions::STRING) { return false; } if (IsStringInlined(field, options)) { GOOGLE_CHECK(!field->is_repeated()); // For inlined strings, the donation state index is stored in the // `aux_idx` field of the fast parsing info. We need to check the range // of that value instead of the auxiliary index. aux_idx = entry.inlined_string_idx; } break; default: break; } if (HasHasbit(field)) { // The tailcall parser can only update the first 32 hasbits. Fields with // has-bits beyond the first 32 are handled by mini parsing/fallback. GOOGLE_CHECK_GE(entry.hasbit_idx, 0) << field->DebugString(); if (entry.hasbit_idx >= 32) return false; } // If the field needs auxiliary data, then the aux index is needed. This // must fit in a uint8_t. if (aux_idx > std::numeric_limits::max()) { return false; } // The largest tag that can be read by the tailcall parser is two bytes // when varint-coded. This allows 14 bits for the numeric tag value: // byte 0 byte 1 // 1nnnnttt 0nnnnnnn // ^^^^^^^ ^^^^^^^ if (field->number() >= 1 << 11) return false; return true; } std::vector SplitFastFieldsForSize( const std::vector& field_entries, int table_size_log2, const Options& options, MessageSCCAnalyzer* scc_analyzer) { std::vector result(1 << table_size_log2); const uint32_t idx_mask = result.size() - 1; for (const auto& entry : field_entries) { if (!IsFieldEligibleForFastParsing(entry, options, scc_analyzer)) { continue; } const auto* field = entry.field; uint32_t tag = WireFormat::MakeTag(field); // Construct the varint-coded tag. If it is more than 7 bits, we need to // shift the high bits and add a continue bit. if (uint32_t hibits = tag & 0xFFFFFF80) { tag = tag + hibits + 128; // tag = lobits + 2*hibits + 128 } // The field index is determined by the low bits of the field number, where // the table size determines the width of the mask. The largest table // supported is 32 entries. The parse loop uses these bits directly, so that // the dispatch does not require arithmetic: // byte 0 byte 1 // tag: 1nnnnttt 0nnnnnnn // ^^^^^ // idx (table_size_log2=5) // This means that any field number that does not fit in the lower 4 bits // will always have the top bit of its table index asserted. const uint32_t fast_idx = (tag >> 3) & idx_mask; TailCallTableInfo::FastFieldInfo& info = result[fast_idx]; if (info.field != nullptr) { // This field entry is already filled. continue; } // Fill in this field's entry: GOOGLE_CHECK(info.func_name.empty()) << info.func_name; info.func_name = FieldParseFunctionName(entry, options); info.field = field; info.coded_tag = tag; // If this field does not have presence, then it can set an out-of-bounds // bit (tailcall parsing uses a uint64_t for hasbits, but only stores 32). info.hasbit_idx = HasHasbit(field) ? entry.hasbit_idx : 63; if (IsStringInlined(field, options)) { GOOGLE_CHECK(!field->is_repeated()); info.aux_idx = static_cast(entry.inlined_string_idx); } else { info.aux_idx = static_cast(entry.aux_idx); } } return result; } // Filter out fields that will be handled by mini parsing. std::vector FilterMiniParsedFields( const std::vector& fields, const Options& options, MessageSCCAnalyzer* scc_analyzer) { std::vector generated_fallback_fields; for (const auto* field : fields) { bool handled = false; switch (field->type()) { case FieldDescriptor::TYPE_DOUBLE: case FieldDescriptor::TYPE_FLOAT: case FieldDescriptor::TYPE_FIXED32: case FieldDescriptor::TYPE_SFIXED32: case FieldDescriptor::TYPE_FIXED64: case FieldDescriptor::TYPE_SFIXED64: case FieldDescriptor::TYPE_BOOL: case FieldDescriptor::TYPE_UINT32: case FieldDescriptor::TYPE_SINT32: case FieldDescriptor::TYPE_INT32: case FieldDescriptor::TYPE_UINT64: case FieldDescriptor::TYPE_SINT64: case FieldDescriptor::TYPE_INT64: // These are handled by MiniParse, so we don't need any generated // fallback code. handled = true; break; case FieldDescriptor::TYPE_ENUM: if (field->is_repeated() && !HasPreservingUnknownEnumSemantics(field)) { // TODO(b/206890171): handle packed repeated closed enums // Non-packed repeated can be handled using tables, but we still // need to generate fallback code for all repeated enums in order to // handle packed encoding. This is because of the lite/full split // when handling invalid enum values in a packed field. handled = false; } else { handled = true; } break; case FieldDescriptor::TYPE_BYTES: case FieldDescriptor::TYPE_STRING: if (IsStringInlined(field, options)) { // TODO(b/198211897): support InilnedStringField. handled = false; } else { handled = true; } break; case FieldDescriptor::TYPE_MESSAGE: case FieldDescriptor::TYPE_GROUP: // TODO(b/210762816): support remaining field types. if (field->is_map() || IsWeak(field, options) || IsImplicitWeakField(field, options, scc_analyzer) || IsLazy(field, options, scc_analyzer)) { handled = false; } else { handled = true; } break; default: handled = false; break; } if (!handled) generated_fallback_fields.push_back(field); } return generated_fallback_fields; } } // namespace TailCallTableInfo::TailCallTableInfo( const Descriptor* descriptor, const Options& options, const std::vector& ordered_fields, const std::vector& has_bit_indices, const std::vector& inlined_string_indices, MessageSCCAnalyzer* scc_analyzer) { int oneof_count = descriptor->real_oneof_decl_count(); // If this message has any oneof fields, store the case offset in the first // auxiliary entry. if (oneof_count > 0) { GOOGLE_LOG_IF(DFATAL, ordered_fields.empty()) << "Invalid message: " << descriptor->full_name() << " has " << oneof_count << " oneof declarations, but no fields"; aux_entries.push_back(StrCat("_fl::Offset{offsetof(", ClassName(descriptor), ", _impl_._oneof_case_)}")); } // If this message has any inlined string fields, store the donation state // offset in the second auxiliary entry. if (!inlined_string_indices.empty()) { aux_entries.resize(2); // pad if necessary aux_entries[1] = StrCat("_fl::Offset{offsetof(", ClassName(descriptor), ", _impl_._inlined_string_donated_)}"); } // Fill in mini table entries. for (const FieldDescriptor* field : ordered_fields) { field_entries.push_back( {field, (HasHasbit(field) ? has_bit_indices[field->index()] : -1)}); auto& entry = field_entries.back(); if (field->type() == FieldDescriptor::TYPE_MESSAGE || field->type() == FieldDescriptor::TYPE_GROUP) { // Message-typed fields have a FieldAux with the default instance pointer. if (field->is_map()) { // TODO(b/205904770): generate aux entries for maps } else if (IsWeak(field, options)) { // Don't generate anything for weak fields. They are handled by the // generated fallback. } else if (IsImplicitWeakField(field, options, scc_analyzer)) { // Implicit weak fields don't need to store a default instance pointer. } else if (IsLazy(field, options, scc_analyzer)) { // Lazy fields are handled by the generated fallback function. } else { field_entries.back().aux_idx = aux_entries.size(); const Descriptor* field_type = field->message_type(); aux_entries.push_back(StrCat( "reinterpret_cast(&", QualifiedDefaultInstanceName(field_type, options), ")")); } } else if (field->type() == FieldDescriptor::TYPE_ENUM && !HasPreservingUnknownEnumSemantics(field)) { // Enum fields which preserve unknown values (proto3 behavior) are // effectively int32 fields with respect to parsing -- i.e., the value // does not need to be validated at parse time. // // Enum fields which do not preserve unknown values (proto2 behavior) use // a FieldAux to store validation information. If the enum values are // sequential (and within a range we can represent), then the FieldAux // entry represents the range using the minimum value (which must fit in // an int16_t) and count (a uint16_t). Otherwise, the entry holds a // pointer to the generated Name_IsValid function. entry.aux_idx = aux_entries.size(); const EnumDescriptor* enum_type = field->enum_type(); GOOGLE_CHECK_GT(enum_type->value_count(), 0) << enum_type->DebugString(); // Check if the enum values are a single, contiguous range. std::vector enum_values; for (int i = 0, N = enum_type->value_count(); i < N; ++i) { enum_values.push_back(enum_type->value(i)->number()); } auto values_begin = enum_values.begin(); auto values_end = enum_values.end(); std::sort(values_begin, values_end); enum_values.erase(std::unique(values_begin, values_end), values_end); if (enum_values.back() - enum_values[0] == enum_values.size() - 1 && enum_values[0] >= std::numeric_limits::min() && enum_values[0] <= std::numeric_limits::max() && enum_values.size() <= std::numeric_limits::max()) { entry.is_enum_range = true; aux_entries.push_back( StrCat(enum_values[0], ", ", enum_values.size())); } else { entry.is_enum_range = false; aux_entries.push_back( StrCat(QualifiedClassName(enum_type, options), "_IsValid")); } } else if ((field->type() == FieldDescriptor::TYPE_STRING || field->type() == FieldDescriptor::TYPE_BYTES) && IsStringInlined(field, options)) { GOOGLE_CHECK(!field->is_repeated()); // Inlined strings have an extra marker to represent their donation state. int idx = inlined_string_indices[field->index()]; // For mini parsing, the donation state index is stored as an `offset` // auxiliary entry. entry.aux_idx = aux_entries.size(); aux_entries.push_back(StrCat("_fl::Offset{", idx, "}")); // For fast table parsing, the donation state index is stored instead of // the aux_idx (this will limit the range to 8 bits). entry.inlined_string_idx = idx; } } // Choose the smallest fast table that covers the maximum number of fields. table_size_log2 = 0; // fallback value int num_fast_fields = -1; for (int try_size_log2 : {0, 1, 2, 3, 4, 5}) { size_t try_size = 1 << try_size_log2; auto split_fields = SplitFastFieldsForSize(field_entries, try_size_log2, options, scc_analyzer); GOOGLE_CHECK_EQ(split_fields.size(), try_size); int try_num_fast_fields = 0; for (const auto& info : split_fields) { if (info.field != nullptr) ++try_num_fast_fields; } // Use this size if (and only if) it covers more fields. if (try_num_fast_fields > num_fast_fields) { fast_path_fields = std::move(split_fields); table_size_log2 = try_size_log2; num_fast_fields = try_num_fast_fields; } // The largest table we allow has the same number of entries as the message // has fields, rounded up to the next power of 2 (e.g., a message with 5 // fields can have a fast table of size 8). A larger table *might* cover // more fields in certain cases, but a larger table in that case would have // mostly empty entries; so, we cap the size to avoid pathologically sparse // tables. if (try_size > ordered_fields.size()) { break; } } // Filter out fields that are handled by MiniParse. We don't need to generate // a fallback for these, which saves code size. fallback_fields = FilterMiniParsedFields(ordered_fields, options, scc_analyzer); // If there are no fallback fields, and at most one extension range, the // parser can use a generic fallback function. Otherwise, a message-specific // fallback routine is needed. use_generated_fallback = !fallback_fields.empty() || descriptor->extension_range_count() > 1; } ParseFunctionGenerator::ParseFunctionGenerator( const Descriptor* descriptor, int max_has_bit_index, const std::vector& has_bit_indices, const std::vector& inlined_string_indices, const Options& options, MessageSCCAnalyzer* scc_analyzer, const std::map& vars) : descriptor_(descriptor), scc_analyzer_(scc_analyzer), options_(options), variables_(vars), inlined_string_indices_(inlined_string_indices), ordered_fields_(GetOrderedFields(descriptor_, options_)), num_hasbits_(max_has_bit_index) { if (should_generate_tctable()) { tc_table_info_.reset(new TailCallTableInfo( descriptor_, options_, ordered_fields_, has_bit_indices, inlined_string_indices, scc_analyzer)); } SetCommonVars(options_, &variables_); SetCommonMessageDataVariables(descriptor_, &variables_); SetUnknownFieldsVariable(descriptor_, options_, &variables_); variables_["classname"] = ClassName(descriptor, false); } void ParseFunctionGenerator::GenerateMethodDecls(io::Printer* printer) { Formatter format(printer, variables_); if (should_generate_tctable()) { format.Outdent(); if (should_generate_guarded_tctable()) { format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n"); } format( " private:\n" " static const char* Tct_ParseFallback(PROTOBUF_TC_PARAM_DECL);\n" " public:\n"); if (should_generate_guarded_tctable()) { format("#endif\n"); } format.Indent(); } format( "const char* _InternalParse(const char* ptr, " "::$proto_ns$::internal::ParseContext* ctx) final;\n"); } void ParseFunctionGenerator::GenerateMethodImpls(io::Printer* printer) { Formatter format(printer, variables_); bool need_parse_function = true; if (descriptor_->options().message_set_wire_format()) { // Special-case MessageSet. need_parse_function = false; format( "const char* $classname$::_InternalParse(const char* ptr,\n" " ::_pbi::ParseContext* ctx) {\n" "$annotate_deserialize$"); if (!options_.unverified_lazy_message_sets && ShouldVerify(descriptor_, options_, scc_analyzer_)) { format( " ctx->set_lazy_eager_verify_func(&$classname$::InternalVerify);\n"); } format( " return $extensions$.ParseMessageSet(ptr, \n" " internal_default_instance(), &_internal_metadata_, ctx);\n" "}\n"); } if (!should_generate_tctable()) { if (need_parse_function) { GenerateLoopingParseFunction(format); } return; } if (should_generate_guarded_tctable()) { format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n\n"); } if (need_parse_function) { GenerateTailcallParseFunction(format); } if (tc_table_info_->use_generated_fallback) { GenerateTailcallFallbackFunction(format); } if (should_generate_guarded_tctable()) { if (need_parse_function) { format("\n#else // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n\n"); GenerateLoopingParseFunction(format); } format("\n#endif // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n"); } } bool ParseFunctionGenerator::should_generate_tctable() const { if (options_.tctable_mode == Options::kTCTableNever) { return false; } return true; } void ParseFunctionGenerator::GenerateTailcallParseFunction(Formatter& format) { GOOGLE_CHECK(should_generate_tctable()); // Generate an `_InternalParse` that starts the tail-calling loop. format( "const char* $classname$::_InternalParse(\n" " const char* ptr, ::_pbi::ParseContext* ctx) {\n" "$annotate_deserialize$" " ptr = ::_pbi::TcParser::ParseLoop(this, ptr, ctx, " "&_table_.header);\n"); format( " return ptr;\n" "}\n\n"); } void ParseFunctionGenerator::GenerateTailcallFallbackFunction( Formatter& format) { GOOGLE_CHECK(should_generate_tctable()); format( "const char* $classname$::Tct_ParseFallback(PROTOBUF_TC_PARAM_DECL) {\n" "#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) return nullptr\n"); format.Indent(); format("auto* typed_msg = static_cast<$classname$*>(msg);\n"); if (num_hasbits_ > 0) { // Sync hasbits format("typed_msg->_impl_._has_bits_[0] = hasbits;\n"); } format("uint32_t tag = data.tag();\n"); format.Set("msg", "typed_msg->"); format.Set("this", "typed_msg"); format.Set("has_bits", "typed_msg->_impl_._has_bits_"); format.Set("next_tag", "goto next_tag"); GenerateParseIterationBody(format, descriptor_, tc_table_info_->fallback_fields); format.Outdent(); format( "next_tag:\n" "message_done:\n" " return ptr;\n" "#undef CHK_\n" "}\n"); } struct SkipEntry16 { uint16_t skipmap; uint16_t field_entry_offset; }; struct SkipEntryBlock { uint32_t first_fnum; std::vector entries; }; struct NumToEntryTable { uint32_t skipmap32; // for fields #1 - #32 std::vector blocks; // Compute the number of uint16_t required to represent this table. int size16() const { int size = 2; // for the termination field# for (const auto& block : blocks) { // 2 for the field#, 1 for a count of skip entries, 2 for each entry. size += 3 + block.entries.size() * 2; } return size; } }; static NumToEntryTable MakeNumToEntryTable( const std::vector& field_descriptors); void ParseFunctionGenerator::GenerateDataDecls(io::Printer* printer) { if (!should_generate_tctable()) { return; } Formatter format(printer, variables_); if (should_generate_guarded_tctable()) { format.Outdent(); format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n"); format.Indent(); } auto field_num_to_entry_table = MakeNumToEntryTable(ordered_fields_); format( "static const ::$proto_ns$::internal::" "TcParseTable<$1$, $2$, $3$, $4$, $5$> _table_;\n", tc_table_info_->table_size_log2, ordered_fields_.size(), tc_table_info_->aux_entries.size(), CalculateFieldNamesSize(), field_num_to_entry_table.size16()); if (should_generate_guarded_tctable()) { format.Outdent(); format("#endif // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n"); format.Indent(); } } void ParseFunctionGenerator::GenerateDataDefinitions(io::Printer* printer) { if (!should_generate_tctable()) { return; } Formatter format(printer, variables_); if (should_generate_guarded_tctable()) { format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n"); } GenerateTailCallTable(format); if (should_generate_guarded_tctable()) { format("#endif // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n"); } } void ParseFunctionGenerator::GenerateLoopingParseFunction(Formatter& format) { format( "const char* $classname$::_InternalParse(const char* ptr, " "::_pbi::ParseContext* ctx) {\n" "$annotate_deserialize$" "#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure\n"); format.Indent(); format.Set("msg", ""); format.Set("this", "this"); int hasbits_size = 0; if (num_hasbits_ > 0) { hasbits_size = (num_hasbits_ + 31) / 32; } // For now only optimize small hasbits. if (hasbits_size != 1) hasbits_size = 0; if (hasbits_size) { format("_Internal::HasBits has_bits{};\n"); format.Set("has_bits", "has_bits"); } else { format.Set("has_bits", "_impl_._has_bits_"); } format.Set("next_tag", "continue"); format("while (!ctx->Done(&ptr)) {\n"); format.Indent(); format( "uint32_t tag;\n" "ptr = ::_pbi::ReadTag(ptr, &tag);\n"); GenerateParseIterationBody(format, descriptor_, ordered_fields_); format.Outdent(); format("} // while\n"); format.Outdent(); format("message_done:\n"); if (hasbits_size) format(" _impl_._has_bits_.Or(has_bits);\n"); format( " return ptr;\n" "failure:\n" " ptr = nullptr;\n" " goto message_done;\n" "#undef CHK_\n" "}\n"); } static NumToEntryTable MakeNumToEntryTable( const std::vector& field_descriptors) { NumToEntryTable num_to_entry_table; num_to_entry_table.skipmap32 = static_cast(-1); // skip_entry_block is the current block of SkipEntries that we're // appending to. cur_block_first_fnum is the number of the first // field represented by the block. uint16_t field_entry_index = 0; uint16_t N = field_descriptors.size(); // First, handle field numbers 1-32, which affect only the initial // skipmap32 and don't generate additional skip-entry blocks. for (; field_entry_index != N; ++field_entry_index) { auto* field_descriptor = field_descriptors[field_entry_index]; if (field_descriptor->number() > 32) break; auto skipmap32_index = field_descriptor->number() - 1; num_to_entry_table.skipmap32 -= 1 << skipmap32_index; } // If all the field numbers were less than or equal to 32, we will have // no further entries to process, and we are already done. if (field_entry_index == N) return num_to_entry_table; SkipEntryBlock* block = nullptr; bool start_new_block = true; // To determine sparseness, track the field number corresponding to // the start of the most recent skip entry. uint32_t last_skip_entry_start = 0; for (; field_entry_index != N; ++field_entry_index) { auto* field_descriptor = field_descriptors[field_entry_index]; uint32_t fnum = field_descriptor->number(); GOOGLE_CHECK_GT(fnum, last_skip_entry_start); if (start_new_block == false) { // If the next field number is within 15 of the last_skip_entry_start, we // continue writing just to that entry. If it's between 16 and 31 more, // then we just extend the current block by one. If it's more than 31 // more, we have to add empty skip entries in order to continue using the // existing block. Obviously it's just 32 more, it doesn't make sense to // start a whole new block, since new blocks mean having to write out // their starting field number, which is 32 bits, as well as the size of // the additional block, which is 16... while an empty SkipEntry16 only // costs 32 bits. So if it was 48 more, it's a slight space win; we save // 16 bits, but probably at the cost of slower run time. We're choosing // 96 for now. if (fnum - last_skip_entry_start > 96) start_new_block = true; } if (start_new_block) { num_to_entry_table.blocks.push_back(SkipEntryBlock{fnum}); block = &num_to_entry_table.blocks.back(); start_new_block = false; } auto skip_entry_num = (fnum - block->first_fnum) / 16; auto skip_entry_index = (fnum - block->first_fnum) % 16; while (skip_entry_num >= block->entries.size()) block->entries.push_back({0xFFFF, field_entry_index}); block->entries[skip_entry_num].skipmap -= 1 << (skip_entry_index); last_skip_entry_start = fnum - skip_entry_index; } return num_to_entry_table; } void ParseFunctionGenerator::GenerateTailCallTable(Formatter& format) { GOOGLE_CHECK(should_generate_tctable()); // All entries without a fast-path parsing function need a fallback. std::string fallback; if (tc_table_info_->use_generated_fallback) { fallback = ClassName(descriptor_) + "::Tct_ParseFallback"; } else { fallback = "::_pbi::TcParser::GenericFallback"; if (GetOptimizeFor(descriptor_->file(), options_) == FileOptions::LITE_RUNTIME) { fallback += "Lite"; } } // For simplicity and speed, the table is not covering all proto // configurations. This model uses a fallback to cover all situations that // the table can't accommodate, together with unknown fields or extensions. // These are number of fields over 32, fields with 3 or more tag bytes, // maps, weak fields, lazy, more than 1 extension range. In the cases // the table is sufficient we can use a generic routine, that just handles // unknown fields and potentially an extension range. auto field_num_to_entry_table = MakeNumToEntryTable(ordered_fields_); format( "PROTOBUF_ATTRIBUTE_INIT_PRIORITY1\n" "const ::_pbi::TcParseTable<$1$, $2$, $3$, $4$, $5$> " "$classname$::_table_ = " "{\n", tc_table_info_->table_size_log2, ordered_fields_.size(), tc_table_info_->aux_entries.size(), CalculateFieldNamesSize(), field_num_to_entry_table.size16()); { auto table_scope = format.ScopedIndent(); format("{\n"); { auto header_scope = format.ScopedIndent(); if (num_hasbits_ > 0 || IsMapEntryMessage(descriptor_)) { format("PROTOBUF_FIELD_OFFSET($classname$, _impl_._has_bits_),\n"); } else { format("0, // no _has_bits_\n"); } if (descriptor_->extension_range_count() == 1) { format( "PROTOBUF_FIELD_OFFSET($classname$, $extensions$),\n" "$1$, $2$, // extension_range_{low,high}\n", descriptor_->extension_range(0)->start, descriptor_->extension_range(0)->end); } else { format("0, 0, 0, // no _extensions_\n"); } format("$1$, $2$, // max_field_number, fast_idx_mask\n", (ordered_fields_.empty() ? 0 : ordered_fields_.back()->number()), (((1 << tc_table_info_->table_size_log2) - 1) << 3)); format( "offsetof(decltype(_table_), field_lookup_table),\n" "$1$, // skipmap\n", field_num_to_entry_table.skipmap32); if (ordered_fields_.empty()) { format( "offsetof(decltype(_table_), field_names), // no field_entries\n"); } else { format("offsetof(decltype(_table_), field_entries),\n"); } format( "$1$, // num_field_entries\n" "$2$, // num_aux_entries\n", ordered_fields_.size(), tc_table_info_->aux_entries.size()); if (tc_table_info_->aux_entries.empty()) { format( "offsetof(decltype(_table_), field_names), // no aux_entries\n"); } else { format("offsetof(decltype(_table_), aux_entries),\n"); } format( "&$1$._instance,\n" "$2$, // fallback\n" "", DefaultInstanceName(descriptor_, options_), fallback); } format("}, {{\n"); { // fast_entries[] auto fast_scope = format.ScopedIndent(); GenerateFastFieldEntries(format); } format("}}, {{\n"); { // field_lookup_table[] auto field_lookup_scope = format.ScopedIndent(); int line_entries = 0; for (int i = 0, N = field_num_to_entry_table.blocks.size(); i < N; ++i) { SkipEntryBlock& entry_block = field_num_to_entry_table.blocks[i]; format("$1$, $2$, $3$,\n", entry_block.first_fnum & 65535, entry_block.first_fnum / 65536, entry_block.entries.size()); for (auto se16 : entry_block.entries) { if (line_entries == 0) { format("$1$, $2$,", se16.skipmap, se16.field_entry_offset); ++line_entries; } else if (line_entries < 5) { format(" $1$, $2$,", se16.skipmap, se16.field_entry_offset); ++line_entries; } else { format(" $1$, $2$,\n", se16.skipmap, se16.field_entry_offset); line_entries = 0; } } } if (line_entries) format("\n"); format("65535, 65535\n"); } if (ordered_fields_.empty()) { GOOGLE_LOG_IF(DFATAL, !tc_table_info_->aux_entries.empty()) << "Invalid message: " << descriptor_->full_name() << " has " << tc_table_info_->aux_entries.size() << " auxiliary field entries, but no fields"; format( "}},\n" "// no field_entries, or aux_entries\n" "{{\n"); } else { format("}}, {{\n"); { // field_entries[] auto field_scope = format.ScopedIndent(); GenerateFieldEntries(format); } if (tc_table_info_->aux_entries.empty()) { format( "}},\n" "// no aux_entries\n" "{{\n"); } else { format("}}, {{\n"); { // aux_entries[] auto aux_scope = format.ScopedIndent(); for (const std::string& aux_entry : tc_table_info_->aux_entries) { format("{$1$},\n", aux_entry); } } format("}}, {{\n"); } } // ordered_fields_.empty() { // field_names[] auto field_name_scope = format.ScopedIndent(); GenerateFieldNames(format); } format("}},\n"); } format("};\n\n"); // _table_ } void ParseFunctionGenerator::GenerateFastFieldEntries(Formatter& format) { for (const auto& info : tc_table_info_->fast_path_fields) { if (info.field != nullptr) { PrintFieldComment(format, info.field); } if (info.func_name.empty()) { format("{::_pbi::TcParser::MiniParse, {}},\n"); } else { bool cold = ShouldSplit(info.field, options_); format( "{$1$,\n" " {$2$, $3$, $4$, PROTOBUF_FIELD_OFFSET($classname$$5$, $6$)}},\n", info.func_name, info.coded_tag, info.hasbit_idx, info.aux_idx, cold ? "::Impl_::Split" : "", cold ? FieldName(info.field) + "_" : FieldMemberName(info.field, /*cold=*/false)); } } } static void FormatFieldKind(Formatter& format, const TailCallTableInfo::FieldEntryInfo& entry, const Options& options, MessageSCCAnalyzer* scc_analyzer) { const FieldDescriptor* field = entry.field; // Spell the field kind in proto language declaration order, starting with // cardinality: format("(::_fl::kFc"); if (HasHasbit(field)) { format("Optional"); } else if (field->is_repeated()) { format("Repeated"); } else if (field->real_containing_oneof()) { format("Oneof"); } else { format("Singular"); } // The rest of the type uses convenience aliases: format(" | ::_fl::k"); if (field->is_repeated() && field->is_packed()) { format("Packed"); } switch (field->type()) { case FieldDescriptor::TYPE_DOUBLE: format("Double"); break; case FieldDescriptor::TYPE_FLOAT: format("Float"); break; case FieldDescriptor::TYPE_FIXED32: format("Fixed32"); break; case FieldDescriptor::TYPE_SFIXED32: format("SFixed32"); break; case FieldDescriptor::TYPE_FIXED64: format("Fixed64"); break; case FieldDescriptor::TYPE_SFIXED64: format("SFixed64"); break; case FieldDescriptor::TYPE_BOOL: format("Bool"); break; case FieldDescriptor::TYPE_ENUM: if (HasPreservingUnknownEnumSemantics(field)) { // No validation is required. format("OpenEnum"); } else if (entry.is_enum_range) { // Validation is done by range check (start/length in FieldAux). format("EnumRange"); } else { // Validation uses the generated _IsValid function. format("Enum"); } break; case FieldDescriptor::TYPE_UINT32: format("UInt32"); break; case FieldDescriptor::TYPE_SINT32: format("SInt32"); break; case FieldDescriptor::TYPE_INT32: format("Int32"); break; case FieldDescriptor::TYPE_UINT64: format("UInt64"); break; case FieldDescriptor::TYPE_SINT64: format("SInt64"); break; case FieldDescriptor::TYPE_INT64: format("Int64"); break; case FieldDescriptor::TYPE_BYTES: format("Bytes"); break; case FieldDescriptor::TYPE_STRING: { auto mode = GetUtf8CheckMode(field, options); switch (mode) { case Utf8CheckMode::kStrict: format("Utf8String"); break; case Utf8CheckMode::kVerify: format("RawString"); break; case Utf8CheckMode::kNone: // Treat LITE_RUNTIME strings as bytes. format("Bytes"); break; default: GOOGLE_LOG(FATAL) << "Invalid Utf8CheckMode (" << static_cast(mode) << ") for " << field->DebugString(); } break; } case FieldDescriptor::TYPE_GROUP: format("Message | ::_fl::kRepGroup"); break; case FieldDescriptor::TYPE_MESSAGE: if (field->is_map()) { format("Map"); } else { format("Message"); if (IsLazy(field, options, scc_analyzer)) { format(" | ::_fl::kRepLazy"); } else if (IsImplicitWeakField(field, options, scc_analyzer)) { format(" | ::_fl::kRepIWeak"); } } break; } // Fill in extra information about string and bytes field representations. if (field->type() == FieldDescriptor::TYPE_BYTES || field->type() == FieldDescriptor::TYPE_STRING) { if (field->is_repeated()) { format(" | ::_fl::kRepSString"); } else { format(" | ::_fl::kRepAString"); } } format(")"); } void ParseFunctionGenerator::GenerateFieldEntries(Formatter& format) { for (const auto& entry : tc_table_info_->field_entries) { const FieldDescriptor* field = entry.field; PrintFieldComment(format, field); format("{"); if (IsWeak(field, options_)) { // Weak fields are handled by the generated fallback function. // (These are handled by legacy Google-internal logic.) format("/* weak */ 0, 0, 0, 0"); } else { const OneofDescriptor* oneof = field->real_containing_oneof(); bool cold = ShouldSplit(field, options_); format("PROTOBUF_FIELD_OFFSET($classname$$1$, $2$), $3$, $4$,\n ", cold ? "::Impl_::Split" : "", cold ? FieldName(field) + "_" : FieldMemberName(field, /*cold=*/false), (oneof ? oneof->index() : entry.hasbit_idx), entry.aux_idx); FormatFieldKind(format, entry, options_, scc_analyzer_); } format("},\n"); } } static constexpr int kMaxNameLength = 255; int ParseFunctionGenerator::CalculateFieldNamesSize() const { // The full name of the message appears first. int size = std::min(static_cast(descriptor_->full_name().size()), kMaxNameLength); int lengths_size = 1; for (const auto& entry : tc_table_info_->field_entries) { const FieldDescriptor* field = entry.field; GOOGLE_CHECK_LE(field->name().size(), kMaxNameLength); size += field->name().size(); lengths_size += 1; } // align to an 8-byte boundary lengths_size = (lengths_size + 7) & -8; return size + lengths_size + 1; } static void FormatOctal(Formatter& format, int size) { int octal_size = ((size >> 6) & 3) * 100 + // ((size >> 3) & 7) * 10 + // ((size >> 0) & 7); format("\\$1$", octal_size); } void ParseFunctionGenerator::GenerateFieldNames(Formatter& format) { // First, we output the size of each string, as an unsigned byte. The first // string is the message name. int count = 1; format("\""); FormatOctal(format, std::min(static_cast(descriptor_->full_name().size()), 255)); for (const auto& entry : tc_table_info_->field_entries) { FormatOctal(format, entry.field->name().size()); ++count; } while (count & 7) { // align to an 8-byte boundary format("\\0"); ++count; } format("\"\n"); // The message name is stored at the beginning of the string std::string message_name = descriptor_->full_name(); if (message_name.size() > kMaxNameLength) { static constexpr int kNameHalfLength = (kMaxNameLength - 3) / 2; message_name = StrCat( message_name.substr(0, kNameHalfLength), "...", message_name.substr(message_name.size() - kNameHalfLength)); } format("\"$1$\"\n", message_name); // Then we output the actual field names for (const auto& entry : tc_table_info_->field_entries) { const FieldDescriptor* field = entry.field; format("\"$1$\"\n", field->name()); } } void ParseFunctionGenerator::GenerateArenaString(Formatter& format, const FieldDescriptor* field) { if (HasHasbit(field)) { format("_Internal::set_has_$1$(&$has_bits$);\n", FieldName(field)); } format( "if (arena != nullptr) {\n" " ptr = ctx->ReadArenaString(ptr, &$msg$$field$, arena"); if (IsStringInlined(field, options_)) { GOOGLE_DCHECK(!inlined_string_indices_.empty()); int inlined_string_index = inlined_string_indices_[field->index()]; GOOGLE_DCHECK_GT(inlined_string_index, 0); format(", &$msg$$inlined_string_donated_array$[0], $1$, $this$", inlined_string_index); } else { GOOGLE_DCHECK(field->default_value_string().empty()); } format( ");\n" "} else {\n" " ptr = ::_pbi::InlineGreedyStringParser(" "$msg$$field$.MutableNoCopy(nullptr), ptr, ctx);\n" "}\n" "const std::string* str = &$msg$$field$.Get(); (void)str;\n"); } void ParseFunctionGenerator::GenerateStrings(Formatter& format, const FieldDescriptor* field, bool check_utf8) { FieldOptions::CType ctype = FieldOptions::STRING; if (!options_.opensource_runtime) { // Open source doesn't support other ctypes; ctype = field->options().ctype(); } if (!field->is_repeated() && !options_.opensource_runtime && GetOptimizeFor(field->file(), options_) != FileOptions::LITE_RUNTIME && // For now only use arena string for strings with empty defaults. field->default_value_string().empty() && !field->real_containing_oneof() && ctype == FieldOptions::STRING) { GenerateArenaString(format, field); } else { std::string parser_name; switch (ctype) { case FieldOptions::STRING: parser_name = "GreedyStringParser"; break; case FieldOptions::CORD: parser_name = "CordParser"; break; case FieldOptions::STRING_PIECE: parser_name = "StringPieceParser"; break; } format( "auto str = $msg$$1$$2$_$name$();\n" "ptr = ::_pbi::Inline$3$(str, ptr, ctx);\n", HasInternalAccessors(ctype) ? "_internal_" : "", field->is_repeated() && !field->is_packable() ? "add" : "mutable", parser_name); } // It is intentionally placed before VerifyUTF8 because it doesn't make sense // to verify UTF8 when we already know parsing failed. format("CHK_(ptr);\n"); if (!check_utf8) return; // return if this is a bytes field auto level = GetUtf8CheckMode(field, options_); switch (level) { case Utf8CheckMode::kNone: return; case Utf8CheckMode::kVerify: format("#ifndef NDEBUG\n"); break; case Utf8CheckMode::kStrict: format("CHK_("); break; } std::string field_name; field_name = "nullptr"; if (HasDescriptorMethods(field->file(), options_)) { field_name = StrCat("\"", field->full_name(), "\""); } format("::_pbi::VerifyUTF8(str, $1$)", field_name); switch (level) { case Utf8CheckMode::kNone: return; case Utf8CheckMode::kVerify: format( ";\n" "#endif // !NDEBUG\n"); break; case Utf8CheckMode::kStrict: format(");\n"); break; } } void ParseFunctionGenerator::GenerateLengthDelim(Formatter& format, const FieldDescriptor* field) { if (field->is_packable()) { if (field->type() == FieldDescriptor::TYPE_ENUM && !HasPreservingUnknownEnumSemantics(field)) { std::string enum_type = QualifiedClassName(field->enum_type(), options_); format( "ptr = " "::$proto_ns$::internal::Packed$1$Parser<$unknown_fields_type$>(" "$msg$_internal_mutable_$name$(), ptr, ctx, $2$_IsValid, " "&$msg$_internal_metadata_, $3$);\n", DeclaredTypeMethodName(field->type()), enum_type, field->number()); } else { format( "ptr = ::$proto_ns$::internal::Packed$1$Parser(" "$msg$_internal_mutable_$name$(), ptr, ctx);\n", DeclaredTypeMethodName(field->type())); } format("CHK_(ptr);\n"); } else { auto field_type = field->type(); switch (field_type) { case FieldDescriptor::TYPE_STRING: GenerateStrings(format, field, true /* utf8 */); break; case FieldDescriptor::TYPE_BYTES: GenerateStrings(format, field, false /* utf8 */); break; case FieldDescriptor::TYPE_MESSAGE: { if (field->is_map()) { const FieldDescriptor* val = field->message_type()->map_value(); GOOGLE_CHECK(val); if (val->type() == FieldDescriptor::TYPE_ENUM && !HasPreservingUnknownEnumSemantics(field)) { format( "auto object = " "::$proto_ns$::internal::InitEnumParseWrapper<" "$unknown_fields_type$>(&$msg$$field$, $1$_IsValid, " "$2$, &$msg$_internal_metadata_);\n" "ptr = ctx->ParseMessage(&object, ptr);\n", QualifiedClassName(val->enum_type(), options_), field->number()); } else { format("ptr = ctx->ParseMessage(&$msg$$field$, ptr);\n"); } } else if (IsLazy(field, options_, scc_analyzer_)) { bool eager_verify = IsEagerlyVerifiedLazy(field, options_, scc_analyzer_); if (ShouldVerify(descriptor_, options_, scc_analyzer_)) { format( "ctx->set_lazy_eager_verify_func($1$);\n", eager_verify ? StrCat("&", ClassName(field->message_type(), true), "::InternalVerify") : "nullptr"); } if (field->real_containing_oneof()) { format( "if (!$msg$_internal_has_$name$()) {\n" " $msg$clear_$1$();\n" " $msg$$field$ = ::$proto_ns$::Arena::CreateMessage<\n" " ::$proto_ns$::internal::LazyField>(" "$msg$GetArenaForAllocation());\n" " $msg$set_has_$name$();\n" "}\n" "auto* lazy_field = $msg$$field$;\n", field->containing_oneof()->name()); } else if (HasHasbit(field)) { format( "_Internal::set_has_$name$(&$has_bits$);\n" "auto* lazy_field = &$msg$$field$;\n"); } else { format("auto* lazy_field = &$msg$$field$;\n"); } format( "::$proto_ns$::internal::LazyFieldParseHelper<\n" " ::$proto_ns$::internal::LazyField> parse_helper(\n" " $1$::default_instance(),\n" " $msg$GetArenaForAllocation(),\n" " ::google::protobuf::internal::LazyVerifyOption::$2$,\n" " lazy_field);\n" "ptr = ctx->ParseMessage(&parse_helper, ptr);\n", FieldMessageTypeName(field, options_), eager_verify ? "kEager" : "kLazy"); if (ShouldVerify(descriptor_, options_, scc_analyzer_) && eager_verify) { format("ctx->set_lazy_eager_verify_func(nullptr);\n"); } } else if (IsImplicitWeakField(field, options_, scc_analyzer_)) { if (!field->is_repeated()) { format( "ptr = ctx->ParseMessage(_Internal::mutable_$name$($this$), " "ptr);\n"); } else { format( "ptr = ctx->ParseMessage($msg$$field$.AddWeak(" "reinterpret_cast($1$ptr_)" "), ptr);\n", QualifiedDefaultInstanceName(field->message_type(), options_)); } } else if (IsWeak(field, options_)) { format( "{\n" " auto* default_ = &reinterpret_cast($1$);\n" " ptr = ctx->ParseMessage($msg$$weak_field_map$.MutableMessage(" "$2$, default_), ptr);\n" "}\n", QualifiedDefaultInstanceName(field->message_type(), options_), field->number()); } else { format( "ptr = ctx->ParseMessage($msg$_internal_$mutable_field$(), " "ptr);\n"); } format("CHK_(ptr);\n"); break; } default: GOOGLE_LOG(FATAL) << "Illegal combination for length delimited wiretype " << " filed type is " << field->type(); } } } static bool ShouldRepeat(const FieldDescriptor* descriptor, WireFormatLite::WireType wiretype) { constexpr int kMaxTwoByteFieldNumber = 16 * 128; return descriptor->number() < kMaxTwoByteFieldNumber && descriptor->is_repeated() && (!descriptor->is_packable() || wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED); } void ParseFunctionGenerator::GenerateFieldBody( Formatter& format, WireFormatLite::WireType wiretype, const FieldDescriptor* field) { Formatter::SaveState formatter_state(&format); format.AddMap( {{"name", FieldName(field)}, {"primitive_type", PrimitiveTypeName(options_, field->cpp_type())}}); if (field->is_repeated()) { format.AddMap({{"put_field", StrCat("add_", FieldName(field))}, {"mutable_field", StrCat("add_", FieldName(field))}}); } else { format.AddMap( {{"put_field", StrCat("set_", FieldName(field))}, {"mutable_field", StrCat("mutable_", FieldName(field))}}); } uint32_t tag = WireFormatLite::MakeTag(field->number(), wiretype); switch (wiretype) { case WireFormatLite::WIRETYPE_VARINT: { std::string type = PrimitiveTypeName(options_, field->cpp_type()); if (field->type() == FieldDescriptor::TYPE_ENUM) { format.Set("enum_type", QualifiedClassName(field->enum_type(), options_)); format( "$uint64$ val = ::$proto_ns$::internal::ReadVarint64(&ptr);\n" "CHK_(ptr);\n"); if (!HasPreservingUnknownEnumSemantics(field)) { format("if (PROTOBUF_PREDICT_TRUE($enum_type$_IsValid(val))) {\n"); format.Indent(); } format("$msg$_internal_$put_field$(static_cast<$enum_type$>(val));\n"); if (!HasPreservingUnknownEnumSemantics(field)) { format.Outdent(); format( "} else {\n" " ::$proto_ns$::internal::WriteVarint(" "$1$, val, $msg$mutable_unknown_fields());\n" "}\n", field->number()); } } else { std::string size = (field->type() == FieldDescriptor::TYPE_INT32 || field->type() == FieldDescriptor::TYPE_SINT32 || field->type() == FieldDescriptor::TYPE_UINT32) ? "32" : "64"; std::string zigzag; if ((field->type() == FieldDescriptor::TYPE_SINT32 || field->type() == FieldDescriptor::TYPE_SINT64)) { zigzag = "ZigZag"; } if (field->is_repeated() || field->real_containing_oneof()) { format( "$msg$_internal_$put_field$(" "::$proto_ns$::internal::ReadVarint$1$$2$(&ptr));\n" "CHK_(ptr);\n", zigzag, size); } else { if (HasHasbit(field)) { format("_Internal::set_has_$name$(&$has_bits$);\n"); } format( "$msg$$field$ = ::$proto_ns$::internal::ReadVarint$1$$2$(&ptr);\n" "CHK_(ptr);\n", zigzag, size); } } break; } case WireFormatLite::WIRETYPE_FIXED32: case WireFormatLite::WIRETYPE_FIXED64: { if (field->is_repeated() || field->real_containing_oneof()) { format( "$msg$_internal_$put_field$(" "::$proto_ns$::internal::UnalignedLoad<$primitive_type$>(ptr));\n" "ptr += sizeof($primitive_type$);\n"); } else { if (HasHasbit(field)) { format("_Internal::set_has_$name$(&$has_bits$);\n"); } format( "$msg$$field$ = " "::$proto_ns$::internal::UnalignedLoad<$primitive_type$>(ptr);\n" "ptr += sizeof($primitive_type$);\n"); } break; } case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: { GenerateLengthDelim(format, field); break; } case WireFormatLite::WIRETYPE_START_GROUP: { format( "ptr = ctx->ParseGroup($msg$_internal_$mutable_field$(), ptr, $1$);\n" "CHK_(ptr);\n", tag); break; } case WireFormatLite::WIRETYPE_END_GROUP: { GOOGLE_LOG(FATAL) << "Can't have end group field\n"; break; } } // switch (wire_type) } // Returns the tag for this field and in case of repeated packable fields, // sets a fallback tag in fallback_tag_ptr. static uint32_t ExpectedTag(const FieldDescriptor* field, uint32_t* fallback_tag_ptr) { uint32_t expected_tag; if (field->is_packable()) { auto expected_wiretype = WireFormat::WireTypeForFieldType(field->type()); expected_tag = WireFormatLite::MakeTag(field->number(), expected_wiretype); GOOGLE_CHECK(expected_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED); auto fallback_wiretype = WireFormatLite::WIRETYPE_LENGTH_DELIMITED; uint32_t fallback_tag = WireFormatLite::MakeTag(field->number(), fallback_wiretype); if (field->is_packed()) std::swap(expected_tag, fallback_tag); *fallback_tag_ptr = fallback_tag; } else { auto expected_wiretype = WireFormat::WireTypeForField(field); expected_tag = WireFormatLite::MakeTag(field->number(), expected_wiretype); } return expected_tag; } // These variables are used by the generated parse iteration, and must already // be defined in the generated code: // - `const char* ptr`: the input buffer. // - `ParseContext* ctx`: the associated context for `ptr`. // - implicit `this`: i.e., we must be in a non-static member function. // // The macro `CHK_(x)` must be defined. It should return an error condition if // the macro parameter is false. // // Whenever an END_GROUP tag was read, or tag 0 was read, the generated code // branches to the label `message_done`. // // These formatter variables are used: // - `next_tag`: a single statement to begin parsing the next tag. // // At the end of the generated code, the enclosing function should proceed to // parse the next tag in the stream. void ParseFunctionGenerator::GenerateParseIterationBody( Formatter& format, const Descriptor* descriptor, const std::vector& fields) { if (!fields.empty()) { GenerateFieldSwitch(format, fields); // Each field `case` only considers field number. Field numbers that are // not defined in the message, or tags with an incompatible wire type, are // considered "unusual" cases. They will be handled by the logic below. format.Outdent(); format("handle_unusual:\n"); format.Indent(); } // Unusual/extension/unknown case: format( "if ((tag == 0) || ((tag & 7) == 4)) {\n" " CHK_(ptr);\n" " ctx->SetLastTag(tag);\n" " goto message_done;\n" "}\n"); if (IsMapEntryMessage(descriptor)) { format("$next_tag$;\n"); } else { if (descriptor->extension_range_count() > 0) { format("if ("); for (int i = 0; i < descriptor->extension_range_count(); i++) { const Descriptor::ExtensionRange* range = descriptor->extension_range(i); if (i > 0) format(" ||\n "); uint32_t start_tag = WireFormatLite::MakeTag( range->start, static_cast(0)); uint32_t end_tag = WireFormatLite::MakeTag( range->end, static_cast(0)); if (range->end > FieldDescriptor::kMaxNumber) { format("($1$u <= tag)", start_tag); } else { format("($1$u <= tag && tag < $2$u)", start_tag, end_tag); } } format( ") {\n" " ptr = $msg$$extensions$.ParseField(tag, ptr, " "internal_default_instance(), &$msg$_internal_metadata_, ctx);\n" " CHK_(ptr != nullptr);\n" " $next_tag$;\n" "}\n"); } format( "ptr = UnknownFieldParse(\n" " tag,\n" " $msg$_internal_metadata_.mutable_unknown_fields<" "$unknown_fields_type$>(),\n" " ptr, ctx);\n" "CHK_(ptr != nullptr);\n"); } } void ParseFunctionGenerator::GenerateFieldSwitch( Formatter& format, const std::vector& fields) { format("switch (tag >> 3) {\n"); format.Indent(); for (const auto* field : fields) { bool cold = ShouldSplit(field, options_); format.Set("field", FieldMemberName(field, cold)); PrintFieldComment(format, field); format("case $1$:\n", field->number()); format.Indent(); uint32_t fallback_tag = 0; uint32_t expected_tag = ExpectedTag(field, &fallback_tag); format("if (PROTOBUF_PREDICT_TRUE(static_cast<$uint8$>(tag) == $1$)) {\n", expected_tag & 0xFF); format.Indent(); if (cold) { format("$msg$PrepareSplitMessageForWrite();\n"); } auto wiretype = WireFormatLite::GetTagWireType(expected_tag); uint32_t tag = WireFormatLite::MakeTag(field->number(), wiretype); int tag_size = io::CodedOutputStream::VarintSize32(tag); bool is_repeat = ShouldRepeat(field, wiretype); if (is_repeat) { format( "ptr -= $1$;\n" "do {\n" " ptr += $1$;\n", tag_size); format.Indent(); } GenerateFieldBody(format, wiretype, field); if (is_repeat) { format.Outdent(); format( " if (!ctx->DataAvailable(ptr)) break;\n" "} while (::$proto_ns$::internal::ExpectTag<$1$>(ptr));\n", tag); } format.Outdent(); if (fallback_tag) { format("} else if (static_cast<$uint8$>(tag) == $1$) {\n", fallback_tag & 0xFF); format.Indent(); GenerateFieldBody(format, WireFormatLite::GetTagWireType(fallback_tag), field); format.Outdent(); } format( "} else\n" " goto handle_unusual;\n" "$next_tag$;\n"); format.Outdent(); } // for loop over ordered fields format( "default:\n" " goto handle_unusual;\n"); format.Outdent(); format("} // switch\n"); } namespace { std::string FieldParseFunctionName( const TailCallTableInfo::FieldEntryInfo& entry, const Options& options) { const FieldDescriptor* field = entry.field; std::string name = "::_pbi::TcParser::Fast"; switch (field->type()) { case FieldDescriptor::TYPE_FIXED32: case FieldDescriptor::TYPE_SFIXED32: case FieldDescriptor::TYPE_FLOAT: name.append("F32"); break; case FieldDescriptor::TYPE_FIXED64: case FieldDescriptor::TYPE_SFIXED64: case FieldDescriptor::TYPE_DOUBLE: name.append("F64"); break; case FieldDescriptor::TYPE_BOOL: name.append("V8"); break; case FieldDescriptor::TYPE_INT32: case FieldDescriptor::TYPE_UINT32: name.append("V32"); break; case FieldDescriptor::TYPE_INT64: case FieldDescriptor::TYPE_UINT64: name.append("V64"); break; case FieldDescriptor::TYPE_ENUM: if (HasPreservingUnknownEnumSemantics(field)) { name.append("V32"); break; } if (field->is_repeated() && field->is_packed()) { GOOGLE_LOG(DFATAL) << "Enum validation not handled: " << field->DebugString(); return ""; } name.append(entry.is_enum_range ? "Er" : "Ev"); break; case FieldDescriptor::TYPE_SINT32: name.append("Z32"); break; case FieldDescriptor::TYPE_SINT64: name.append("Z64"); break; case FieldDescriptor::TYPE_BYTES: name.append("B"); if (IsStringInlined(field, options)) { name.append("i"); } break; case FieldDescriptor::TYPE_STRING: switch (GetUtf8CheckMode(field, options)) { case Utf8CheckMode::kNone: name.append("B"); break; case Utf8CheckMode::kVerify: name.append("S"); break; case Utf8CheckMode::kStrict: name.append("U"); break; default: GOOGLE_LOG(DFATAL) << "Mode not handled: " << static_cast(GetUtf8CheckMode(field, options)); return ""; } if (IsStringInlined(field, options)) { name.append("i"); } break; case FieldDescriptor::TYPE_MESSAGE: name.append("M"); break; case FieldDescriptor::TYPE_GROUP: name.append("G"); break; default: GOOGLE_LOG(DFATAL) << "Type not handled: " << field->DebugString(); return ""; } // The field implementation functions are prefixed by cardinality: // `S` for optional or implicit fields. // `R` for non-packed repeated. // `P` for packed repeated. name.append(field->is_packed() ? "P" : field->is_repeated() ? "R" : field->real_containing_oneof() ? "O" : "S"); // Append the tag length. Fast parsing only handles 1- or 2-byte tags. name.append(TagSize(field->number()) == 1 ? "1" : "2"); return name; } } // namespace } // namespace cpp } // namespace compiler } // namespace protobuf } // namespace google