// // Copyright (c) 2022 Alan de Freitas (alandefreitas@gmail.com) // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // // Official repository: https://github.com/boostorg/url // #ifndef BOOST_URL_DETAIL_IMPL_PATTERN_IPP #define BOOST_URL_DETAIL_IMPL_PATTERN_IPP #include #include #include #include #include #include #include namespace boost { namespace urls { namespace detail { static constexpr auto lhost_chars = host_chars + ':'; void pattern:: apply( url_base& u, format_args const& args) const { // measure total struct sizes { std::size_t scheme = 0; std::size_t user = 0; std::size_t pass = 0; std::size_t host = 0; std::size_t port = 0; std::size_t path = 0; std::size_t query = 0; std::size_t frag = 0; }; sizes n; format_parse_context pctx(nullptr, nullptr, 0); measure_context mctx(args); if (!scheme.empty()) { pctx = {scheme, pctx.next_arg_id()}; n.scheme = pct_vmeasure( grammar::alpha_chars, pctx, mctx); mctx.advance_to(0); } if (has_authority) { if (has_user) { pctx = {user, pctx.next_arg_id()}; n.user = pct_vmeasure( user_chars, pctx, mctx); mctx.advance_to(0); if (has_pass) { pctx = {pass, pctx.next_arg_id()}; n.pass = pct_vmeasure( password_chars, pctx, mctx); mctx.advance_to(0); } } if (host.starts_with('[')) { BOOST_ASSERT(host.ends_with(']')); pctx = {host.substr(1, host.size() - 2), pctx.next_arg_id()}; n.host = pct_vmeasure( lhost_chars, pctx, mctx) + 2; mctx.advance_to(0); } else { pctx = {host, pctx.next_arg_id()}; n.host = pct_vmeasure( host_chars, pctx, mctx); mctx.advance_to(0); } if (has_port) { pctx = {port, pctx.next_arg_id()}; n.port = pct_vmeasure( grammar::digit_chars, pctx, mctx); mctx.advance_to(0); } } if (!path.empty()) { pctx = {path, pctx.next_arg_id()}; n.path = pct_vmeasure( path_chars, pctx, mctx); mctx.advance_to(0); } if (has_query) { pctx = {query, pctx.next_arg_id()}; n.query = pct_vmeasure( query_chars, pctx, mctx); mctx.advance_to(0); } if (has_frag) { pctx = {frag, pctx.next_arg_id()}; n.frag = pct_vmeasure( fragment_chars, pctx, mctx); mctx.advance_to(0); } std::size_t const n_total = n.scheme + (n.scheme != 0) * 1 + // ":" has_authority * 2 + // "//" n.user + has_pass * 1 + // ":" n.pass + has_user * 1 + // "@" n.host + has_port * 1 + // ":" n.port + n.path + has_query * 1 + // "?" n.query + has_frag * 1 + // "#" n.frag; u.reserve(n_total); // Apply pctx = {nullptr, nullptr, 0}; format_context fctx(nullptr, args); url_base::op_t op(u); using parts = parts_base; if (!scheme.empty()) { auto dest = u.resize_impl( parts::id_scheme, n.scheme + 1, op); pctx = {scheme, pctx.next_arg_id()}; fctx.advance_to(dest); const char* dest1 = pct_vformat( grammar::alpha_chars, pctx, fctx); dest[n.scheme] = ':'; // validate if (!grammar::parse({dest, dest1}, scheme_rule())) { throw_invalid_argument(); } } if (has_authority) { if (has_user) { auto dest = u.set_user_impl( n.user, op); pctx = {user, pctx.next_arg_id()}; fctx.advance_to(dest); char const* dest1 = pct_vformat( user_chars, pctx, fctx); u.impl_.decoded_[parts::id_user] = pct_string_view(dest, dest1 - dest) ->decoded_size(); if (has_pass) { char* destp = u.set_password_impl( n.pass, op); pctx = {pass, pctx.next_arg_id()}; fctx.advance_to(destp); dest1 = pct_vformat( password_chars, pctx, fctx); u.impl_.decoded_[parts::id_pass] = pct_string_view({destp, dest1}) ->decoded_size() + 1; } } auto dest = u.set_host_impl( n.host, op); if (host.starts_with('[')) { BOOST_ASSERT(host.ends_with(']')); pctx = {host.substr(1, host.size() - 2), pctx.next_arg_id()}; *dest++ = '['; fctx.advance_to(dest); char* dest1 = pct_vformat(lhost_chars, pctx, fctx); *dest1++ = ']'; u.impl_.decoded_[parts::id_host] = pct_string_view(dest - 1, dest1 - dest) ->decoded_size(); } else { pctx = {host, pctx.next_arg_id()}; fctx.advance_to(dest); char const* dest1 = pct_vformat(host_chars, pctx, fctx); u.impl_.decoded_[parts::id_host] = pct_string_view(dest, dest1 - dest) ->decoded_size(); } auto uh = u.encoded_host(); auto h = grammar::parse(uh, host_rule).value(); std::memcpy( u.impl_.ip_addr_, h.addr, sizeof(u.impl_.ip_addr_)); u.impl_.host_type_ = h.host_type; if (has_port) { dest = u.set_port_impl(n.port, op); pctx = {port, pctx.next_arg_id()}; fctx.advance_to(dest); char const* dest1 = pct_vformat( grammar::digit_chars, pctx, fctx); u.impl_.decoded_[parts::id_port] = pct_string_view(dest, dest1 - dest) ->decoded_size() + 1; string_view up = {dest - 1, dest1}; auto p = grammar::parse(up, detail::port_part_rule).value(); if (p.has_port) u.impl_.port_number_ = p.port_number; } } if (!path.empty()) { auto dest = u.resize_impl( parts::id_path, n.path, op); pctx = {path, pctx.next_arg_id()}; fctx.advance_to(dest); auto dest1 = pct_vformat( path_chars, pctx, fctx); pct_string_view npath(dest, dest1 - dest); u.impl_.decoded_[parts::id_path] += npath.decoded_size(); if (!npath.empty()) { u.impl_.nseg_ = std::count( npath.begin() + 1, npath.end(), '/') + 1; } // handle edge cases // 1) path is first component and the // first segment contains an unencoded ':' // This is impossible because the template // "{}" would be a host. if (u.scheme().empty() && !u.has_authority()) { auto fseg = u.encoded_segments().front(); std::size_t nc = std::count( fseg.begin(), fseg.end(), ':'); if (nc) { std::size_t diff = nc * 2; u.reserve(n_total + diff); dest = u.resize_impl( parts::id_path, n.path + diff, op); char* dest0 = dest + diff; std::memmove(dest0, dest, n.path); while (dest0 != dest) { if (*dest0 != ':') { *dest++ = *dest0++; } else { *dest++ = '%'; *dest++ = '3'; *dest++ = 'A'; dest0++; } } } } // 2) url has no authority and path // starts with "//" if (!u.has_authority() && u.encoded_path().starts_with("//")) { u.reserve(n_total + 2); dest = u.resize_impl( parts::id_path, n.path + 2, op); std::memmove(dest + 2, dest, n.path); *dest++ = '/'; *dest = '.'; } } if (has_query) { auto dest = u.resize_impl( parts::id_query, n.query + 1, op); *dest++ = '?'; pctx = {query, pctx.next_arg_id()}; fctx.advance_to(dest); auto dest1 = pct_vformat( query_chars, pctx, fctx); pct_string_view nquery(dest, dest1 - dest); u.impl_.decoded_[parts::id_query] += nquery.decoded_size() + 1; if (!nquery.empty()) { u.impl_.nparam_ = std::count( nquery.begin(), nquery.end(), '&') + 1; } } if (has_frag) { auto dest = u.resize_impl( parts::id_frag, n.frag + 1, op); *dest++ = '#'; pctx = {frag, pctx.next_arg_id()}; fctx.advance_to(dest); auto dest1 = pct_vformat( fragment_chars, pctx, fctx); u.impl_.decoded_[parts::id_frag] += make_pct_string_view( string_view(dest, dest1 - dest)) ->decoded_size() + 1; } } // This rule represents a pct-encoded string // that contains an arbitrary number of // replacement ids in it template struct pct_encoded_fmt_string_rule_t { using value_type = pct_string_view; constexpr pct_encoded_fmt_string_rule_t( CharSet const& cs) noexcept : cs_(cs) { } template friend constexpr auto pct_encoded_fmt_string_rule( CharSet_ const& cs) noexcept -> pct_encoded_fmt_string_rule_t; result parse( char const*& it, char const* end) const noexcept { auto const start = it; if(it == end) { // this might be empty return {}; } // consume some with literal rule // this might be an empty literal auto literal_rule = pct_encoded_rule(cs_); auto rv = literal_rule.parse(it, end); while (rv) { auto it0 = it; // consume some with replacement id // rule if (!replacement_field_rule.parse(it, end)) { it = it0; break; } rv = literal_rule.parse(it, end); } return string_view(start, it - start); } private: CharSet cs_; }; template constexpr auto pct_encoded_fmt_string_rule( CharSet const& cs) noexcept -> pct_encoded_fmt_string_rule_t { // If an error occurs here it means that // the value of your type does not meet // the requirements. Please check the // documentation! static_assert( grammar::is_charset::value, "CharSet requirements not met"); return pct_encoded_fmt_string_rule_t(cs); } // This rule represents a regular string with // only chars from the specified charset and // an arbitrary number of replacement ids in it template struct fmt_token_rule_t { using value_type = pct_string_view; constexpr fmt_token_rule_t( CharSet const& cs) noexcept : cs_(cs) { } template friend constexpr auto fmt_token_rule( CharSet_ const& cs) noexcept -> fmt_token_rule_t; result parse( char const*& it, char const* end) const noexcept { auto const start = it; BOOST_ASSERT(it != end); /* // This should never happen because // all tokens are optional and will // already return `none`: if(it == end) { BOOST_URL_RETURN_EC( grammar::error::need_more); } */ // consume some with literal rule // this might be an empty literal auto partial_token_rule = grammar::optional_rule( grammar::token_rule(cs_)); auto rv = partial_token_rule.parse(it, end); while (rv) { auto it0 = it; // consume some with replacement id if (!replacement_field_rule.parse(it, end)) { // no replacement and no more cs // before: nothing else to consume it = it0; break; } // after {...}, consume any more chars // in the charset rv = partial_token_rule.parse(it, end); } if(it == start) { // it != end but we consumed nothing BOOST_URL_RETURN_EC( grammar::error::need_more); } return string_view(start, it - start); } private: CharSet cs_; }; template constexpr auto fmt_token_rule( CharSet const& cs) noexcept -> fmt_token_rule_t { // If an error occurs here it means that // the value of your type does not meet // the requirements. Please check the // documentation! static_assert( grammar::is_charset::value, "CharSet requirements not met"); return fmt_token_rule_t(cs); } struct userinfo_template_rule_t { struct value_type { string_view user; string_view password; bool has_password = false; }; auto parse( char const*& it, char const* end ) const noexcept -> result { static constexpr auto uchars = unreserved_chars + sub_delim_chars; static constexpr auto pwchars = uchars + ':'; value_type t; // user static constexpr auto user_fmt_rule = pct_encoded_fmt_string_rule(uchars); auto rv = grammar::parse( it, end, user_fmt_rule); BOOST_ASSERT(rv); t.user = *rv; // ':' if( it == end || *it != ':') { t.has_password = false; t.password = {}; return t; } ++it; // pass static constexpr auto pass_fmt_rule = pct_encoded_fmt_string_rule(grammar::ref(pwchars)); rv = grammar::parse( it, end, pass_fmt_rule); BOOST_ASSERT(rv); t.has_password = true; t.password = *rv; return t; } }; constexpr userinfo_template_rule_t userinfo_template_rule{}; struct host_template_rule_t { using value_type = string_view; auto parse( char const*& it, char const* end ) const noexcept -> result { if(it == end) { // empty host return {}; } // the host type will be ultimately // validated when applying the replacement // strings. Any chars allowed in hosts // are allowed here. if (*it != '[') { // IPv4address and reg-name have the // same char sets. constexpr auto any_host_template_rule = pct_encoded_fmt_string_rule(host_chars); auto rv = grammar::parse( it, end, any_host_template_rule); // any_host_template_rule can always // be empty, so it's never invalid BOOST_ASSERT(rv); return detail::to_sv(*rv); } // IP-literals need to be enclosed in // "[]" if using ':' in the template // string, because the ':' would be // ambiguous with the port in fmt string. // The "[]:" can be used in replacement // strings without the "[]" though. constexpr auto ip_literal_template_rule = pct_encoded_fmt_string_rule(lhost_chars); auto it0 = it; auto rv = grammar::parse( it, end, grammar::optional_rule( grammar::tuple_rule( grammar::squelch( grammar::delim_rule('[')), ip_literal_template_rule, grammar::squelch( grammar::delim_rule(']'))))); // ip_literal_template_rule can always // be empty, so it's never invalid, but // the rule might fail to match the // closing "]" BOOST_ASSERT(rv); return string_view{it0, it}; } }; constexpr host_template_rule_t host_template_rule{}; struct authority_template_rule_t { using value_type = pattern; result parse( char const*& it, char const* end ) const noexcept { pattern u; // [ userinfo "@" ] { auto rv = grammar::parse( it, end, grammar::optional_rule( grammar::tuple_rule( userinfo_template_rule, grammar::squelch( grammar::delim_rule('@'))))); BOOST_ASSERT(rv); if(rv->has_value()) { auto& r = **rv; u.has_user = true; u.user = r.user; u.has_pass = r.has_password; u.pass = r.password; } } // host { auto rv = grammar::parse( it, end, host_template_rule); // host is allowed to be empty BOOST_ASSERT(rv); u.host = *rv; } // [ ":" port ] { constexpr auto port_template_rule = grammar::optional_rule( fmt_token_rule(grammar::digit_chars)); auto it0 = it; auto rv = grammar::parse( it, end, grammar::tuple_rule( grammar::squelch( grammar::delim_rule(':')), port_template_rule)); if (!rv) { it = it0; } else { u.has_port = true; if (rv->has_value()) { u.port = **rv; } } } return u; } }; constexpr authority_template_rule_t authority_template_rule{}; struct scheme_template_rule_t { using value_type = string_view; result parse( char const*& it, char const* end) const noexcept { auto const start = it; if(it == end) { // scheme can't be empty BOOST_URL_RETURN_EC( grammar::error::mismatch); } if(!grammar::alpha_chars(*it) && *it != '{') { // expected alpha BOOST_URL_RETURN_EC( grammar::error::mismatch); } // it starts with replacement id or alpha char if (!grammar::alpha_chars(*it)) { if (!replacement_field_rule.parse(it, end)) { // replacement_field_rule is invalid BOOST_URL_RETURN_EC( grammar::error::mismatch); } } else { // skip first ++it; } static constexpr grammar::lut_chars scheme_chars( "0123456789" "+-." "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz"); // non-scheme chars might be a new // replacement-id or just an invalid char it = grammar::find_if_not( it, end, scheme_chars); while (it != end) { auto it0 = it; if (!replacement_field_rule.parse(it, end)) { it = it0; break; } it = grammar::find_if_not( it, end, scheme_chars); } return string_view(start, it - start); } }; constexpr scheme_template_rule_t scheme_template_rule{}; // This rule should consider all url types at the // same time according to the format string // - relative urls with no scheme/authority // - absolute urls have no fragment struct pattern_rule_t { using value_type = pattern; result parse( char const*& it, char const* const end ) const noexcept { pattern u; // optional scheme { auto it0 = it; auto rv = grammar::parse( it, end, grammar::tuple_rule( scheme_template_rule, grammar::squelch( grammar::delim_rule(':')))); if(rv) u.scheme = *rv; else it = it0; } // hier_part (authority + path) // if there are less than 2 chars left, // we are parsing the path if (it == end) { // this is over, so we can consider // that a "path-empty" return u; } if(end - it == 1) { // only one char left // it can be a single separator "/", // representing an empty absolute path, // or a single-char segment if(*it == '/') { // path-absolute u.path = {it, 1}; ++it; return u; } // this can be a: // - path-noscheme if there's no scheme, or // - path-rootless with a single char, or // - path-empty (and consume nothing) if (!u.scheme.empty() || *it != ':') { // path-rootless with a single char // this needs to be a segment because // the authority needs two slashes // "//" // path-noscheme also matches here // because we already validated the // first char auto rv = grammar::parse( it, end, urls::detail::segment_rule); if(! rv) return rv.error(); u.path = *rv; } return u; } // authority if( it[0] == '/' && it[1] == '/') { // "//" always indicates authority it += 2; auto rv = grammar::parse( it, end, authority_template_rule); // authority is allowed to be empty BOOST_ASSERT(rv); u.has_authority = true; u.has_user = rv->has_user; u.user = rv->user; u.has_pass = rv->has_pass; u.pass = rv->pass; u.host = rv->host; u.has_port = rv->has_port; u.port = rv->port; } // the authority requires an absolute path // or an empty path if (it == end || (u.has_authority && (*it != '/' && *it != '?' && *it != '#'))) { // path-empty return u; } // path-abempty // consume the whole path at once because // we're going to count number of segments // later after the replacements happen static constexpr auto segment_fmt_rule = pct_encoded_fmt_string_rule(path_chars); auto rp = grammar::parse( it, end, segment_fmt_rule); // path-abempty is allowed to be empty BOOST_ASSERT(rp); u.path = *rp; // [ "?" query ] { static constexpr auto query_fmt_rule = pct_encoded_fmt_string_rule(query_chars); auto rv = grammar::parse( it, end, grammar::tuple_rule( grammar::squelch( grammar::delim_rule('?')), query_fmt_rule)); // query is allowed to be empty but // delim rule is not if (rv) { u.has_query = true; u.query = *rv; } } // [ "#" fragment ] { static constexpr auto frag_fmt_rule = pct_encoded_fmt_string_rule(fragment_chars); auto rv = grammar::parse( it, end, grammar::tuple_rule( grammar::squelch( grammar::delim_rule('#')), frag_fmt_rule)); // frag is allowed to be empty but // delim rule is not if (rv) { u.has_frag = true; u.frag = *rv; } } return u; } }; constexpr pattern_rule_t pattern_rule{}; result parse_pattern( string_view s) { return grammar::parse( s, pattern_rule); } } // detail } // urls } // boost #endif