ext-boost/boost/url/detail/impl/pattern.ipp
2023-05-22 18:45:02 +10:00

946 lines
25 KiB
C++

//
// Copyright (c) 2022 Alan de Freitas (alandefreitas@gmail.com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
// Official repository: https://github.com/boostorg/url
//
#ifndef BOOST_URL_DETAIL_IMPL_PATTERN_IPP
#define BOOST_URL_DETAIL_IMPL_PATTERN_IPP
#include <boost/url/detail/pattern.hpp>
#include <boost/url/detail/pct_format.hpp>
#include <boost/url/detail/replacement_field_rule.hpp>
#include <boost/url/rfc/detail/host_rule.hpp>
#include <boost/url/rfc/detail/path_rules.hpp>
#include <boost/url/rfc/detail/port_rule.hpp>
#include <boost/url/rfc/detail/scheme_rule.hpp>
namespace boost {
namespace urls {
namespace detail {
static constexpr auto lhost_chars = host_chars + ':';
void
pattern::
apply(
url_base& u,
format_args const& args) const
{
// measure total
struct sizes
{
std::size_t scheme = 0;
std::size_t user = 0;
std::size_t pass = 0;
std::size_t host = 0;
std::size_t port = 0;
std::size_t path = 0;
std::size_t query = 0;
std::size_t frag = 0;
};
sizes n;
format_parse_context pctx(nullptr, nullptr, 0);
measure_context mctx(args);
if (!scheme.empty())
{
pctx = {scheme, pctx.next_arg_id()};
n.scheme = pct_vmeasure(
grammar::alpha_chars, pctx, mctx);
mctx.advance_to(0);
}
if (has_authority)
{
if (has_user)
{
pctx = {user, pctx.next_arg_id()};
n.user = pct_vmeasure(
user_chars, pctx, mctx);
mctx.advance_to(0);
if (has_pass)
{
pctx = {pass, pctx.next_arg_id()};
n.pass = pct_vmeasure(
password_chars, pctx, mctx);
mctx.advance_to(0);
}
}
if (host.starts_with('['))
{
BOOST_ASSERT(host.ends_with(']'));
pctx = {host.substr(1, host.size() - 2), pctx.next_arg_id()};
n.host = pct_vmeasure(
lhost_chars, pctx, mctx) + 2;
mctx.advance_to(0);
}
else
{
pctx = {host, pctx.next_arg_id()};
n.host = pct_vmeasure(
host_chars, pctx, mctx);
mctx.advance_to(0);
}
if (has_port)
{
pctx = {port, pctx.next_arg_id()};
n.port = pct_vmeasure(
grammar::digit_chars, pctx, mctx);
mctx.advance_to(0);
}
}
if (!path.empty())
{
pctx = {path, pctx.next_arg_id()};
n.path = pct_vmeasure(
path_chars, pctx, mctx);
mctx.advance_to(0);
}
if (has_query)
{
pctx = {query, pctx.next_arg_id()};
n.query = pct_vmeasure(
query_chars, pctx, mctx);
mctx.advance_to(0);
}
if (has_frag)
{
pctx = {frag, pctx.next_arg_id()};
n.frag = pct_vmeasure(
fragment_chars, pctx, mctx);
mctx.advance_to(0);
}
std::size_t const n_total =
n.scheme +
(n.scheme != 0) * 1 + // ":"
has_authority * 2 + // "//"
n.user +
has_pass * 1 + // ":"
n.pass +
has_user * 1 + // "@"
n.host +
has_port * 1 + // ":"
n.port +
n.path +
has_query * 1 + // "?"
n.query +
has_frag * 1 + // "#"
n.frag;
u.reserve(n_total);
// Apply
pctx = {nullptr, nullptr, 0};
format_context fctx(nullptr, args);
url_base::op_t op(u);
using parts = parts_base;
if (!scheme.empty())
{
auto dest = u.resize_impl(
parts::id_scheme,
n.scheme + 1, op);
pctx = {scheme, pctx.next_arg_id()};
fctx.advance_to(dest);
const char* dest1 = pct_vformat(
grammar::alpha_chars, pctx, fctx);
dest[n.scheme] = ':';
// validate
if (!grammar::parse({dest, dest1}, scheme_rule()))
{
throw_invalid_argument();
}
}
if (has_authority)
{
if (has_user)
{
auto dest = u.set_user_impl(
n.user, op);
pctx = {user, pctx.next_arg_id()};
fctx.advance_to(dest);
char const* dest1 = pct_vformat(
user_chars, pctx, fctx);
u.impl_.decoded_[parts::id_user] =
pct_string_view(dest, dest1 - dest)
->decoded_size();
if (has_pass)
{
char* destp = u.set_password_impl(
n.pass, op);
pctx = {pass, pctx.next_arg_id()};
fctx.advance_to(destp);
dest1 = pct_vformat(
password_chars, pctx, fctx);
u.impl_.decoded_[parts::id_pass] =
pct_string_view({destp, dest1})
->decoded_size() + 1;
}
}
auto dest = u.set_host_impl(
n.host, op);
if (host.starts_with('['))
{
BOOST_ASSERT(host.ends_with(']'));
pctx = {host.substr(1, host.size() - 2), pctx.next_arg_id()};
*dest++ = '[';
fctx.advance_to(dest);
char* dest1 =
pct_vformat(lhost_chars, pctx, fctx);
*dest1++ = ']';
u.impl_.decoded_[parts::id_host] =
pct_string_view(dest - 1, dest1 - dest)
->decoded_size();
}
else
{
pctx = {host, pctx.next_arg_id()};
fctx.advance_to(dest);
char const* dest1 =
pct_vformat(host_chars, pctx, fctx);
u.impl_.decoded_[parts::id_host] =
pct_string_view(dest, dest1 - dest)
->decoded_size();
}
auto uh = u.encoded_host();
auto h = grammar::parse(uh, host_rule).value();
std::memcpy(
u.impl_.ip_addr_,
h.addr,
sizeof(u.impl_.ip_addr_));
u.impl_.host_type_ = h.host_type;
if (has_port)
{
dest = u.set_port_impl(n.port, op);
pctx = {port, pctx.next_arg_id()};
fctx.advance_to(dest);
char const* dest1 = pct_vformat(
grammar::digit_chars, pctx, fctx);
u.impl_.decoded_[parts::id_port] =
pct_string_view(dest, dest1 - dest)
->decoded_size() + 1;
string_view up = {dest - 1, dest1};
auto p = grammar::parse(up, detail::port_part_rule).value();
if (p.has_port)
u.impl_.port_number_ = p.port_number;
}
}
if (!path.empty())
{
auto dest = u.resize_impl(
parts::id_path,
n.path, op);
pctx = {path, pctx.next_arg_id()};
fctx.advance_to(dest);
auto dest1 = pct_vformat(
path_chars, pctx, fctx);
pct_string_view npath(dest, dest1 - dest);
u.impl_.decoded_[parts::id_path] +=
npath.decoded_size();
if (!npath.empty())
{
u.impl_.nseg_ = std::count(
npath.begin() + 1,
npath.end(), '/') + 1;
}
// handle edge cases
// 1) path is first component and the
// first segment contains an unencoded ':'
// This is impossible because the template
// "{}" would be a host.
if (u.scheme().empty() &&
!u.has_authority())
{
auto fseg = u.encoded_segments().front();
std::size_t nc = std::count(
fseg.begin(), fseg.end(), ':');
if (nc)
{
std::size_t diff = nc * 2;
u.reserve(n_total + diff);
dest = u.resize_impl(
parts::id_path,
n.path + diff, op);
char* dest0 = dest + diff;
std::memmove(dest0, dest, n.path);
while (dest0 != dest)
{
if (*dest0 != ':')
{
*dest++ = *dest0++;
}
else
{
*dest++ = '%';
*dest++ = '3';
*dest++ = 'A';
dest0++;
}
}
}
}
// 2) url has no authority and path
// starts with "//"
if (!u.has_authority() &&
u.encoded_path().starts_with("//"))
{
u.reserve(n_total + 2);
dest = u.resize_impl(
parts::id_path,
n.path + 2, op);
std::memmove(dest + 2, dest, n.path);
*dest++ = '/';
*dest = '.';
}
}
if (has_query)
{
auto dest = u.resize_impl(
parts::id_query,
n.query + 1, op);
*dest++ = '?';
pctx = {query, pctx.next_arg_id()};
fctx.advance_to(dest);
auto dest1 = pct_vformat(
query_chars, pctx, fctx);
pct_string_view nquery(dest, dest1 - dest);
u.impl_.decoded_[parts::id_query] +=
nquery.decoded_size() + 1;
if (!nquery.empty())
{
u.impl_.nparam_ = std::count(
nquery.begin(),
nquery.end(), '&') + 1;
}
}
if (has_frag)
{
auto dest = u.resize_impl(
parts::id_frag,
n.frag + 1, op);
*dest++ = '#';
pctx = {frag, pctx.next_arg_id()};
fctx.advance_to(dest);
auto dest1 = pct_vformat(
fragment_chars, pctx, fctx);
u.impl_.decoded_[parts::id_frag] +=
make_pct_string_view(
string_view(dest, dest1 - dest))
->decoded_size() + 1;
}
}
// This rule represents a pct-encoded string
// that contains an arbitrary number of
// replacement ids in it
template<class CharSet>
struct pct_encoded_fmt_string_rule_t
{
using value_type = pct_string_view;
constexpr
pct_encoded_fmt_string_rule_t(
CharSet const& cs) noexcept
: cs_(cs)
{
}
template<class CharSet_>
friend
constexpr
auto
pct_encoded_fmt_string_rule(
CharSet_ const& cs) noexcept ->
pct_encoded_fmt_string_rule_t<CharSet_>;
result<value_type>
parse(
char const*& it,
char const* end) const noexcept
{
auto const start = it;
if(it == end)
{
// this might be empty
return {};
}
// consume some with literal rule
// this might be an empty literal
auto literal_rule = pct_encoded_rule(cs_);
auto rv = literal_rule.parse(it, end);
while (rv)
{
auto it0 = it;
// consume some with replacement id
// rule
if (!replacement_field_rule.parse(it, end))
{
it = it0;
break;
}
rv = literal_rule.parse(it, end);
}
return string_view(start, it - start);
}
private:
CharSet cs_;
};
template<class CharSet>
constexpr
auto
pct_encoded_fmt_string_rule(
CharSet const& cs) noexcept ->
pct_encoded_fmt_string_rule_t<CharSet>
{
// If an error occurs here it means that
// the value of your type does not meet
// the requirements. Please check the
// documentation!
static_assert(
grammar::is_charset<CharSet>::value,
"CharSet requirements not met");
return pct_encoded_fmt_string_rule_t<CharSet>(cs);
}
// This rule represents a regular string with
// only chars from the specified charset and
// an arbitrary number of replacement ids in it
template<class CharSet>
struct fmt_token_rule_t
{
using value_type = pct_string_view;
constexpr
fmt_token_rule_t(
CharSet const& cs) noexcept
: cs_(cs)
{
}
template<class CharSet_>
friend
constexpr
auto
fmt_token_rule(
CharSet_ const& cs) noexcept ->
fmt_token_rule_t<CharSet_>;
result<value_type>
parse(
char const*& it,
char const* end) const noexcept
{
auto const start = it;
BOOST_ASSERT(it != end);
/*
// This should never happen because
// all tokens are optional and will
// already return `none`:
if(it == end)
{
BOOST_URL_RETURN_EC(
grammar::error::need_more);
}
*/
// consume some with literal rule
// this might be an empty literal
auto partial_token_rule =
grammar::optional_rule(
grammar::token_rule(cs_));
auto rv = partial_token_rule.parse(it, end);
while (rv)
{
auto it0 = it;
// consume some with replacement id
if (!replacement_field_rule.parse(it, end))
{
// no replacement and no more cs
// before: nothing else to consume
it = it0;
break;
}
// after {...}, consume any more chars
// in the charset
rv = partial_token_rule.parse(it, end);
}
if(it == start)
{
// it != end but we consumed nothing
BOOST_URL_RETURN_EC(
grammar::error::need_more);
}
return string_view(start, it - start);
}
private:
CharSet cs_;
};
template<class CharSet>
constexpr
auto
fmt_token_rule(
CharSet const& cs) noexcept ->
fmt_token_rule_t<CharSet>
{
// If an error occurs here it means that
// the value of your type does not meet
// the requirements. Please check the
// documentation!
static_assert(
grammar::is_charset<CharSet>::value,
"CharSet requirements not met");
return fmt_token_rule_t<CharSet>(cs);
}
struct userinfo_template_rule_t
{
struct value_type
{
string_view user;
string_view password;
bool has_password = false;
};
auto
parse(
char const*& it,
char const* end
) const noexcept ->
result<value_type>
{
static constexpr auto uchars =
unreserved_chars +
sub_delim_chars;
static constexpr auto pwchars =
uchars + ':';
value_type t;
// user
static constexpr auto user_fmt_rule =
pct_encoded_fmt_string_rule(uchars);
auto rv = grammar::parse(
it, end, user_fmt_rule);
BOOST_ASSERT(rv);
t.user = *rv;
// ':'
if( it == end ||
*it != ':')
{
t.has_password = false;
t.password = {};
return t;
}
++it;
// pass
static constexpr auto pass_fmt_rule =
pct_encoded_fmt_string_rule(grammar::ref(pwchars));
rv = grammar::parse(
it, end, pass_fmt_rule);
BOOST_ASSERT(rv);
t.has_password = true;
t.password = *rv;
return t;
}
};
constexpr userinfo_template_rule_t userinfo_template_rule{};
struct host_template_rule_t
{
using value_type = string_view;
auto
parse(
char const*& it,
char const* end
) const noexcept ->
result<value_type>
{
if(it == end)
{
// empty host
return {};
}
// the host type will be ultimately
// validated when applying the replacement
// strings. Any chars allowed in hosts
// are allowed here.
if (*it != '[')
{
// IPv4address and reg-name have the
// same char sets.
constexpr auto any_host_template_rule =
pct_encoded_fmt_string_rule(host_chars);
auto rv = grammar::parse(
it, end, any_host_template_rule);
// any_host_template_rule can always
// be empty, so it's never invalid
BOOST_ASSERT(rv);
return detail::to_sv(*rv);
}
// IP-literals need to be enclosed in
// "[]" if using ':' in the template
// string, because the ':' would be
// ambiguous with the port in fmt string.
// The "[]:" can be used in replacement
// strings without the "[]" though.
constexpr auto ip_literal_template_rule =
pct_encoded_fmt_string_rule(lhost_chars);
auto it0 = it;
auto rv = grammar::parse(
it, end,
grammar::optional_rule(
grammar::tuple_rule(
grammar::squelch(
grammar::delim_rule('[')),
ip_literal_template_rule,
grammar::squelch(
grammar::delim_rule(']')))));
// ip_literal_template_rule can always
// be empty, so it's never invalid, but
// the rule might fail to match the
// closing "]"
BOOST_ASSERT(rv);
return string_view{it0, it};
}
};
constexpr host_template_rule_t host_template_rule{};
struct authority_template_rule_t
{
using value_type = pattern;
result<value_type>
parse(
char const*& it,
char const* end
) const noexcept
{
pattern u;
// [ userinfo "@" ]
{
auto rv = grammar::parse(
it, end,
grammar::optional_rule(
grammar::tuple_rule(
userinfo_template_rule,
grammar::squelch(
grammar::delim_rule('@')))));
BOOST_ASSERT(rv);
if(rv->has_value())
{
auto& r = **rv;
u.has_user = true;
u.user = r.user;
u.has_pass = r.has_password;
u.pass = r.password;
}
}
// host
{
auto rv = grammar::parse(
it, end,
host_template_rule);
// host is allowed to be empty
BOOST_ASSERT(rv);
u.host = *rv;
}
// [ ":" port ]
{
constexpr auto port_template_rule =
grammar::optional_rule(
fmt_token_rule(grammar::digit_chars));
auto it0 = it;
auto rv = grammar::parse(
it, end,
grammar::tuple_rule(
grammar::squelch(
grammar::delim_rule(':')),
port_template_rule));
if (!rv)
{
it = it0;
}
else
{
u.has_port = true;
if (rv->has_value())
{
u.port = **rv;
}
}
}
return u;
}
};
constexpr authority_template_rule_t authority_template_rule{};
struct scheme_template_rule_t
{
using value_type = string_view;
result<value_type>
parse(
char const*& it,
char const* end) const noexcept
{
auto const start = it;
if(it == end)
{
// scheme can't be empty
BOOST_URL_RETURN_EC(
grammar::error::mismatch);
}
if(!grammar::alpha_chars(*it) &&
*it != '{')
{
// expected alpha
BOOST_URL_RETURN_EC(
grammar::error::mismatch);
}
// it starts with replacement id or alpha char
if (!grammar::alpha_chars(*it))
{
if (!replacement_field_rule.parse(it, end))
{
// replacement_field_rule is invalid
BOOST_URL_RETURN_EC(
grammar::error::mismatch);
}
}
else
{
// skip first
++it;
}
static
constexpr
grammar::lut_chars scheme_chars(
"0123456789" "+-."
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz");
// non-scheme chars might be a new
// replacement-id or just an invalid char
it = grammar::find_if_not(
it, end, scheme_chars);
while (it != end)
{
auto it0 = it;
if (!replacement_field_rule.parse(it, end))
{
it = it0;
break;
}
it = grammar::find_if_not(
it, end, scheme_chars);
}
return string_view(start, it - start);
}
};
constexpr scheme_template_rule_t scheme_template_rule{};
// This rule should consider all url types at the
// same time according to the format string
// - relative urls with no scheme/authority
// - absolute urls have no fragment
struct pattern_rule_t
{
using value_type = pattern;
result<value_type>
parse(
char const*& it,
char const* const end
) const noexcept
{
pattern u;
// optional scheme
{
auto it0 = it;
auto rv = grammar::parse(
it, end,
grammar::tuple_rule(
scheme_template_rule,
grammar::squelch(
grammar::delim_rule(':'))));
if(rv)
u.scheme = *rv;
else
it = it0;
}
// hier_part (authority + path)
// if there are less than 2 chars left,
// we are parsing the path
if (it == end)
{
// this is over, so we can consider
// that a "path-empty"
return u;
}
if(end - it == 1)
{
// only one char left
// it can be a single separator "/",
// representing an empty absolute path,
// or a single-char segment
if(*it == '/')
{
// path-absolute
u.path = {it, 1};
++it;
return u;
}
// this can be a:
// - path-noscheme if there's no scheme, or
// - path-rootless with a single char, or
// - path-empty (and consume nothing)
if (!u.scheme.empty() ||
*it != ':')
{
// path-rootless with a single char
// this needs to be a segment because
// the authority needs two slashes
// "//"
// path-noscheme also matches here
// because we already validated the
// first char
auto rv = grammar::parse(
it, end, urls::detail::segment_rule);
if(! rv)
return rv.error();
u.path = *rv;
}
return u;
}
// authority
if( it[0] == '/' &&
it[1] == '/')
{
// "//" always indicates authority
it += 2;
auto rv = grammar::parse(
it, end,
authority_template_rule);
// authority is allowed to be empty
BOOST_ASSERT(rv);
u.has_authority = true;
u.has_user = rv->has_user;
u.user = rv->user;
u.has_pass = rv->has_pass;
u.pass = rv->pass;
u.host = rv->host;
u.has_port = rv->has_port;
u.port = rv->port;
}
// the authority requires an absolute path
// or an empty path
if (it == end ||
(u.has_authority &&
(*it != '/' &&
*it != '?' &&
*it != '#')))
{
// path-empty
return u;
}
// path-abempty
// consume the whole path at once because
// we're going to count number of segments
// later after the replacements happen
static constexpr auto segment_fmt_rule =
pct_encoded_fmt_string_rule(path_chars);
auto rp = grammar::parse(
it, end, segment_fmt_rule);
// path-abempty is allowed to be empty
BOOST_ASSERT(rp);
u.path = *rp;
// [ "?" query ]
{
static constexpr auto query_fmt_rule =
pct_encoded_fmt_string_rule(query_chars);
auto rv = grammar::parse(
it, end,
grammar::tuple_rule(
grammar::squelch(
grammar::delim_rule('?')),
query_fmt_rule));
// query is allowed to be empty but
// delim rule is not
if (rv)
{
u.has_query = true;
u.query = *rv;
}
}
// [ "#" fragment ]
{
static constexpr auto frag_fmt_rule =
pct_encoded_fmt_string_rule(fragment_chars);
auto rv = grammar::parse(
it, end,
grammar::tuple_rule(
grammar::squelch(
grammar::delim_rule('#')),
frag_fmt_rule));
// frag is allowed to be empty but
// delim rule is not
if (rv)
{
u.has_frag = true;
u.frag = *rv;
}
}
return u;
}
};
constexpr pattern_rule_t pattern_rule{};
result<pattern>
parse_pattern(
string_view s)
{
return grammar::parse(
s, pattern_rule);
}
} // detail
} // urls
} // boost
#endif