ext-boost/boost/url/grammar/lut_chars.hpp
2023-05-22 18:45:02 +10:00

403 lines
8.7 KiB
C++

//
// Copyright (c) 2021 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
// Official repository: https://github.com/boostorg/url
//
#ifndef BOOST_URL_GRAMMAR_LUT_CHARS_HPP
#define BOOST_URL_GRAMMAR_LUT_CHARS_HPP
#include <boost/url/detail/config.hpp>
#include <boost/url/grammar/detail/charset.hpp>
#include <cstdint>
#include <type_traits>
// Credit to Peter Dimov for ideas regarding
// SIMD constexpr, and character set masks.
namespace boost {
namespace urls {
namespace grammar {
#ifndef BOOST_URL_DOCS
namespace detail {
template<class T, class = void>
struct is_pred : std::false_type {};
template<class T>
struct is_pred<T, void_t<
decltype(
std::declval<bool&>() =
std::declval<T const&>().operator()(
std::declval<char>())
) > > : std::true_type
{
};
} // detail
#endif
/** A set of characters
The characters defined by instances of
this set are provided upon construction.
The `constexpr` implementation allows
these to become compile-time constants.
@par Example
Character sets are used with rules and the
functions @ref find_if and @ref find_if_not.
@code
constexpr lut_chars vowel_chars = "AEIOU" "aeiou";
result< string_view > rv = parse( "Aiea", token_rule( vowel_chars ) );
@endcode
@see
@ref find_if,
@ref find_if_not,
@ref parse,
@ref token_rule.
*/
class lut_chars
{
std::uint64_t mask_[4] = {};
constexpr
static
std::uint64_t
lo(char c) noexcept
{
return static_cast<
unsigned char>(c) & 3;
}
constexpr
static
std::uint64_t
hi(char c) noexcept
{
return 1ULL << (static_cast<
unsigned char>(c) >> 2);
}
constexpr
static
lut_chars
construct(
char const* s) noexcept
{
return *s
? lut_chars(*s) +
construct(s+1)
: lut_chars();
}
constexpr
static
lut_chars
construct(
unsigned char ch,
bool b) noexcept
{
return b
? lut_chars(ch)
: lut_chars();
}
template<class Pred>
constexpr
static
lut_chars
construct(
Pred pred,
unsigned char ch) noexcept
{
return ch == 255
? construct(ch, pred(ch))
: construct(ch, pred(ch)) +
construct(pred, ch + 1);
}
constexpr
lut_chars() = default;
constexpr
lut_chars(
std::uint64_t m0,
std::uint64_t m1,
std::uint64_t m2,
std::uint64_t m3) noexcept
: mask_{ m0, m1, m2, m3 }
{
}
public:
/** Constructor
This function constructs a character
set which has as a single member,
the character `ch`.
@par Example
@code
constexpr lut_chars asterisk( '*' );
@endcode
@par Complexity
Constant.
@par Exception Safety
Throws nothing.
@param ch A character.
*/
constexpr
lut_chars(char ch) noexcept
: mask_ {
lo(ch) == 0 ? hi(ch) : 0,
lo(ch) == 1 ? hi(ch) : 0,
lo(ch) == 2 ? hi(ch) : 0,
lo(ch) == 3 ? hi(ch) : 0 }
{
}
/** Constructor
This function constructs a character
set which has as members, all of the
characters present in the null-terminated
string `s`.
@par Example
@code
constexpr lut_chars digits = "0123456789";
@endcode
@par Complexity
Linear in `::strlen(s)`, or constant
if `s` is a constant expression.
@par Exception Safety
Throws nothing.
@param s A null-terminated string.
*/
constexpr
lut_chars(
char const* s) noexcept
: lut_chars(construct(s))
{
}
/** Constructor.
This function constructs a character
set which has as members, every value
of `char ch` for which the expression
`pred(ch)` returns `true`.
@par Example
@code
struct is_digit
{
constexpr bool
operator()(char c ) const noexcept
{
return c >= '0' && c <= '9';
}
};
constexpr lut_chars digits( is_digit{} );
@endcode
@par Complexity
Linear in `pred`, or constant if
`pred(ch)` is a constant expression.
@par Exception Safety
Throws nothing.
@param pred The function object to
use for determining membership in
the character set.
*/
template<class Pred
#ifndef BOOST_URL_DOCS
,class = typename std::enable_if<
detail::is_pred<Pred>::value &&
! std::is_base_of<
lut_chars, Pred>::value>::type
#endif
>
constexpr
lut_chars(Pred const& pred) noexcept
: lut_chars(
construct(pred, 0))
{
}
/** Return true if ch is in the character set.
This function returns true if the
character `ch` is in the set, otherwise
it returns false.
@par Complexity
Constant.
@par Exception Safety
Throws nothing.
@param ch The character to test.
*/
constexpr
bool
operator()(
unsigned char ch) const noexcept
{
return mask_[lo(ch)] & hi(ch);
}
/** Return the union of two character sets.
This function returns a new character
set which contains all of the characters
in `cs0` as well as all of the characters
in `cs`.
@par Example
This creates a character set which
includes all letters and numbers
@code
constexpr lut_chars alpha_chars(
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz");
constexpr lut_chars alnum_chars = alpha_chars + "0123456789";
@endcode
@par Complexity
Constant.
@return The new character set.
@param cs0 A character to join
@param cs1 A character to join
*/
friend
constexpr
lut_chars
operator+(
lut_chars const& cs0,
lut_chars const& cs1) noexcept
{
return lut_chars(
cs0.mask_[0] | cs1.mask_[0],
cs0.mask_[1] | cs1.mask_[1],
cs0.mask_[2] | cs1.mask_[2],
cs0.mask_[3] | cs1.mask_[3]);
}
/** Return a new character set by subtracting
This function returns a new character
set which is formed from all of the
characters in `cs0` which are not in `cs`.
@par Example
This statement declares a character set
containing all the lowercase letters
which are not vowels:
@code
constexpr lut_chars consonants = lut_chars("abcdefghijklmnopqrstuvwxyz") - "aeiou";
@endcode
@par Complexity
Constant.
@return The new character set.
@param cs0 A character set to join.
@param cs1 A character set to join.
*/
friend
constexpr
lut_chars
operator-(
lut_chars const& cs0,
lut_chars const& cs1) noexcept
{
return lut_chars(
cs0.mask_[0] & ~cs1.mask_[0],
cs0.mask_[1] & ~cs1.mask_[1],
cs0.mask_[2] & ~cs1.mask_[2],
cs0.mask_[3] & ~cs1.mask_[3]);
}
/** Return a new character set which is the complement of another character set.
This function returns a new character
set which contains all of the characters
that are not in `*this`.
@par Example
This statement declares a character set
containing everything but vowels:
@code
constexpr lut_chars not_vowels = ~lut_chars( "AEIOU" "aeiou" );
@endcode
@par Complexity
Constant.
@par Exception Safety
Throws nothing.
@return The new character set.
*/
constexpr
lut_chars
operator~() const noexcept
{
return lut_chars(
~mask_[0],
~mask_[1],
~mask_[2],
~mask_[3]
);
}
#ifndef BOOST_URL_DOCS
#ifdef BOOST_URL_USE_SSE2
char const*
find_if(
char const* first,
char const* last) const noexcept
{
return detail::find_if_pred(
*this, first, last);
}
char const*
find_if_not(
char const* first,
char const* last) const noexcept
{
return detail::find_if_not_pred(
*this, first, last);
}
#endif
#endif
};
} // grammar
} // urls
} // boost
#endif