#uuid parser
29 messages · Page 1 of 1 (latest)
using uuid_t = std::pair<std::uint64_t, std::uint64_t>;
// @throws std::runtime_error if uuid has invalid format
inline uuid_t parse_uuid(const std::array<char, 36>& arr)
{
uuid_t uuid{0, 0};
const auto hex_digit_to_uint64 = [](char c) -> std::uint64_t
{
static_assert('a' + 1 == 'b' && 'b' + 1 == 'c' && 'c' + 1 == 'd' && 'd' + 1 == 'e' && 'e' + 1 == 'f', "lowercase a-f characters not in order, panic!!!");
static_assert('A' + 1 == 'B' && 'B' + 1 == 'C' && 'C' + 1 == 'D' && 'D' + 1 == 'E' && 'E' + 1 == 'F', "uppercase a-f characters not in order, panic!!!");
if ('0' <= c && c <= '9')
{ return c - '0'; }
if ('a' <= c && c <= 'f')
{ return c - 'a' + 10; }
if ('A' <= c && c <= 'F')
{ return c - 'A' + 10; }
throw std::runtime_error("UUID parsing failed: invalid hex character");
};
const auto assert_dash = [](char c)
{
if (c != '-')
{
throw std::runtime_error("UUID parsing failed: expected dash (-) not found");
}
};
// xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
for (std::size_t i = 0; i < 8; i++)
{
uuid.first += hex_digit_to_uint64(arr[i]) << (i * 4);
}
assert_dash(arr[8]);
for (std::size_t i = 9; i < 13; i++)
{
uuid.first += hex_digit_to_uint64(arr[i]) << ((i - 1) * 4);
}
assert_dash(arr[13]);
for (std::size_t i = 14; i < 18; i++)
{
uuid.first += hex_digit_to_uint64(arr[i]) << ((i - 2) * 4);
}
assert_dash(arr[18]);
for (std::size_t i = 19; i < 23; i++)
{
uuid.second += hex_digit_to_uint64(arr[i]) << ((i - 19) * 4);
}
assert_dash(arr[23]);
for (std::size_t i = 24; i < 36; i++)
{
uuid.second += hex_digit_to_uint64(arr[i]) << ((i - 20) * 4);
}
return uuid;
}
(the code was marginally too long to fit into the post)
lgtm I guess. there are optimizations possible if you need this to be really fast ofc, but I guess it's not performance critical?
but if you wanna go that route: I'd probably copy only the digits into a local array and then translate each character to a digit value in there. if you write that well, it should vectorize pretty neatly.
one thing you might want to consider is returning an std::expected instead of throwing directly.
mmm but even with the additional copying it could be faster?
almost definitely
i was originally going to use optional but i didnt want to add an if statement a billion times
i guess it's just 4
im going to benchmark this tomorrow if i remember
I can also recommend editing this code of yours I pasted into Compiler Explorer such that it compiles, and then checking whether the Assembly output view on the right-hand side shows that it got vectorized:
https://godbolt.org/z/jhbvja789
using uuid_t = std::pair<std::uint64_t, std::uint64_t>;
// @throws std::runtime_error if uuid has invalid format
inline uuid_t parse_uuid(const std::array<char, 36>& arr)
{
uuid_t uuid{0, 0};
const auto hex_digit_to_uint64 = [](char c) -> std::uint64_t
{
static_assert('a' + 1 == 'b' && 'b' + 1 == 'c' && 'c' + 1 == 'd' && 'd'...
Note that your throw std::runtime_error("UUID parsing failed: invalid hex character"); statement will probably prevent the loop generating your hex_digit_to_uint64 from being vectorized
Why?
more readable
hot damn, i was playing around with this for a bit now
the gcc codegen is atrocious
what on earth

So it's probably better to insert a check that returns early in each of the loops?
I don't know, you'll have to check the generated Assembly + benchmark it, cause this sort of stuff is hard to predict for humans