/** * Copyright (C) 2015 Topology LP * Copyright (C) 2018 Jakob Petsovits * All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #ifndef CPPCODEC_DETAIL_STREAM_CODEC #define CPPCODEC_DETAIL_STREAM_CODEC #include #include // for abort() #include #include "../parse_error.hpp" #include "config.hpp" namespace cppcodec { namespace detail { using alphabet_index_t = uint_fast16_t; template class stream_codec { public: template static void encode( Result& encoded_result, ResultState&, const uint8_t* binary, size_t binary_size); template static void decode( Result& binary_result, ResultState&, const char* encoded, size_t encoded_size); static constexpr size_t encoded_size(size_t binary_size) noexcept; static constexpr size_t decoded_max_size(size_t encoded_size) noexcept; }; template // default for CodecVariant::generates_padding() == false struct padder { template CPPCODEC_ALWAYS_INLINE void operator()(Result&, ResultState&, EncodedBlockSizeT) { } }; template<> // specialization for CodecVariant::generates_padding() == true struct padder { template CPPCODEC_ALWAYS_INLINE void operator()( Result& encoded, ResultState& state, EncodedBlockSizeT num_padding_characters) { for (EncodedBlockSizeT i = 0; i < num_padding_characters; ++i) { data::put(encoded, state, CodecVariant::padding_symbol()); } } }; template struct enc { // Block encoding: Go from 0 to (block size - 1), append a symbol for each iteration unconditionally. template static CPPCODEC_ALWAYS_INLINE void block(Result& encoded, ResultState& state, const uint8_t* src) { using EncodedBlockSizeT = decltype(Codec::encoded_block_size()); constexpr static const EncodedBlockSizeT SymbolIndex = static_cast(I - 1); enc().template block(encoded, state, src); data::put(encoded, state, CodecVariant::symbol(Codec::template index(src))); } // Tail encoding: Go from 0 until (runtime) num_symbols, append a symbol for each iteration. template static CPPCODEC_ALWAYS_INLINE void tail( Result& encoded, ResultState& state, const uint8_t* src, EncodedBlockSizeT num_symbols) { constexpr static const EncodedBlockSizeT SymbolIndex = Codec::encoded_block_size() - I; constexpr static const EncodedBlockSizeT NumSymbols = SymbolIndex + static_cast(1); if (num_symbols == NumSymbols) { data::put(encoded, state, CodecVariant::symbol(Codec::template index_last(src))); padder pad; #ifdef _MSC_VER pad.operator()(encoded, state, Codec::encoded_block_size() - NumSymbols); #else pad.template operator()(encoded, state, Codec::encoded_block_size() - NumSymbols); #endif return; } data::put(encoded, state, CodecVariant::symbol(Codec::template index(src))); enc().template tail(encoded, state, src, num_symbols); } }; template<> // terminating specialization struct enc<0> { template static CPPCODEC_ALWAYS_INLINE void block(Result&, ResultState&, const uint8_t*) { } template static CPPCODEC_ALWAYS_INLINE void tail(Result&, ResultState&, const uint8_t*, EncodedBlockSizeT) { abort(); // Not reached: block() should be called if num_symbols == block size, not tail(). } }; template template inline void stream_codec::encode( Result& encoded_result, ResultState& state, const uint8_t* src, size_t src_size) { using encoder = enc; const uint8_t* src_end = src + src_size; if (src_size >= Codec::binary_block_size()) { src_end -= Codec::binary_block_size(); for (; src <= src_end; src += Codec::binary_block_size()) { encoder::template block(encoded_result, state, src); } src_end += Codec::binary_block_size(); } if (src_end > src) { auto remaining_src_len = src_end - src; if (!remaining_src_len || remaining_src_len >= Codec::binary_block_size()) { abort(); return; } auto num_symbols = Codec::num_encoded_tail_symbols(static_cast(remaining_src_len)); encoder::template tail(encoded_result, state, src, num_symbols); } } // Range & lookup table generation, see // http://stackoverflow.com/questions/13313980/populate-an-array-using-constexpr-at-compile-time // and http://cplusadd.blogspot.ca/2013/02/c11-compile-time-lookup-tablearray-with.html template struct seq {}; template struct gen_seq : gen_seq { // Clang up to 3.6 has a limit of 256 for template recursion, // so pass a few more symbols at once to make it work. static_assert(N % 4 == 0, "I must be divisible by 4 to eventually end at 0"); }; template struct gen_seq<0, Is...> : seq {}; template struct lookup_table_t { alphabet_index_t lookup[N]; static constexpr size_t size = N; }; template constexpr lookup_table_t make_lookup_table(seq, LambdaType value_for_index) { return { { value_for_index(Is)... } }; } template constexpr lookup_table_t make_lookup_table(LambdaType evalFunc) { return make_lookup_table(gen_seq(), evalFunc); } // CodecVariant::symbol() provides a symbol for an index. // Use recursive templates to get the inverse lookup table for fast decoding. template static CPPCODEC_ALWAYS_INLINE constexpr size_t num_possible_values() { return static_cast( static_cast(std::numeric_limits::max()) - static_cast(std::numeric_limits::min()) + 1); } template struct index_if_in_alphabet { static CPPCODEC_ALWAYS_INLINE constexpr alphabet_index_t for_symbol(char symbol) { return (CodecVariant::symbol( static_cast(CodecVariant::alphabet_size() - I)) == symbol) ? static_cast(CodecVariant::alphabet_size() - I) : index_if_in_alphabet::for_symbol(symbol); } }; template struct index_if_in_alphabet { // terminating specialization static CPPCODEC_ALWAYS_INLINE constexpr alphabet_index_t for_symbol(char) { return InvalidIdx; } }; template struct padding_searcher { static CPPCODEC_ALWAYS_INLINE constexpr bool exists_padding_symbol() { // Clang up to 3.6 has a limit of 256 for template recursion, // so pass a few more symbols at once to make it work. static_assert(I % 4 == 0, "I must be divisible by 4 to eventually end at 0"); return CodecVariant::is_padding_symbol( static_cast(num_possible_values() - I - 4)) || CodecVariant::is_padding_symbol( static_cast(num_possible_values() - I - 3)) || CodecVariant::is_padding_symbol( static_cast(num_possible_values() - I - 2)) || CodecVariant::is_padding_symbol( static_cast(num_possible_values() - I - 1)) || padding_searcher::exists_padding_symbol(); } }; template struct padding_searcher { // terminating specialization static CPPCODEC_ALWAYS_INLINE constexpr bool exists_padding_symbol() { return false; } }; template struct alphabet_index_info { static constexpr const size_t num_possible_symbols = num_possible_values(); static constexpr const alphabet_index_t padding_idx = 1 << 8; static constexpr const alphabet_index_t invalid_idx = 1 << 9; static constexpr const alphabet_index_t eof_idx = 1 << 10; static constexpr const alphabet_index_t stop_character_mask = static_cast(~0xFFu); static constexpr const bool padding_allowed = padding_searcher< CodecVariant, num_possible_symbols>::exists_padding_symbol(); static CPPCODEC_ALWAYS_INLINE constexpr bool allows_padding() { return padding_allowed; } static CPPCODEC_ALWAYS_INLINE constexpr bool is_padding(alphabet_index_t idx) { return allows_padding() ? (idx == padding_idx) : false; } static CPPCODEC_ALWAYS_INLINE constexpr bool is_invalid(alphabet_index_t idx) { return idx == invalid_idx; } static CPPCODEC_ALWAYS_INLINE constexpr bool is_eof(alphabet_index_t idx) { return idx == eof_idx; } static CPPCODEC_ALWAYS_INLINE constexpr bool is_stop_character(alphabet_index_t idx) { return (idx & stop_character_mask) != 0; } private: static CPPCODEC_ALWAYS_INLINE constexpr alphabet_index_t valid_index_or(alphabet_index_t a, alphabet_index_t b) { return a == invalid_idx ? b : a; } using idx_if_in_alphabet = index_if_in_alphabet< CodecVariant, invalid_idx, CodecVariant::alphabet_size()>; static CPPCODEC_ALWAYS_INLINE constexpr alphabet_index_t index_of(char symbol) { return valid_index_or(idx_if_in_alphabet::for_symbol(symbol), CodecVariant::is_eof_symbol(symbol) ? eof_idx : CodecVariant::is_padding_symbol(symbol) ? padding_idx : invalid_idx); } // GCC <= 4.9 has a bug with retaining constexpr when passing a function pointer. // To get around this, we'll create a callable with operator() and pass that one. // Unfortunately, MSVC prior to VS 2017 (for MinSizeRel or Release builds) // chokes on this by compiling the project in 20 minutes instead of seconds. // So let's define two separate variants and remove the old GCC one whenever we // decide not to support GCC < 5.0 anymore. #if defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 5 struct index_at { CPPCODEC_ALWAYS_INLINE constexpr alphabet_index_t operator()(size_t symbol) const { return index_of(CodecVariant::normalized_symbol(static_cast(symbol))); } }; #else static CPPCODEC_ALWAYS_INLINE constexpr alphabet_index_t index_at(size_t symbol) { return index_of(CodecVariant::normalized_symbol(static_cast(symbol))); } #endif public: struct lookup { static CPPCODEC_ALWAYS_INLINE alphabet_index_t for_symbol(char symbol) { #if defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 5 static constexpr const auto t = make_lookup_table(index_at()); #else static constexpr const auto t = make_lookup_table(&index_at); #endif static_assert(t.size == num_possible_symbols, "lookup table must cover each possible (character) symbol"); return t.lookup[static_cast(symbol)]; } }; }; // // At long last! The actual decode/encode functions. template template inline void stream_codec::decode( Result& binary_result, ResultState& state, const char* src_encoded, size_t src_size) { using alphabet_index_lookup = typename alphabet_index_info::lookup; const char* src = src_encoded; const char* src_end = src + src_size; alphabet_index_t alphabet_indexes[Codec::encoded_block_size()] = {}; alphabet_indexes[0] = alphabet_index_info::eof_idx; alphabet_index_t* const alphabet_index_start = &alphabet_indexes[0]; alphabet_index_t* const alphabet_index_end = &alphabet_indexes[Codec::encoded_block_size()]; alphabet_index_t* alphabet_index_ptr = &alphabet_indexes[0]; while (src < src_end) { if (CodecVariant::should_ignore(*src)) { ++src; continue; } *alphabet_index_ptr = alphabet_index_lookup::for_symbol(*src); if (alphabet_index_info::is_stop_character(*alphabet_index_ptr)) { break; } ++src; ++alphabet_index_ptr; if (alphabet_index_ptr == alphabet_index_end) { Codec::decode_block(binary_result, state, alphabet_indexes); alphabet_index_ptr = alphabet_index_start; } } if (alphabet_index_info::is_invalid(*alphabet_index_ptr)) { throw symbol_error(*src); } ++src; alphabet_index_t* last_index_ptr = alphabet_index_ptr; if (alphabet_index_info::is_padding(*last_index_ptr)) { if (last_index_ptr == alphabet_index_start) { // Don't accept padding at the start of a block. // The encoder should have omitted that padding altogether. throw padding_error(); } // We're in here because we just read a (first) padding character. Try to read more. // Count with last_index_ptr, but store in alphabet_index_ptr so we don't // overflow the array in case the input data is too long. ++last_index_ptr; while (src < src_end) { *alphabet_index_ptr = alphabet_index_lookup::for_symbol(*(src++)); if (alphabet_index_info::is_eof(*alphabet_index_ptr)) { *alphabet_index_ptr = alphabet_index_info::padding_idx; break; } if (!alphabet_index_info::is_padding(*alphabet_index_ptr)) { throw padding_error(); } ++last_index_ptr; if (last_index_ptr > alphabet_index_end) { throw padding_error(); } } } if (last_index_ptr != alphabet_index_start) { if ((CodecVariant::requires_padding() || alphabet_index_info::is_padding(*alphabet_index_ptr) ) && last_index_ptr != alphabet_index_end) { // If the input is not a multiple of the block size then the input is incorrect. throw padding_error(); } if (alphabet_index_ptr >= alphabet_index_end) { abort(); return; } Codec::decode_tail(binary_result, state, alphabet_indexes, static_cast(alphabet_index_ptr - alphabet_index_start)); } } template inline constexpr size_t stream_codec::encoded_size(size_t binary_size) noexcept { using C = Codec; // constexpr rules make this a lot harder to read than it actually is. return CodecVariant::generates_padding() // With padding, the encoded size is a multiple of the encoded block size. // To calculate that, round the binary size up to multiple of the binary block size, // then convert to encoded by multiplying with { base32: 8/5, base64: 4/3 }. ? (binary_size + (C::binary_block_size() - 1) - ((binary_size + (C::binary_block_size() - 1)) % C::binary_block_size())) * C::encoded_block_size() / C::binary_block_size() // No padding: only pad to the next multiple of 5 bits, i.e. at most a single extra byte. : (binary_size * C::encoded_block_size() / C::binary_block_size()) + (((binary_size * C::encoded_block_size()) % C::binary_block_size()) ? 1 : 0); } template inline constexpr size_t stream_codec::decoded_max_size(size_t encoded_size) noexcept { using C = Codec; return CodecVariant::requires_padding() ? (encoded_size / C::encoded_block_size() * C::binary_block_size()) : (encoded_size / C::encoded_block_size() * C::binary_block_size()) + ((encoded_size % C::encoded_block_size()) * C::binary_block_size() / C::encoded_block_size()); } } // namespace detail } // namespace cppcodec #endif // CPPCODEC_DETAIL_STREAM_CODEC