Add MD5 hashing APIs to Azure::Core available from azure/core/md5.hpp. (#1617)

* Add MD5 hashing APIs to Azure::Core available from azure/core/md5.hpp.

* Add simplified header test for md5 and base64.

* Add changelog entry.

* Remove unnecessary include.

* Address feedback - add back ptr, length APIs.

* Address PR feedback - docs and typo fixes.
This commit is contained in:
Ahson Khan 2021-02-08 15:34:41 -08:00 committed by GitHub
parent d9ae3cf5fd
commit 568d4bcc4e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 344 additions and 1 deletions

View File

@ -5,6 +5,7 @@
### New Features
- Added support for HTTP conditional requests `MatchConditions` and `RequestConditions`.
- Added MD5 hashing APIs to the `Azure::Core` namespace available from `azure/core/md5.hpp`.
### Breaking Changes

View File

@ -64,6 +64,7 @@ set(
inc/azure/core/etag.hpp
inc/azure/core/exception.hpp
inc/azure/core/match_conditions.hpp
inc/azure/core/md5.hpp
inc/azure/core/nullable.hpp
inc/azure/core/operation.hpp
inc/azure/core/operation_status.hpp
@ -94,6 +95,7 @@ set(
src/base64.cpp
src/context.cpp
src/datetime.cpp
src/md5.cpp
src/operation_status.cpp
src/strings.cpp
src/version.cpp
@ -117,7 +119,7 @@ create_code_coverage(core azure-core azure-core-test)
target_link_libraries(azure-core INTERFACE Threads::Threads)
if(WIN32)
target_link_libraries(azure-core PRIVATE crypt32)
target_link_libraries(azure-core PRIVATE bcrypt crypt32)
else()
find_package(OpenSSL REQUIRED)
target_link_libraries(azure-core PRIVATE OpenSSL::SSL)

View File

@ -11,12 +11,14 @@
*/
// azure/core
#include "azure/core/base64.hpp"
#include "azure/core/context.hpp"
#include "azure/core/credentials.hpp"
#include "azure/core/datetime.hpp"
#include "azure/core/dll_import_export.hpp"
#include "azure/core/etag.hpp"
#include "azure/core/match_conditions.hpp"
#include "azure/core/md5.hpp"
#include "azure/core/nullable.hpp"
#include "azure/core/request_conditions.hpp"
#include "azure/core/response.hpp"

View File

@ -0,0 +1,75 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-License-Identifier: MIT
/**
* @file
* @brief Utility functions to help compute the MD5 hash value for the input binary data.
*/
#pragma once
#include <cstdint>
#include <vector>
namespace Azure { namespace Core {
/**
* @brief Represents the class for the MD5 hash function which maps binary data of an arbitrary
* length to small binary data of a fixed length.
*/
class Md5 {
public:
/**
* @brief Construct a default instance of @Md5.
*/
explicit Md5();
/**
* @brief Cleanup any state when destroying the instance of @Md5.
*/
~Md5();
/**
* @brief Used to append partial binary input data to compute the hash in a streaming fashion.
* @remark Once all the data has been added, call #Digest() to get the computed hash value.
* @param data The pointer to the current block of binary data that is used for hash
* calculation.
* @param length The size of the data provided.
*/
void Update(const uint8_t* data, std::size_t length);
/**
* @brief Computes the hash value of all the binary input data appended to the instance so far.
* @remark Use #Update() to add more partial data before calling this function.
* @return The computed MD5 hash value corresponding to the input provided.
*/
std::vector<uint8_t> Digest() const;
/**
* @brief Computes the hash value of the specified binary input data.
* @param data The pointer to binary data to compute the hash value for.
* @param length The size of the data provided.
* @return The computed MD5 hash value corresponding to the input provided.
*/
static std::vector<uint8_t> Hash(const uint8_t* data, std::size_t length)
{
Md5 instance;
instance.Update(data, length);
return instance.Digest();
}
/**
* @brief Computes the hash value of the specified binary input data.
* @param data The input vector to compute the hash value for.
* @return The computed MD5 hash value corresponding to the input provided.
*/
static std::vector<uint8_t> Hash(const std::vector<uint8_t>& data)
{
return Hash(data.data(), data.size());
}
private:
void* m_md5Context;
};
}} // namespace Azure::Core

View File

@ -0,0 +1,169 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-License-Identifier: MIT
#include "azure/core/md5.hpp"
#include "azure/core/platform.hpp"
#if defined(AZ_PLATFORM_WINDOWS)
// Windows needs to go before bcrypt
#include <windows.h>
#include <bcrypt.h>
#elif defined(AZ_PLATFORM_POSIX)
#include <openssl/md5.h>
#endif
#include <stdexcept>
#include <vector>
namespace Azure { namespace Core {
#if defined(AZ_PLATFORM_WINDOWS)
namespace Details {
struct AlgorithmProviderInstance
{
BCRYPT_ALG_HANDLE Handle;
std::size_t ContextSize;
std::size_t HashLength;
AlgorithmProviderInstance()
{
NTSTATUS status = BCryptOpenAlgorithmProvider(&Handle, BCRYPT_MD5_ALGORITHM, nullptr, 0);
if (!BCRYPT_SUCCESS(status))
{
throw std::runtime_error("BCryptOpenAlgorithmProvider failed");
}
DWORD objectLength = 0;
DWORD dataLength = 0;
status = BCryptGetProperty(
Handle,
BCRYPT_OBJECT_LENGTH,
reinterpret_cast<PBYTE>(&objectLength),
sizeof(objectLength),
&dataLength,
0);
if (!BCRYPT_SUCCESS(status))
{
throw std::runtime_error("BCryptGetProperty failed");
}
ContextSize = objectLength;
DWORD hashLength = 0;
status = BCryptGetProperty(
Handle,
BCRYPT_HASH_LENGTH,
reinterpret_cast<PBYTE>(&hashLength),
sizeof(hashLength),
&dataLength,
0);
if (!BCRYPT_SUCCESS(status))
{
throw std::runtime_error("BCryptGetProperty failed");
}
HashLength = hashLength;
}
~AlgorithmProviderInstance() { BCryptCloseAlgorithmProvider(Handle, 0); }
};
struct Md5HashContext
{
std::string buffer;
BCRYPT_HASH_HANDLE hashHandle = nullptr;
std::size_t hashLength = 0;
};
} // namespace Details
Md5::Md5()
{
static Details::AlgorithmProviderInstance AlgorithmProvider{};
Details::Md5HashContext* md5Context = new Details::Md5HashContext;
m_md5Context = md5Context;
md5Context->buffer.resize(AlgorithmProvider.ContextSize);
md5Context->hashLength = AlgorithmProvider.HashLength;
NTSTATUS status = BCryptCreateHash(
AlgorithmProvider.Handle,
&md5Context->hashHandle,
reinterpret_cast<PUCHAR>(&md5Context->buffer[0]),
static_cast<ULONG>(md5Context->buffer.size()),
nullptr,
0,
0);
if (!BCRYPT_SUCCESS(status))
{
throw std::runtime_error("BCryptCreateHash failed");
}
}
Md5::~Md5()
{
Details::Md5HashContext* md5Context = static_cast<Details::Md5HashContext*>(m_md5Context);
BCryptDestroyHash(md5Context->hashHandle);
delete md5Context;
}
void Md5::Update(const uint8_t* data, std::size_t length)
{
Details::Md5HashContext* md5Context = static_cast<Details::Md5HashContext*>(m_md5Context);
NTSTATUS status = BCryptHashData(
md5Context->hashHandle,
reinterpret_cast<PBYTE>(const_cast<uint8_t*>(data)),
static_cast<ULONG>(length),
0);
if (!BCRYPT_SUCCESS(status))
{
throw std::runtime_error("BCryptHashData failed");
}
}
std::vector<uint8_t> Md5::Digest() const
{
Details::Md5HashContext* md5Context = static_cast<Details::Md5HashContext*>(m_md5Context);
std::vector<uint8_t> hash;
hash.resize(md5Context->hashLength);
NTSTATUS status = BCryptFinishHash(
md5Context->hashHandle,
reinterpret_cast<PUCHAR>(&hash[0]),
static_cast<ULONG>(hash.size()),
0);
if (!BCRYPT_SUCCESS(status))
{
throw std::runtime_error("BCryptFinishHash failed");
}
return hash;
}
#elif defined(AZ_PLATFORM_POSIX)
Md5::Md5()
{
MD5_CTX* md5Context = new MD5_CTX;
m_md5Context = md5Context;
MD5_Init(md5Context);
}
Md5::~Md5()
{
MD5_CTX* md5Context = static_cast<MD5_CTX*>(m_md5Context);
delete md5Context;
}
void Md5::Update(const uint8_t* data, std::size_t length)
{
MD5_CTX* md5Context = static_cast<MD5_CTX*>(m_md5Context);
MD5_Update(md5Context, data, length);
}
std::vector<uint8_t> Md5::Digest() const
{
MD5_CTX* md5Context = static_cast<MD5_CTX*>(m_md5Context);
unsigned char hash[MD5_DIGEST_LENGTH];
MD5_Final(hash, md5Context);
return std::vector<uint8_t>(std::begin(hash), std::end(hash));
}
#endif
}} // namespace Azure::Core

View File

@ -41,6 +41,7 @@ add_executable (
logging.cpp
main.cpp
match_conditions.cpp
md5.cpp
nullable.cpp
operation.cpp
operation_status.cpp

View File

@ -0,0 +1,91 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-License-Identifier: MIT
#include <algorithm>
#include <azure/core/base64.hpp>
#include <azure/core/md5.hpp>
#include <gtest/gtest.h>
#include <random>
#include <string>
#include <vector>
using namespace Azure::Core;
static std::vector<uint8_t> Hash(const std::string& data)
{
const uint8_t* ptr = reinterpret_cast<const uint8_t*>(data.data());
std::vector<uint8_t> v(ptr, ptr + data.length());
return Md5::Hash(v);
}
static thread_local std::mt19937_64 random_generator(std::random_device{}());
static char RandomCharGenerator()
{
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
std::uniform_int_distribution<std::size_t> distribution(0, sizeof(charset) - 2);
return charset[distribution(random_generator)];
}
std::vector<uint8_t> RandomBuffer(std::size_t length)
{
std::vector<uint8_t> result(length);
char* dataPtr = reinterpret_cast<char*>(&result[0]);
char* start_addr = dataPtr;
char* end_addr = dataPtr + length;
const std::size_t rand_int_size = sizeof(uint64_t);
while (uintptr_t(start_addr) % rand_int_size != 0 && start_addr < end_addr)
{
*(start_addr++) = RandomCharGenerator();
}
std::uniform_int_distribution<uint64_t> distribution(0ULL, std::numeric_limits<uint64_t>::max());
while (start_addr + rand_int_size <= end_addr)
{
*reinterpret_cast<uint64_t*>(start_addr) = distribution(random_generator);
start_addr += rand_int_size;
}
while (start_addr < end_addr)
{
*(start_addr++) = RandomCharGenerator();
}
return result;
}
uint64_t RandomInt(uint64_t minNumber, uint64_t maxNumber)
{
std::uniform_int_distribution<uint64_t> distribution(minNumber, maxNumber);
return distribution(random_generator);
}
TEST(Md5, Basic)
{
EXPECT_EQ(Base64Encode(Hash("")), "1B2M2Y8AsgTpgAmY7PhCfg==");
EXPECT_EQ(Base64Encode(Hash("Hello Azure!")), "Pz8543xut4RVSbb2g52Mww==");
auto data = RandomBuffer(static_cast<std::size_t>(16777216));
Md5 md5Instance;
// There are two ways to get the hash value, a "single-shot" static API called `Hash()` and one
// where you can stream partial data blocks with multiple calls to `Update()` and then once you
// are done, call `Digest()` to calculate the hash of the whole set of data blocks.
// What this test is saying is, split up a 16MB block into many 0-4MB chunks, and compare the
// computed hash value when you have all the data with the streaming approach, and validate they
// are equal.
std::size_t length = 0;
while (length < data.size())
{
std::size_t s = static_cast<std::size_t>(RandomInt(0, 4194304));
s = std::min(s, data.size() - length);
md5Instance.Update(&data[length], s);
md5Instance.Update(&data[length], 0);
length += s;
}
EXPECT_EQ(md5Instance.Digest(), Md5::Hash(data));
}

View File

@ -22,6 +22,8 @@ TEST(SimplifiedHeader, core)
EXPECT_NO_THROW(Azure::Core::Context c);
EXPECT_NO_THROW(Azure::Core::DateTime(2020, 11, 03, 15, 30, 44));
EXPECT_NO_THROW(Azure::Core::ETag e);
EXPECT_NO_THROW(Azure::Core::Base64Decode("foo"));
EXPECT_NO_THROW(Azure::Core::Md5 m);
EXPECT_NO_THROW(Azure::Core::Http::RawResponse r(
1, 1, Azure::Core::Http::HttpStatusCode::Accepted, "phrase"));
EXPECT_NO_THROW(Azure::Core::MatchConditions mc);