Adding missing fields in DataLake service (#1520)

This commit is contained in:
Kan Tang 2021-02-01 14:30:32 +08:00 committed by GitHub
parent 517b8e6245
commit 721473a568
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 88 additions and 39 deletions

View File

@ -5,7 +5,7 @@
### New Features
- Added `Owner`, `Permissions`, and `Group` to `GetDataLakePathAccessControlResult`.
- `ReadDataLakeFileResult` now has a new field `FileSize`.
- `DownloadDataLakeFileResult` now has a new field `FileSize`.
- Added support for `GetAccessPolicy` and `SetAccessPolicy` in `DataLakeFileSystemClient`.
- Moved all protocol layer generated result types to `Details` namespace.
- Renamed `FileSystem` type returned from `ListDataLakeFileSystems` to be `FileSystemItem`. Member object name `FileSystems` is renamed to `Items`.
@ -17,13 +17,14 @@
- Added `RequestId` in each return type for REST API calls, except for concurrent APIs.
- Added `UpdateAccessControlListRecursiveSinglePage` to update the access control recursively for a datalake path.
- Added `RemoveAccessControlListRecursiveSinglePage` to remove the access control recursively for a datalake path.
- Added some new properties in `GetDataLakePathPropertiesResult` and `DownloadDataLakeFileResult`.
### Breaking Changes
- Removed `GetDfsUri` in all clients since they are currently implementation details.
- Removed `Data` suffix for `FlushData` and `AppendData` and modified all related structs to align the change.
- `DataLakePathClient` can no longer set permissions with `SetAccessControl`, instead, a new API `SetPermissions` is created for such functionality. Renamed the original API to `SetAccessControlList` to be more precise.
- `ContentRange` in `ReadDataLakeFileResult` is now `Azure::Core::Http::Range`.
- `ContentRange` in `DownloadDataLakeFileResult` is now `Azure::Core::Http::Range`.
- Removed `ContentRange` in `PathGetPropertiesResult`.
- Renamed `ContentLength` in `GetDataLakePathPropertiesResult` and `CreateDataLakePathResult` to `FileSize` to be more accurate.
- Renamed `GetUri` to `GetUrl`.
@ -47,6 +48,7 @@
- Renamed `ListDataLakeFileSystemsIncludeItem` to `ListDataLakeFileSystemsIncludeFlags`.
- Removed `DataLakeDirectoryClient::Delete` and `DataLakeDirectoryClient::DeleteIfExists`. Added `DataLakeDirectoryClient::DeleteEmpty`, `DataLakeDirectoryClient::DeleteEmptyIfExists`, `DataLakeDirectoryClient::DeleteRecursive` and `DataLakeDirectoryClient::DeleteRecursiveIfExists` instead.
- Removed `ContinuationToken` in `DeleteDataLakePathResult` and `DeleteDataLakeDirectoryResult`, as they will never be returned for HNS enabled accounts.
- Renamed `DataLakeFileClient::Read` to `DataLakeFileClient::Download`. Also changed the member `Azure::Core::Nullable<bool> RangeGetContentMd5` in the option to be `Azure::Core::Nullable<HashAlgorithm> RangeHashAlgorithm` instead.
### Other Changes and Improvements

View File

@ -11,5 +11,6 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake { nam
constexpr static const char* DataLakeFilesystemNotFound = "FilesystemNotFound";
constexpr static const char* DataLakePathNotFound = "PathNotFound";
constexpr static const char* DataLakePathAlreadyExists = "PathAlreadyExists";
constexpr static const char* DataLakeIsDirectoryKey = "hdi_isFolder";
}}}}} // namespace Azure::Storage::Files::DataLake::Details

View File

@ -161,14 +161,16 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake {
const DeleteDataLakeFileOptions& options = DeleteDataLakeFileOptions()) const;
/**
* @brief Read the contents of a file. For read operations, range requests are supported.
* @param options Optional parameters to read the content from the resource the path points to.
* @return Azure::Core::Response<Models::ReadDataLakeFileResult> containing the information and
* content returned when reading from a file.
* @brief Download the contents of a file. For download operations, range requests are
* supported.
* @param options Optional parameters to download the content from the resource the path points
* to.
* @return Azure::Core::Response<Models::DownloadDataLakeFileResult> containing the information
* and content returned when downloading from a file.
* @remark This request is sent to blob endpoint.
*/
Azure::Core::Response<Models::ReadDataLakeFileResult> Read(
const ReadDataLakeFileOptions& options = ReadDataLakeFileOptions()) const;
Azure::Core::Response<Models::DownloadDataLakeFileResult> Download(
const DownloadDataLakeFileOptions& options = DownloadDataLakeFileOptions()) const;
/**
* @brief Creates a new file, or updates the content of an existing file. Updating

View File

@ -568,7 +568,7 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake {
* More details:
* https://docs.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/read
*/
struct ReadDataLakeFileOptions
struct DownloadDataLakeFileOptions
{
/**
* @brief Context for cancelling long running operations.
@ -581,14 +581,9 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake {
Azure::Core::Nullable<Core::Http::Range> Range;
/**
* @brief When this header is set to "true" and specified together with the Range header,
* the service returns the MD5 hash for the range, as long as the range is less than
* or equal to 4MB in size. If this header is specified without the Range header,
* the service returns status code 400 (Bad Request). If this header is set to true
* when the range exceeds 4 MB in size, the service returns status code 400 (Bad
* Request).
* @brief The hash algorithm used to calculate the hash for the returned content.
*/
Azure::Core::Nullable<bool> RangeGetContentMd5;
Azure::Core::Nullable<HashAlgorithm> RangeHashAlgorithm;
/**
* @brief Specify the access condition for the path.

View File

@ -101,6 +101,8 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake { nam
using ReleaseDataLakeLeaseResult = Blobs::Models::ReleaseBlobLeaseResult;
using ChangeDataLakeLeaseResult = Blobs::Models::ChangeBlobLeaseResult;
using BreakDataLakeLeaseResult = Blobs::Models::BreakBlobLeaseResult;
using RehydratePriority = Blobs::Models::RehydratePriority;
using DataLakeArchiveStatus = Blobs::Models::BlobArchiveStatus;
struct Acl
{
@ -160,6 +162,14 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake { nam
Azure::Core::Nullable<Core::DateTime> CopyCompletedOn;
Azure::Core::Nullable<Core::DateTime> ExpiresOn;
Azure::Core::Nullable<Core::DateTime> LastAccessedOn;
bool IsDirectory = false;
Azure::Core::Nullable<DataLakeArchiveStatus> ArchiveStatus;
Azure::Core::Nullable<Models::RehydratePriority> RehydratePriority;
Azure::Core::Nullable<std::string> CopyStatusDescription;
Azure::Core::Nullable<bool> IsIncrementalCopy;
Azure::Core::Nullable<std::string> IncrementalCopyDestinationSnapshot;
Azure::Core::Nullable<std::string> VersionId;
Azure::Core::Nullable<bool> IsCurrentVersion;
std::string RequestId;
};
@ -206,8 +216,9 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake { nam
using AppendDataLakeFileResult = Details::PathAppendDataResult;
using FlushDataLakeFileResult = Details::PathFlushDataResult;
using ScheduleDataLakeFileDeletionResult = Blobs::Models::SetBlobExpiryResult;
using CopyStatus = Blobs::Models::CopyStatus;
struct ReadDataLakeFileResult
struct DownloadDataLakeFileResult
{
std::unique_ptr<Azure::Core::Http::BodyStream> Body;
PathHttpHeaders HttpHeaders;
@ -223,6 +234,14 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake { nam
Core::DateTime CreatedOn;
Azure::Core::Nullable<Core::DateTime> ExpiresOn;
Azure::Core::Nullable<Core::DateTime> LastAccessedOn;
Azure::Core::Nullable<std::string> CopyId;
Azure::Core::Nullable<std::string> CopySource;
Azure::Core::Nullable<Models::CopyStatus> CopyStatus;
Azure::Core::Nullable<std::string> CopyStatusDescription;
Azure::Core::Nullable<std::string> CopyProgress;
Azure::Core::Nullable<Azure::Core::DateTime> CopyCompletedOn;
Azure::Core::Nullable<std::string> VersionId;
Azure::Core::Nullable<bool> IsCurrentVersion;
std::string RequestId;
};

View File

@ -19,4 +19,6 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake { nam
const std::string& string,
std::string::const_iterator& cur);
bool MetadataIncidatesIsDirectory(const Storage::Metadata& metadata);
}}}}} // namespace Azure::Storage::Files::DataLake::Details

View File

@ -78,7 +78,7 @@ void DataLakeGettingStarted()
fileClient.Flush(str1.size() + str2.size());
// Read
auto result = fileClient.Read();
auto result = fileClient.Download();
Azure::Core::Context context;
std::vector<uint8_t> downloaded
= Azure::Core::Http::BodyStream::ReadToEnd(context, *(result->Body));

View File

@ -278,19 +278,21 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake {
std::move(ret), result.ExtractRawResponse());
}
Azure::Core::Response<Models::ReadDataLakeFileResult> DataLakeFileClient::Read(
const ReadDataLakeFileOptions& options) const
Azure::Core::Response<Models::DownloadDataLakeFileResult> DataLakeFileClient::Download(
const DownloadDataLakeFileOptions& options) const
{
Blobs::DownloadBlobOptions blobOptions;
blobOptions.Context = options.Context;
blobOptions.Range = options.Range;
blobOptions.RangeHashAlgorithm = options.RangeHashAlgorithm;
blobOptions.Range = options.Range;
blobOptions.AccessConditions.IfMatch = options.AccessConditions.IfMatch;
blobOptions.AccessConditions.IfNoneMatch = options.AccessConditions.IfNoneMatch;
blobOptions.AccessConditions.IfModifiedSince = options.AccessConditions.IfModifiedSince;
blobOptions.AccessConditions.IfUnmodifiedSince = options.AccessConditions.IfUnmodifiedSince;
blobOptions.AccessConditions.LeaseId = options.AccessConditions.LeaseId;
auto result = m_blobClient.Download(blobOptions);
Models::ReadDataLakeFileResult ret;
Models::DownloadDataLakeFileResult ret;
ret.Body = std::move(result->BodyStream);
ret.HttpHeaders = FromBlobHttpHeaders(std::move(result->HttpHeaders));
ret.ContentRange = std::move(result->ContentRange);
@ -312,8 +314,16 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake {
ret.CreatedOn = std::move(result->CreatedOn);
ret.ExpiresOn = std::move(result->ExpiresOn);
ret.LastAccessedOn = std::move(result->LastAccessedOn);
ret.CopyId = std::move(result->CopyId);
ret.CopySource = std::move(result->CopySource);
ret.CopyStatus = std::move(result->CopyStatus);
ret.CopyStatusDescription = std::move(result->CopyStatusDescription);
ret.CopyProgress = std::move(result->CopyProgress);
ret.CopyCompletedOn = std::move(result->CopyCompletedOn);
ret.VersionId = std::move(result->VersionId);
ret.IsCurrentVersion = std::move(result->IsCurrentVersion);
ret.RequestId = std::move(result->RequestId);
return Azure::Core::Response<Models::ReadDataLakeFileResult>(
return Azure::Core::Response<Models::DownloadDataLakeFileResult>(
std::move(ret), result.ExtractRawResponse());
}

View File

@ -388,6 +388,14 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake {
ret.LastAccessedOn = std::move(result->LastAccessedOn);
ret.FileSize = result->BlobSize;
ret.RequestId = std::move(result->RequestId);
ret.ArchiveStatus = std::move(result->ArchiveStatus);
ret.RehydratePriority = std::move(result->RehydratePriority);
ret.CopyStatusDescription = std::move(result->CopyStatusDescription);
ret.IsIncrementalCopy = std::move(result->IsIncrementalCopy);
ret.IncrementalCopyDestinationSnapshot = std::move(result->IncrementalCopyDestinationSnapshot);
ret.VersionId = std::move(result->VersionId);
ret.IsCurrentVersion = std::move(result->IsCurrentVersion);
ret.IsDirectory = Details::MetadataIncidatesIsDirectory(ret.Metadata);
return Azure::Core::Response<Models::GetDataLakePathPropertiesResult>(
std::move(ret), result.ExtractRawResponse());
}

View File

@ -5,6 +5,7 @@
#include <azure/storage/common/crypt.hpp>
#include "azure/storage/files/datalake/datalake_constants.hpp"
#include "azure/storage/files/datalake/protocol/datalake_rest_client.hpp"
namespace Azure { namespace Storage { namespace Files { namespace DataLake { namespace Details {
@ -65,4 +66,10 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake { nam
}
return std::string(begin, end);
}
bool MetadataIncidatesIsDirectory(const Storage::Metadata& metadata)
{
auto ite = metadata.find(DataLakeIsDirectoryKey);
return ite != metadata.end() && ite->second == "true";
}
}}}}} // namespace Azure::Storage::Files::DataLake::Details

View File

@ -79,6 +79,7 @@ namespace Azure { namespace Storage { namespace Test {
for (const auto& client : directoryClient)
{
auto response = client.GetProperties();
EXPECT_TRUE(response->IsDirectory);
Files::DataLake::DeleteDataLakeDirectoryOptions options1;
options1.AccessConditions.IfNoneMatch = response->ETag;
EXPECT_THROW(client.DeleteEmpty(options1), StorageException);

View File

@ -73,6 +73,7 @@ namespace Azure { namespace Storage { namespace Test {
for (const auto& client : fileClient)
{
auto response = client.GetProperties();
EXPECT_FALSE(response->IsDirectory);
Files::DataLake::DeleteDataLakeFileOptions options1;
options1.AccessConditions.IfModifiedSince = response->LastModified;
EXPECT_TRUE(IsValidTime(response->LastModified));
@ -319,7 +320,7 @@ namespace Azure { namespace Storage { namespace Test {
EXPECT_NE(properties2->ETag, properties3->ETag);
// Read
auto result = m_fileClient->Read();
auto result = m_fileClient->Download();
auto downloaded = ReadBodyStream(result->Body);
EXPECT_EQ(buffer, downloaded);
}
@ -350,7 +351,7 @@ namespace Azure { namespace Storage { namespace Test {
EXPECT_NE(properties2->ETag, properties3->ETag);
// Read
auto result = newFileClient->Read();
auto result = newFileClient->Download();
auto downloaded = ReadBodyStream(result->Body);
EXPECT_EQ(buffer, downloaded);
EXPECT_EQ(bufferSize, result->FileSize);
@ -360,11 +361,11 @@ namespace Azure { namespace Storage { namespace Test {
// Read Range
{
auto firstHalf = std::vector<uint8_t>(buffer.begin(), buffer.begin() + (bufferSize / 2));
Files::DataLake::ReadDataLakeFileOptions options;
Files::DataLake::DownloadDataLakeFileOptions options;
options.Range = Azure::Core::Http::Range();
options.Range.GetValue().Offset = 0;
options.Range.GetValue().Length = bufferSize / 2;
result = newFileClient->Read(options);
result = newFileClient->Download(options);
downloaded = ReadBodyStream(result->Body);
EXPECT_EQ(firstHalf.size(), downloaded.size());
EXPECT_EQ(firstHalf, downloaded);
@ -374,11 +375,11 @@ namespace Azure { namespace Storage { namespace Test {
}
{
auto secondHalf = std::vector<uint8_t>(buffer.begin() + bufferSize / 2, buffer.end());
Files::DataLake::ReadDataLakeFileOptions options;
Files::DataLake::DownloadDataLakeFileOptions options;
options.Range = Azure::Core::Http::Range();
options.Range.GetValue().Offset = bufferSize / 2;
options.Range.GetValue().Length = bufferSize / 2;
result = newFileClient->Read(options);
result = newFileClient->Download(options);
downloaded = ReadBodyStream(result->Body);
EXPECT_EQ(secondHalf, downloaded);
EXPECT_EQ(bufferSize, result->FileSize);
@ -388,25 +389,25 @@ namespace Azure { namespace Storage { namespace Test {
{
// Read with last modified access condition.
auto response = newFileClient->GetProperties();
Files::DataLake::ReadDataLakeFileOptions options1;
Files::DataLake::DownloadDataLakeFileOptions options1;
options1.AccessConditions.IfModifiedSince = response->LastModified;
EXPECT_TRUE(IsValidTime(response->LastModified));
EXPECT_THROW(newFileClient->Read(options1), StorageException);
Files::DataLake::ReadDataLakeFileOptions options2;
EXPECT_THROW(newFileClient->Download(options1), StorageException);
Files::DataLake::DownloadDataLakeFileOptions options2;
options2.AccessConditions.IfUnmodifiedSince = response->LastModified;
EXPECT_NO_THROW(result = newFileClient->Read(options2));
EXPECT_NO_THROW(result = newFileClient->Download(options2));
downloaded = ReadBodyStream(result->Body);
EXPECT_EQ(buffer, downloaded);
}
{
// Read with if match access condition.
auto response = newFileClient->GetProperties();
Files::DataLake::ReadDataLakeFileOptions options1;
Files::DataLake::DownloadDataLakeFileOptions options1;
options1.AccessConditions.IfNoneMatch = response->ETag;
EXPECT_THROW(newFileClient->Read(options1), StorageException);
Files::DataLake::ReadDataLakeFileOptions options2;
EXPECT_THROW(newFileClient->Download(options1), StorageException);
Files::DataLake::DownloadDataLakeFileOptions options2;
options2.AccessConditions.IfMatch = response->ETag;
EXPECT_NO_THROW(result = newFileClient->Read(options2));
EXPECT_NO_THROW(result = newFileClient->Download(options2));
downloaded = ReadBodyStream(result->Body);
EXPECT_EQ(buffer, downloaded);
}
@ -593,7 +594,7 @@ namespace Azure { namespace Storage { namespace Test {
std::this_thread::sleep_for(std::chrono::seconds(30));
EXPECT_NO_THROW(anonymousClient.Read());
EXPECT_NO_THROW(anonymousClient.Download());
}
}
}}} // namespace Azure::Storage::Test

View File

@ -1,4 +1,4 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-License-Identifier: MIT
#include "datalake_path_client_test.hpp"
@ -107,6 +107,7 @@ namespace Azure { namespace Storage { namespace Test {
// Last modified Etag works.
auto properties1 = m_pathClient->GetProperties();
auto properties2 = m_pathClient->GetProperties();
EXPECT_FALSE(properties1->IsDirectory);
EXPECT_EQ(properties1->ETag, properties2->ETag);
EXPECT_EQ(properties1->LastModified, properties2->LastModified);

View File

@ -67,7 +67,7 @@ namespace Azure { namespace Storage { namespace Test {
auto verify_file_read = [&](const std::string& sas) {
EXPECT_NO_THROW(fileClient0.Create());
auto fileClient = Files::DataLake::DataLakeFileClient(fileUrl + sas);
auto downloadedContent = fileClient.Read();
auto downloadedContent = fileClient.Download();
EXPECT_TRUE(ReadBodyStream(downloadedContent->Body).empty());
};