Scenario
This article is going to focus on providing guidance and library for copying Azure BLOB to Amazon S3. Using .NET (C#), have created few classes which will help us to achieve copying large BLOB into Amazon S3.
Points to be considered
- As we're targeting large data objects to be copied, we need perform Multipart upload.
- From Azure BLOB, in order to download data in parts, we can utilize DownloadRangeToStream method.
- Amazon S3 also provides us the facility to perform Multipart upload using the APIs exposed through AWS .NET SDK.
- But first we need to understand the nature and different options the SDK provides and select which best suit of needs.
- AWS .NET SDK for Multipart upload provides High-Level API and Low-Level API.
- High-Level API provides us with some sophisticated way of performing few common operations on S3 by encapsulating core level implementations.
- Low-Level API as you might have guessed, exposes the core Multipart operations on S3.
- For our scenario, Low-Level API will be the best fit as we need few detailed control over the operations.
- Maximum of 10000 parts are allowed per upload.
- Part size can be of range 5MB to 5GB with an exception to the last part which can be less than 5MB.
- To handle our scenario, we're going to stick to 100MB as part size in our library.
Library Source code
Following classes has been created to facilitate developers utilize as library encapsulating the implementation logic of the copy operation and provide Request and Response object for ease of use and code management.
- BlobToS3Manager: Holds the implementation logic of copying BLOB data int S3.
- IBlobToS3Manager: Prototype of BlobToS3Manager.
- BlobToS3Request: To hold the BLOB source and S3 target data needed for copy operation.
- BlobToS3Response: To hold the response of the copy operation.
Snippet 1 (BlobToS3Manager)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | namespace AWSSampleConsoleApp1.BlobToS3 { using Amazon.S3.Model; using Microsoft.WindowsAzure.Storage.Blob; using System; using System.Collections.Generic; using System.IO; using System.Security.Cryptography; using System.Threading.Tasks; /// <summary> /// Handles the BLOB to S3 /// </summary> public class BlobToS3Manager : IBlobToS3Manager { /// <summary> /// The container not exists. /// </summary> private const string ContainerNotExists = "BLOB Container doesn't exists." ; /// <summary> /// The BLOB not exists. /// </summary> private const string BlobNotExists = "BLOB doesn't exists." ; /// <summary> /// Part size to read from BLOB and upload to S3. /// </summary> private const long PartSize = 104857600; // 100 MB. /// <summary> /// The BLOB S3 request. /// </summary> private readonly BlobToS3Request blobToS3Request; /// <summary> /// Initializes new instance of BLOB to S3 Manager. /// </summary> /// <param name="blobToS3Request">The BLOB to S3 request.</param> public BlobToS3Manager(BlobToS3Request blobToS3Request) { this .blobToS3Request = blobToS3Request; } /// <summary> /// Copies BLOB to S3. /// </summary> /// <returns>The BLOB to S3 response.</returns> public async Task<BlobToS3Response> CopyFromBlobToS3Async() { BlobToS3Response blobToS3Response = new BlobToS3Response(); var validation = await this .Validate(); if (!validation.Item1) { return validation.Item2; } var sourceBlob = validation.Item3; await sourceBlob.FetchAttributesAsync(); var remainingBytes = sourceBlob.Properties.Length; long readPosition = 0; // To be used offset / position from where to start reading from BLOB. InitiateMultipartUploadRequest initiateMultipartUploadRequest = new InitiateMultipartUploadRequest { BucketName = this .blobToS3Request.TargetS3Bucket, Key = this .blobToS3Request.TargetS3File }; // Will use UploadId from this response. InitiateMultipartUploadResponse initiateMultipartUploadResponse = this .blobToS3Request.S3Client.InitiateMultipartUpload(initiateMultipartUploadRequest); List<UploadPartResponse> uploadPartResponses = new List<UploadPartResponse>(); try { int partCounter = 0; // To increment on each read of parts and use it as part number. var sha256 = new SHA256Managed(); while (remainingBytes > 0) { // Determine the size when final block reached as it might be less than Part size. // Will be PartSize except final block. long bytesToCopy = Math.Min(PartSize, remainingBytes); using (MemoryStream memoryStream = new MemoryStream()) { // To download part from BLOB. await sourceBlob.DownloadRangeToStreamAsync(memoryStream, readPosition, bytesToCopy).ConfigureAwait( false ); memoryStream.Position = 0; partCounter++; UploadPartRequest uploadRequest = new UploadPartRequest { BucketName = this .blobToS3Request.TargetS3Bucket, Key = this .blobToS3Request.TargetS3File, UploadId = initiateMultipartUploadResponse.UploadId, PartNumber = partCounter, PartSize = bytesToCopy, InputStream = memoryStream }; UploadPartResponse uploadPartResponse = this .blobToS3Request.S3Client.UploadPart(uploadRequest); uploadPartResponses.Add(uploadPartResponse); remainingBytes -= bytesToCopy; readPosition += bytesToCopy; // $"Uploaded part with part number {partCounter}, size {bytesToCopy}bytes and remaining {remainingBytes}bytes to read.") // Calculate the checksum value. if (remainingBytes <= 0) { sha256.TransformFinalBlock(memoryStream.ToArray(), 0, ( int )bytesToCopy); } else { byte [] bytesToSend = memoryStream.ToArray(); sha256.TransformBlock(bytesToSend, 0, ( int )bytesToCopy, bytesToSend, 0); } } } blobToS3Response.Sha256CheckSum = BitConverter.ToString(sha256.Hash).Replace( "-" , string .Empty); CompleteMultipartUploadRequest completeMultipartUploadRequest = new CompleteMultipartUploadRequest { BucketName = this .blobToS3Request.TargetS3Bucket, Key = this .blobToS3Request.TargetS3File, UploadId = initiateMultipartUploadResponse.UploadId }; completeMultipartUploadRequest.AddPartETags(uploadPartResponses); CompleteMultipartUploadResponse completeMultipartUploadResponse = await this .blobToS3Request.S3Client.CompleteMultipartUploadAsync(completeMultipartUploadRequest).ConfigureAwait( false ); blobToS3Response.IsSuccess = true ; blobToS3Response.S3Path = completeMultipartUploadResponse.Location; } catch (Exception exception) { blobToS3Response.IsSuccess = false ; blobToS3Response.Message = exception.Message; AbortMultipartUploadRequest abortMultipartUploadRequest = new AbortMultipartUploadRequest { BucketName = this .blobToS3Request.TargetS3Bucket, Key = this .blobToS3Request.TargetS3File, UploadId = initiateMultipartUploadResponse.UploadId }; await this .blobToS3Request.S3Client.AbortMultipartUploadAsync(abortMultipartUploadRequest).ConfigureAwait( false ); } return blobToS3Response; } /// <summary> /// Validates the source BLOB is valid. /// </summary> /// <returns>The resultant tuple.</returns> private async Task<Tuple< bool , BlobToS3Response, CloudBlockBlob>> Validate() { BlobToS3Response blobToS3Response = null ; CloudBlobContainer cloudBlobContainer = this .blobToS3Request.BlobClient.GetContainerReference( this .blobToS3Request.SourceBlobContainer); if (!await cloudBlobContainer.ExistsAsync()) { blobToS3Response = new BlobToS3Response { IsSuccess = false , Message = ContainerNotExists }; return new Tuple< bool , BlobToS3Response, CloudBlockBlob>( false , blobToS3Response, null ); } CloudBlockBlob cloudBlockBlob = cloudBlobContainer.GetBlockBlobReference( this .blobToS3Request.SourceBlob); if (await cloudBlockBlob.ExistsAsync()) { return new Tuple< bool , BlobToS3Response, CloudBlockBlob>( true , null , cloudBlockBlob); } blobToS3Response = new BlobToS3Response { IsSuccess = false , Message = BlobNotExists }; return new Tuple< bool , BlobToS3Response, CloudBlockBlob>( false , blobToS3Response, null ); } } } |
Snippet 2 (CopyFromBlobToS3: Sample on how to create instances and call the library)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Amazon.S3; | |
using AWSSampleConsoleApp1.BlobToS3; | |
using Microsoft.Azure; | |
using Microsoft.WindowsAzure.Storage; | |
using System; | |
using System.Threading.Tasks; | |
namespace AWSSampleConsoleApp1 | |
{ | |
public class CopyFromBlobToS3 | |
{ | |
public static void Main() | |
{ | |
CopyFromBlobToS3Client copyFromBlobToS3Client = new CopyFromBlobToS3Client(); | |
BlobToS3Response blobToS3Response = copyFromBlobToS3Client.ProcessAsync().Result; | |
Console.WriteLine($"Is copy from BLOB to S3 successfull: {blobToS3Response.IsSuccess}"); | |
if (blobToS3Response.IsSuccess) | |
{ | |
Console.WriteLine($"S3 Path: {blobToS3Response.S3Path}"); | |
Console.WriteLine($"SHA256 cheksum of uploaded file: {blobToS3Response.Sha256CheckSum}"); | |
} | |
else | |
{ | |
Console.WriteLine($"Failure reason: {blobToS3Response.Message}"); | |
} | |
} | |
} | |
public class CopyFromBlobToS3Client | |
{ | |
/// <summary> | |
/// Part size to read from BLOB and upload to S3. | |
/// </summary> | |
private const long PartSize = 104857600; // 100 MB. | |
/// <summary> | |
/// AWS Acsess Id. | |
/// </summary> | |
private string AwsAccessKeyId => CloudConfigurationManager.GetSetting("AwsAccessKeyId"); | |
/// <summary> | |
/// AWS Secret Key. | |
/// </summary> | |
private string AwsSecretKey => CloudConfigurationManager.GetSetting("AwsSecretKey"); | |
/// <summary> | |
/// S3 bucket name. | |
/// </summary> | |
private string AwsS3BucketName => CloudConfigurationManager.GetSetting("AwsS3BucketName"); | |
/// <summary> | |
/// S3 file name to be copied to. | |
/// </summary> | |
private string TargetFileName => CloudConfigurationManager.GetSetting("TargetFileName"); | |
/// <summary> | |
/// Azure storage account. | |
/// </summary> | |
private string StorageAccount => CloudConfigurationManager.GetSetting("StorageAccount"); | |
/// <summary> | |
/// Azure BLOB container name. | |
/// </summary> | |
private string ContainerName => CloudConfigurationManager.GetSetting("ContainerName"); | |
/// <summary> | |
/// Azure BLOB file name to be copied. | |
/// </summary> | |
private string BlobFileName => CloudConfigurationManager.GetSetting("BlobFileName"); | |
/// <summary> | |
/// Logger instance. | |
/// </summary> | |
private ILogger logger = new Logger(); | |
public async Task<BlobToS3Response> ProcessAsync() | |
{ | |
BlobToS3Request blobToS3Request = new BlobToS3Request | |
{ | |
BlobClient = CloudStorageAccount.Parse(StorageAccount).CreateCloudBlobClient(), | |
SourceBlob = BlobFileName, | |
SourceBlobContainer = ContainerName, | |
S3Client = new AmazonS3Client(AwsAccessKeyId, AwsSecretKey, Amazon.RegionEndpoint.APSouth1), | |
TargetS3Bucket = AwsS3BucketName, | |
TargetS3File = TargetFileName | |
}; | |
IBlobToS3Manager blobToS3Manager = new BlobToS3Manager(blobToS3Request); | |
return await blobToS3Manager.CopyFromBlobToS3Async(); | |
} | |
} | |
} |
Full source code can be found in my github repo here.
References
- https://docs.microsoft.com/en-us/dotnet/api/microsoft.windowsazure.storage.blob.cloudblob.downloadrangetostream?view=azure-dotnet
- https://docs.microsoft.com/en-us/dotnet/api/microsoft.windowsazure.storage.blob.cloudblob.downloadrangetostreamasync?view=azure-dotnet
- https://docs.aws.amazon.com/AmazonS3/latest/dev/LLuploadFileDotNet.html
- https://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html
No comments:
Post a Comment