Aws-sdk S3: Best Way To List All Keys With ListObjectsV2


Answer :

this is the best way to do that in my opinion:

const AWS = require('aws-sdk'); const s3 = new AWS.S3();  const listAllKeys = (params, out = []) => new Promise((resolve, reject) => {   s3.listObjectsV2(params).promise()     .then(({Contents, IsTruncated, NextContinuationToken}) => {       out.push(...Contents);       !IsTruncated ? resolve(out) : resolve(listAllKeys(Object.assign(params, {ContinuationToken: NextContinuationToken}), out));     })     .catch(reject); });  listAllKeys({Bucket: 'bucket-name'})   .then(console.log)   .catch(console.log); 

Here is the code to get the list of keys from a bucket.

var params = {     Bucket: 'bucket-name'     };  var allKeys = []; listAllKeys(); function listAllKeys() {     s3.listObjectsV2(params, function (err, data) {         if (err) {             console.log(err, err.stack); // an error occurred         } else {             var contents = data.Contents;             contents.forEach(function (content) {                 allKeys.push(content.Key);             });              if (data.IsTruncated) {                 params.ContinuationToken = data.NextContinuationToken;                 console.log("get further list...");                 listAllKeys();             }           }     }); } 

Building on previous answers, here is an approach that takes advantage of the Prefix parameter to make multiple calls to s3.listObjectsV2() in parallel.

This has led to 2-15x speedup for me depending on how evenly the keys are distributed and whether or not the code is running locally or on AWS.

You should make sure that the prefixes cover the full range of possible prefixes for your bucket. The code below covers all "safe" characters but S3 supports a wider range of UTF-8 characters.

Note that this example uses async/await so ES2017/Node 8 is required. The example is a Node 8.10 Lambda function.

const AWS = require('aws-sdk'); const s3 = new AWS.S3();  exports.handler = async (event) => {   // Prefixes are used to fetch data in parallel.   const numbers = '0123456789'.split('');   const letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'.split('');   const special = "!-_'.*()".split(''); // "Safe" S3 special chars   const prefixes = [...numbers, ...letters, ...special];    // array of params used to call listObjectsV2 in parallel for each prefix above   const arrayOfParams = prefixes.map((prefix) => {     return { Bucket: 'YOUR-BUCKET-NAME', Prefix: prefix }   });    const allKeys = [];   await Promise.all(arrayOfParams.map(params => getAllKeys(params, allKeys)));   return allKeys.length; };  async function getAllKeys(params,  allKeys = []){   const response = await s3.listObjectsV2(params).promise();   response.Contents.forEach(obj => allKeys.push(obj.Key));    if (response.NextContinuationToken) {     params.ContinuationToken = response.NextContinuationToken;     await getAllKeys(params, allKeys); // RECURSIVE CALL   }   return allKeys; } 

Also, for completeness, here is a simpler, non-prefixed async/await version:

const AWS = require('aws-sdk'); const s3 = new AWS.S3();  exports.handler = async (event) => {   const allKeys = await getAllKeys({ Bucket: 'YOUR-BUCKET-NAME' });   return allKeys.length; };  async function getAllKeys(params,  allKeys = []){   const response = await s3.listObjectsV2(params).promise();   response.Contents.forEach(obj => allKeys.push(obj.Key));    if (response.NextContinuationToken) {     params.ContinuationToken = response.NextContinuationToken;     await getAllKeys(params, allKeys); // RECURSIVE CALL   }   return allKeys; } 

Comments

Popular posts from this blog

Chemistry - Bond Angles In NH3 And NCl3

Are Regular VACUUM ANALYZE Still Recommended Under 9.1?

Change The Font Size Of Visual Studio Solution Explorer