EMR 018 S3DistCp Manifest - qyjohn/AWS_Tutorials GitHub Wiki

Use the following code to create manifest file.

import boto3
client = boto3.client('s3')

bucket = 'bucket-name'
prefix = 'output/'
if prefix[-1] != '/':
    src_dir = 's3://' + bucket + '/' + prefix
    prefix = prefix + '/'
else:
    src_dir = 's3://' + bucket + '/' + prefix
    src_dir = src_dir[:-1]
len = len(prefix)

response = client.list_objects_v2(
    Bucket=bucket,
    MaxKeys=2,
    Prefix=prefix,
)
for object in response['Contents']:
    path='s3://' + bucket + '/' + object['Key']
    base_name = object['Key'][len:]
    size = object['Size']
    print('{"path":"' + path + '","baseName":"' + base_name + '","srcDir":"' + src_dir + '","size":' + str(size) + '}')
while response['IsTruncated']:
    response = client.list_objects_v2(
        Bucket=bucket,
        MaxKeys=2,
        Prefix=prefix,
        ContinuationToken=response['NextContinuationToken']
    )
    for object in response['Contents']:
        path='s3://' + bucket + '/' + object['Key']
        base_name = object['Key'][len:]
        size = object['Size']
        print('{"path":"' + path + '","baseName":"' + base_name + '","srcDir":"' + src_dir + '","size":' + str(size) + '}')