EMR 018 S3DistCp Manifest - qyjohn/AWS_Tutorials GitHub Wiki
Use the following code to create manifest file.
import boto3
client = boto3.client('s3')
bucket = 'bucket-name'
prefix = 'output/'
if prefix[-1] != '/':
src_dir = 's3://' + bucket + '/' + prefix
prefix = prefix + '/'
else:
src_dir = 's3://' + bucket + '/' + prefix
src_dir = src_dir[:-1]
len = len(prefix)
response = client.list_objects_v2(
Bucket=bucket,
MaxKeys=2,
Prefix=prefix,
)
for object in response['Contents']:
path='s3://' + bucket + '/' + object['Key']
base_name = object['Key'][len:]
size = object['Size']
print('{"path":"' + path + '","baseName":"' + base_name + '","srcDir":"' + src_dir + '","size":' + str(size) + '}')
while response['IsTruncated']:
response = client.list_objects_v2(
Bucket=bucket,
MaxKeys=2,
Prefix=prefix,
ContinuationToken=response['NextContinuationToken']
)
for object in response['Contents']:
path='s3://' + bucket + '/' + object['Key']
base_name = object['Key'][len:]
size = object['Size']
print('{"path":"' + path + '","baseName":"' + base_name + '","srcDir":"' + src_dir + '","size":' + str(size) + '}')