スレッドを使って並列ダウンロード。
#!/usr/bin/env ruby require 'rubygems' require 'base64' require 'cgi' require 'net/http' require 'nokogiri' require 'openssl' require 'time' Net::HTTP.version_1_2 def aws_sign(secret_access_key_id, date, bucket, path) string_to_sign = "GET\n\n\n#{date}\n/#{bucket}#{path}" digest = OpenSSL::HMAC.digest(OpenSSL::Digest::SHA1.new, secret_access_key_id, string_to_sign) Base64.encode64(digest).gsub("\n", '') end def gauge(bucket, date, path, access_key_id, secret_access_key_id) host = "#{bucket}.s3.amazonaws.com" signature = aws_sign(secret_access_key_id, date, bucket, '/') query = '/?prefix=' + CGI.escape(path.sub(%r|^/|, '')) + '&max-keys=1' header = { 'Host' => host, 'Date' => date, 'Authorization' => "AWS #{access_key_id}:#{signature}" } content = nil Net::HTTP.start(host, 80) do |http| content = http.get(query, header).body end Nokogiri::HTML(content).at_css('size').content.to_i end def split(size, chunk_size) tail_start = 0 n = size / chunk_size ranges = (0...n).map do |i| start = chunk_size * i tail_start = chunk_size * (i + 1) finish = tail_start - 1 [i, start, finish] end unless (size % chunk_size).zero? ranges << [n, tail_start, size - 1] end return ranges end def get_multi(bucket, path, access_key_id, secret_access_key_id) host = "#{bucket}.s3.amazonaws.com" date = Time.now.rfc2822 signature = aws_sign(secret_access_key_id, date, bucket, path) header = { 'Host' => host, 'Date' => date, 'Authorization' => "AWS #{access_key_id}:#{signature}" } # calc renges size = gauge(bucket, date, path, access_key_id, secret_access_key_id) ranges = split(size, 1024 * 1024) chunks = [] ranges.map {|i, start, finish| Thread.fork(header.dup) do |h| Net::HTTP.start(host, 80) do |http| h['Range'] = "bytes=#{start}-#{finish}" chunks[i] = http.get(path, h).body end end }.each {|t| t.join } chunks.inject("") {|r, i| r + i } end BucketName = 'my_bucket' Path = '/path/to/object' AWSAccessKeyId = '<MyAWSAccessKeyId>' AWSSecretAccessKey = '<MyAWSSecretAccessKey>' content = get_multi(BucketName, Path, AWSAccessKeyId, AWSSecretAccessKey) puts(content.length / 1024 / 1024)
10MBのオブジェクトを普通にダウンロードすると…
19秒かかるけど、並列にダウンロードすると…
~$ time ./s3get.rb
10real 0m19.856s
user 0m0.046s
sys 0m0.015s
11秒でダウンロードが終わる。
~$ time ./s3getm.rb
10real 0m11.855s
user 0m0.046s
sys 0m0.015s