Merge pull request #191 from jedmund/jedmund/parallelize-downloaders
Parallelize downloaders and update sizes
This commit is contained in:
commit
a6ede6ecf7
7 changed files with 134 additions and 78 deletions
9
Gemfile
9
Gemfile
|
|
@ -76,10 +76,13 @@ gem 'strscan'
|
|||
# New Relic Ruby Agent
|
||||
gem 'newrelic_rpm'
|
||||
|
||||
# Parallel processing made simple and fast
|
||||
gem 'parallel'
|
||||
|
||||
# The Sentry SDK for Rails
|
||||
gem "stackprof"
|
||||
gem "sentry-ruby"
|
||||
gem "sentry-rails"
|
||||
gem 'sentry-rails'
|
||||
gem 'sentry-ruby'
|
||||
gem 'stackprof'
|
||||
|
||||
group :doc do
|
||||
gem 'apipie-rails'
|
||||
|
|
|
|||
|
|
@ -482,6 +482,7 @@ DEPENDENCIES
|
|||
mini_magick
|
||||
newrelic_rpm
|
||||
oj
|
||||
parallel
|
||||
pg
|
||||
pg_query
|
||||
pg_search
|
||||
|
|
|
|||
|
|
@ -35,26 +35,31 @@ module Granblue
|
|||
# @param verbose [Boolean] When true, enables detailed logging
|
||||
# @param storage [Symbol] Storage mode (:local, :s3, or :both)
|
||||
# @return [void]
|
||||
def initialize(id, test_mode: false, verbose: false, storage: :both)
|
||||
def initialize(id, test_mode: false, verbose: false, storage: :both, logger: nil)
|
||||
@id = id
|
||||
@base_url = base_url
|
||||
@test_mode = test_mode
|
||||
@verbose = verbose
|
||||
@storage = storage
|
||||
@logger = logger || Logger.new($stdout) # fallback logger
|
||||
@aws_service = AwsService.new
|
||||
ensure_directories_exist unless @test_mode
|
||||
end
|
||||
|
||||
# Download images for all sizes
|
||||
# @param selected_size [String] The size to download
|
||||
# @return [void]
|
||||
def download
|
||||
log_info "-> #{@id}"
|
||||
def download(selected_size = nil)
|
||||
log_info("-> #{@id}")
|
||||
return if @test_mode
|
||||
|
||||
SIZES.each_with_index do |size, index|
|
||||
# If a specific size is provided, use only that; otherwise, use all available sizes.
|
||||
sizes = selected_size ? [selected_size] : SIZES
|
||||
|
||||
sizes.each_with_index do |size, index|
|
||||
path = download_path(size)
|
||||
url = build_url(size)
|
||||
process_download(url, size, path, last: index == SIZES.size - 1)
|
||||
process_download(url, size, path, last: index == sizes.size - 1)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
@ -128,10 +133,10 @@ module Granblue
|
|||
download.rewind
|
||||
|
||||
# Upload to S3 if it doesn't exist
|
||||
unless @aws_service.file_exists?(s3_key)
|
||||
return if @aws_service.file_exists?(s3_key)
|
||||
|
||||
@aws_service.upload_stream(download, s3_key)
|
||||
end
|
||||
end
|
||||
|
||||
# Check if file should be downloaded based on storage mode
|
||||
# @param local_path [String] Local file path
|
||||
|
|
@ -182,7 +187,7 @@ module Granblue
|
|||
# Log informational message if verbose
|
||||
# @param message [String] Message
|
||||
def log_info(message)
|
||||
puts message if @verbose
|
||||
@logger.info(message) if @verbose
|
||||
end
|
||||
|
||||
# Download elemental variant image
|
||||
|
|
@ -197,13 +202,11 @@ module Granblue
|
|||
filepath = "#{path}/#{filename}"
|
||||
URI.open(url) do |file|
|
||||
content = file.read
|
||||
if content
|
||||
raise "Failed to read content from #{url}" unless content
|
||||
|
||||
File.open(filepath, 'wb') do |output|
|
||||
output.write(content)
|
||||
end
|
||||
else
|
||||
raise "Failed to read content from #{url}"
|
||||
end
|
||||
end
|
||||
log_info "-> #{size}:\t#{url}..."
|
||||
rescue OpenURI::HTTPError
|
||||
|
|
|
|||
|
|
@ -15,24 +15,27 @@ module Granblue
|
|||
# Downloads images for all variants of a character based on their uncap status.
|
||||
# Overrides {BaseDownloader#download} to handle character-specific variants.
|
||||
#
|
||||
# @param selected_size [String] The size to download. If nil, downloads all sizes.
|
||||
# @return [void]
|
||||
# @note Skips download if character is not found in database
|
||||
# @note Downloads FLB/ULB variants only if character has those uncaps
|
||||
# @see #download_variants
|
||||
def download
|
||||
def download(selected_size = nil)
|
||||
character = Character.find_by(granblue_id: @id)
|
||||
return unless character
|
||||
|
||||
download_variants(character)
|
||||
download_variants(character, selected_size)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Downloads all variants of a character's images
|
||||
#
|
||||
# @param character [Character] Character model instance to download images for
|
||||
# @param selected_size [String] The size to download. If nil, downloads all sizes.
|
||||
# @return [void]
|
||||
# @note Only downloads variants that should exist based on character uncap status
|
||||
def download_variants(character)
|
||||
def download_variants(character, selected_size = nil)
|
||||
# All characters have 01 and 02 variants
|
||||
variants = %W[#{@id}_01 #{@id}_02]
|
||||
|
||||
|
|
@ -45,18 +48,22 @@ module Granblue
|
|||
log_info "Downloading character variants: #{variants.join(', ')}" if @verbose
|
||||
|
||||
variants.each do |variant_id|
|
||||
download_variant(variant_id)
|
||||
download_variant(variant_id, selected_size)
|
||||
end
|
||||
end
|
||||
|
||||
# Downloads a specific variant's images in all sizes
|
||||
#
|
||||
# @param variant_id [String] Character variant ID (e.g., "3040001000_01")
|
||||
# @param selected_size [String] The size to download. If nil, downloads all sizes.
|
||||
# @return [void]
|
||||
def download_variant(variant_id)
|
||||
def download_variant(variant_id, selected_size = nil)
|
||||
log_info "-> #{variant_id}" if @verbose
|
||||
return if @test_mode
|
||||
|
||||
SIZES.each_with_index do |size, index|
|
||||
sizes = selected_size ? [selected_size] : SIZES
|
||||
|
||||
sizes.each_with_index do |size, index|
|
||||
path = download_path(size)
|
||||
url = build_variant_url(variant_id, size)
|
||||
process_download(url, size, path, last: index == SIZES.size - 1)
|
||||
|
|
@ -64,13 +71,19 @@ module Granblue
|
|||
end
|
||||
|
||||
# Builds URL for a specific variant and size
|
||||
#
|
||||
# @param variant_id [String] Character variant ID
|
||||
# @param size [String] Image size variant ("main", "grid", or "square")
|
||||
# @param size [String] Image size variant ("main", "grid", "square", or "detail")
|
||||
# @return [String] Complete URL for downloading the image
|
||||
def build_variant_url(variant_id, size)
|
||||
directory = directory_for_size(size)
|
||||
|
||||
if size == 'detail'
|
||||
"#{@base_url}/#{directory}/#{variant_id}.png"
|
||||
else
|
||||
"#{@base_url}/#{directory}/#{variant_id}.jpg"
|
||||
end
|
||||
end
|
||||
|
||||
# Gets object type for file paths and storage keys
|
||||
# @return [String] Returns "character"
|
||||
|
|
@ -85,6 +98,7 @@ module Granblue
|
|||
end
|
||||
|
||||
# Gets directory name for a size variant
|
||||
#
|
||||
# @param size [String] Image size variant
|
||||
# @return [String] Directory name in game asset URL structure
|
||||
# @note Maps "main" -> "f", "grid" -> "m", "square" -> "s"
|
||||
|
|
@ -93,6 +107,7 @@ module Granblue
|
|||
when 'main' then 'f'
|
||||
when 'grid' then 'm'
|
||||
when 'square' then 's'
|
||||
when 'detail' then 'detail'
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -15,25 +15,28 @@ module Granblue
|
|||
# Downloads images for all variants of a summon based on their uncap status.
|
||||
# Overrides {BaseDownloader#download} to handle summon-specific variants.
|
||||
#
|
||||
# @param selected_size [String] The size to download. If nil, downloads all sizes.
|
||||
# @return [void]
|
||||
# @note Skips download if summon is not found in database
|
||||
# @note Downloads ULB and transcendence variants only if summon has those uncaps
|
||||
# @see #download_variants
|
||||
def download
|
||||
def download(selected_size = nil)
|
||||
summon = Summon.find_by(granblue_id: @id)
|
||||
return unless summon
|
||||
|
||||
download_variants(summon)
|
||||
download_variants(summon, selected_size)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Downloads all variants of a summon's images
|
||||
#
|
||||
# @param summon [Summon] Summon model instance to download images for
|
||||
# @param selected_size [String] The size to download. If nil, downloads all sizes.
|
||||
# @return [void]
|
||||
# @note Only downloads variants that should exist based on summon uncap status
|
||||
# @note Handles special transcendence art variants for 6★ summons
|
||||
def download_variants(summon)
|
||||
def download_variants(summon, selected_size = nil)
|
||||
# All summons have base variant
|
||||
variants = [@id]
|
||||
|
||||
|
|
@ -41,26 +44,28 @@ module Granblue
|
|||
variants << "#{@id}_02" if summon.ulb
|
||||
|
||||
# Add Transcendence variants if available
|
||||
if summon.transcendence
|
||||
variants.push("#{@id}_03", "#{@id}_04")
|
||||
end
|
||||
variants.push("#{@id}_03", "#{@id}_04") if summon.transcendence
|
||||
|
||||
log_info "Downloading summon variants: #{variants.join(', ')}" if @verbose
|
||||
|
||||
variants.each do |variant_id|
|
||||
download_variant(variant_id)
|
||||
download_variant(variant_id, selected_size)
|
||||
end
|
||||
end
|
||||
|
||||
# Downloads a specific variant's images in all sizes
|
||||
#
|
||||
# @param variant_id [String] Summon variant ID (e.g., "2040001000_02")
|
||||
# @param selected_size [String] The size to download. If nil, downloads all sizes.
|
||||
# @return [void]
|
||||
# @note Downloads all size variants (main/grid/square) for the given variant
|
||||
def download_variant(variant_id)
|
||||
def download_variant(variant_id, selected_size = nil)
|
||||
log_info "-> #{variant_id}" if @verbose
|
||||
return if @test_mode
|
||||
|
||||
SIZES.each_with_index do |size, index|
|
||||
sizes = selected_size ? [selected_size] : SIZES
|
||||
|
||||
sizes.each_with_index do |size, index|
|
||||
path = download_path(size)
|
||||
url = build_variant_url(variant_id, size)
|
||||
process_download(url, size, path, last: index == SIZES.size - 1)
|
||||
|
|
@ -68,13 +73,18 @@ module Granblue
|
|||
end
|
||||
|
||||
# Builds URL for a specific variant and size
|
||||
#
|
||||
# @param variant_id [String] Summon variant ID
|
||||
# @param size [String] Image size variant ("main", "grid", or "square")
|
||||
# @param size [String] Image size variant ("main", "grid", "square", or "detail")
|
||||
# @return [String] Complete URL for downloading the image
|
||||
def build_variant_url(variant_id, size)
|
||||
directory = directory_for_size(size)
|
||||
if size == 'detail'
|
||||
"#{@base_url}/#{directory}/#{variant_id}.png"
|
||||
else
|
||||
"#{@base_url}/#{directory}/#{variant_id}.jpg"
|
||||
end
|
||||
end
|
||||
|
||||
# Gets object type for file paths and storage keys
|
||||
# @return [String] Returns "summon"
|
||||
|
|
@ -89,14 +99,16 @@ module Granblue
|
|||
end
|
||||
|
||||
# Gets directory name for a size variant
|
||||
#
|
||||
# @param size [String] Image size variant
|
||||
# @return [String] Directory name in game asset URL structure
|
||||
# @note Maps "main" -> "party_main", "grid" -> "party_sub", "square" -> "s"
|
||||
def directory_for_size(size)
|
||||
case size.to_s
|
||||
when 'main' then 'party_main'
|
||||
when 'grid' then 'party_sub'
|
||||
when 'main' then 'ls'
|
||||
when 'grid' then 'm'
|
||||
when 'square' then 's'
|
||||
when 'detail' then 'detail'
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -16,49 +16,54 @@ module Granblue
|
|||
# Downloads images for all variants of a weapon based on their uncap status.
|
||||
# Overrides {BaseDownloader#download} to handle weapon-specific variants.
|
||||
#
|
||||
# @param selected_size [String] The size to download. If nil, downloads all sizes.
|
||||
# @return [void]
|
||||
# @note Skips download if weapon is not found in database
|
||||
# @note Downloads transcendence variants only if weapon has those uncaps
|
||||
# @see #download_variants
|
||||
def download
|
||||
def download(selected_size = nil)
|
||||
weapon = Weapon.find_by(granblue_id: @id)
|
||||
return unless weapon
|
||||
|
||||
download_variants(weapon)
|
||||
download_variants(weapon, selected_size)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Downloads all variants of a weapon's images
|
||||
#
|
||||
# @param weapon [Weapon] Weapon model instance to download images for
|
||||
# @param selected_size [String] The size to download. If nil, downloads all sizes.
|
||||
# @return [void]
|
||||
# @note Only downloads variants that should exist based on weapon uncap status
|
||||
# @note Handles special transcendence art variants for transcendable weapons
|
||||
def download_variants(weapon)
|
||||
def download_variants(weapon, selected_size = nil)
|
||||
# All weapons have base variant
|
||||
variants = [@id]
|
||||
|
||||
# Add transcendence variants if available
|
||||
if weapon.transcendence
|
||||
variants.push("#{@id}_02", "#{@id}_03")
|
||||
end
|
||||
variants.push("#{@id}_02", "#{@id}_03") if weapon.transcendence
|
||||
|
||||
log_info "Downloading weapon variants: #{variants.join(', ')}" if @verbose
|
||||
|
||||
variants.each do |variant_id|
|
||||
download_variant(variant_id)
|
||||
download_variant(variant_id, selected_size)
|
||||
end
|
||||
end
|
||||
|
||||
# Downloads a specific variant's images in all sizes
|
||||
#
|
||||
# @param variant_id [String] Weapon variant ID (e.g., "1040001000_02")
|
||||
# @param selected_size [String] The size to download. If nil, downloads all sizes.
|
||||
# @return [void]
|
||||
# @note Downloads all size variants (main/grid/square) for the given variant
|
||||
def download_variant(variant_id)
|
||||
def download_variant(variant_id, selected_size = nil)
|
||||
log_info "-> #{variant_id}" if @verbose
|
||||
return if @test_mode
|
||||
|
||||
SIZES.each_with_index do |size, index|
|
||||
sizes = selected_size ? [selected_size] : SIZES
|
||||
|
||||
sizes.each_with_index do |size, index|
|
||||
path = download_path(size)
|
||||
url = build_variant_url(variant_id, size)
|
||||
process_download(url, size, path, last: index == SIZES.size - 1)
|
||||
|
|
@ -66,13 +71,18 @@ module Granblue
|
|||
end
|
||||
|
||||
# Builds URL for a specific variant and size
|
||||
#
|
||||
# @param variant_id [String] Weapon variant ID
|
||||
# @param size [String] Image size variant ("main", "grid", or "square")
|
||||
# @param size [String] Image size variant ("main", "grid", "square", or "raw")
|
||||
# @return [String] Complete URL for downloading the image
|
||||
def build_variant_url(variant_id, size)
|
||||
directory = directory_for_size(size)
|
||||
if size == 'raw'
|
||||
"#{@base_url}/#{directory}/#{variant_id}.png"
|
||||
else
|
||||
"#{@base_url}/#{directory}/#{variant_id}.jpg"
|
||||
end
|
||||
end
|
||||
|
||||
# Gets object type for file paths and storage keys
|
||||
# @return [String] Returns "weapon"
|
||||
|
|
@ -87,6 +97,7 @@ module Granblue
|
|||
end
|
||||
|
||||
# Gets directory name for a size variant
|
||||
#
|
||||
# @param size [String] Image size variant
|
||||
# @return [String] Directory name in game asset URL structure
|
||||
# @note Maps "main" -> "ls", "grid" -> "m", "square" -> "s"
|
||||
|
|
@ -95,6 +106,7 @@ module Granblue
|
|||
when 'main' then 'ls'
|
||||
when 'grid' then 'm'
|
||||
when 'square' then 's'
|
||||
when 'raw' then 'b'
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -1,40 +1,50 @@
|
|||
namespace :granblue do
|
||||
def _progress_reporter(count:, total:, result:, bar_len: 40, multi: true)
|
||||
filled_len = (bar_len * count / total).round
|
||||
status = File.basename(result)
|
||||
percents = (100.0 * count / total).round(1)
|
||||
bar = '=' * filled_len + '-' * (bar_len - filled_len)
|
||||
desc 'Downloads all images for the given object type'
|
||||
# Downloads all images for a specific type of game object (e.g. summons, weapons)
|
||||
# Uses the appropriate downloader class based on the object type
|
||||
#
|
||||
# @param object [String] Type of object to download images for (e.g. 'summon', 'weapon')
|
||||
# @example Download all summon images
|
||||
# rake granblue:download_all_images\[summon\]
|
||||
# @example Download all weapon images
|
||||
# rake granblue:download_all_images\[weapon\]
|
||||
# @example Download all character images
|
||||
# rake granblue:download_all_images\[character\]
|
||||
task :download_all_images, %i[object threads size] => :environment do |_t, args|
|
||||
require 'parallel'
|
||||
require 'logger'
|
||||
|
||||
if !multi
|
||||
print("[#{bar}] #{percents}% ...#{' ' * 14}#{status}\n")
|
||||
# Use a thread-safe logger (or Rails.logger if preferred)
|
||||
logger = Logger.new($stdout)
|
||||
logger.level = Logger::INFO # set to WARN or INFO to reduce debug noise
|
||||
|
||||
# Load downloader classes
|
||||
require_relative '../granblue/downloaders/base_downloader'
|
||||
Dir[Rails.root.join('lib', 'granblue', 'downloaders', '*.rb')].each { |file| require file }
|
||||
|
||||
object = args[:object]
|
||||
specified_size = args[:size]
|
||||
klass = object.classify.constantize
|
||||
ids = klass.pluck(:granblue_id)
|
||||
|
||||
puts "Downloading images for #{ids.count} #{object.pluralize}..."
|
||||
|
||||
logger.info "Downloading images for #{ids.count} #{object.pluralize}..."
|
||||
thread_count = (args[:threads] || 4).to_i
|
||||
logger.info "Using #{thread_count} threads for parallel downloads..."
|
||||
logger.info "Downloading only size: #{specified_size}" if specified_size
|
||||
|
||||
Parallel.each(ids, in_threads: thread_count) do |id|
|
||||
ActiveRecord::Base.connection_pool.with_connection do
|
||||
downloader_class = "Granblue::Downloaders::#{object.classify}Downloader".constantize
|
||||
downloader = downloader_class.new(id, verbose: true, logger: logger)
|
||||
if specified_size
|
||||
downloader.download(specified_size)
|
||||
else
|
||||
print "\n"
|
||||
end
|
||||
end
|
||||
|
||||
desc 'Downloads images for the given object type at the given size'
|
||||
task :download_all_images, %i[object size] => :environment do |_t, args|
|
||||
require 'open-uri'
|
||||
|
||||
filename = "export/#{args[:object]}-#{args[:size]}.txt"
|
||||
count = `wc -l #{filename}`.split.first.to_i
|
||||
|
||||
path = "#{Rails.root}/download/#{args[:object]}-#{args[:size]}"
|
||||
FileUtils.mkdir_p(path) unless Dir.exist?(path)
|
||||
|
||||
puts "Downloading #{count} images from #{args[:object]}-#{args[:size]}.txt..."
|
||||
if File.exist?(filename)
|
||||
File.readlines(filename).each_with_index do |line, i|
|
||||
download = URI.parse(line.strip).open
|
||||
download_URI = "#{path}/#{download.base_uri.to_s.split('/')[-1]}"
|
||||
if File.exist?(download_URI)
|
||||
puts "Skipping #{line}"
|
||||
else
|
||||
IO.copy_stream(download, "#{path}/#{download.base_uri.to_s.split('/')[-1]}")
|
||||
_progress_reporter(count: i, total: count, result: download_URI, bar_len: 40, multi: false)
|
||||
downloader.download
|
||||
end
|
||||
rescue StandardError => e
|
||||
puts "#{e}: #{line}"
|
||||
logger.error "Error downloading #{object} #{id}: #{e.message}"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
Loading…
Reference in a new issue