Merge pull request #191 from jedmund/jedmund/parallelize-downloaders

Parallelize downloaders and update sizes
This commit is contained in:
Justin Edmund 2025-03-01 05:46:08 -08:00 committed by GitHub
commit a6ede6ecf7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 134 additions and 78 deletions

View file

@ -76,10 +76,13 @@ gem 'strscan'
# New Relic Ruby Agent # New Relic Ruby Agent
gem 'newrelic_rpm' gem 'newrelic_rpm'
# Parallel processing made simple and fast
gem 'parallel'
# The Sentry SDK for Rails # The Sentry SDK for Rails
gem "stackprof" gem 'sentry-rails'
gem "sentry-ruby" gem 'sentry-ruby'
gem "sentry-rails" gem 'stackprof'
group :doc do group :doc do
gem 'apipie-rails' gem 'apipie-rails'

View file

@ -482,6 +482,7 @@ DEPENDENCIES
mini_magick mini_magick
newrelic_rpm newrelic_rpm
oj oj
parallel
pg pg
pg_query pg_query
pg_search pg_search

View file

@ -35,26 +35,31 @@ module Granblue
# @param verbose [Boolean] When true, enables detailed logging # @param verbose [Boolean] When true, enables detailed logging
# @param storage [Symbol] Storage mode (:local, :s3, or :both) # @param storage [Symbol] Storage mode (:local, :s3, or :both)
# @return [void] # @return [void]
def initialize(id, test_mode: false, verbose: false, storage: :both) def initialize(id, test_mode: false, verbose: false, storage: :both, logger: nil)
@id = id @id = id
@base_url = base_url @base_url = base_url
@test_mode = test_mode @test_mode = test_mode
@verbose = verbose @verbose = verbose
@storage = storage @storage = storage
@logger = logger || Logger.new($stdout) # fallback logger
@aws_service = AwsService.new @aws_service = AwsService.new
ensure_directories_exist unless @test_mode ensure_directories_exist unless @test_mode
end end
# Download images for all sizes # Download images for all sizes
# @param selected_size [String] The size to download
# @return [void] # @return [void]
def download def download(selected_size = nil)
log_info "-> #{@id}" log_info("-> #{@id}")
return if @test_mode return if @test_mode
SIZES.each_with_index do |size, index| # If a specific size is provided, use only that; otherwise, use all available sizes.
sizes = selected_size ? [selected_size] : SIZES
sizes.each_with_index do |size, index|
path = download_path(size) path = download_path(size)
url = build_url(size) url = build_url(size)
process_download(url, size, path, last: index == SIZES.size - 1) process_download(url, size, path, last: index == sizes.size - 1)
end end
end end
@ -128,9 +133,9 @@ module Granblue
download.rewind download.rewind
# Upload to S3 if it doesn't exist # Upload to S3 if it doesn't exist
unless @aws_service.file_exists?(s3_key) return if @aws_service.file_exists?(s3_key)
@aws_service.upload_stream(download, s3_key)
end @aws_service.upload_stream(download, s3_key)
end end
# Check if file should be downloaded based on storage mode # Check if file should be downloaded based on storage mode
@ -182,7 +187,7 @@ module Granblue
# Log informational message if verbose # Log informational message if verbose
# @param message [String] Message # @param message [String] Message
def log_info(message) def log_info(message)
puts message if @verbose @logger.info(message) if @verbose
end end
# Download elemental variant image # Download elemental variant image
@ -197,12 +202,10 @@ module Granblue
filepath = "#{path}/#{filename}" filepath = "#{path}/#{filename}"
URI.open(url) do |file| URI.open(url) do |file|
content = file.read content = file.read
if content raise "Failed to read content from #{url}" unless content
File.open(filepath, 'wb') do |output|
output.write(content) File.open(filepath, 'wb') do |output|
end output.write(content)
else
raise "Failed to read content from #{url}"
end end
end end
log_info "-> #{size}:\t#{url}..." log_info "-> #{size}:\t#{url}..."

View file

@ -15,24 +15,27 @@ module Granblue
# Downloads images for all variants of a character based on their uncap status. # Downloads images for all variants of a character based on their uncap status.
# Overrides {BaseDownloader#download} to handle character-specific variants. # Overrides {BaseDownloader#download} to handle character-specific variants.
# #
# @param selected_size [String] The size to download. If nil, downloads all sizes.
# @return [void] # @return [void]
# @note Skips download if character is not found in database # @note Skips download if character is not found in database
# @note Downloads FLB/ULB variants only if character has those uncaps # @note Downloads FLB/ULB variants only if character has those uncaps
# @see #download_variants # @see #download_variants
def download def download(selected_size = nil)
character = Character.find_by(granblue_id: @id) character = Character.find_by(granblue_id: @id)
return unless character return unless character
download_variants(character) download_variants(character, selected_size)
end end
private private
# Downloads all variants of a character's images # Downloads all variants of a character's images
#
# @param character [Character] Character model instance to download images for # @param character [Character] Character model instance to download images for
# @param selected_size [String] The size to download. If nil, downloads all sizes.
# @return [void] # @return [void]
# @note Only downloads variants that should exist based on character uncap status # @note Only downloads variants that should exist based on character uncap status
def download_variants(character) def download_variants(character, selected_size = nil)
# All characters have 01 and 02 variants # All characters have 01 and 02 variants
variants = %W[#{@id}_01 #{@id}_02] variants = %W[#{@id}_01 #{@id}_02]
@ -45,18 +48,22 @@ module Granblue
log_info "Downloading character variants: #{variants.join(', ')}" if @verbose log_info "Downloading character variants: #{variants.join(', ')}" if @verbose
variants.each do |variant_id| variants.each do |variant_id|
download_variant(variant_id) download_variant(variant_id, selected_size)
end end
end end
# Downloads a specific variant's images in all sizes # Downloads a specific variant's images in all sizes
#
# @param variant_id [String] Character variant ID (e.g., "3040001000_01") # @param variant_id [String] Character variant ID (e.g., "3040001000_01")
# @param selected_size [String] The size to download. If nil, downloads all sizes.
# @return [void] # @return [void]
def download_variant(variant_id) def download_variant(variant_id, selected_size = nil)
log_info "-> #{variant_id}" if @verbose log_info "-> #{variant_id}" if @verbose
return if @test_mode return if @test_mode
SIZES.each_with_index do |size, index| sizes = selected_size ? [selected_size] : SIZES
sizes.each_with_index do |size, index|
path = download_path(size) path = download_path(size)
url = build_variant_url(variant_id, size) url = build_variant_url(variant_id, size)
process_download(url, size, path, last: index == SIZES.size - 1) process_download(url, size, path, last: index == SIZES.size - 1)
@ -64,12 +71,18 @@ module Granblue
end end
# Builds URL for a specific variant and size # Builds URL for a specific variant and size
#
# @param variant_id [String] Character variant ID # @param variant_id [String] Character variant ID
# @param size [String] Image size variant ("main", "grid", or "square") # @param size [String] Image size variant ("main", "grid", "square", or "detail")
# @return [String] Complete URL for downloading the image # @return [String] Complete URL for downloading the image
def build_variant_url(variant_id, size) def build_variant_url(variant_id, size)
directory = directory_for_size(size) directory = directory_for_size(size)
"#{@base_url}/#{directory}/#{variant_id}.jpg"
if size == 'detail'
"#{@base_url}/#{directory}/#{variant_id}.png"
else
"#{@base_url}/#{directory}/#{variant_id}.jpg"
end
end end
# Gets object type for file paths and storage keys # Gets object type for file paths and storage keys
@ -85,6 +98,7 @@ module Granblue
end end
# Gets directory name for a size variant # Gets directory name for a size variant
#
# @param size [String] Image size variant # @param size [String] Image size variant
# @return [String] Directory name in game asset URL structure # @return [String] Directory name in game asset URL structure
# @note Maps "main" -> "f", "grid" -> "m", "square" -> "s" # @note Maps "main" -> "f", "grid" -> "m", "square" -> "s"
@ -93,6 +107,7 @@ module Granblue
when 'main' then 'f' when 'main' then 'f'
when 'grid' then 'm' when 'grid' then 'm'
when 'square' then 's' when 'square' then 's'
when 'detail' then 'detail'
end end
end end
end end

View file

@ -15,25 +15,28 @@ module Granblue
# Downloads images for all variants of a summon based on their uncap status. # Downloads images for all variants of a summon based on their uncap status.
# Overrides {BaseDownloader#download} to handle summon-specific variants. # Overrides {BaseDownloader#download} to handle summon-specific variants.
# #
# @param selected_size [String] The size to download. If nil, downloads all sizes.
# @return [void] # @return [void]
# @note Skips download if summon is not found in database # @note Skips download if summon is not found in database
# @note Downloads ULB and transcendence variants only if summon has those uncaps # @note Downloads ULB and transcendence variants only if summon has those uncaps
# @see #download_variants # @see #download_variants
def download def download(selected_size = nil)
summon = Summon.find_by(granblue_id: @id) summon = Summon.find_by(granblue_id: @id)
return unless summon return unless summon
download_variants(summon) download_variants(summon, selected_size)
end end
private private
# Downloads all variants of a summon's images # Downloads all variants of a summon's images
#
# @param summon [Summon] Summon model instance to download images for # @param summon [Summon] Summon model instance to download images for
# @param selected_size [String] The size to download. If nil, downloads all sizes.
# @return [void] # @return [void]
# @note Only downloads variants that should exist based on summon uncap status # @note Only downloads variants that should exist based on summon uncap status
# @note Handles special transcendence art variants for 6★ summons # @note Handles special transcendence art variants for 6★ summons
def download_variants(summon) def download_variants(summon, selected_size = nil)
# All summons have base variant # All summons have base variant
variants = [@id] variants = [@id]
@ -41,26 +44,28 @@ module Granblue
variants << "#{@id}_02" if summon.ulb variants << "#{@id}_02" if summon.ulb
# Add Transcendence variants if available # Add Transcendence variants if available
if summon.transcendence variants.push("#{@id}_03", "#{@id}_04") if summon.transcendence
variants.push("#{@id}_03", "#{@id}_04")
end
log_info "Downloading summon variants: #{variants.join(', ')}" if @verbose log_info "Downloading summon variants: #{variants.join(', ')}" if @verbose
variants.each do |variant_id| variants.each do |variant_id|
download_variant(variant_id) download_variant(variant_id, selected_size)
end end
end end
# Downloads a specific variant's images in all sizes # Downloads a specific variant's images in all sizes
#
# @param variant_id [String] Summon variant ID (e.g., "2040001000_02") # @param variant_id [String] Summon variant ID (e.g., "2040001000_02")
# @param selected_size [String] The size to download. If nil, downloads all sizes.
# @return [void] # @return [void]
# @note Downloads all size variants (main/grid/square) for the given variant # @note Downloads all size variants (main/grid/square) for the given variant
def download_variant(variant_id) def download_variant(variant_id, selected_size = nil)
log_info "-> #{variant_id}" if @verbose log_info "-> #{variant_id}" if @verbose
return if @test_mode return if @test_mode
SIZES.each_with_index do |size, index| sizes = selected_size ? [selected_size] : SIZES
sizes.each_with_index do |size, index|
path = download_path(size) path = download_path(size)
url = build_variant_url(variant_id, size) url = build_variant_url(variant_id, size)
process_download(url, size, path, last: index == SIZES.size - 1) process_download(url, size, path, last: index == SIZES.size - 1)
@ -68,12 +73,17 @@ module Granblue
end end
# Builds URL for a specific variant and size # Builds URL for a specific variant and size
#
# @param variant_id [String] Summon variant ID # @param variant_id [String] Summon variant ID
# @param size [String] Image size variant ("main", "grid", or "square") # @param size [String] Image size variant ("main", "grid", "square", or "detail")
# @return [String] Complete URL for downloading the image # @return [String] Complete URL for downloading the image
def build_variant_url(variant_id, size) def build_variant_url(variant_id, size)
directory = directory_for_size(size) directory = directory_for_size(size)
"#{@base_url}/#{directory}/#{variant_id}.jpg" if size == 'detail'
"#{@base_url}/#{directory}/#{variant_id}.png"
else
"#{@base_url}/#{directory}/#{variant_id}.jpg"
end
end end
# Gets object type for file paths and storage keys # Gets object type for file paths and storage keys
@ -89,14 +99,16 @@ module Granblue
end end
# Gets directory name for a size variant # Gets directory name for a size variant
#
# @param size [String] Image size variant # @param size [String] Image size variant
# @return [String] Directory name in game asset URL structure # @return [String] Directory name in game asset URL structure
# @note Maps "main" -> "party_main", "grid" -> "party_sub", "square" -> "s" # @note Maps "main" -> "party_main", "grid" -> "party_sub", "square" -> "s"
def directory_for_size(size) def directory_for_size(size)
case size.to_s case size.to_s
when 'main' then 'party_main' when 'main' then 'ls'
when 'grid' then 'party_sub' when 'grid' then 'm'
when 'square' then 's' when 'square' then 's'
when 'detail' then 'detail'
end end
end end
end end

View file

@ -16,49 +16,54 @@ module Granblue
# Downloads images for all variants of a weapon based on their uncap status. # Downloads images for all variants of a weapon based on their uncap status.
# Overrides {BaseDownloader#download} to handle weapon-specific variants. # Overrides {BaseDownloader#download} to handle weapon-specific variants.
# #
# @param selected_size [String] The size to download. If nil, downloads all sizes.
# @return [void] # @return [void]
# @note Skips download if weapon is not found in database # @note Skips download if weapon is not found in database
# @note Downloads transcendence variants only if weapon has those uncaps # @note Downloads transcendence variants only if weapon has those uncaps
# @see #download_variants # @see #download_variants
def download def download(selected_size = nil)
weapon = Weapon.find_by(granblue_id: @id) weapon = Weapon.find_by(granblue_id: @id)
return unless weapon return unless weapon
download_variants(weapon) download_variants(weapon, selected_size)
end end
private private
# Downloads all variants of a weapon's images # Downloads all variants of a weapon's images
#
# @param weapon [Weapon] Weapon model instance to download images for # @param weapon [Weapon] Weapon model instance to download images for
# @param selected_size [String] The size to download. If nil, downloads all sizes.
# @return [void] # @return [void]
# @note Only downloads variants that should exist based on weapon uncap status # @note Only downloads variants that should exist based on weapon uncap status
# @note Handles special transcendence art variants for transcendable weapons # @note Handles special transcendence art variants for transcendable weapons
def download_variants(weapon) def download_variants(weapon, selected_size = nil)
# All weapons have base variant # All weapons have base variant
variants = [@id] variants = [@id]
# Add transcendence variants if available # Add transcendence variants if available
if weapon.transcendence variants.push("#{@id}_02", "#{@id}_03") if weapon.transcendence
variants.push("#{@id}_02", "#{@id}_03")
end
log_info "Downloading weapon variants: #{variants.join(', ')}" if @verbose log_info "Downloading weapon variants: #{variants.join(', ')}" if @verbose
variants.each do |variant_id| variants.each do |variant_id|
download_variant(variant_id) download_variant(variant_id, selected_size)
end end
end end
# Downloads a specific variant's images in all sizes # Downloads a specific variant's images in all sizes
#
# @param variant_id [String] Weapon variant ID (e.g., "1040001000_02") # @param variant_id [String] Weapon variant ID (e.g., "1040001000_02")
# @param selected_size [String] The size to download. If nil, downloads all sizes.
# @return [void] # @return [void]
# @note Downloads all size variants (main/grid/square) for the given variant # @note Downloads all size variants (main/grid/square) for the given variant
def download_variant(variant_id) def download_variant(variant_id, selected_size = nil)
log_info "-> #{variant_id}" if @verbose log_info "-> #{variant_id}" if @verbose
return if @test_mode return if @test_mode
SIZES.each_with_index do |size, index| sizes = selected_size ? [selected_size] : SIZES
sizes.each_with_index do |size, index|
path = download_path(size) path = download_path(size)
url = build_variant_url(variant_id, size) url = build_variant_url(variant_id, size)
process_download(url, size, path, last: index == SIZES.size - 1) process_download(url, size, path, last: index == SIZES.size - 1)
@ -66,12 +71,17 @@ module Granblue
end end
# Builds URL for a specific variant and size # Builds URL for a specific variant and size
#
# @param variant_id [String] Weapon variant ID # @param variant_id [String] Weapon variant ID
# @param size [String] Image size variant ("main", "grid", or "square") # @param size [String] Image size variant ("main", "grid", "square", or "raw")
# @return [String] Complete URL for downloading the image # @return [String] Complete URL for downloading the image
def build_variant_url(variant_id, size) def build_variant_url(variant_id, size)
directory = directory_for_size(size) directory = directory_for_size(size)
"#{@base_url}/#{directory}/#{variant_id}.jpg" if size == 'raw'
"#{@base_url}/#{directory}/#{variant_id}.png"
else
"#{@base_url}/#{directory}/#{variant_id}.jpg"
end
end end
# Gets object type for file paths and storage keys # Gets object type for file paths and storage keys
@ -87,6 +97,7 @@ module Granblue
end end
# Gets directory name for a size variant # Gets directory name for a size variant
#
# @param size [String] Image size variant # @param size [String] Image size variant
# @return [String] Directory name in game asset URL structure # @return [String] Directory name in game asset URL structure
# @note Maps "main" -> "ls", "grid" -> "m", "square" -> "s" # @note Maps "main" -> "ls", "grid" -> "m", "square" -> "s"
@ -95,6 +106,7 @@ module Granblue
when 'main' then 'ls' when 'main' then 'ls'
when 'grid' then 'm' when 'grid' then 'm'
when 'square' then 's' when 'square' then 's'
when 'raw' then 'b'
end end
end end
end end

View file

@ -1,40 +1,50 @@
namespace :granblue do namespace :granblue do
def _progress_reporter(count:, total:, result:, bar_len: 40, multi: true) desc 'Downloads all images for the given object type'
filled_len = (bar_len * count / total).round # Downloads all images for a specific type of game object (e.g. summons, weapons)
status = File.basename(result) # Uses the appropriate downloader class based on the object type
percents = (100.0 * count / total).round(1) #
bar = '=' * filled_len + '-' * (bar_len - filled_len) # @param object [String] Type of object to download images for (e.g. 'summon', 'weapon')
# @example Download all summon images
# rake granblue:download_all_images\[summon\]
# @example Download all weapon images
# rake granblue:download_all_images\[weapon\]
# @example Download all character images
# rake granblue:download_all_images\[character\]
task :download_all_images, %i[object threads size] => :environment do |_t, args|
require 'parallel'
require 'logger'
if !multi # Use a thread-safe logger (or Rails.logger if preferred)
print("[#{bar}] #{percents}% ...#{' ' * 14}#{status}\n") logger = Logger.new($stdout)
else logger.level = Logger::INFO # set to WARN or INFO to reduce debug noise
print "\n"
end
end
desc 'Downloads images for the given object type at the given size' # Load downloader classes
task :download_all_images, %i[object size] => :environment do |_t, args| require_relative '../granblue/downloaders/base_downloader'
require 'open-uri' Dir[Rails.root.join('lib', 'granblue', 'downloaders', '*.rb')].each { |file| require file }
filename = "export/#{args[:object]}-#{args[:size]}.txt" object = args[:object]
count = `wc -l #{filename}`.split.first.to_i specified_size = args[:size]
klass = object.classify.constantize
ids = klass.pluck(:granblue_id)
path = "#{Rails.root}/download/#{args[:object]}-#{args[:size]}" puts "Downloading images for #{ids.count} #{object.pluralize}..."
FileUtils.mkdir_p(path) unless Dir.exist?(path)
puts "Downloading #{count} images from #{args[:object]}-#{args[:size]}.txt..." logger.info "Downloading images for #{ids.count} #{object.pluralize}..."
if File.exist?(filename) thread_count = (args[:threads] || 4).to_i
File.readlines(filename).each_with_index do |line, i| logger.info "Using #{thread_count} threads for parallel downloads..."
download = URI.parse(line.strip).open logger.info "Downloading only size: #{specified_size}" if specified_size
download_URI = "#{path}/#{download.base_uri.to_s.split('/')[-1]}"
if File.exist?(download_URI) Parallel.each(ids, in_threads: thread_count) do |id|
puts "Skipping #{line}" ActiveRecord::Base.connection_pool.with_connection do
downloader_class = "Granblue::Downloaders::#{object.classify}Downloader".constantize
downloader = downloader_class.new(id, verbose: true, logger: logger)
if specified_size
downloader.download(specified_size)
else else
IO.copy_stream(download, "#{path}/#{download.base_uri.to_s.split('/')[-1]}") downloader.download
_progress_reporter(count: i, total: count, result: download_URI, bar_len: 40, multi: false)
end end
rescue StandardError => e rescue StandardError => e
puts "#{e}: #{line}" logger.error "Error downloading #{object} #{id}: #{e.message}"
end end
end end
end end