Merge pull request #191 from jedmund/jedmund/parallelize-downloaders

Parallelize downloaders and update sizes
This commit is contained in:
Justin Edmund 2025-03-01 05:46:08 -08:00 committed by GitHub
commit a6ede6ecf7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 134 additions and 78 deletions

View file

@ -76,10 +76,13 @@ gem 'strscan'
# New Relic Ruby Agent
gem 'newrelic_rpm'
# Parallel processing made simple and fast
gem 'parallel'
# The Sentry SDK for Rails
gem "stackprof"
gem "sentry-ruby"
gem "sentry-rails"
gem 'sentry-rails'
gem 'sentry-ruby'
gem 'stackprof'
group :doc do
gem 'apipie-rails'

View file

@ -482,6 +482,7 @@ DEPENDENCIES
mini_magick
newrelic_rpm
oj
parallel
pg
pg_query
pg_search

View file

@ -35,26 +35,31 @@ module Granblue
# @param verbose [Boolean] When true, enables detailed logging
# @param storage [Symbol] Storage mode (:local, :s3, or :both)
# @return [void]
def initialize(id, test_mode: false, verbose: false, storage: :both)
def initialize(id, test_mode: false, verbose: false, storage: :both, logger: nil)
@id = id
@base_url = base_url
@test_mode = test_mode
@verbose = verbose
@storage = storage
@logger = logger || Logger.new($stdout) # fallback logger
@aws_service = AwsService.new
ensure_directories_exist unless @test_mode
end
# Download images for all sizes
# @param selected_size [String] The size to download
# @return [void]
def download
log_info "-> #{@id}"
def download(selected_size = nil)
log_info("-> #{@id}")
return if @test_mode
SIZES.each_with_index do |size, index|
# If a specific size is provided, use only that; otherwise, use all available sizes.
sizes = selected_size ? [selected_size] : SIZES
sizes.each_with_index do |size, index|
path = download_path(size)
url = build_url(size)
process_download(url, size, path, last: index == SIZES.size - 1)
process_download(url, size, path, last: index == sizes.size - 1)
end
end
@ -128,9 +133,9 @@ module Granblue
download.rewind
# Upload to S3 if it doesn't exist
unless @aws_service.file_exists?(s3_key)
@aws_service.upload_stream(download, s3_key)
end
return if @aws_service.file_exists?(s3_key)
@aws_service.upload_stream(download, s3_key)
end
# Check if file should be downloaded based on storage mode
@ -182,7 +187,7 @@ module Granblue
# Log informational message if verbose
# @param message [String] Message
def log_info(message)
puts message if @verbose
@logger.info(message) if @verbose
end
# Download elemental variant image
@ -197,12 +202,10 @@ module Granblue
filepath = "#{path}/#{filename}"
URI.open(url) do |file|
content = file.read
if content
File.open(filepath, 'wb') do |output|
output.write(content)
end
else
raise "Failed to read content from #{url}"
raise "Failed to read content from #{url}" unless content
File.open(filepath, 'wb') do |output|
output.write(content)
end
end
log_info "-> #{size}:\t#{url}..."

View file

@ -15,24 +15,27 @@ module Granblue
# Downloads images for all variants of a character based on their uncap status.
# Overrides {BaseDownloader#download} to handle character-specific variants.
#
# @param selected_size [String] The size to download. If nil, downloads all sizes.
# @return [void]
# @note Skips download if character is not found in database
# @note Downloads FLB/ULB variants only if character has those uncaps
# @see #download_variants
def download
def download(selected_size = nil)
character = Character.find_by(granblue_id: @id)
return unless character
download_variants(character)
download_variants(character, selected_size)
end
private
# Downloads all variants of a character's images
#
# @param character [Character] Character model instance to download images for
# @param selected_size [String] The size to download. If nil, downloads all sizes.
# @return [void]
# @note Only downloads variants that should exist based on character uncap status
def download_variants(character)
def download_variants(character, selected_size = nil)
# All characters have 01 and 02 variants
variants = %W[#{@id}_01 #{@id}_02]
@ -45,18 +48,22 @@ module Granblue
log_info "Downloading character variants: #{variants.join(', ')}" if @verbose
variants.each do |variant_id|
download_variant(variant_id)
download_variant(variant_id, selected_size)
end
end
# Downloads a specific variant's images in all sizes
#
# @param variant_id [String] Character variant ID (e.g., "3040001000_01")
# @param selected_size [String] The size to download. If nil, downloads all sizes.
# @return [void]
def download_variant(variant_id)
def download_variant(variant_id, selected_size = nil)
log_info "-> #{variant_id}" if @verbose
return if @test_mode
SIZES.each_with_index do |size, index|
sizes = selected_size ? [selected_size] : SIZES
sizes.each_with_index do |size, index|
path = download_path(size)
url = build_variant_url(variant_id, size)
process_download(url, size, path, last: index == SIZES.size - 1)
@ -64,12 +71,18 @@ module Granblue
end
# Builds URL for a specific variant and size
#
# @param variant_id [String] Character variant ID
# @param size [String] Image size variant ("main", "grid", or "square")
# @param size [String] Image size variant ("main", "grid", "square", or "detail")
# @return [String] Complete URL for downloading the image
def build_variant_url(variant_id, size)
directory = directory_for_size(size)
"#{@base_url}/#{directory}/#{variant_id}.jpg"
if size == 'detail'
"#{@base_url}/#{directory}/#{variant_id}.png"
else
"#{@base_url}/#{directory}/#{variant_id}.jpg"
end
end
# Gets object type for file paths and storage keys
@ -85,6 +98,7 @@ module Granblue
end
# Gets directory name for a size variant
#
# @param size [String] Image size variant
# @return [String] Directory name in game asset URL structure
# @note Maps "main" -> "f", "grid" -> "m", "square" -> "s"
@ -93,6 +107,7 @@ module Granblue
when 'main' then 'f'
when 'grid' then 'm'
when 'square' then 's'
when 'detail' then 'detail'
end
end
end

View file

@ -15,25 +15,28 @@ module Granblue
# Downloads images for all variants of a summon based on their uncap status.
# Overrides {BaseDownloader#download} to handle summon-specific variants.
#
# @param selected_size [String] The size to download. If nil, downloads all sizes.
# @return [void]
# @note Skips download if summon is not found in database
# @note Downloads ULB and transcendence variants only if summon has those uncaps
# @see #download_variants
def download
def download(selected_size = nil)
summon = Summon.find_by(granblue_id: @id)
return unless summon
download_variants(summon)
download_variants(summon, selected_size)
end
private
# Downloads all variants of a summon's images
#
# @param summon [Summon] Summon model instance to download images for
# @param selected_size [String] The size to download. If nil, downloads all sizes.
# @return [void]
# @note Only downloads variants that should exist based on summon uncap status
# @note Handles special transcendence art variants for 6★ summons
def download_variants(summon)
def download_variants(summon, selected_size = nil)
# All summons have base variant
variants = [@id]
@ -41,26 +44,28 @@ module Granblue
variants << "#{@id}_02" if summon.ulb
# Add Transcendence variants if available
if summon.transcendence
variants.push("#{@id}_03", "#{@id}_04")
end
variants.push("#{@id}_03", "#{@id}_04") if summon.transcendence
log_info "Downloading summon variants: #{variants.join(', ')}" if @verbose
variants.each do |variant_id|
download_variant(variant_id)
download_variant(variant_id, selected_size)
end
end
# Downloads a specific variant's images in all sizes
#
# @param variant_id [String] Summon variant ID (e.g., "2040001000_02")
# @param selected_size [String] The size to download. If nil, downloads all sizes.
# @return [void]
# @note Downloads all size variants (main/grid/square) for the given variant
def download_variant(variant_id)
def download_variant(variant_id, selected_size = nil)
log_info "-> #{variant_id}" if @verbose
return if @test_mode
SIZES.each_with_index do |size, index|
sizes = selected_size ? [selected_size] : SIZES
sizes.each_with_index do |size, index|
path = download_path(size)
url = build_variant_url(variant_id, size)
process_download(url, size, path, last: index == SIZES.size - 1)
@ -68,12 +73,17 @@ module Granblue
end
# Builds URL for a specific variant and size
#
# @param variant_id [String] Summon variant ID
# @param size [String] Image size variant ("main", "grid", or "square")
# @param size [String] Image size variant ("main", "grid", "square", or "detail")
# @return [String] Complete URL for downloading the image
def build_variant_url(variant_id, size)
directory = directory_for_size(size)
"#{@base_url}/#{directory}/#{variant_id}.jpg"
if size == 'detail'
"#{@base_url}/#{directory}/#{variant_id}.png"
else
"#{@base_url}/#{directory}/#{variant_id}.jpg"
end
end
# Gets object type for file paths and storage keys
@ -89,14 +99,16 @@ module Granblue
end
# Gets directory name for a size variant
#
# @param size [String] Image size variant
# @return [String] Directory name in game asset URL structure
# @note Maps "main" -> "party_main", "grid" -> "party_sub", "square" -> "s"
def directory_for_size(size)
case size.to_s
when 'main' then 'party_main'
when 'grid' then 'party_sub'
when 'main' then 'ls'
when 'grid' then 'm'
when 'square' then 's'
when 'detail' then 'detail'
end
end
end

View file

@ -16,49 +16,54 @@ module Granblue
# Downloads images for all variants of a weapon based on their uncap status.
# Overrides {BaseDownloader#download} to handle weapon-specific variants.
#
# @param selected_size [String] The size to download. If nil, downloads all sizes.
# @return [void]
# @note Skips download if weapon is not found in database
# @note Downloads transcendence variants only if weapon has those uncaps
# @see #download_variants
def download
def download(selected_size = nil)
weapon = Weapon.find_by(granblue_id: @id)
return unless weapon
download_variants(weapon)
download_variants(weapon, selected_size)
end
private
# Downloads all variants of a weapon's images
#
# @param weapon [Weapon] Weapon model instance to download images for
# @param selected_size [String] The size to download. If nil, downloads all sizes.
# @return [void]
# @note Only downloads variants that should exist based on weapon uncap status
# @note Handles special transcendence art variants for transcendable weapons
def download_variants(weapon)
def download_variants(weapon, selected_size = nil)
# All weapons have base variant
variants = [@id]
# Add transcendence variants if available
if weapon.transcendence
variants.push("#{@id}_02", "#{@id}_03")
end
variants.push("#{@id}_02", "#{@id}_03") if weapon.transcendence
log_info "Downloading weapon variants: #{variants.join(', ')}" if @verbose
variants.each do |variant_id|
download_variant(variant_id)
download_variant(variant_id, selected_size)
end
end
# Downloads a specific variant's images in all sizes
#
# @param variant_id [String] Weapon variant ID (e.g., "1040001000_02")
# @param selected_size [String] The size to download. If nil, downloads all sizes.
# @return [void]
# @note Downloads all size variants (main/grid/square) for the given variant
def download_variant(variant_id)
def download_variant(variant_id, selected_size = nil)
log_info "-> #{variant_id}" if @verbose
return if @test_mode
SIZES.each_with_index do |size, index|
sizes = selected_size ? [selected_size] : SIZES
sizes.each_with_index do |size, index|
path = download_path(size)
url = build_variant_url(variant_id, size)
process_download(url, size, path, last: index == SIZES.size - 1)
@ -66,12 +71,17 @@ module Granblue
end
# Builds URL for a specific variant and size
#
# @param variant_id [String] Weapon variant ID
# @param size [String] Image size variant ("main", "grid", or "square")
# @param size [String] Image size variant ("main", "grid", "square", or "raw")
# @return [String] Complete URL for downloading the image
def build_variant_url(variant_id, size)
directory = directory_for_size(size)
"#{@base_url}/#{directory}/#{variant_id}.jpg"
if size == 'raw'
"#{@base_url}/#{directory}/#{variant_id}.png"
else
"#{@base_url}/#{directory}/#{variant_id}.jpg"
end
end
# Gets object type for file paths and storage keys
@ -87,6 +97,7 @@ module Granblue
end
# Gets directory name for a size variant
#
# @param size [String] Image size variant
# @return [String] Directory name in game asset URL structure
# @note Maps "main" -> "ls", "grid" -> "m", "square" -> "s"
@ -95,6 +106,7 @@ module Granblue
when 'main' then 'ls'
when 'grid' then 'm'
when 'square' then 's'
when 'raw' then 'b'
end
end
end

View file

@ -1,40 +1,50 @@
namespace :granblue do
def _progress_reporter(count:, total:, result:, bar_len: 40, multi: true)
filled_len = (bar_len * count / total).round
status = File.basename(result)
percents = (100.0 * count / total).round(1)
bar = '=' * filled_len + '-' * (bar_len - filled_len)
desc 'Downloads all images for the given object type'
# Downloads all images for a specific type of game object (e.g. summons, weapons)
# Uses the appropriate downloader class based on the object type
#
# @param object [String] Type of object to download images for (e.g. 'summon', 'weapon')
# @example Download all summon images
# rake granblue:download_all_images\[summon\]
# @example Download all weapon images
# rake granblue:download_all_images\[weapon\]
# @example Download all character images
# rake granblue:download_all_images\[character\]
task :download_all_images, %i[object threads size] => :environment do |_t, args|
require 'parallel'
require 'logger'
if !multi
print("[#{bar}] #{percents}% ...#{' ' * 14}#{status}\n")
else
print "\n"
end
end
# Use a thread-safe logger (or Rails.logger if preferred)
logger = Logger.new($stdout)
logger.level = Logger::INFO # set to WARN or INFO to reduce debug noise
desc 'Downloads images for the given object type at the given size'
task :download_all_images, %i[object size] => :environment do |_t, args|
require 'open-uri'
# Load downloader classes
require_relative '../granblue/downloaders/base_downloader'
Dir[Rails.root.join('lib', 'granblue', 'downloaders', '*.rb')].each { |file| require file }
filename = "export/#{args[:object]}-#{args[:size]}.txt"
count = `wc -l #{filename}`.split.first.to_i
object = args[:object]
specified_size = args[:size]
klass = object.classify.constantize
ids = klass.pluck(:granblue_id)
path = "#{Rails.root}/download/#{args[:object]}-#{args[:size]}"
FileUtils.mkdir_p(path) unless Dir.exist?(path)
puts "Downloading images for #{ids.count} #{object.pluralize}..."
puts "Downloading #{count} images from #{args[:object]}-#{args[:size]}.txt..."
if File.exist?(filename)
File.readlines(filename).each_with_index do |line, i|
download = URI.parse(line.strip).open
download_URI = "#{path}/#{download.base_uri.to_s.split('/')[-1]}"
if File.exist?(download_URI)
puts "Skipping #{line}"
logger.info "Downloading images for #{ids.count} #{object.pluralize}..."
thread_count = (args[:threads] || 4).to_i
logger.info "Using #{thread_count} threads for parallel downloads..."
logger.info "Downloading only size: #{specified_size}" if specified_size
Parallel.each(ids, in_threads: thread_count) do |id|
ActiveRecord::Base.connection_pool.with_connection do
downloader_class = "Granblue::Downloaders::#{object.classify}Downloader".constantize
downloader = downloader_class.new(id, verbose: true, logger: logger)
if specified_size
downloader.download(specified_size)
else
IO.copy_stream(download, "#{path}/#{download.base_uri.to_s.split('/')[-1]}")
_progress_reporter(count: i, total: count, result: download_URI, bar_len: 40, multi: false)
downloader.download
end
rescue StandardError => e
puts "#{e}: #{line}"
logger.error "Error downloading #{object} #{id}: #{e.message}"
end
end
end