Refactor download_all_images task

This refactor focuses on implementing parallelization. This allows us to pass in a number of threads and download concurrently. This makes downloading lots of images a lot faster.
This commit is contained in:
Justin Edmund 2025-03-01 05:43:57 -08:00
parent ffbc8d0c1e
commit e75578bea3

View file

@ -1,40 +1,50 @@
namespace :granblue do namespace :granblue do
def _progress_reporter(count:, total:, result:, bar_len: 40, multi: true) desc 'Downloads all images for the given object type'
filled_len = (bar_len * count / total).round # Downloads all images for a specific type of game object (e.g. summons, weapons)
status = File.basename(result) # Uses the appropriate downloader class based on the object type
percents = (100.0 * count / total).round(1) #
bar = '=' * filled_len + '-' * (bar_len - filled_len) # @param object [String] Type of object to download images for (e.g. 'summon', 'weapon')
# @example Download all summon images
# rake granblue:download_all_images\[summon\]
# @example Download all weapon images
# rake granblue:download_all_images\[weapon\]
# @example Download all character images
# rake granblue:download_all_images\[character\]
task :download_all_images, %i[object threads size] => :environment do |_t, args|
require 'parallel'
require 'logger'
if !multi # Use a thread-safe logger (or Rails.logger if preferred)
print("[#{bar}] #{percents}% ...#{' ' * 14}#{status}\n") logger = Logger.new($stdout)
else logger.level = Logger::INFO # set to WARN or INFO to reduce debug noise
print "\n"
end
end
desc 'Downloads images for the given object type at the given size' # Load downloader classes
task :download_all_images, %i[object size] => :environment do |_t, args| require_relative '../granblue/downloaders/base_downloader'
require 'open-uri' Dir[Rails.root.join('lib', 'granblue', 'downloaders', '*.rb')].each { |file| require file }
filename = "export/#{args[:object]}-#{args[:size]}.txt" object = args[:object]
count = `wc -l #{filename}`.split.first.to_i specified_size = args[:size]
klass = object.classify.constantize
ids = klass.pluck(:granblue_id)
path = "#{Rails.root}/download/#{args[:object]}-#{args[:size]}" puts "Downloading images for #{ids.count} #{object.pluralize}..."
FileUtils.mkdir_p(path) unless Dir.exist?(path)
puts "Downloading #{count} images from #{args[:object]}-#{args[:size]}.txt..." logger.info "Downloading images for #{ids.count} #{object.pluralize}..."
if File.exist?(filename) thread_count = (args[:threads] || 4).to_i
File.readlines(filename).each_with_index do |line, i| logger.info "Using #{thread_count} threads for parallel downloads..."
download = URI.parse(line.strip).open logger.info "Downloading only size: #{specified_size}" if specified_size
download_URI = "#{path}/#{download.base_uri.to_s.split('/')[-1]}"
if File.exist?(download_URI) Parallel.each(ids, in_threads: thread_count) do |id|
puts "Skipping #{line}" ActiveRecord::Base.connection_pool.with_connection do
downloader_class = "Granblue::Downloaders::#{object.classify}Downloader".constantize
downloader = downloader_class.new(id, verbose: true, logger: logger)
if specified_size
downloader.download(specified_size)
else else
IO.copy_stream(download, "#{path}/#{download.base_uri.to_s.split('/')[-1]}") downloader.download
_progress_reporter(count: i, total: count, result: download_URI, bar_len: 40, multi: false)
end end
rescue StandardError => e rescue StandardError => e
puts "#{e}: #{line}" logger.error "Error downloading #{object} #{id}: #{e.message}"
end end
end end
end end