From 311c2188635987ae3103648ab267b1c526f36e10 Mon Sep 17 00:00:00 2001 From: Justin Edmund Date: Sun, 2 Mar 2025 16:17:32 -0800 Subject: [PATCH] Adds a task for fetching data from wiki Can be used to fetch one object or multiple --- lib/tasks/fetch_wiki.rake | 79 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 lib/tasks/fetch_wiki.rake diff --git a/lib/tasks/fetch_wiki.rake b/lib/tasks/fetch_wiki.rake new file mode 100644 index 0000000..03cc7aa --- /dev/null +++ b/lib/tasks/fetch_wiki.rake @@ -0,0 +1,79 @@ +namespace :granblue do + desc <<~DESC + Fetch and store raw wiki data for objects (Character, Weapon, Summon). + + Usage: + rake granblue:fetch_wiki_data # Fetch all Characters (default) + rake granblue:fetch_wiki_data type=Weapon # Fetch all Weapons + rake granblue:fetch_wiki_data type=Summon # Fetch all Summons + rake granblue:fetch_wiki_data type=Character id=5 # Fetch specific Character by ID + rake granblue:fetch_wiki_data force=true # Force re-download even if data exists + DESC + task fetch_wiki_data: :environment do + # Get parameters from environment + type = (ENV['type'] || 'Character').classify + id = ENV['id'] + force = ENV['force'] == 'true' + + # Validate object type + valid_types = %w[Character Weapon Summon] + unless valid_types.include?(type) + puts "Error: Invalid type '#{type}'. Must be one of: #{valid_types.join(', ')}" + exit 1 + end + + # Get the class from the type string + klass = type.constantize + + # Setup query - either all objects or specific one + query = id.present? ? klass.where(granblue_id: id) : klass.all + + errors = [] + count = 0 + + query.find_each do |object| + # Skip objects that already have wiki_raw if force is not set + if object.wiki_raw.present? && !force + puts "Skipping #{object.name_en} (already has wiki_raw)." + next + end + + # If the object doesn't have a wiki page specified, skip + if object.wiki_en.blank? + puts "Skipping #{object.name_en} (no wiki_en set)." + next + end + + begin + # 1) Fetch raw wikitext from the wiki + wiki_text = Granblue::Parsers::Wiki.new.fetch(object.wiki_en) + + # 2) Check if the page is a redirect + redirect_match = wiki_text.match(/#REDIRECT \[\[(.*?)\]\]/) + if redirect_match + redirect_target = redirect_match[1] + # Update object to new wiki_en so we don't keep fetching the old page + object.update!(wiki_en: redirect_target) + # Fetch again with the new page name + wiki_text = Granblue::Parsers::Wiki.new.fetch(redirect_target) + end + puts wiki_text + + # 3) Save raw wiki text in the object record + object.update!(wiki_raw: wiki_text) + puts "Saved wiki data for #{object.name_en} (#{object.id})" + count += 1 + rescue StandardError => e + errors << { object_id: object.id, type: type, error: e.message } + puts "Error fetching data for #{object.name_en}: #{e.message}" + end + end + + if errors.any? + puts "#{errors.size} #{type.pluralize} had errors:" + errors.each { |err| puts " - #{err[:type]} ##{err[:object_id]} => #{err[:error]}" } + else + puts "Wiki data fetch complete for #{count} #{type.pluralize} with no errors!" + end + end +end