Learning how to parse

This commit is contained in:
Justin Edmund 2022-12-30 03:42:30 -08:00
parent f7ecbe16ec
commit 3313f21a47
3 changed files with 141 additions and 0 deletions

View file

@ -43,6 +43,8 @@ gem 'pg_search'
# Pagination library
gem 'will_paginate', '~> 3.3'
gem 'httparty'
group :doc do
gem 'apipie-rails'
gem 'sdoc'
@ -53,6 +55,7 @@ group :development, :test do
gem 'dotenv-rails'
gem 'factory_bot_rails'
gem 'faker'
gem 'pry'
gem 'rspec_junit_formatter'
gem 'rspec-rails'
end

View file

@ -78,6 +78,7 @@ GEM
msgpack (~> 1.0)
builder (3.2.4)
byebug (11.1.3)
coderay (1.1.3)
concurrent-ruby (1.1.9)
crass (1.0.6)
database_cleaner (2.0.1)
@ -113,6 +114,9 @@ GEM
gemoji (>= 2.1.0)
globalid (0.5.2)
activesupport (>= 5.0)
httparty (0.20.0)
mime-types (~> 3.0)
multi_xml (>= 0.5.2)
i18n (1.8.10)
concurrent-ruby (~> 1.0)
jaro_winkler (1.5.4)
@ -130,9 +134,13 @@ GEM
mini_mime (>= 0.1.1)
marcel (1.0.2)
method_source (1.0.0)
mime-types (3.4.1)
mime-types-data (~> 3.2015)
mime-types-data (3.2022.0105)
mini_mime (1.1.2)
minitest (5.14.4)
msgpack (1.4.2)
multi_xml (0.6.0)
nio4r (2.5.8)
nokogiri (1.12.5-arm64-darwin)
racc (~> 1.4)
@ -146,6 +154,9 @@ GEM
pg_search (2.3.5)
activerecord (>= 5.2)
activesupport (>= 5.2)
pry (0.14.1)
coderay (~> 1.1)
method_source (~> 1.0)
puma (5.5.2)
nio4r (~> 2.0)
racc (1.6.0)
@ -296,10 +307,12 @@ DEPENDENCIES
faker
figaro
gemoji-parser
httparty
listen
oj
pg
pg_search
pry
puma
rack-cors
rails

View file

@ -0,0 +1,125 @@
# frozen_string_literal: true
require 'rubygems'
require 'httparty'
require 'strscan'
require 'pry'
# GranblueWiki fetches and parses data from gbf.wiki
class GranblueWiki
URL = 'https://gbf.wiki/api.php'
def fetch(page)
query_params = params(page).map do |key, value|
"#{key}=#{value}"
end.join('&')
destination = "#{URL}?#{query_params}"
response = HTTParty.get(destination)
response['parse']['wikitext']['*']
end
def parse(page)
parsed = parse_string(fetch(page))
abilities = extract_abilities(parsed)
ougis = extract_ougis(parsed)
ap abilities.merge(ougis)
end
def extract_abilities(hash)
abilities = []
hash.each do |key, value|
next unless key =~ /^a(\d+)_/
ability_number = Regexp.last_match(1).to_i
abilities[ability_number] ||= {}
case key.gsub(/^a\d+_/, '')
when 'cd'
cooldown = parse_substring(value)
abilities[ability_number]['cooldown'] = cooldown
when 'dur'
duration = parse_substring(value)
abilities[ability_number]['duration'] = duration
when 'oblevel'
obtained = parse_substring(value)
abilities[ability_number]['obtained'] = obtained
else
abilities[ability_number][key.gsub(/^a\d+_/, '')] = value
end
end
{ 'abilities' => abilities.compact }
end
def parse_substring(string)
hash = {}
string.scan(/\|([^|=]+?)=([^|]+)/) do |key, value|
value.gsub!(/\}\}$/, '') if value.include?('}}')
hash[key] = value
end
hash
end
def extract_ougis(hash)
ougi = []
hash.each do |key, value|
next unless key =~ /^ougi(\d*)_(.*)/
ougi_number = Regexp.last_match(1)
ougi_key = Regexp.last_match(2)
ougi[ougi_number.to_i] ||= {}
ougi[ougi_number.to_i][ougi_key] = value
end
{ 'ougis' => ougi.compact }
end
def parse_string(string)
# Split the string into lines
lines = string.split("\n")
# Initialize an empty hash to store the key/value pairs
data = {}
# Iterate over the lines
good_lines = lines.map do |line|
line if line[0] == '|' && line.size > 2
end
good_lines.compact.each do |line|
trimmed_line = line[1..]
# Split the line into key and value by the '=' character
key, value = trimmed_line.split('=', 2)
next unless value
# Strip leading and trailing whitespace from the key and value
key = key.strip
value = value.strip
# Store the key/value pair in the data hash
data[key] = value
end
# Return the data hash
data
end
private
def params(page)
{
action: 'parse',
format: 'json',
page: page,
prop: 'wikitext'
}
end
end