null+****@clear*****
null+****@clear*****
Sat Jan 21 12:12:05 JST 2012
SHIMADA Koji 2012-01-21 12:12:05 +0900 (Sat, 21 Jan 2012) New Revision: e26c69d5847563e549f28e1f1c19e4a3d92abe03 Merged c742d06: Merge pull request #21 from logaling/import-gene95 Log: Refactor Gene95#convert Modified files: lib/logaling/external_glossaries/gene95.rb Modified: lib/logaling/external_glossaries/gene95.rb (+11 -8) =================================================================== --- lib/logaling/external_glossaries/gene95.rb 2012-01-21 12:11:23 +0900 (b3dea7c) +++ lib/logaling/external_glossaries/gene95.rb 2012-01-21 12:12:05 +0900 (2bcd16d) @@ -25,15 +25,18 @@ module Logaling def convert CSV.generate do |csv| puts "downloading gene95 dictionary..." - Zlib::GzipReader.open(open('http://www.namazu.org/~tsuchiya/sdic/data/gene95.tar.gz')) do |gz| - contents = false + url = 'http://www.namazu.org/~tsuchiya/sdic/data/gene95.tar.gz' + Zlib::GzipReader.open(open(url)) do |gz| puts "importing gene95 dictionary..." - gz.readlines.map{|l| l.encode("UTF-8", "CP932", undef: :replace, replace: '').strip }.each_slice(2) do |source, target| - if contents - csv << [ source.split(" ").map(&:strip)[0], target ] - else - contents = true - end + + 2.times { gz.gets } # skip header + + preprocessed_lines = gz.readlines.map do |line| + line.encode("UTF-8", "CP932", undef: :replace, replace: '').chomp + end + + preprocessed_lines.each_slice(2) do |source, target| + csv << [source.sub(/( .*)/, ''), target] end end end