スペル修正プログラム Ruby版 (改)
初はてぶなのに転載...。
いや、スーパーpre記法を試したくて。
class SpellChecker def initialize(filename=nil) @nwords = train(word(File.read(filename))) if filename end def word(text) text.downcase.gsub(/[^a-z]/, ' ').split(' ') end def train(features) model = Hash.new(1) features.each{|f| model[f] += 1} model end def edits1(w, a='abcdefghijklmnopqrstuvwxyz'.split('')) n = w.size [(0...n).map{|i| w[0...i] + w[i+1...n]}, # deletion (0...n-1).map{|i| w[0, i] + w[i+1].chr + w[i].chr + w[i+2, n-i-2]}, # transposition a.map{|c| (0...n).map{|i| w[0...i] + c + w[i+1...n]}}, # alteration a.map{|c| (0..n).map {|i| w[0...i] + c + w[i...n]}}].flatten.uniq # insertion end def known_edits2(word, known_words=@nwords.keys) edits1(word).map{|s1| edits1(s1)}.flatten & known_words end def known(words, known_words=@nwords.keys) words & known_words end def correct(word, nwords=@nwords) @nwords = nwords candidates = known([word]) | known(edits1(word)) | known_edits2(word) | [word] candidates.max{|c1, c2| @nwords[c1] <=> @nwords[c2]} end end if __FILE__ == $0 checker = SpellChecker.new 'big.txt' if ARGV.empty? puts 'Ctrl+c to stop' while gets puts checker.correct($_) end else puts checker.correct(ARGV[0]) end end