POPFileの単純ベイズの実装

これを見ながら。
たぶんどっかまちがってるはず。

class Classifier
  class Bucket
    attr_reader :name
    attr_reader :words

    def initialize(name, classifier)
      @name = name
      @classifier = classifier
      @words = 0
    end

    def <<(words)
      words.each do |word|
        @classifier.words[word] ||= 0
        @classifier.words[word] += 1
        @words[word] ||= 0
        @words[word] += 1
      end
    end

    def p_bi
      word_count + word_count
    end

    def probability(word, count)
      if @words.has_key?(word)
        (@words[word].to_f / word_count.to_f) * count.to_f
      else
        1.0 / (@classifier.word_count.to_f * 10.0)
      end
    end

    def word_count
      @word.values.inject(0) {|r, i| r + i }
    end

    def valid?
      @word.size.nonzero
    end
  end

  attr_reader :words

  def initialize(*buckets)
    @words = {}
    @buckets = buckets.map {|name| Bucket.new(name, self) }
  end

  def classify(words)
    scores = {}

    @buckets.each do |bucket|
      next unless bucket.valid?
      scores[bucket.name] = Math.log(bucket.p_bi) + Math.log(bucket.probability(word, count)
    end

    scores.sort_by {|name, score| score }.last[0]
  end

  def word_count
    @word.values.inject(0) {|r, i| r + i }
  end
end