Streamを読み込むタイプのパーサ用 Yahoo検索APIクラス

とりあえず。
net/httpからいくつかのメソッドを借りた。

ソースコード

require 'socket'

class YahooSearchAPI
  HOST = 'api.search.yahoo.co.jp'
  PATH = '/WebSearchService/V1/webSearch'

  attr_reader :status_line

  def initialize(appid, options = {})
    @params = Hash[options]
    @params['appid'] = appid
  end

  def query(query_string)
    params = Hash[@params]
    params['query'] = query_string

    TCPSocket.open(HOST, 'http') do |sock|
      request(params, sock)
      httpv, code, msg = read_status_line(sock)
      @status_line = [httpv, code, msg]
      code == '200' or break false
      header = read_header(sock)
      yield(sock, header) or true
    end
  end

  private
  def request(params, sock)
    encoded = params.map {|k,v|
      k = urlencode(k.to_s)
      if v.kind_of?(Array)
        v.map {|i| "#{k}=#{urlencode(i.to_s)}" }.join('&')
      else
        "#{k}=#{urlencode(v.to_s)}"
      end
    }.join('&')
    req =  "POST #{PATH} HTTP/1.0\r\n"
    req << "Content-Type: application/x-www-form-urlencoded\r\n"
    req << "Content-Length: #{encoded.length}\r\n"
    req << "\r\n"
    req << "#{encoded}"
    sock.write(req)
  end

  def read_status_line(sock)
    status_line = sock.readline.chomp
    m = /\AHTTP(?:\/(\d+\.\d+))?\s+(\d\d\d)\s*(.*)\z/in.match(status_line) or raise "wrong status line: #{status_line.dump}"
    m.captures
  end

  def read_header(sock)
    header = ""
    until (line = sock.gets) == "\r\n"
      header << line
    end
    header
  end

  def urlencode(str)
    str.gsub(/[^a-zA-Z0-9_\.\-]/n) {|s| sprintf('%%%02x', s[0]) }
  end
end

使い方

こんな感じで。

require "rexml/document"
require "rexml/streamlistener"

class Listener
  include REXML::StreamListener
  def text(text)
    puts text
  end
end

searcher = YahooSearchAPI.new('アプリケーションID')
searcher.query('RubyでXML') do |body_stream, header|
    puts header
    REXML::Document.parse_stream(body_stream, Listener.new)
end