Sparse HashのRubyバインディング: charを使う

charを使えば少しは早くなるかもと思って、少し修正。

#include "google/sparse_hash_map"
#include "google/dense_hash_map"

#include "sparsehash_internal.h"
#include <cstring>

namespace {

template <class T>
struct Sparsehash {
  typename T *m;

  static void rb_sparsehash_mark(Sparsehash<T> *p) {
    if (!p->m) {
      return;
    }

    T *m = p->m;

    for(T::iterator i = m->begin(); i != m->end(); i++) {
      rb_gc_mark(i->first);
      rb_gc_mark(i->second);
    }
  }

  static void rb_sparsehash_free(Sparsehash<T> *p) {
    if (p->m) {
      delete p->m;
    }

    delete p;
  }

  static VALUE rb_sparsehash_alloc(VALUE klass) {
    Sparsehash<T> *p;

    p = new Sparsehash<T>;
    p->m = NULL;
    
    return Data_Wrap_Struct(klass, &rb_sparsehash_mark, &rb_sparsehash_free, p);
  }

  static VALUE rb_sparsehash_get(VALUE self, VALUE key) {
    Sparsehash<T> *p;

    // added
    Check_Type(key, T_STRING);

    Data_Get_Struct(self, Sparsehash<T>, p);
    T &m = *(p->m);

    VALUE value = m[key];

    return(value ? value : Qnil);
  }

  static VALUE rb_sparsehash_set(VALUE self, VALUE key, VALUE value) {
    Sparsehash<T> *p;

    // added
    Check_Type(key, T_STRING);

    Data_Get_Struct(self, Sparsehash<T>, p);
    T &m = *(p->m);

    m[key] = value;

    return value;
  }

  static VALUE rb_sparsehash_each0(VALUE args) {
    return rb_yield(args);
  }

  static VALUE rb_sparsehash_each(VALUE self) {
    Sparsehash<T> *p;

    rb_need_block();

    Data_Get_Struct(self, Sparsehash<T>, p);
    T &m = *(p->m);
    int status = 0;

    for(T::iterator i = m.begin(); i != m.end(); i++) {
      VALUE args = rb_ary_new3(2, i->first, i->second);

      rb_protect(rb_sparsehash_each0, args, &status);

      if (status != 0) {
        break;
      }
    }

    if (status != 0) {
      rb_jump_tag(status);
    }

    return self;
  }


  static VALUE rb_sparsehash_erase(VALUE self, VALUE key) {
    Sparsehash<T> *p;

    // added
    Check_Type(key, T_STRING);

    Data_Get_Struct(self, Sparsehash<T>, p);
    T &m = *(p->m);

    VALUE value = m[key];
    m.erase(key);

    return(value ? value : Qnil);
  }

  static void init(VALUE &rb_mSparsehash, const char *name) {
    VALUE rb_cSparseHashMap = rb_define_class_under(rb_mSparsehash, name, rb_cObject);

    rb_define_alloc_func(rb_cSparseHashMap, &rb_sparsehash_alloc);
    rb_define_private_method(rb_cSparseHashMap, "initialize", __F(rb_sparsehash_initialize<T>), 0);
    rb_define_method(rb_cSparseHashMap, "[]", __F(&rb_sparsehash_get), 1);
    rb_define_method(rb_cSparseHashMap, "[]=", __F(&rb_sparsehash_set), 2);
    rb_define_method(rb_cSparseHashMap, "each", __F(&rb_sparsehash_each), 0);
    rb_define_method(rb_cSparseHashMap, "erase", __F(&rb_sparsehash_erase), 1);
    rb_define_method(rb_cSparseHashMap, "delete", __F(rb_sparsehash_erase), 1);
  }
};

/*
struct rb_sparsehash_equal_key {
  bool operator() (const VALUE &x, const VALUE &y) const {
    return (rb_funcall(x, rb_intern("=="), 1, y) == Qtrue);
  }
};
*/

struct rb_sparsehash_equal_key {
  bool operator() (const VALUE &x, const VALUE &y) const {
    const char *key_x = RSTRING_PTR(x);
    const char *key_y = RSTRING_PTR(y);
    return (::strcmp(key_x, key_y) == 0);
  }
};

/*
struct rb_sparsehash_hash {
  size_t operator() (const VALUE &x) const {
    VALUE hash = rb_funcall(x, rb_intern("hash"), 0);
    return NUM2INT(hash);
  }
};
*/

struct rb_sparsehash_hash {
  stdext::hash_compare<const char *> hash;

  size_t operator() (const VALUE &x) const {
    const char *key = RSTRING_PTR(x);
    return rb_sparsehash_hash::hash(key);
  }
};

typedef google::sparse_hash_map<VALUE, VALUE, rb_sparsehash_hash, rb_sparsehash_equal_key> SHMap;
typedef google::dense_hash_map<VALUE, VALUE, rb_sparsehash_hash, rb_sparsehash_equal_key> DHMap;

template <class T>
VALUE rb_sparsehash_initialize(VALUE self) {
  Sparsehash<T> *p;

  Data_Get_Struct(self, Sparsehash<T>, p);
  p->m = new T;

  return Qnil;
}

VALUE empty_string = rb_str_new("", 0);

template <>
VALUE rb_sparsehash_initialize<DHMap>(VALUE self) {
  Sparsehash<DHMap> *p;

  Data_Get_Struct(self, Sparsehash<DHMap>, p);
  p->m = new DHMap;
  p->m->set_empty_key(empty_string);

  return Qnil;
}

} // namespace

void Init_sparsehash() {
  VALUE rb_mSparsehash = rb_define_module("Sparsehash");
  rb_const_set(rb_mSparsehash, rb_intern("EMPTY_STRING"), empty_string);
  Sparsehash<SHMap>::init(rb_mSparsehash, "SparseHashMap");
  Sparsehash<DHMap>::init(rb_mSparsehash, "DenseHashMap");
}
rnum = 100 10000 1000000
Hash 0.297 sec. / 0.145 MB 0.187 sec. / 2.566 MB 13.032 sec. / 245.555 MB
SparseHashMap 0.156 sec. / 0.035 MB 2.297 sec. / 0.043 MB 計測不能
DenseHashMap 0.141 sec. / 0.066 MB 0.266 sec. / 0.133 MB 45.719 sec. / 115.617 MB

うーん、微妙。