鬼車: 回してみる

#include <stdio.h>
#include <string.h>
#include <malloc.h>
#include "oniguruma.h"

static regex_t *reg_compile(const char *pattern) {
  regex_t* reg;
  OnigErrorInfo einfo;
  int r;

  r = onig_new(&reg, (UChar *) pattern, (UChar *) (pattern + strlen(pattern)),
    ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo);

  if (r != ONIG_NORMAL) {
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
    onig_error_code_to_str(s, r, &einfo);
    fprintf(stderr, "%s\n", s);
    return NULL;
  }

  return reg;
}

static int reg_search_all(regex_t *reg, const char *str, void (*handler)(const char *src, OnigRegion *region)) {
  UChar *ustr;
  OnigRegion *region;
  int r, len;
  unsigned char *start, *range, *end;

  ustr = (UChar *) str;
  region = onig_region_new();

  len = strlen(str);
  end = ustr + len;
  start = ustr;
  range = end;

  while ((r = onig_search(reg, ustr, end, start, range, region, ONIG_OPTION_NONE)) >= 0) {
    int mend;

    mend = region->end[0];
    handler(str, region);

    if (mend >= len) {
      break;
    }
    
    onig_region_clear(region);
    start = ustr + mend;
  }

  onig_region_free(region, 1);

  if (r < 0 && r != ONIG_MISMATCH) {
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
    onig_error_code_to_str(s, r);
    fprintf(stderr, "%s\n", s);
    return 0;
  }

  return 1;
}

void handler(const char *src, OnigRegion *region) {
  int i, len;
  char *s;

  len = region->end[0] - region->beg[0];
  s = alloca(sizeof(char) * len + 1);
  strncpy(s, src + region->beg[0], len);
  s[len] = '\0';
  printf("%s\n", s);

  for (i = 0; i < region->num_regs; i++) {
    printf("  %d: (%d-%d)\n", i, region->beg[i], region->end[i]);
  }
}

int main(int argc, char* argv[]) {
  regex_t* reg;

  if ((reg = reg_compile("o(o+)")) == NULL) {
    return -1;
  }

  reg_search_all(reg, "ooxxoooxxooooxxoooo", handler);

  onig_free(reg);
  onig_end();
  return 0;
}

出力はこんな感じ。


oo
0: (0-2)
1: (1-2)
ooo
0: (4-7)
1: (5-7)
oooo
0: (9-13)
1: (10-13)
oooo
0: (15-19)
1: (16-19)