Ragelのマニュアルとにらめっこしていてもさっぱりなので、Mongrelのソースを読んでみる、と。
……おー、わかりやすい。
RFCチックなHTTP/1.1のリクエストの定義は簡潔でなかなか気持ちいい。
Zed ShawがCOOLだと言うだけのことはある。
http11_parser_common.rl
%%{
machine http_parser_common;
#### HTTP PROTOCOL GRAMMAR
# line endings
CRLF = "\r\n";
# character types
CTL = (cntrl | 127);
safe = ("$" | "-" | "_" | ".");
extra = ("!" | "*" | "'" | "(" | ")" | ",");
reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+");
unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">");
national = any -- (alpha | digit | reserved | extra | safe | unsafe);
unreserved = (alpha | digit | safe | extra | national);
escape = ("%" xdigit xdigit);
uchar = (unreserved | escape);
pchar = (uchar | ":" | "@" | "&" | "=" | "+");
tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");
# elements
token = (ascii -- (CTL | tspecials));
# URI schemes and absolute paths
scheme = ( alpha | digit | "+" | "-" | "." )* ;
absolute_uri = (scheme ":" (uchar | reserved )*);
path = ( pchar+ ( "/" pchar* )* ) ;
query = ( uchar | reserved )* %query_string ;
param = ( pchar | "/" )* ;
params = ( param ( ";" param )* ) ;
rel_path = ( path? %request_path (";" params)? ) ("?" %start_query query)?;
absolute_path = ( "/"+ rel_path );
Request_URI = ( "*" | absolute_uri | absolute_path ) >mark %request_uri;
Fragment = ( uchar | reserved )* >mark %fragment;
Method = ( upper | digit | safe ){1,20} >mark %request_method;
http_number = ( digit+ "." digit+ ) ;
HTTP_Version = ( "HTTP/" http_number ) >mark %http_version ;
Request_Line = ( Method " " Request_URI ("#" Fragment){0,1} " " HTTP_Version CRLF ) ;
field_name = ( token -- ":" )+ >start_field %write_field;
field_value = any* >start_value %write_value;
message_header = field_name ":" " "* field_value :> CRLF;
Request = Request_Line ( message_header )* ( CRLF @done );
main := Request;
}%%基本的な文字列の扱いとしては、状態に入るところでそのオフセットを記録しておいて
#define MARK(M,FPC) (parser->M = (FPC) - buffer) // Cのマクロ
action mark {MARK(mark, fpc); } // アクションの定義
~~~アクションが実行されたときのポインタ
Method = ( upper | digit | safe ){1,20} >mark %request_method; // 状態の定義
~~~~~状態に入るときにアクションmarkを実行状態から去るときに、その状態の開始ポインタと長さを使って、パースする文字列から部分文字列を切り出してやると。
// Cのマクロ。bufferはパースする文字列
#define MARK(M,FPC) (parser->M = (FPC) - buffer)
#define PTR_TO(F) (buffer + parser->F)
// アクションの定義
action request_method {
if(parser->request_method != NULL)
parser->request_method(parser->data, PTR_TO(mark), LEN(mark, fpc));
}
Method = ( upper | digit | safe ){1,20} >mark %request_method;
~~~~~~~~~~~~~~~状態を抜けたときにアクションrequest_methodを実行なんかホストがRubyでも同じインターフェスのような気がする…。
しょうがないのかなー。