kcar RubyGem user+dev discussion/patches/pulls/bugs/help
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: kcar-public@bogomips.org
Subject: [PATCH 02/11] begin implementing request parsing
Date: Sat,  1 Dec 2018 13:31:16 +0000	[thread overview]
Message-ID: <20181201133125.5524-3-e@80x24.org> (raw)
In-Reply-To: <20181201133125.5524-1-e@80x24.org>

Not wired up, yet; but for now everything compiles
and existing tests run.
---
 ext/kcar/kcar.rl             | 107 ++++++++++++++++++++++++++++++++++-
 ext/kcar/kcar_http_common.rl |  36 +++++++++++-
 2 files changed, 138 insertions(+), 5 deletions(-)

diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl
index 79f65db..033e6ea 100644
--- a/ext/kcar/kcar.rl
+++ b/ext/kcar/kcar.rl
@@ -15,6 +15,10 @@
 
 static VALUE eParserError;
 static ID id_uminus, id_sq, id_sq_set;
+static VALUE g_rack_url_scheme,
+  g_HOST, g_PATH_INFO, g_QUERY_STRING,
+  g_REQUEST_METHOD, g_REQUEST_PATH, g_REQUEST_URI;
+static VALUE e413, e414;
 
 /** Defines common length and error messages for input length validation. */
 #define DEF_MAX_LENGTH(N, length) \
@@ -31,10 +35,20 @@ static ID id_uminus, id_sq, id_sq_set;
     rb_raise(eParserError, MAX_##N##_LENGTH_ERR); \
 } while (0)
 
+#define VALIDATE_MAX_URI_LENGTH(len, N) do { \
+  if (len > MAX_##N##_LENGTH) \
+    rb_raise(e414, MAX_##N##_LENGTH_ERR); \
+} while (0)
+
 /* Defines the maximum allowed lengths for various input elements.*/
 DEF_MAX_LENGTH(FIELD_NAME, 256);
 DEF_MAX_LENGTH(FIELD_VALUE, 80 * 1024);
 DEF_MAX_LENGTH(HEADER, (1024 * (80 + 32)));
+DEF_MAX_LENGTH(REQUEST_URI, 1024 * 15);
+DEF_MAX_LENGTH(FRAGMENT, 1024); /* just in case (stolen from Mongrel) */
+DEF_MAX_LENGTH(REQUEST_PATH, 4096); /* common PATH_MAX on modern systems */
+DEF_MAX_LENGTH(QUERY_STRING, (1024 * 10));
+
 
 #define UH_FL_CHUNKED  0x1
 #define UH_FL_HASBODY  0x2
@@ -90,6 +104,13 @@ static unsigned int ulong2uint(unsigned long n)
 #define HP_FL_UNSET(hp,fl) ((hp)->flags &= ~(UH_FL_##fl))
 #define HP_FL_ALL(hp,fl) (HP_FL_TEST(hp, fl) == (UH_FL_##fl))
 
+/* Downcases a single ASCII character.  Locale-agnostic. */
+static void downcase_char(char *c)
+{
+  if (*c >= 'A' && *c <= 'Z')
+    *c |= 0x20;
+}
+
 static int is_lws(char c)
 {
   return (c == ' ' || c == '\t');
@@ -153,7 +174,54 @@ static void hp_keepalive_connection(struct http_parser *hp, VALUE val)
 }
 
 static void
-http_version(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len)
+request_method(VALUE env, const char *ptr, size_t len)
+{
+  rb_hash_aset(env, g_REQUEST_METHOD, str_new_dd_freeze(ptr, len));
+}
+
+static void
+url_scheme(VALUE env, const char *ptr, size_t len)
+{
+  rb_hash_aset(env, g_rack_url_scheme, str_new_dd_freeze(ptr, len));
+}
+
+static void
+request_host(VALUE env, const char *ptr, size_t len)
+{
+  rb_hash_aset(env, g_HOST, str_new_dd_freeze(ptr, len));
+}
+
+static void
+request_uri(VALUE env, const char *ptr, size_t len)
+{
+  VALIDATE_MAX_URI_LENGTH(len, REQUEST_URI);
+  rb_hash_aset(env, g_REQUEST_URI, rb_str_new(ptr, len));
+}
+
+static void
+query_string(VALUE env, const char *ptr, size_t len)
+{
+  VALIDATE_MAX_URI_LENGTH(len, QUERY_STRING);
+  rb_hash_aset(env, g_QUERY_STRING, rb_str_new(ptr, len));
+}
+
+static void
+request_path(VALUE env, const char *ptr, size_t len)
+{
+  VALUE val;
+
+  VALIDATE_MAX_URI_LENGTH(len, REQUEST_PATH);
+  val = rb_hash_aset(env, g_REQUEST_PATH, rb_str_new(ptr, len));
+
+  /* rack says PATH_INFO must start with "/" or be empty */
+  if (CONST_MEM_EQ("*", ptr, len))
+    val = rb_str_new(NULL, 0);
+
+  rb_hash_aset(env, g_PATH_INFO, val);
+}
+
+static void
+http_version(struct http_parser *hp, const char *ptr, size_t len)
 {
   if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) {
     /* HTTP/1.1 implies keepalive unless "Connection: close" is set */
@@ -328,12 +396,24 @@ static void write_value(VALUE hdr, struct http_parser *hp,
 
   action mark {MARK(mark, fpc); }
 
+  action snake_upcase_field { snake_upcase_char(deconst(fpc)); }
+  action downcase_char { downcase_char(deconst(fpc)); }
+  action request_method { request_method(hdr, PTR_TO(mark), LEN(mark, fpc)); }
+  action url_scheme { url_scheme(hdr, PTR_TO(mark), LEN(mark, fpc)); }
+  action host { request_host(hdr, PTR_TO(mark), LEN(mark, fpc)); }
+  action request_uri { request_uri(hdr, PTR_TO(mark), LEN(mark, fpc)); }
+
+  action start_query { MARK(start.query, fpc); }
+  action query_string {
+    query_string(hdr, PTR_TO(start.query), LEN(start.query, fpc));
+  }
+  action request_path { request_path(hdr, PTR_TO(mark), LEN(mark, fpc)); }
   action start_field { MARK(start.field, fpc); }
   action write_field { hp->s.field_len = LEN(start.field, fpc); }
   action start_value { MARK(mark, fpc); }
   action write_value { write_value(hdr, hp, buffer, fpc); }
   action write_cont_value { write_cont_value(hp, buffer, fpc); }
-  action http_version { http_version(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
+  action http_version { http_version(hp, PTR_TO(mark), LEN(mark, fpc)); }
   action status_phrase { status_phrase(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
 
   action add_to_chunk_size {
@@ -720,6 +800,7 @@ static VALUE filter_body(VALUE self, VALUE buf, VALUE data)
 
 void Init_kcar_ext(void)
 {
+  static VALUE globals;
   VALUE mKcar = rb_define_module("Kcar");
   VALUE cParser = rb_define_class_under(mKcar, "Parser", rb_cObject);
 
@@ -729,6 +810,10 @@ void Init_kcar_ext(void)
    * This is raised if there are parsing errors.
    */
   eParserError = rb_define_class_under(mKcar, "ParserError", rb_eIOError);
+  e413 = rb_define_class_under(mKcar, "RequestEntityTooLargeError",
+                               eParserError);
+  e414 = rb_define_class_under(mKcar, "RequestURITooLongError",
+                               eParserError);
 
   rb_define_alloc_func(cParser, kcar_alloc);
   rb_define_method(cParser, "initialize", initialize, 0);
@@ -759,4 +844,22 @@ void Init_kcar_ext(void)
   id_sq = rb_intern("[]");
   id_sq_set = rb_intern("[]=");
   id_uminus = rb_intern("-@");
+
+  /* TODO: gperf to make a perfect hash of common strings */
+#define C(ary, var, cstr) do { \
+  var = str_new_dd_freeze((cstr), sizeof(cstr) - 1); \
+  rb_ary_push((ary), (var)); \
+} while (0);
+
+  globals = rb_ary_new();
+  rb_global_variable(&globals);
+  C(globals, g_HOST, "HOST");
+  C(globals, g_PATH_INFO, "PATH_INFO");
+  C(globals, g_QUERY_STRING, "QUERY_STRING");
+  C(globals, g_REQUEST_METHOD, "REQUEST_METHOD");
+  C(globals, g_REQUEST_PATH, "REQUEST_PATH");
+  C(globals, g_REQUEST_URI, "REQUEST_URI");
+  C(globals, g_rack_url_scheme, "rack.url_scheme");
+  OBJ_FREEZE(globals);
+#undef C
 }
diff --git a/ext/kcar/kcar_http_common.rl b/ext/kcar/kcar_http_common.rl
index cb89248..0c596bc 100644
--- a/ext/kcar/kcar_http_common.rl
+++ b/ext/kcar/kcar_http_common.rl
@@ -25,10 +25,38 @@
 
 # elements
   token = (ascii -- (CTL | tspecials));
+
+# URI schemes and absolute paths
+  scheme = ( "http"i ("s"i)? ) $downcase_char >mark %url_scheme;
+  hostname = ((alnum | "-" | "." | "_")+ | ("[" (":" | xdigit)+ "]"));
+  host_with_port = (hostname (":" digit*)?) >mark %host;
+  userinfo = ((unreserved | escape | ";" | ":" | "&" | "=" | "+")+ "@")*;
+
+  path = ( pchar+ ( "/" pchar* )* ) ;
+  query = ( uchar | reserved )* %query_string ;
+  param = ( pchar | "/" )* ;
+  params = ( param ( ";" param )* ) ;
+  rel_path = (path? (";" params)? %request_path) ("?" %start_query query)?;
+  absolute_path = ( "/"+ rel_path );
+  path_uri = absolute_path > mark %request_uri;
+  Absolute_URI = (scheme "://" userinfo host_with_port path_uri);
+
+  Request_URI = ((absolute_path | "*") >mark %request_uri) | Absolute_URI;
+
+  # lets not waste cycles setting fragment in the request,
+  # valid clients do not send it, but we will just silently ignore it.
+  Fragment = ( uchar | reserved )*;
+
+  Method = (token){1,20} >mark %request_method;
+  GetOnly = "GET" >mark %request_method;
+
+  http_number = ( digit+ "." digit+ ) ;
+  HTTP_Version = ( "HTTP/" http_number ) >mark %http_version ;
+  Request_Line = ( Method " " Request_URI ("#" Fragment){0,1} " "
+                   HTTP_Version CRLF ) ;
+
   phrase = (any -- CRLF)+;
   Status_Phrase = (digit+ (" "+ phrase)?) >mark %status_phrase ;
-  http_number = (digit+ "." digit+) ;
-  HTTP_Version = ("HTTP/" http_number) >mark %http_version ;
   Status_Line = HTTP_Version " "+ Status_Phrase :> CRLF;
 
   field_name = ( token -- ":" )+ >start_field %write_field;
@@ -51,7 +79,9 @@
   Trailers := (message_header)* CRLF @end_trailers;
 
   FullResponse = Status_Line (message_header)* CRLF @header_done;
+  FullRequest = Request_Line (message_header)* CRLF @header_done;
+  SimpleRequest = GetOnly " " Request_URI ("#"Fragment){0,1} CRLF @header_done;
 
-main := FullResponse;
+main := FullResponse | FullRequest | SimpleRequest;
 
 }%%

  parent reply	other threads:[~2018-12-01 13:31 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-01 13:31 [PATCH v2] request parsing bits Eric Wong
2018-12-01 13:31 ` [PATCH 01/11] introduce new str_new_dd_freeze internal function Eric Wong
2018-12-01 13:31 ` Eric Wong [this message]
2018-12-01 13:31 ` [PATCH 03/11] favor bitfields instead flags + macros Eric Wong
2018-12-01 13:31 ` [PATCH 04/11] implement request parsing with tests Eric Wong
2018-12-01 13:31 ` [PATCH 05/11] pkg.mk: enable warnings by default for tests Eric Wong
2018-12-01 13:31 ` [PATCH 06/11] filter_body: rename variables to be like memcpy(3) Eric Wong
2018-12-01 13:31 ` [PATCH 07/11] flesh out filter_body for request parsing Eric Wong
2018-12-01 13:31 ` [PATCH 08/11] do not assume SERVER_PORT Eric Wong
2018-12-01 13:31 ` [PATCH 09/11] do not set "HTTP/0.9" for pre-1.0 requests Eric Wong
2018-12-01 13:31 ` [PATCH 10/11] always set non-negative Content-Length for requests Eric Wong
2018-12-01 13:31 ` [PATCH 11/11] avoid String#-@ call on request parsing under Ruby 2.6 Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://yhbt.net/kcar/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181201133125.5524-3-e@80x24.org \
    --to=e@80x24.org \
    --cc=kcar-public@bogomips.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://yhbt.net/kcar.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).