Ethereal-dev: [Ethereal-dev] HTTP chunked encoding patch
Note: This archive is from the project's previous web site, ethereal.com. This list is no longer active.
From: Jerry Talkington <jtalkington@xxxxxxxxxxxxxxxxxxxxx>
Date: Sun, 25 Apr 2004 21:39:51 -0700
Howdy List! Here's a patch to add support for decoding chunked transfer-encoded HTTP entities. I also updated my email address. I had originally started by using composite tvbuffs, but backed out when I found out that they're not really ready for prime time. I left the code in, commented out, but I can resubmit with out it (or with any other changes.) -- GPG public key: http://pgp.mit.edu:11371/pks/lookup?op=get&search=0x9D5B8762
Index: AUTHORS
===================================================================
RCS file: /cvsroot/ethereal/AUTHORS,v
retrieving revision 1.999
diff -u -r1.999 AUTHORS
--- AUTHORS 22 Apr 2004 08:22:07 -0000 1.999
+++ AUTHORS 26 Apr 2004 04:01:20 -0000
@@ -285,7 +285,7 @@
updates to BGP (Border Gateway Protocol) support
}
-Jerry Talkington <jerryt[AT]netapp.com> {
+Jerry Talkington <jtalkington[AT]users.sourceforge.net> {
updates to HTTP support
Filter selection/editing GUI improvements
WCCP 1.0 support
Index: packet-http.c
===================================================================
RCS file: /cvsroot/ethereal/packet-http.c,v
retrieving revision 1.96
diff -u -r1.96 packet-http.c
--- packet-http.c 12 Apr 2004 22:14:37 -0000 1.96
+++ packet-http.c 26 Apr 2004 04:01:21 -0000
@@ -73,6 +73,9 @@
static gint ett_http = -1;
static gint ett_http_ntlmssp = -1;
static gint ett_http_request = -1;
+static gint ett_http_chunked_response = -1;
+static gint ett_http_chunk_data = -1;
+static gint ett_http_encoded_entity = -1;
static dissector_handle_t data_handle;
static dissector_handle_t http_handle;
@@ -125,6 +128,8 @@
static int is_http_request_or_reply(const gchar *data, int linelen, http_type_t *type,
RequestDissector *req_dissector, int *req_strlen);
+static int chunked_encoding_dissector(tvbuff_t **tvb_ptr, packet_info *pinfo,
+ proto_tree *tree, int offset);
static void process_header(tvbuff_t *tvb, int offset, int next_offset,
const guchar *line, int linelen, int colon_offset, packet_info *pinfo,
proto_tree *tree, headers_t *eh_ptr);
@@ -589,6 +594,7 @@
*/
tvbuff_t *next_tvb;
void *save_private_data = NULL;
+ gint chunks_decoded = 0;
/*
* Create a tvbuff for the payload.
@@ -608,6 +614,29 @@
reported_datalen);
/*
+ * Handle transfer encodings other than "identity".
+ */
+ if (headers.transfer_encoding != NULL &&
+ strcasecmp(headers.transfer_encoding, "identity") != 0) {
+ if (strcasecmp(headers.transfer_encoding, "chunked")
+ == 0) {
+
+ chunks_decoded = chunked_encoding_dissector(
+ &next_tvb, pinfo, tree, 0);
+
+ } else {
+ /*
+ * We currently can't handle, for example, "gzip",
+ * "compress", or "deflate"; just handle them
+ * as data for now.
+ */
+ call_dissector(data_handle, next_tvb, pinfo,
+ http_tree);
+ goto body_dissected;
+ }
+ }
+
+ /*
* Handle content encodings other than "identity" (which
* shouldn't appear in a Content-Encoding header, but
* we handle it in any case).
@@ -619,23 +648,28 @@
* "compress", or "deflate"; just handle them as
* data for now.
*/
- call_dissector(data_handle, next_tvb, pinfo,
- http_tree);
- goto body_dissected;
- }
+ if (chunks_decoded != 0) {
+ /*
+ * There is a chunked response tree, so put
+ * the entity body below it.
+ */
+ proto_item *e_ti = NULL;
+ proto_tree *e_tree = NULL;
- /*
- * Handle transfer encodings other than "identity".
- */
- if (headers.transfer_encoding != NULL &&
- strcasecmp(headers.transfer_encoding, "identity") != 0) {
- /*
- * We currently can't handle, for example, "chunked",
- * "gzip", "compress", or "deflate"; just handle them
- * as data for now.
- */
- call_dissector(data_handle, next_tvb, pinfo,
- http_tree);
+ e_ti = proto_tree_add_text(tree, next_tvb,
+ 0, tvb_length(next_tvb),
+ "Encoded entity-body (%s)",
+ headers.content_encoding);
+
+ e_tree = proto_item_add_subtree(e_ti,
+ ett_http_encoded_entity);
+
+ call_dissector(data_handle, next_tvb, pinfo,
+ e_tree);
+ } else {
+ call_dissector(data_handle, next_tvb, pinfo,
+ http_tree);
+ }
goto body_dissected;
}
@@ -751,6 +785,182 @@
}
/*
+ * Dissect the http data chunks and add them to the tree.
+ */
+static int
+chunked_encoding_dissector(tvbuff_t **tvb_ptr, packet_info *pinfo,
+ proto_tree *tree, int offset)
+{
+ guint8 *chunk_string = NULL;
+ gint chunk_size = 0;
+ gint chunk_offset = 0;
+ gint datalen = 0;
+ gint linelen = 0;
+ gint chunks_decoded = 0;
+ tvbuff_t *tvb = NULL;
+ tvbuff_t *new_tvb = NULL;
+ gint chunked_data_size = 0;
+ proto_tree *subtree = NULL;
+ proto_item *ti = NULL;
+
+ if (tvb_ptr == NULL || *tvb_ptr == NULL) {
+ return 0;
+ }
+
+ tvb = *tvb_ptr;
+
+ datalen = tvb_reported_length_remaining(tvb, offset);
+
+ if (tree) {
+ ti = proto_tree_add_text(tree, tvb, offset, datalen,
+ "HTTP chunked response");
+ subtree = proto_item_add_subtree(ti, ett_http_chunked_response);
+ }
+
+
+ while (datalen != 0) {
+ proto_item *chunk_ti = NULL;
+ proto_tree *chunk_subtree = NULL;
+ tvbuff_t *data_tvb = NULL;
+ gchar *c = NULL;
+
+ linelen = tvb_find_line_end(tvb, offset, -1, &chunk_offset, TRUE);
+
+ if (linelen <= 0) {
+ /* Can't get the chunk size line */
+ return 0;
+ }
+
+ chunk_string = tvb_get_string(tvb, offset, linelen);
+
+ if (chunk_string == NULL) {
+ /* Can't get the chunk size line */
+ return 0;
+ }
+
+ c = chunk_string;
+
+ /*
+ * We don't care about the extensions.
+ */
+ if ((c = strchr(c, ';'))) {
+ *c = '\0';
+ }
+
+ if (sscanf(chunk_string, "%x", &chunk_size) != 1) {
+ g_free(chunk_string);
+ return 0;
+ }
+
+ g_free(chunk_string);
+
+
+ if (chunk_size > datalen) {
+ /*
+ * The chunk size is more than what's in the tvbuff,
+ * so either the user hasn't enabled decoding, or all
+ * of the segments weren't captured.
+ */
+ chunk_size = datalen;
+ }/* else if (new_tvb == NULL) {
+ new_tvb = tvb_new_composite();
+ }
+
+
+
+ if (new_tvb != NULL && chunk_size != 0) {
+ tvbuff_t *chunk_tvb = NULL;
+
+ chunk_tvb = tvb_new_subset(tvb, chunk_offset,
+ chunk_size, datalen);
+
+ tvb_composite_append(new_tvb, chunk_tvb);
+
+ }
+ */
+
+ chunked_data_size += chunk_size;
+
+ if (chunk_size != 0) {
+ guint8 *raw_data = g_malloc(chunked_data_size);
+ gint raw_len = 0;
+
+ if (new_tvb != NULL) {
+ raw_len = tvb_length_remaining(new_tvb, 0);
+ tvb_memcpy(new_tvb, raw_data, 0, raw_len);
+
+ tvb_free(new_tvb);
+ }
+
+ tvb_memcpy(tvb, (guint8 *)(raw_data + raw_len),
+ chunk_offset, chunk_size);
+
+ new_tvb = tvb_new_real_data(raw_data,
+ chunked_data_size, chunked_data_size);
+
+ }
+
+
+
+ if (subtree) {
+ if (chunk_size == 0) {
+ chunk_ti = proto_tree_add_text(subtree, tvb,
+ offset,
+ chunk_offset - offset + chunk_size + 2,
+ "Data chunk (last chunk)");
+ } else {
+ chunk_ti = proto_tree_add_text(subtree, tvb,
+ offset,
+ chunk_offset - offset + chunk_size + 2,
+ "Data chunk (%u octets)", chunk_size);
+ }
+
+ chunk_subtree = proto_item_add_subtree(chunk_ti,
+ ett_http_chunk_data);
+
+ proto_tree_add_text(chunk_subtree, tvb, offset,
+ chunk_offset - offset, "Chunk size: %u octets",
+ chunk_size);
+
+ data_tvb = tvb_new_subset(tvb, chunk_offset, chunk_size,
+ datalen);
+
+
+ if (chunk_size > 0) {
+ call_dissector(data_handle, data_tvb, pinfo,
+ chunk_subtree);
+ }
+
+ proto_tree_add_text(chunk_subtree, tvb, chunk_offset +
+ chunk_size, 2, "Chunk boundry");
+ }
+
+ chunks_decoded++;
+ offset = chunk_offset + chunk_size + 2;
+ datalen = tvb_reported_length_remaining(tvb, offset);
+ }
+
+ if (new_tvb != NULL) {
+
+ /*
+ tvb_composite_finalize(new_tvb);
+ //tvb_set_reported_length(new_tvb, chunked_data_size);
+ */
+
+ tvb_set_child_real_data_tvbuff(tvb, new_tvb);
+ add_new_data_source(pinfo, new_tvb, "De-chunked entity body");
+
+ tvb_free(*tvb_ptr);
+ *tvb_ptr = new_tvb;
+
+ }
+
+ return chunks_decoded;
+
+}
+
+
+/*
* XXX - this won't handle HTTP 0.9 replies, but they're all data
* anyway.
*/
@@ -1271,6 +1481,9 @@
&ett_http,
&ett_http_ntlmssp,
&ett_http_request,
+ &ett_http_chunked_response,
+ &ett_http_chunk_data,
+ &ett_http_encoded_entity,
};
module_t *http_module;
Index: packet-wccp.c
===================================================================
RCS file: /cvsroot/ethereal/packet-wccp.c,v
retrieving revision 1.33
diff -u -r1.33 packet-wccp.c
--- packet-wccp.c 28 Aug 2002 21:00:37 -0000 1.33
+++ packet-wccp.c 26 Apr 2004 04:01:22 -0000
@@ -1,6 +1,6 @@
/* packet-wccp.c
* Routines for Web Cache Coordination Protocol dissection
- * Jerry Talkington <jerryt@xxxxxxxxxx>
+ * Jerry Talkington <jtalkington@xxxxxxxxxxxxxxxxxxxxx>
*
* $Id: packet-wccp.c,v 1.33 2002/08/28 21:00:37 jmayer Exp $
*
Index: req_resp_hdrs.c
===================================================================
RCS file: /cvsroot/ethereal/req_resp_hdrs.c,v
retrieving revision 1.3
diff -u -r1.3 req_resp_hdrs.c
--- req_resp_hdrs.c 29 Dec 2003 22:33:18 -0000 1.3
+++ req_resp_hdrs.c 26 Apr 2004 04:01:22 -0000
@@ -30,6 +30,7 @@
#include <glib.h>
#include <epan/packet.h>
#include <epan/strutil.h>
+#include <string.h>
#include "req_resp_hdrs.h"
@@ -47,6 +48,7 @@
int linelen;
long int content_length;
gboolean content_length_found = FALSE;
+ gboolean chunked_encoding = FALSE;
/*
* Do header desegmentation if we've been told to.
@@ -131,8 +133,8 @@
}
/*
- * Is this a Content-Length header?
- * If not, it either means that we are in
+ * Is this a Content-Length or Transfer-Encoding
+ * header? If not, it either means that we are in
* a different header line, or that we are
* at the end of the headers, or that there
* isn't enough data; the two latter cases
@@ -151,6 +153,44 @@
"%li", &content_length)
== 1)
content_length_found = TRUE;
+ } else if (tvb_strncaseeql(tvb,
+ next_offset_sav,
+ "Transfer-Encoding:", 18) == 0) {
+ gchar *chunk_type = tvb_get_string(tvb,
+ next_offset_sav + 18, linelen - 18);
+ /*
+ * Find out if this Transfer-Encoding is
+ * chunked. It should be, since there
+ * really aren't any other types, but
+ * RFC 2616 allows for them.
+ */
+
+ if (chunk_type != NULL) {
+ gchar *c = chunk_type;
+ gint len = strlen(chunk_type);
+
+
+ /* start after any white-space */
+ while (c != NULL && c <
+ chunk_type + len &&
+ (*c == ' ' ||
+ *c == 0x09)) {
+ c++;
+ }
+
+ if (c <= chunk_type + len ) {
+ if (strncasecmp(c, "chunked", 7)
+ == 0) {
+ /*
+ * Don't bother looking for extensions;
+ * since we don't understand them,
+ * they should be ignored.
+ */
+ chunked_encoding = TRUE;
+ }
+ }
+ g_free(chunk_type);
+ }
}
}
}
@@ -158,30 +198,139 @@
/*
* The above loop ends when we reached the end of the headers, so
- * there should be content_length byte after the 4 terminating bytes
+ * there should be content_length bytes after the 4 terminating bytes
* and next_offset points to after the end of the headers.
*/
- if (desegment_body && content_length_found) {
- /* next_offset has been set because content-length was found */
- if (!tvb_bytes_exist(tvb, next_offset, content_length)) {
- length_remaining = tvb_length_remaining(tvb,
- next_offset);
- reported_length_remaining =
- tvb_reported_length_remaining(tvb, next_offset);
- if (length_remaining < reported_length_remaining) {
+ if (desegment_body) {
+ if (content_length_found) {
+ /* next_offset has been set to the end of the headers */
+ if (!tvb_bytes_exist(tvb, next_offset, content_length)) {
+ length_remaining = tvb_length_remaining(tvb,
+ next_offset);
+ reported_length_remaining =
+ tvb_reported_length_remaining(tvb, next_offset);
+ if (length_remaining < reported_length_remaining) {
+ /*
+ * It's a waste of time asking for more
+ * data, because that data wasn't captured.
+ */
+ return TRUE;
+ }
+ if (length_remaining == -1)
+ length_remaining = 0;
+ pinfo->desegment_offset = offset;
+ pinfo->desegment_len =
+ content_length - length_remaining;
+ return FALSE;
+ }
+ } else if (chunked_encoding) {
+ /*
+ * This data is chunked, so we need to keep pulling
+ * data until we reach the end of the stream, or a
+ * zero sized chunk.
+ *
+ * XXX
+ * This doesn't bother with trailing headers; I don't
+ * think they are really used, and we'd have to use
+ * is_http_request_or_reply() to determine if it was
+ * a trailing header, or the start of a new response.
+ */
+ gboolean done_chunking = FALSE;
+
+ while (!done_chunking) {
+ gint chunk_size = 0;
+ gint chunk_offset = 0;
+ gchar *chunk_string = NULL;
+ gchar *c = NULL;
+
+ length_remaining = tvb_length_remaining(tvb,
+ next_offset);
+ reported_length_remaining =
+ tvb_reported_length_remaining(tvb,
+ next_offset);
+
+ if (reported_length_remaining < 1) {
+ pinfo->desegment_offset = offset;
+ pinfo->desegment_len = 1;
+ return FALSE;
+ }
+
+ linelen = tvb_find_line_end(tvb, next_offset,
+ -1, &chunk_offset, TRUE);
+
+ if (linelen == -1 &&
+ length_remaining >=
+ reported_length_remaining) {
+ pinfo->desegment_offset = offset;
+ pinfo->desegment_len = 2;
+ return FALSE;
+ }
+
+ /* We have a line with the chunk size in it.*/
+ chunk_string = tvb_get_string(tvb, next_offset,
+ linelen);
+ c = chunk_string;
+
/*
- * It's a waste of time asking for more
- * data, because that data wasn't captured.
+ * We don't care about the extensions.
*/
- return TRUE;
+ if ((c = strchr(c, ';'))) {
+ *c = '\0';
+ }
+
+ if ((sscanf(chunk_string, "%x",
+ &chunk_size) < 0) || chunk_size < 0) {
+ /* We couldn't get the chunk size,
+ * so stop trying.
+ */
+ return TRUE;
+ }
+
+ if (chunk_size == 0) {
+ /*
+ * This is the last chunk. Let's pull in the
+ * trailing CRLF.
+ */
+ linelen = tvb_find_line_end(tvb,
+ chunk_offset, -1, &chunk_offset, TRUE);
+
+ if (linelen == -1 &&
+ length_remaining >=
+ reported_length_remaining) {
+ pinfo->desegment_offset = offset;
+ pinfo->desegment_len = 1;
+ return FALSE;
+ }
+
+ pinfo->desegment_offset = chunk_offset;
+ pinfo->desegment_len = 0;
+ done_chunking = TRUE;
+ } else {
+ /*
+ * Skip to the next chunk if we
+ * already have it
+ */
+ if (reported_length_remaining >
+ chunk_size) {
+
+ next_offset = chunk_offset
+ + chunk_size + 2;
+ } else {
+ /*
+ * Fetch this chunk, plus the
+ * trailing CRLF.
+ */
+ pinfo->desegment_offset = offset;
+ pinfo->desegment_len =
+ chunk_size + 1 -
+ reported_length_remaining;
+ return FALSE;
+ }
+ }
+
}
- if (length_remaining == -1)
- length_remaining = 0;
- pinfo->desegment_offset = offset;
- pinfo->desegment_len =
- content_length - length_remaining;
- return FALSE;
}
+
}
/*
Index: doc/ethereal.pod
===================================================================
RCS file: /cvsroot/ethereal/doc/ethereal.pod,v
retrieving revision 1.109
diff -u -r1.109 ethereal.pod
--- doc/ethereal.pod 23 Apr 2004 19:53:36 -0000 1.109
+++ doc/ethereal.pod 26 Apr 2004 04:01:23 -0000
@@ -1893,7 +1893,7 @@
Warren Young <tangent[AT]mail.com>
Heikki Vatiainen <hessu[AT]cs.tut.fi>
Greg Hankins <gregh[AT]twoguys.org>
- Jerry Talkington <jerryt[AT]netapp.com>
+ Jerry Talkington <jtalkington[AT]users.sourceforge.net>
Dave Chapeskie <dchapes[AT]ddm.on.ca>
James Coe <jammer[AT]cin.net>
Bert Driehuis <driehuis[AT]playbeing.org>
- Prev by Date: Re: [Ethereal-dev] Possible IEEE 802.11 dissector bug
- Next by Date: [Ethereal-dev] FW: [Ethereal-users] Ethereal v0.10.3 dissection of SNMP Integer and Counter64
- Previous by thread: Re: [Ethereal-dev] Coloring rules dialog - check-in please
- Next by thread: RE: [Ethereal-dev] HTTP chunked encoding patch
- Index(es):





