Savarese Software Research Corporation
parse_query.cc
Go to the documentation of this file.
00001 /* Copyright 2006-2011 Savarese Software Research Corporation
00002  *
00003  * Licensed under the Apache License, Version 2.0 (the "License");
00004  * you may not use this file except in compliance with the License.
00005  * You may obtain a copy of the License at
00006  *
00007  *     https://www.savarese.com/software/ApacheLicense-2.0
00008  *
00009  * Unless required by applicable law or agreed to in writing, software
00010  * distributed under the License is distributed on an "AS IS" BASIS,
00011  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012  * See the License for the specific language governing permissions and
00013  * limitations under the License.
00014  */
00015 
00016 #include <ssrc/wispers/fcgi/parse_query.h>
00017 #include <ssrc/wispers/fcgi/URI.h>
00018 #include <ssrc/wispers/utility/WebStrings.h>
00019 
00020 #include <boost/algorithm/string/trim.hpp>
00021 
00022 __BEGIN_NS_SSRC_WSPR_FCGI
00023 
00024 // Note, this is an approximation because a header value could contain:
00025 //   Foo-Header: foobar; bar=" foo=value"; foo=value
00026 // In practice, that will not be the case for well-behaved programs.
00027 string parse_header_attribute(const string & header_value,
00028                               const string & attribute,
00029                               string::size_type pos,
00030                               string::size_type *end)
00031 {
00032   string attr = std::move(string(" ").append(attribute).append("="));
00033   string::size_type end_pos = 0;
00034 
00035   pos = header_value.find(attr, pos);
00036 
00037   if(pos != string::npos) {
00038 
00039     pos+=attr.size();
00040 
00041     if(pos < header_value.size()) {
00042       if(header_value[pos] == '"') {
00043         ++pos;
00044         end_pos = header_value.find('"', pos);
00045         if(end_pos == string::npos) {
00046           end_pos = pos;
00047         }
00048       } else {
00049         end_pos = header_value.find_first_of("; \r\n", pos + 1);
00050 
00051         if(end_pos == string::npos) {
00052           end_pos = header_value.size();
00053         }
00054       }
00055     } else {
00056       pos = 0;
00057     }
00058   } else {
00059     pos = 0;
00060   }
00061 
00062   if(end) {
00063     *end = end_pos;
00064   }
00065 
00066   return header_value.substr(pos, end_pos - pos);
00067 }
00068 
00069 void parse_query_string(const string & query_str, parameter_map & parameters) {
00070   using NS_SSRC_WSPR_UTILITY::unescape_url;
00071 
00072   if(query_str.empty())
00073     return;
00074 
00075   string::size_type and_pos = query_str.find('&');
00076   string::size_type eq_pos = query_str.find('=');
00077   string::size_type pos = 0;
00078   string name, value;
00079 
00080   while(eq_pos != string::npos) {
00081     while(and_pos < eq_pos || and_pos == string::npos) {
00082       if(and_pos == string::npos) {
00083         and_pos = query_str.size();
00084       } else {
00085         pos = and_pos + 1;
00086         and_pos = query_str.find('&', pos);
00087       }
00088     }
00089 
00090     const string::size_type key_size = eq_pos - pos;
00091     const string::size_type value_size = and_pos - eq_pos - 1;
00092 
00093     if(key_size > 0) {
00094       name.assign(query_str, pos, key_size);
00095       value.assign(query_str, eq_pos + 1, value_size);
00096       unescape_url(name);
00097       unescape_url(value);
00098 
00099       // We do not allow leading or trailing whitespace in input values.
00100       boost::algorithm::trim(value);
00101 
00102       parameters.insert(parameter_map::value_type(name, value));
00103     }
00104 
00105     eq_pos = query_str.find('=', and_pos);
00106   }
00107 }
00108 
00109 void parse_multipart_data(const string & data, const string & boundary,
00110                           parameter_map & parameters)
00111 {
00112   const string::size_type step = boundary.size();
00113   const string::size_type max_pos = data.size();
00114   string::size_type pos = data.find(boundary), start;
00115 
00116   if(pos != string::npos) {
00117     pos+=step;   
00118 
00119     while(pos < max_pos && data[pos] != '-') {
00120       // Since we don't handle file data, we don't bother looking
00121       // for specific header fields.  Instead, we assume only the
00122       // Content-Disposition header contains a name attribute, thereby
00123       // allowing us to search only for the next name attribute.
00124       const string && name = parse_header_attribute(data, "name", pos, &pos);
00125 
00126       if(name.empty()) {
00127         break;
00128       }
00129 
00130       // Look for start of data.
00131       if((pos = data.find("\r\n\r\n", pos)) == string::npos) {
00132         break;
00133       }
00134 
00135       start = pos + 4;
00136 
00137       // Look for end of data.
00138       if((pos = data.find(boundary, start)) == string::npos) {
00139         break;
00140       }
00141 
00142       if(pos - 2 >= start) {
00143         parameters.insert(parameter_map::value_type(name, data.substr(start, pos - start - 2)));
00144       }
00145 
00146       pos+=step;
00147     }
00148   }
00149 }
00150 
00151 void parse_get_parameters(const FCGIRequest & request,
00152                           parameter_map & parameters)
00153 {
00154   const char *query_str = request.fcgx_get_param("QUERY_STRING");
00155 
00156   if(!query_str || *query_str == 0) {
00157     const char *request_uri = request.fcgx_get_param("REQUEST_URI");
00158     if(request_uri) {
00159       URI uri(request_uri);
00160       parse_query_string(uri.query(), parameters);
00161     }
00162   } else
00163     parse_query_string(query_str, parameters);
00164 }
00165 
00166 void parse_post_parameters(const FCGIRequest & request,
00167                            parameter_map & parameters)
00168 {
00169   const string && content_type = request.content_type();
00170   const bool is_urlencoded =
00171     (content_type.find("application/x-www-form-urlencoded") == 0);
00172   const bool is_multipart =
00173     (is_urlencoded ? false : (content_type.find( "multipart/form-data") == 0));
00174 
00175   // TODO: Check that content length is less than some maximum value first.
00176   if(is_urlencoded || is_multipart) {
00177     long length = request.content_length();
00178 
00179     if(length > 0) {
00180       string data(length, ' ');
00181       int bytes_read = 0, total = 0;
00182 
00183       // Using &data[i] as a contiguous buffer may not work with some
00184       // STL implementations, but C++ 0x requires the behavior.
00185       while(length > 0) {
00186         bytes_read = request.fcgx_get_str(&data[total], length);
00187         if(bytes_read <= 0)
00188           break;
00189         total+=bytes_read;
00190         length-=bytes_read;
00191       }
00192 
00193       // Ignore short reads.  The error will be handled elsewhere.
00194       if(length != 0)
00195         data.resize(total);
00196 
00197       if(is_urlencoded) {
00198         parse_query_string(data, parameters);
00199       } else {
00200         string boundary("--");
00201         boundary.append(parse_header_attribute(content_type, "boundary"));
00202         if(boundary.size() > 2) {
00203           parse_multipart_data(data, boundary, parameters);
00204         }
00205       }
00206     }
00207   }
00208 }
00209 
00210 __END_NS_SSRC_WSPR_FCGI

Savarese Software Research Corporation
Copyright © 2006-2011 Savarese Software Research Corporation. All rights reserved.