|
1 """HTTP server base class. |
|
2 |
|
3 Note: the class in this module doesn't implement any HTTP request; see |
|
4 SimpleHTTPServer for simple implementations of GET, HEAD and POST |
|
5 (including CGI scripts). It does, however, optionally implement HTTP/1.1 |
|
6 persistent connections, as of version 0.3. |
|
7 |
|
8 Contents: |
|
9 |
|
10 - BaseHTTPRequestHandler: HTTP request handler base class |
|
11 - test: test function |
|
12 |
|
13 XXX To do: |
|
14 |
|
15 - log requests even later (to capture byte count) |
|
16 - log user-agent header and other interesting goodies |
|
17 - send error log to separate file |
|
18 """ |
|
19 |
|
20 |
|
21 # See also: |
|
22 # |
|
23 # HTTP Working Group T. Berners-Lee |
|
24 # INTERNET-DRAFT R. T. Fielding |
|
25 # <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen |
|
26 # Expires September 8, 1995 March 8, 1995 |
|
27 # |
|
28 # URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt |
|
29 # |
|
30 # and |
|
31 # |
|
32 # Network Working Group R. Fielding |
|
33 # Request for Comments: 2616 et al |
|
34 # Obsoletes: 2068 June 1999 |
|
35 # Category: Standards Track |
|
36 # |
|
37 # URL: http://www.faqs.org/rfcs/rfc2616.html |
|
38 |
|
39 # Log files |
|
40 # --------- |
|
41 # |
|
42 # Here's a quote from the NCSA httpd docs about log file format. |
|
43 # |
|
44 # | The logfile format is as follows. Each line consists of: |
|
45 # | |
|
46 # | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb |
|
47 # | |
|
48 # | host: Either the DNS name or the IP number of the remote client |
|
49 # | rfc931: Any information returned by identd for this person, |
|
50 # | - otherwise. |
|
51 # | authuser: If user sent a userid for authentication, the user name, |
|
52 # | - otherwise. |
|
53 # | DD: Day |
|
54 # | Mon: Month (calendar name) |
|
55 # | YYYY: Year |
|
56 # | hh: hour (24-hour format, the machine's timezone) |
|
57 # | mm: minutes |
|
58 # | ss: seconds |
|
59 # | request: The first line of the HTTP request as sent by the client. |
|
60 # | ddd: the status code returned by the server, - if not available. |
|
61 # | bbbb: the total number of bytes sent, |
|
62 # | *not including the HTTP/1.0 header*, - if not available |
|
63 # | |
|
64 # | You can determine the name of the file accessed through request. |
|
65 # |
|
66 # (Actually, the latter is only true if you know the server configuration |
|
67 # at the time the request was made!) |
|
68 |
|
69 __version__ = "0.3" |
|
70 |
|
71 __all__ = ["HTTPServer", "BaseHTTPRequestHandler"] |
|
72 |
|
73 import sys |
|
74 import time |
|
75 import socket # For gethostbyaddr() |
|
76 from warnings import filterwarnings, catch_warnings |
|
77 with catch_warnings(): |
|
78 if sys.py3kwarning: |
|
79 filterwarnings("ignore", ".*mimetools has been removed", |
|
80 DeprecationWarning) |
|
81 import mimetools |
|
82 import SocketServer |
|
83 |
|
84 # Default error message template |
|
85 DEFAULT_ERROR_MESSAGE = """\ |
|
86 <head> |
|
87 <title>Error response</title> |
|
88 </head> |
|
89 <body> |
|
90 <h1>Error response</h1> |
|
91 <p>Error code %(code)d. |
|
92 <p>Message: %(message)s. |
|
93 <p>Error code explanation: %(code)s = %(explain)s. |
|
94 </body> |
|
95 """ |
|
96 |
|
97 DEFAULT_ERROR_CONTENT_TYPE = "text/html" |
|
98 |
|
99 def _quote_html(html): |
|
100 return html.replace("&", "&").replace("<", "<").replace(">", ">") |
|
101 |
|
102 class HTTPServer(SocketServer.TCPServer): |
|
103 |
|
104 allow_reuse_address = 1 # Seems to make sense in testing environment |
|
105 |
|
106 def server_bind(self): |
|
107 """Override server_bind to store the server name.""" |
|
108 SocketServer.TCPServer.server_bind(self) |
|
109 host, port = self.socket.getsockname()[:2] |
|
110 self.server_name = socket.getfqdn(host) |
|
111 self.server_port = port |
|
112 |
|
113 |
|
114 class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler): |
|
115 |
|
116 """HTTP request handler base class. |
|
117 |
|
118 The following explanation of HTTP serves to guide you through the |
|
119 code as well as to expose any misunderstandings I may have about |
|
120 HTTP (so you don't need to read the code to figure out I'm wrong |
|
121 :-). |
|
122 |
|
123 HTTP (HyperText Transfer Protocol) is an extensible protocol on |
|
124 top of a reliable stream transport (e.g. TCP/IP). The protocol |
|
125 recognizes three parts to a request: |
|
126 |
|
127 1. One line identifying the request type and path |
|
128 2. An optional set of RFC-822-style headers |
|
129 3. An optional data part |
|
130 |
|
131 The headers and data are separated by a blank line. |
|
132 |
|
133 The first line of the request has the form |
|
134 |
|
135 <command> <path> <version> |
|
136 |
|
137 where <command> is a (case-sensitive) keyword such as GET or POST, |
|
138 <path> is a string containing path information for the request, |
|
139 and <version> should be the string "HTTP/1.0" or "HTTP/1.1". |
|
140 <path> is encoded using the URL encoding scheme (using %xx to signify |
|
141 the ASCII character with hex code xx). |
|
142 |
|
143 The specification specifies that lines are separated by CRLF but |
|
144 for compatibility with the widest range of clients recommends |
|
145 servers also handle LF. Similarly, whitespace in the request line |
|
146 is treated sensibly (allowing multiple spaces between components |
|
147 and allowing trailing whitespace). |
|
148 |
|
149 Similarly, for output, lines ought to be separated by CRLF pairs |
|
150 but most clients grok LF characters just fine. |
|
151 |
|
152 If the first line of the request has the form |
|
153 |
|
154 <command> <path> |
|
155 |
|
156 (i.e. <version> is left out) then this is assumed to be an HTTP |
|
157 0.9 request; this form has no optional headers and data part and |
|
158 the reply consists of just the data. |
|
159 |
|
160 The reply form of the HTTP 1.x protocol again has three parts: |
|
161 |
|
162 1. One line giving the response code |
|
163 2. An optional set of RFC-822-style headers |
|
164 3. The data |
|
165 |
|
166 Again, the headers and data are separated by a blank line. |
|
167 |
|
168 The response code line has the form |
|
169 |
|
170 <version> <responsecode> <responsestring> |
|
171 |
|
172 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), |
|
173 <responsecode> is a 3-digit response code indicating success or |
|
174 failure of the request, and <responsestring> is an optional |
|
175 human-readable string explaining what the response code means. |
|
176 |
|
177 This server parses the request and the headers, and then calls a |
|
178 function specific to the request type (<command>). Specifically, |
|
179 a request SPAM will be handled by a method do_SPAM(). If no |
|
180 such method exists the server sends an error response to the |
|
181 client. If it exists, it is called with no arguments: |
|
182 |
|
183 do_SPAM() |
|
184 |
|
185 Note that the request name is case sensitive (i.e. SPAM and spam |
|
186 are different requests). |
|
187 |
|
188 The various request details are stored in instance variables: |
|
189 |
|
190 - client_address is the client IP address in the form (host, |
|
191 port); |
|
192 |
|
193 - command, path and version are the broken-down request line; |
|
194 |
|
195 - headers is an instance of mimetools.Message (or a derived |
|
196 class) containing the header information; |
|
197 |
|
198 - rfile is a file object open for reading positioned at the |
|
199 start of the optional input data part; |
|
200 |
|
201 - wfile is a file object open for writing. |
|
202 |
|
203 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! |
|
204 |
|
205 The first thing to be written must be the response line. Then |
|
206 follow 0 or more header lines, then a blank line, and then the |
|
207 actual data (if any). The meaning of the header lines depends on |
|
208 the command executed by the server; in most cases, when data is |
|
209 returned, there should be at least one header line of the form |
|
210 |
|
211 Content-type: <type>/<subtype> |
|
212 |
|
213 where <type> and <subtype> should be registered MIME types, |
|
214 e.g. "text/html" or "text/plain". |
|
215 |
|
216 """ |
|
217 |
|
218 # The Python system version, truncated to its first component. |
|
219 sys_version = "Python/" + sys.version.split()[0] |
|
220 |
|
221 # The server software version. You may want to override this. |
|
222 # The format is multiple whitespace-separated strings, |
|
223 # where each string is of the form name[/version]. |
|
224 server_version = "BaseHTTP/" + __version__ |
|
225 |
|
226 # The default request version. This only affects responses up until |
|
227 # the point where the request line is parsed, so it mainly decides what |
|
228 # the client gets back when sending a malformed request line. |
|
229 # Most web servers default to HTTP 0.9, i.e. don't send a status line. |
|
230 default_request_version = "HTTP/0.9" |
|
231 |
|
232 def parse_request(self): |
|
233 """Parse a request (internal). |
|
234 |
|
235 The request should be stored in self.raw_requestline; the results |
|
236 are in self.command, self.path, self.request_version and |
|
237 self.headers. |
|
238 |
|
239 Return True for success, False for failure; on failure, an |
|
240 error is sent back. |
|
241 |
|
242 """ |
|
243 self.command = None # set in case of error on the first line |
|
244 self.request_version = version = self.default_request_version |
|
245 self.close_connection = 1 |
|
246 requestline = self.raw_requestline |
|
247 if requestline[-2:] == '\r\n': |
|
248 requestline = requestline[:-2] |
|
249 elif requestline[-1:] == '\n': |
|
250 requestline = requestline[:-1] |
|
251 self.requestline = requestline |
|
252 words = requestline.split() |
|
253 if len(words) == 3: |
|
254 [command, path, version] = words |
|
255 if version[:5] != 'HTTP/': |
|
256 self.send_error(400, "Bad request version (%r)" % version) |
|
257 return False |
|
258 try: |
|
259 base_version_number = version.split('/', 1)[1] |
|
260 version_number = base_version_number.split(".") |
|
261 # RFC 2145 section 3.1 says there can be only one "." and |
|
262 # - major and minor numbers MUST be treated as |
|
263 # separate integers; |
|
264 # - HTTP/2.4 is a lower version than HTTP/2.13, which in |
|
265 # turn is lower than HTTP/12.3; |
|
266 # - Leading zeros MUST be ignored by recipients. |
|
267 if len(version_number) != 2: |
|
268 raise ValueError |
|
269 version_number = int(version_number[0]), int(version_number[1]) |
|
270 except (ValueError, IndexError): |
|
271 self.send_error(400, "Bad request version (%r)" % version) |
|
272 return False |
|
273 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": |
|
274 self.close_connection = 0 |
|
275 if version_number >= (2, 0): |
|
276 self.send_error(505, |
|
277 "Invalid HTTP Version (%s)" % base_version_number) |
|
278 return False |
|
279 elif len(words) == 2: |
|
280 [command, path] = words |
|
281 self.close_connection = 1 |
|
282 if command != 'GET': |
|
283 self.send_error(400, |
|
284 "Bad HTTP/0.9 request type (%r)" % command) |
|
285 return False |
|
286 elif not words: |
|
287 return False |
|
288 else: |
|
289 self.send_error(400, "Bad request syntax (%r)" % requestline) |
|
290 return False |
|
291 self.command, self.path, self.request_version = command, path, version |
|
292 |
|
293 # Examine the headers and look for a Connection directive |
|
294 self.headers = self.MessageClass(self.rfile, 0) |
|
295 |
|
296 conntype = self.headers.get('Connection', "") |
|
297 if conntype.lower() == 'close': |
|
298 self.close_connection = 1 |
|
299 elif (conntype.lower() == 'keep-alive' and |
|
300 self.protocol_version >= "HTTP/1.1"): |
|
301 self.close_connection = 0 |
|
302 return True |
|
303 |
|
304 def handle_one_request(self): |
|
305 """Handle a single HTTP request. |
|
306 |
|
307 You normally don't need to override this method; see the class |
|
308 __doc__ string for information on how to handle specific HTTP |
|
309 commands such as GET and POST. |
|
310 |
|
311 """ |
|
312 self.raw_requestline = self.rfile.readline() |
|
313 if not self.raw_requestline: |
|
314 self.close_connection = 1 |
|
315 return |
|
316 if not self.parse_request(): # An error code has been sent, just exit |
|
317 return |
|
318 mname = 'do_' + self.command |
|
319 if not hasattr(self, mname): |
|
320 self.send_error(501, "Unsupported method (%r)" % self.command) |
|
321 return |
|
322 method = getattr(self, mname) |
|
323 method() |
|
324 |
|
325 def handle(self): |
|
326 """Handle multiple requests if necessary.""" |
|
327 self.close_connection = 1 |
|
328 |
|
329 self.handle_one_request() |
|
330 while not self.close_connection: |
|
331 self.handle_one_request() |
|
332 |
|
333 def send_error(self, code, message=None): |
|
334 """Send and log an error reply. |
|
335 |
|
336 Arguments are the error code, and a detailed message. |
|
337 The detailed message defaults to the short entry matching the |
|
338 response code. |
|
339 |
|
340 This sends an error response (so it must be called before any |
|
341 output has been generated), logs the error, and finally sends |
|
342 a piece of HTML explaining the error to the user. |
|
343 |
|
344 """ |
|
345 |
|
346 try: |
|
347 short, long = self.responses[code] |
|
348 except KeyError: |
|
349 short, long = '???', '???' |
|
350 if message is None: |
|
351 message = short |
|
352 explain = long |
|
353 self.log_error("code %d, message %s", code, message) |
|
354 # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201) |
|
355 content = (self.error_message_format % |
|
356 {'code': code, 'message': _quote_html(message), 'explain': explain}) |
|
357 self.send_response(code, message) |
|
358 self.send_header("Content-Type", self.error_content_type) |
|
359 self.send_header('Connection', 'close') |
|
360 self.end_headers() |
|
361 if self.command != 'HEAD' and code >= 200 and code not in (204, 304): |
|
362 self.wfile.write(content) |
|
363 |
|
364 error_message_format = DEFAULT_ERROR_MESSAGE |
|
365 error_content_type = DEFAULT_ERROR_CONTENT_TYPE |
|
366 |
|
367 def send_response(self, code, message=None): |
|
368 """Send the response header and log the response code. |
|
369 |
|
370 Also send two standard headers with the server software |
|
371 version and the current date. |
|
372 |
|
373 """ |
|
374 self.log_request(code) |
|
375 if message is None: |
|
376 if code in self.responses: |
|
377 message = self.responses[code][0] |
|
378 else: |
|
379 message = '' |
|
380 if self.request_version != 'HTTP/0.9': |
|
381 self.wfile.write("%s %d %s\r\n" % |
|
382 (self.protocol_version, code, message)) |
|
383 # print (self.protocol_version, code, message) |
|
384 self.send_header('Server', self.version_string()) |
|
385 self.send_header('Date', self.date_time_string()) |
|
386 |
|
387 def send_header(self, keyword, value): |
|
388 """Send a MIME header.""" |
|
389 if self.request_version != 'HTTP/0.9': |
|
390 self.wfile.write("%s: %s\r\n" % (keyword, value)) |
|
391 |
|
392 if keyword.lower() == 'connection': |
|
393 if value.lower() == 'close': |
|
394 self.close_connection = 1 |
|
395 elif value.lower() == 'keep-alive': |
|
396 self.close_connection = 0 |
|
397 |
|
398 def end_headers(self): |
|
399 """Send the blank line ending the MIME headers.""" |
|
400 if self.request_version != 'HTTP/0.9': |
|
401 self.wfile.write("\r\n") |
|
402 |
|
403 def log_request(self, code='-', size='-'): |
|
404 """Log an accepted request. |
|
405 |
|
406 This is called by send_response(). |
|
407 |
|
408 """ |
|
409 |
|
410 self.log_message('"%s" %s %s', |
|
411 self.requestline, str(code), str(size)) |
|
412 |
|
413 def log_error(self, format, *args): |
|
414 """Log an error. |
|
415 |
|
416 This is called when a request cannot be fulfilled. By |
|
417 default it passes the message on to log_message(). |
|
418 |
|
419 Arguments are the same as for log_message(). |
|
420 |
|
421 XXX This should go to the separate error log. |
|
422 |
|
423 """ |
|
424 |
|
425 self.log_message(format, *args) |
|
426 |
|
427 def log_message(self, format, *args): |
|
428 """Log an arbitrary message. |
|
429 |
|
430 This is used by all other logging functions. Override |
|
431 it if you have specific logging wishes. |
|
432 |
|
433 The first argument, FORMAT, is a format string for the |
|
434 message to be logged. If the format string contains |
|
435 any % escapes requiring parameters, they should be |
|
436 specified as subsequent arguments (it's just like |
|
437 printf!). |
|
438 |
|
439 The client host and current date/time are prefixed to |
|
440 every message. |
|
441 |
|
442 """ |
|
443 |
|
444 sys.stderr.write("%s - - [%s] %s\n" % |
|
445 (self.address_string(), |
|
446 self.log_date_time_string(), |
|
447 format%args)) |
|
448 |
|
449 def version_string(self): |
|
450 """Return the server software version string.""" |
|
451 return self.server_version + ' ' + self.sys_version |
|
452 |
|
453 def date_time_string(self, timestamp=None): |
|
454 """Return the current date and time formatted for a message header.""" |
|
455 if timestamp is None: |
|
456 timestamp = time.time() |
|
457 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp) |
|
458 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( |
|
459 self.weekdayname[wd], |
|
460 day, self.monthname[month], year, |
|
461 hh, mm, ss) |
|
462 return s |
|
463 |
|
464 def log_date_time_string(self): |
|
465 """Return the current time formatted for logging.""" |
|
466 now = time.time() |
|
467 year, month, day, hh, mm, ss, x, y, z = time.localtime(now) |
|
468 s = "%02d/%3s/%04d %02d:%02d:%02d" % ( |
|
469 day, self.monthname[month], year, hh, mm, ss) |
|
470 return s |
|
471 |
|
472 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] |
|
473 |
|
474 monthname = [None, |
|
475 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', |
|
476 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] |
|
477 |
|
478 def address_string(self): |
|
479 """Return the client address formatted for logging. |
|
480 |
|
481 This version looks up the full hostname using gethostbyaddr(), |
|
482 and tries to find a name that contains at least one dot. |
|
483 |
|
484 """ |
|
485 |
|
486 host, port = self.client_address[:2] |
|
487 return socket.getfqdn(host) |
|
488 |
|
489 # Essentially static class variables |
|
490 |
|
491 # The version of the HTTP protocol we support. |
|
492 # Set this to HTTP/1.1 to enable automatic keepalive |
|
493 protocol_version = "HTTP/1.0" |
|
494 |
|
495 # The Message-like class used to parse headers |
|
496 MessageClass = mimetools.Message |
|
497 |
|
498 # Table mapping response codes to messages; entries have the |
|
499 # form {code: (shortmessage, longmessage)}. |
|
500 # See RFC 2616. |
|
501 responses = { |
|
502 100: ('Continue', 'Request received, please continue'), |
|
503 101: ('Switching Protocols', |
|
504 'Switching to new protocol; obey Upgrade header'), |
|
505 |
|
506 200: ('OK', 'Request fulfilled, document follows'), |
|
507 201: ('Created', 'Document created, URL follows'), |
|
508 202: ('Accepted', |
|
509 'Request accepted, processing continues off-line'), |
|
510 203: ('Non-Authoritative Information', 'Request fulfilled from cache'), |
|
511 204: ('No Content', 'Request fulfilled, nothing follows'), |
|
512 205: ('Reset Content', 'Clear input form for further input.'), |
|
513 206: ('Partial Content', 'Partial content follows.'), |
|
514 |
|
515 300: ('Multiple Choices', |
|
516 'Object has several resources -- see URI list'), |
|
517 301: ('Moved Permanently', 'Object moved permanently -- see URI list'), |
|
518 302: ('Found', 'Object moved temporarily -- see URI list'), |
|
519 303: ('See Other', 'Object moved -- see Method and URL list'), |
|
520 304: ('Not Modified', |
|
521 'Document has not changed since given time'), |
|
522 305: ('Use Proxy', |
|
523 'You must use proxy specified in Location to access this ' |
|
524 'resource.'), |
|
525 307: ('Temporary Redirect', |
|
526 'Object moved temporarily -- see URI list'), |
|
527 |
|
528 400: ('Bad Request', |
|
529 'Bad request syntax or unsupported method'), |
|
530 401: ('Unauthorized', |
|
531 'No permission -- see authorization schemes'), |
|
532 402: ('Payment Required', |
|
533 'No payment -- see charging schemes'), |
|
534 403: ('Forbidden', |
|
535 'Request forbidden -- authorization will not help'), |
|
536 404: ('Not Found', 'Nothing matches the given URI'), |
|
537 405: ('Method Not Allowed', |
|
538 'Specified method is invalid for this server.'), |
|
539 406: ('Not Acceptable', 'URI not available in preferred format.'), |
|
540 407: ('Proxy Authentication Required', 'You must authenticate with ' |
|
541 'this proxy before proceeding.'), |
|
542 408: ('Request Timeout', 'Request timed out; try again later.'), |
|
543 409: ('Conflict', 'Request conflict.'), |
|
544 410: ('Gone', |
|
545 'URI no longer exists and has been permanently removed.'), |
|
546 411: ('Length Required', 'Client must specify Content-Length.'), |
|
547 412: ('Precondition Failed', 'Precondition in headers is false.'), |
|
548 413: ('Request Entity Too Large', 'Entity is too large.'), |
|
549 414: ('Request-URI Too Long', 'URI is too long.'), |
|
550 415: ('Unsupported Media Type', 'Entity body in unsupported format.'), |
|
551 416: ('Requested Range Not Satisfiable', |
|
552 'Cannot satisfy request range.'), |
|
553 417: ('Expectation Failed', |
|
554 'Expect condition could not be satisfied.'), |
|
555 |
|
556 500: ('Internal Server Error', 'Server got itself in trouble'), |
|
557 501: ('Not Implemented', |
|
558 'Server does not support this operation'), |
|
559 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'), |
|
560 503: ('Service Unavailable', |
|
561 'The server cannot process the request due to a high load'), |
|
562 504: ('Gateway Timeout', |
|
563 'The gateway server did not receive a timely response'), |
|
564 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'), |
|
565 } |
|
566 |
|
567 |
|
568 def test(HandlerClass = BaseHTTPRequestHandler, |
|
569 ServerClass = HTTPServer, protocol="HTTP/1.0"): |
|
570 """Test the HTTP request handler class. |
|
571 |
|
572 This runs an HTTP server on port 8000 (or the first command line |
|
573 argument). |
|
574 |
|
575 """ |
|
576 |
|
577 if sys.argv[1:]: |
|
578 port = int(sys.argv[1]) |
|
579 else: |
|
580 port = 8000 |
|
581 server_address = ('', port) |
|
582 |
|
583 HandlerClass.protocol_version = protocol |
|
584 httpd = ServerClass(server_address, HandlerClass) |
|
585 |
|
586 sa = httpd.socket.getsockname() |
|
587 print "Serving HTTP on", sa[0], "port", sa[1], "..." |
|
588 httpd.serve_forever() |
|
589 |
|
590 |
|
591 if __name__ == '__main__': |
|
592 test() |