|
1 #ifndef DATE_TIME_TZ_DB_BASE_HPP__ |
|
2 #define DATE_TIME_TZ_DB_BASE_HPP__ |
|
3 |
|
4 /* Copyright (c) 2003-2005 CrystalClear Software, Inc. |
|
5 * Subject to the Boost Software License, Version 1.0. |
|
6 * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) |
|
7 * Author: Jeff Garland, Bart Garst |
|
8 * $Date: 2008-11-12 14:37:53 -0500 (Wed, 12 Nov 2008) $ |
|
9 */ |
|
10 |
|
11 #include <map> |
|
12 #include <vector> |
|
13 #include <string> |
|
14 #include <sstream> |
|
15 #include <fstream> |
|
16 #include <stdexcept> |
|
17 #include <boost/tokenizer.hpp> |
|
18 #include <boost/shared_ptr.hpp> |
|
19 #include <boost/throw_exception.hpp> |
|
20 #include <boost/date_time/compiler_config.hpp> |
|
21 #include <boost/date_time/time_zone_names.hpp> |
|
22 #include <boost/date_time/time_zone_base.hpp> |
|
23 #include <boost/date_time/time_parsing.hpp> |
|
24 |
|
25 namespace boost { |
|
26 namespace date_time { |
|
27 |
|
28 //! Exception thrown when tz database cannot locate requested data file |
|
29 class data_not_accessible : public std::logic_error |
|
30 { |
|
31 public: |
|
32 data_not_accessible() : |
|
33 std::logic_error(std::string("Unable to locate or access the required datafile.")) |
|
34 {} |
|
35 data_not_accessible(const std::string& filespec) : |
|
36 std::logic_error(std::string("Unable to locate or access the required datafile. Filespec: " + filespec)) |
|
37 {} |
|
38 }; |
|
39 |
|
40 //! Exception thrown when tz database locates incorrect field structure in data file |
|
41 class bad_field_count : public std::out_of_range |
|
42 { |
|
43 public: |
|
44 bad_field_count(const std::string& s) : |
|
45 std::out_of_range(s) |
|
46 {} |
|
47 }; |
|
48 |
|
49 //! Creates a database of time_zones from csv datafile |
|
50 /*! The csv file containing the zone_specs used by the |
|
51 * tz_db_base is intended to be customized by the |
|
52 * library user. When customizing this file (or creating your own) the |
|
53 * file must follow a specific format. |
|
54 * |
|
55 * This first line is expected to contain column headings and is therefore |
|
56 * not processed by the tz_db_base. |
|
57 * |
|
58 * Each record (line) must have eleven fields. Some of those fields can |
|
59 * be empty. Every field (even empty ones) must be enclosed in |
|
60 * double-quotes. |
|
61 * Ex: |
|
62 * @code |
|
63 * "America/Phoenix" <- string enclosed in quotes |
|
64 * "" <- empty field |
|
65 * @endcode |
|
66 * |
|
67 * Some fields represent a length of time. The format of these fields |
|
68 * must be: |
|
69 * @code |
|
70 * "{+|-}hh:mm[:ss]" <- length-of-time format |
|
71 * @endcode |
|
72 * Where the plus or minus is mandatory and the seconds are optional. |
|
73 * |
|
74 * Since some time zones do not use daylight savings it is not always |
|
75 * necessary for every field in a zone_spec to contain a value. All |
|
76 * zone_specs must have at least ID and GMT offset. Zones that use |
|
77 * daylight savings must have all fields filled except: |
|
78 * STD ABBR, STD NAME, DST NAME. You should take note |
|
79 * that DST ABBR is mandatory for zones that use daylight savings |
|
80 * (see field descriptions for further details). |
|
81 * |
|
82 * ******* Fields and their description/details ********* |
|
83 * |
|
84 * ID: |
|
85 * Contains the identifying string for the zone_spec. Any string will |
|
86 * do as long as it's unique. No two ID's can be the same. |
|
87 * |
|
88 * STD ABBR: |
|
89 * STD NAME: |
|
90 * DST ABBR: |
|
91 * DST NAME: |
|
92 * These four are all the names and abbreviations used by the time |
|
93 * zone being described. While any string will do in these fields, |
|
94 * care should be taken. These fields hold the strings that will be |
|
95 * used in the output of many of the local_time classes. |
|
96 * Ex: |
|
97 * @code |
|
98 * time_zone nyc = tz_db.time_zone_from_region("America/New_York"); |
|
99 * local_time ny_time(date(2004, Aug, 30), IS_DST, nyc); |
|
100 * cout << ny_time.to_long_string() << endl; |
|
101 * // 2004-Aug-30 00:00:00 Eastern Daylight Time |
|
102 * cout << ny_time.to_short_string() << endl; |
|
103 * // 2004-Aug-30 00:00:00 EDT |
|
104 * @endcode |
|
105 * |
|
106 * NOTE: The exact format/function names may vary - see local_time |
|
107 * documentation for further details. |
|
108 * |
|
109 * GMT offset: |
|
110 * This is the number of hours added to utc to get the local time |
|
111 * before any daylight savings adjustments are made. Some examples |
|
112 * are: America/New_York offset -5 hours, & Africa/Cairo offset +2 hours. |
|
113 * The format must follow the length-of-time format described above. |
|
114 * |
|
115 * DST adjustment: |
|
116 * The amount of time added to gmt_offset when daylight savings is in |
|
117 * effect. The format must follow the length-of-time format described |
|
118 * above. |
|
119 * |
|
120 * DST Start Date rule: |
|
121 * This is a specially formatted string that describes the day of year |
|
122 * in which the transition take place. It holds three fields of it's own, |
|
123 * separated by semicolons. |
|
124 * The first field indicates the "nth" weekday of the month. The possible |
|
125 * values are: 1 (first), 2 (second), 3 (third), 4 (fourth), 5 (fifth), |
|
126 * and -1 (last). |
|
127 * The second field indicates the day-of-week from 0-6 (Sun=0). |
|
128 * The third field indicates the month from 1-12 (Jan=1). |
|
129 * |
|
130 * Examples are: "-1;5;9"="Last Friday of September", |
|
131 * "2;1;3"="Second Monday of March" |
|
132 * |
|
133 * Start time: |
|
134 * Start time is the number of hours past midnight, on the day of the |
|
135 * start transition, the transition takes place. More simply put, the |
|
136 * time of day the transition is made (in 24 hours format). The format |
|
137 * must follow the length-of-time format described above with the |
|
138 * exception that it must always be positive. |
|
139 * |
|
140 * DST End date rule: |
|
141 * See DST Start date rule. The difference here is this is the day |
|
142 * daylight savings ends (transition to STD). |
|
143 * |
|
144 * End time: |
|
145 * Same as Start time. |
|
146 */ |
|
147 template<class time_zone_type, class rule_type> |
|
148 class tz_db_base { |
|
149 public: |
|
150 /* Having CharT as a template parameter created problems |
|
151 * with posix_time::duration_from_string. Templatizing |
|
152 * duration_from_string was not possible at this time, however, |
|
153 * it should be possible in the future (when poor compilers get |
|
154 * fixed or stop being used). |
|
155 * Since this class was designed to use CharT as a parameter it |
|
156 * is simply typedef'd here to ease converting in back to a |
|
157 * parameter the future */ |
|
158 typedef char char_type; |
|
159 |
|
160 typedef typename time_zone_type::base_type time_zone_base_type; |
|
161 typedef typename time_zone_type::time_duration_type time_duration_type; |
|
162 typedef time_zone_names_base<char_type> time_zone_names; |
|
163 typedef boost::date_time::dst_adjustment_offsets<time_duration_type> dst_adjustment_offsets; |
|
164 typedef std::basic_string<char_type> string_type; |
|
165 |
|
166 //! Constructs an empty database |
|
167 tz_db_base() {} |
|
168 |
|
169 //! Process csv data file, may throw exceptions |
|
170 /*! May throw data_not_accessible, or bad_field_count exceptions */ |
|
171 void load_from_file(const std::string& pathspec) |
|
172 { |
|
173 string_type in_str; |
|
174 std::string buff; |
|
175 |
|
176 std::ifstream ifs(pathspec.c_str()); |
|
177 if(!ifs){ |
|
178 boost::throw_exception(data_not_accessible(pathspec)); |
|
179 } |
|
180 std::getline(ifs, buff); // first line is column headings |
|
181 |
|
182 while( std::getline(ifs, buff)) { |
|
183 parse_string(buff); |
|
184 } |
|
185 } |
|
186 |
|
187 //! returns true if record successfully added to map |
|
188 /*! Takes a region name in the form of "America/Phoenix", and a |
|
189 * time_zone object for that region. The id string must be a unique |
|
190 * name that does not already exist in the database. */ |
|
191 bool add_record(const string_type& region, |
|
192 boost::shared_ptr<time_zone_base_type> tz) |
|
193 { |
|
194 typename map_type::value_type p(region, tz); |
|
195 return (m_zone_map.insert(p)).second; |
|
196 } |
|
197 |
|
198 //! Returns a time_zone object built from the specs for the given region |
|
199 /*! Returns a time_zone object built from the specs for the given |
|
200 * region. If region does not exist a local_time::record_not_found |
|
201 * exception will be thrown */ |
|
202 boost::shared_ptr<time_zone_base_type> |
|
203 time_zone_from_region(const string_type& region) const |
|
204 { |
|
205 // get the record |
|
206 typename map_type::const_iterator record = m_zone_map.find(region); |
|
207 if(record == m_zone_map.end()){ |
|
208 return boost::shared_ptr<time_zone_base_type>(); //null pointer |
|
209 } |
|
210 return record->second; |
|
211 } |
|
212 |
|
213 //! Returns a vector of strings holding the time zone regions in the database |
|
214 std::vector<std::string> region_list() const |
|
215 { |
|
216 typedef std::vector<std::string> vector_type; |
|
217 vector_type regions; |
|
218 typename map_type::const_iterator itr = m_zone_map.begin(); |
|
219 while(itr != m_zone_map.end()) { |
|
220 regions.push_back(itr->first); |
|
221 ++itr; |
|
222 } |
|
223 return regions; |
|
224 } |
|
225 |
|
226 private: |
|
227 typedef std::map<string_type, boost::shared_ptr<time_zone_base_type> > map_type; |
|
228 map_type m_zone_map; |
|
229 |
|
230 // start and end rule are of the same type |
|
231 typedef typename rule_type::start_rule::week_num week_num; |
|
232 |
|
233 /* TODO: mechanisms need to be put in place to handle different |
|
234 * types of rule specs. parse_rules() only handles nth_kday |
|
235 * rule types. */ |
|
236 |
|
237 //! parses rule specs for transition day rules |
|
238 rule_type* parse_rules(const string_type& sr, const string_type& er) const |
|
239 { |
|
240 using namespace gregorian; |
|
241 // start and end rule are of the same type, |
|
242 // both are included here for readability |
|
243 typedef typename rule_type::start_rule start_rule; |
|
244 typedef typename rule_type::end_rule end_rule; |
|
245 |
|
246 // these are: [start|end] nth, day, month |
|
247 int s_nth = 0, s_d = 0, s_m = 0; |
|
248 int e_nth = 0, e_d = 0, e_m = 0; |
|
249 split_rule_spec(s_nth, s_d, s_m, sr); |
|
250 split_rule_spec(e_nth, e_d, e_m, er); |
|
251 |
|
252 typename start_rule::week_num s_wn, e_wn; |
|
253 s_wn = get_week_num(s_nth); |
|
254 e_wn = get_week_num(e_nth); |
|
255 |
|
256 |
|
257 return new rule_type(start_rule(s_wn, s_d, s_m), |
|
258 end_rule(e_wn, e_d, e_m)); |
|
259 } |
|
260 //! helper function for parse_rules() |
|
261 week_num get_week_num(int nth) const |
|
262 { |
|
263 typedef typename rule_type::start_rule start_rule; |
|
264 switch(nth){ |
|
265 case 1: |
|
266 return start_rule::first; |
|
267 case 2: |
|
268 return start_rule::second; |
|
269 case 3: |
|
270 return start_rule::third; |
|
271 case 4: |
|
272 return start_rule::fourth; |
|
273 case 5: |
|
274 case -1: |
|
275 return start_rule::fifth; |
|
276 default: |
|
277 // shouldn't get here - add error handling later |
|
278 break; |
|
279 } |
|
280 return start_rule::fifth; // silence warnings |
|
281 } |
|
282 |
|
283 //! splits the [start|end]_date_rule string into 3 ints |
|
284 void split_rule_spec(int& nth, int& d, int& m, string_type rule) const |
|
285 { |
|
286 typedef boost::char_separator<char_type, std::char_traits<char_type> > char_separator_type; |
|
287 typedef boost::tokenizer<char_separator_type, |
|
288 std::basic_string<char_type>::const_iterator, |
|
289 std::basic_string<char_type> > tokenizer; |
|
290 typedef boost::tokenizer<char_separator_type, |
|
291 std::basic_string<char_type>::const_iterator, |
|
292 std::basic_string<char_type> >::iterator tokenizer_iterator; |
|
293 |
|
294 const char_type sep_char[] = { ';', '\0'}; |
|
295 char_separator_type sep(sep_char); |
|
296 tokenizer tokens(rule, sep); // 3 fields |
|
297 |
|
298 tokenizer_iterator tok_iter = tokens.begin(); |
|
299 nth = std::atoi(tok_iter->c_str()); ++tok_iter; |
|
300 d = std::atoi(tok_iter->c_str()); ++tok_iter; |
|
301 m = std::atoi(tok_iter->c_str()); |
|
302 } |
|
303 |
|
304 |
|
305 //! Take a line from the csv, turn it into a time_zone_type. |
|
306 /*! Take a line from the csv, turn it into a time_zone_type, |
|
307 * and add it to the map. Zone_specs in csv file are expected to |
|
308 * have eleven fields that describe the time zone. Returns true if |
|
309 * zone_spec successfully added to database */ |
|
310 bool parse_string(string_type& s) |
|
311 { |
|
312 std::vector<string_type> result; |
|
313 typedef boost::token_iterator_generator<boost::escaped_list_separator<char_type>, string_type::const_iterator, string_type >::type token_iter_type; |
|
314 |
|
315 token_iter_type i = boost::make_token_iterator<string_type>(s.begin(), s.end(),boost::escaped_list_separator<char_type>()); |
|
316 |
|
317 token_iter_type end; |
|
318 while (i != end) { |
|
319 result.push_back(*i); |
|
320 i++; |
|
321 } |
|
322 |
|
323 enum db_fields { ID, STDABBR, STDNAME, DSTABBR, DSTNAME, GMTOFFSET, |
|
324 DSTADJUST, START_DATE_RULE, START_TIME, END_DATE_RULE, |
|
325 END_TIME, FIELD_COUNT }; |
|
326 |
|
327 //take a shot at fixing gcc 4.x error |
|
328 const unsigned int expected_fields = static_cast<unsigned int>(FIELD_COUNT); |
|
329 if (result.size() != expected_fields) { |
|
330 std::ostringstream msg; |
|
331 msg << "Expecting " << FIELD_COUNT << " fields, got " |
|
332 << result.size() << " fields in line: " << s; |
|
333 boost::throw_exception(bad_field_count(msg.str())); |
|
334 BOOST_DATE_TIME_UNREACHABLE_EXPRESSION(return false); // should never reach |
|
335 } |
|
336 |
|
337 // initializations |
|
338 bool has_dst = true; |
|
339 if(result[DSTABBR] == std::string()){ |
|
340 has_dst = false; |
|
341 } |
|
342 |
|
343 |
|
344 // start building components of a time_zone |
|
345 time_zone_names names(result[STDNAME], result[STDABBR], |
|
346 result[DSTNAME], result[DSTABBR]); |
|
347 |
|
348 time_duration_type utc_offset = |
|
349 str_from_delimited_time_duration<time_duration_type,char_type>(result[GMTOFFSET]); |
|
350 |
|
351 dst_adjustment_offsets adjust(time_duration_type(0,0,0), |
|
352 time_duration_type(0,0,0), |
|
353 time_duration_type(0,0,0)); |
|
354 |
|
355 boost::shared_ptr<rule_type> rules; |
|
356 |
|
357 if(has_dst){ |
|
358 adjust = dst_adjustment_offsets( |
|
359 str_from_delimited_time_duration<time_duration_type,char_type>(result[DSTADJUST]), |
|
360 str_from_delimited_time_duration<time_duration_type,char_type>(result[START_TIME]), |
|
361 str_from_delimited_time_duration<time_duration_type,char_type>(result[END_TIME]) |
|
362 ); |
|
363 |
|
364 rules = |
|
365 boost::shared_ptr<rule_type>(parse_rules(result[START_DATE_RULE], |
|
366 result[END_DATE_RULE])); |
|
367 } |
|
368 string_type id(result[ID]); |
|
369 boost::shared_ptr<time_zone_base_type> zone(new time_zone_type(names, utc_offset, adjust, rules)); |
|
370 return (add_record(id, zone)); |
|
371 |
|
372 } |
|
373 |
|
374 }; |
|
375 |
|
376 } } // namespace |
|
377 |
|
378 #endif // DATE_TIME_TZ_DB_BASE_HPP__ |