|
1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd |
|
2 See the file COPYING for copying permission. |
|
3 */ |
|
4 #include <string.h> |
|
5 #include "xmlmime.h" |
|
6 |
|
7 static const char * |
|
8 getTok(const char **pp) |
|
9 { |
|
10 /* inComment means one level of nesting; inComment+1 means two levels etc */ |
|
11 enum { inAtom, inString, init, inComment }; |
|
12 int state = init; |
|
13 const char *tokStart = 0; |
|
14 for (;;) { |
|
15 switch (**pp) { |
|
16 case '\0': |
|
17 if (state == inAtom) |
|
18 return tokStart; |
|
19 return 0; |
|
20 case ' ': |
|
21 case '\r': |
|
22 case '\t': |
|
23 case '\n': |
|
24 if (state == inAtom) |
|
25 return tokStart; |
|
26 break; |
|
27 case '(': |
|
28 if (state == inAtom) |
|
29 return tokStart; |
|
30 if (state != inString) |
|
31 state++; |
|
32 break; |
|
33 case ')': |
|
34 if (state > init) |
|
35 --state; |
|
36 else if (state != inString) |
|
37 return 0; |
|
38 break; |
|
39 case ';': |
|
40 case '/': |
|
41 case '=': |
|
42 if (state == inAtom) |
|
43 return tokStart; |
|
44 if (state == init) |
|
45 return (*pp)++; |
|
46 break; |
|
47 case '\\': |
|
48 ++*pp; |
|
49 if (**pp == '\0') |
|
50 return 0; |
|
51 break; |
|
52 case '"': |
|
53 switch (state) { |
|
54 case inString: |
|
55 ++*pp; |
|
56 return tokStart; |
|
57 case inAtom: |
|
58 return tokStart; |
|
59 case init: |
|
60 tokStart = *pp; |
|
61 state = inString; |
|
62 break; |
|
63 } |
|
64 break; |
|
65 default: |
|
66 if (state == init) { |
|
67 tokStart = *pp; |
|
68 state = inAtom; |
|
69 } |
|
70 break; |
|
71 } |
|
72 ++*pp; |
|
73 } |
|
74 /* not reached */ |
|
75 } |
|
76 |
|
77 /* key must be lowercase ASCII */ |
|
78 |
|
79 static int |
|
80 matchkey(const char *start, const char *end, const char *key) |
|
81 { |
|
82 if (!start) |
|
83 return 0; |
|
84 for (; start != end; start++, key++) |
|
85 if (*start != *key && *start != 'A' + (*key - 'a')) |
|
86 return 0; |
|
87 return *key == '\0'; |
|
88 } |
|
89 |
|
90 void |
|
91 getXMLCharset(const char *buf, char *charset) |
|
92 { |
|
93 const char *next, *p; |
|
94 |
|
95 charset[0] = '\0'; |
|
96 next = buf; |
|
97 p = getTok(&next); |
|
98 if (matchkey(p, next, "text")) |
|
99 strcpy(charset, "us-ascii"); |
|
100 else if (!matchkey(p, next, "application")) |
|
101 return; |
|
102 p = getTok(&next); |
|
103 if (!p || *p != '/') |
|
104 return; |
|
105 p = getTok(&next); |
|
106 #if 0 |
|
107 if (!matchkey(p, next, "xml") && charset[0] == '\0') |
|
108 return; |
|
109 #endif |
|
110 p = getTok(&next); |
|
111 while (p) { |
|
112 if (*p == ';') { |
|
113 p = getTok(&next); |
|
114 if (matchkey(p, next, "charset")) { |
|
115 p = getTok(&next); |
|
116 if (p && *p == '=') { |
|
117 p = getTok(&next); |
|
118 if (p) { |
|
119 char *s = charset; |
|
120 if (*p == '"') { |
|
121 while (++p != next - 1) { |
|
122 if (*p == '\\') |
|
123 ++p; |
|
124 if (s == charset + CHARSET_MAX - 1) { |
|
125 charset[0] = '\0'; |
|
126 break; |
|
127 } |
|
128 *s++ = *p; |
|
129 } |
|
130 *s++ = '\0'; |
|
131 } |
|
132 else { |
|
133 if (next - p > CHARSET_MAX - 1) |
|
134 break; |
|
135 while (p != next) |
|
136 *s++ = *p++; |
|
137 *s = 0; |
|
138 break; |
|
139 } |
|
140 } |
|
141 } |
|
142 break; |
|
143 } |
|
144 } |
|
145 else |
|
146 p = getTok(&next); |
|
147 } |
|
148 } |
|
149 |
|
150 #ifdef TEST |
|
151 |
|
152 #include <stdio.h> |
|
153 |
|
154 int |
|
155 main(int argc, char *argv[]) |
|
156 { |
|
157 char buf[CHARSET_MAX]; |
|
158 if (argc <= 1) |
|
159 return 1; |
|
160 printf("%s\n", argv[1]); |
|
161 getXMLCharset(argv[1], buf); |
|
162 printf("charset=\"%s\"\n", buf); |
|
163 return 0; |
|
164 } |
|
165 |
|
166 #endif /* TEST */ |