|
1 #include <string.h> |
|
2 #include "xmlmime.h" |
|
3 |
|
4 static const char * |
|
5 getTok(const char **pp) |
|
6 { |
|
7 /* inComment means one level of nesting; inComment+1 means two levels etc */ |
|
8 enum { inAtom, inString, init, inComment }; |
|
9 int state = init; |
|
10 const char *tokStart = 0; |
|
11 for (;;) { |
|
12 switch (**pp) { |
|
13 case '\0': |
|
14 if (state == inAtom) |
|
15 return tokStart; |
|
16 return 0; |
|
17 case ' ': |
|
18 case '\r': |
|
19 case '\t': |
|
20 case '\n': |
|
21 if (state == inAtom) |
|
22 return tokStart; |
|
23 break; |
|
24 case '(': |
|
25 if (state == inAtom) |
|
26 return tokStart; |
|
27 if (state != inString) |
|
28 state++; |
|
29 break; |
|
30 case ')': |
|
31 if (state > init) |
|
32 --state; |
|
33 else if (state != inString) |
|
34 return 0; |
|
35 break; |
|
36 case ';': |
|
37 case '/': |
|
38 case '=': |
|
39 if (state == inAtom) |
|
40 return tokStart; |
|
41 if (state == init) |
|
42 return (*pp)++; |
|
43 break; |
|
44 case '\\': |
|
45 ++*pp; |
|
46 if (**pp == '\0') |
|
47 return 0; |
|
48 break; |
|
49 case '"': |
|
50 switch (state) { |
|
51 case inString: |
|
52 ++*pp; |
|
53 return tokStart; |
|
54 case inAtom: |
|
55 return tokStart; |
|
56 case init: |
|
57 tokStart = *pp; |
|
58 state = inString; |
|
59 break; |
|
60 } |
|
61 break; |
|
62 default: |
|
63 if (state == init) { |
|
64 tokStart = *pp; |
|
65 state = inAtom; |
|
66 } |
|
67 break; |
|
68 } |
|
69 ++*pp; |
|
70 } |
|
71 /* not reached */ |
|
72 } |
|
73 |
|
74 /* key must be lowercase ASCII */ |
|
75 |
|
76 static int |
|
77 matchkey(const char *start, const char *end, const char *key) |
|
78 { |
|
79 if (!start) |
|
80 return 0; |
|
81 for (; start != end; start++, key++) |
|
82 if (*start != *key && *start != 'A' + (*key - 'a')) |
|
83 return 0; |
|
84 return *key == '\0'; |
|
85 } |
|
86 |
|
87 void |
|
88 getXMLCharset(const char *buf, char *charset) |
|
89 { |
|
90 const char *next, *p; |
|
91 |
|
92 charset[0] = '\0'; |
|
93 next = buf; |
|
94 p = getTok(&next); |
|
95 if (matchkey(p, next, "text")) |
|
96 strcpy(charset, "us-ascii"); |
|
97 else if (!matchkey(p, next, "application")) |
|
98 return; |
|
99 p = getTok(&next); |
|
100 if (!p || *p != '/') |
|
101 return; |
|
102 p = getTok(&next); |
|
103 #if 0 |
|
104 if (!matchkey(p, next, "xml") && charset[0] == '\0') |
|
105 return; |
|
106 #endif |
|
107 p = getTok(&next); |
|
108 while (p) { |
|
109 if (*p == ';') { |
|
110 p = getTok(&next); |
|
111 if (matchkey(p, next, "charset")) { |
|
112 p = getTok(&next); |
|
113 if (p && *p == '=') { |
|
114 p = getTok(&next); |
|
115 if (p) { |
|
116 char *s = charset; |
|
117 if (*p == '"') { |
|
118 while (++p != next - 1) { |
|
119 if (*p == '\\') |
|
120 ++p; |
|
121 if (s == charset + CHARSET_MAX - 1) { |
|
122 charset[0] = '\0'; |
|
123 break; |
|
124 } |
|
125 *s++ = *p; |
|
126 } |
|
127 *s++ = '\0'; |
|
128 } |
|
129 else { |
|
130 if (next - p > CHARSET_MAX - 1) |
|
131 break; |
|
132 while (p != next) |
|
133 *s++ = *p++; |
|
134 *s = 0; |
|
135 break; |
|
136 } |
|
137 } |
|
138 } |
|
139 break; |
|
140 } |
|
141 } |
|
142 else |
|
143 p = getTok(&next); |
|
144 } |
|
145 } |
|
146 |
|
147 #ifdef TEST |
|
148 |
|
149 #include <stdio.h> |
|
150 |
|
151 int |
|
152 main(int argc, char *argv[]) |
|
153 { |
|
154 char buf[CHARSET_MAX]; |
|
155 if (argc <= 1) |
|
156 return 1; |
|
157 printf("%s\n", argv[1]); |
|
158 getXMLCharset(argv[1], buf); |
|
159 printf("charset=\"%s\"\n", buf); |
|
160 return 0; |
|
161 } |
|
162 |
|
163 #endif /* TEST */ |