|
1 #define CHARSET_MAX 41 |
|
2 |
|
3 static const char * |
|
4 getTok(const char **pp) |
|
5 { |
|
6 enum { inAtom, inString, init, inComment }; |
|
7 int state = init; |
|
8 const char *tokStart = 0; |
|
9 for (;;) { |
|
10 switch (**pp) { |
|
11 case '\0': |
|
12 return 0; |
|
13 case ' ': |
|
14 case '\r': |
|
15 case '\t': |
|
16 case '\n': |
|
17 if (state == inAtom) |
|
18 return tokStart; |
|
19 break; |
|
20 case '(': |
|
21 if (state == inAtom) |
|
22 return tokStart; |
|
23 if (state != inString) |
|
24 state++; |
|
25 break; |
|
26 case ')': |
|
27 if (state > init) |
|
28 --state; |
|
29 else if (state != inString) |
|
30 return 0; |
|
31 break; |
|
32 case ';': |
|
33 case '/': |
|
34 case '=': |
|
35 if (state == inAtom) |
|
36 return tokStart; |
|
37 if (state == init) |
|
38 return (*pp)++; |
|
39 break; |
|
40 case '\\': |
|
41 ++*pp; |
|
42 if (**pp == '\0') |
|
43 return 0; |
|
44 break; |
|
45 case '"': |
|
46 switch (state) { |
|
47 case inString: |
|
48 ++*pp; |
|
49 return tokStart; |
|
50 case inAtom: |
|
51 return tokStart; |
|
52 case init: |
|
53 tokStart = *pp; |
|
54 state = inString; |
|
55 break; |
|
56 } |
|
57 break; |
|
58 default: |
|
59 if (state == init) { |
|
60 tokStart = *pp; |
|
61 state = inAtom; |
|
62 } |
|
63 break; |
|
64 } |
|
65 ++*pp; |
|
66 } |
|
67 /* not reached */ |
|
68 } |
|
69 |
|
70 /* key must be lowercase ASCII */ |
|
71 |
|
72 static int |
|
73 matchkey(const char *start, const char *end, const char *key) |
|
74 { |
|
75 if (!start) |
|
76 return 0; |
|
77 for (; start != end; start++, key++) |
|
78 if (*start != *key && *start != 'A' + (*key - 'a')) |
|
79 return 0; |
|
80 return *key == '\0'; |
|
81 } |
|
82 |
|
83 void |
|
84 getXMLCharset(const char *buf, char *charset) |
|
85 { |
|
86 const char *next, *p; |
|
87 |
|
88 charset[0] = '\0'; |
|
89 next = buf; |
|
90 p = getTok(&next); |
|
91 if (matchkey(p, next, "text")) |
|
92 strcpy(charset, "us-ascii"); |
|
93 else if (!matchkey(p, next, "application")) |
|
94 return; |
|
95 p = getTok(&next); |
|
96 if (!p || *p != '/') |
|
97 return; |
|
98 p = getTok(&next); |
|
99 if (matchkey(p, next, "xml")) |
|
100 isXml = 1; |
|
101 p = getTok(&next); |
|
102 while (p) { |
|
103 if (*p == ';') { |
|
104 p = getTok(&next); |
|
105 if (matchkey(p, next, "charset")) { |
|
106 p = getTok(&next); |
|
107 if (p && *p == '=') { |
|
108 p = getTok(&next); |
|
109 if (p) { |
|
110 char *s = charset; |
|
111 if (*p == '"') { |
|
112 while (++p != next - 1) { |
|
113 if (*p == '\\') |
|
114 ++p; |
|
115 if (s == charset + CHARSET_MAX - 1) { |
|
116 charset[0] = '\0'; |
|
117 break; |
|
118 } |
|
119 *s++ = *p; |
|
120 } |
|
121 *s++ = '\0'; |
|
122 } |
|
123 else { |
|
124 if (next - p > CHARSET_MAX - 1) |
|
125 break; |
|
126 while (p != next) |
|
127 *s++ = *p++; |
|
128 *s = 0; |
|
129 break; |
|
130 } |
|
131 } |
|
132 } |
|
133 } |
|
134 } |
|
135 else |
|
136 p = getTok(&next); |
|
137 } |
|
138 } |
|
139 |
|
140 int |
|
141 main(int argc, char **argv) |
|
142 { |
|
143 char buf[CHARSET_MAX]; |
|
144 getXMLCharset(argv[1], buf); |
|
145 printf("charset = \"%s\"\n", buf); |
|
146 return 0; |
|
147 } |