|
1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd |
|
2 See the file COPYING for copying permission. |
|
3 */ |
|
4 #define CHARSET_MAX 41 |
|
5 |
|
6 static const char * |
|
7 getTok(const char **pp) |
|
8 { |
|
9 enum { inAtom, inString, init, inComment }; |
|
10 int state = init; |
|
11 const char *tokStart = 0; |
|
12 for (;;) { |
|
13 switch (**pp) { |
|
14 case '\0': |
|
15 return 0; |
|
16 case ' ': |
|
17 case '\r': |
|
18 case '\t': |
|
19 case '\n': |
|
20 if (state == inAtom) |
|
21 return tokStart; |
|
22 break; |
|
23 case '(': |
|
24 if (state == inAtom) |
|
25 return tokStart; |
|
26 if (state != inString) |
|
27 state++; |
|
28 break; |
|
29 case ')': |
|
30 if (state > init) |
|
31 --state; |
|
32 else if (state != inString) |
|
33 return 0; |
|
34 break; |
|
35 case ';': |
|
36 case '/': |
|
37 case '=': |
|
38 if (state == inAtom) |
|
39 return tokStart; |
|
40 if (state == init) |
|
41 return (*pp)++; |
|
42 break; |
|
43 case '\\': |
|
44 ++*pp; |
|
45 if (**pp == '\0') |
|
46 return 0; |
|
47 break; |
|
48 case '"': |
|
49 switch (state) { |
|
50 case inString: |
|
51 ++*pp; |
|
52 return tokStart; |
|
53 case inAtom: |
|
54 return tokStart; |
|
55 case init: |
|
56 tokStart = *pp; |
|
57 state = inString; |
|
58 break; |
|
59 } |
|
60 break; |
|
61 default: |
|
62 if (state == init) { |
|
63 tokStart = *pp; |
|
64 state = inAtom; |
|
65 } |
|
66 break; |
|
67 } |
|
68 ++*pp; |
|
69 } |
|
70 /* not reached */ |
|
71 } |
|
72 |
|
73 /* key must be lowercase ASCII */ |
|
74 |
|
75 static int |
|
76 matchkey(const char *start, const char *end, const char *key) |
|
77 { |
|
78 if (!start) |
|
79 return 0; |
|
80 for (; start != end; start++, key++) |
|
81 if (*start != *key && *start != 'A' + (*key - 'a')) |
|
82 return 0; |
|
83 return *key == '\0'; |
|
84 } |
|
85 |
|
86 void |
|
87 getXMLCharset(const char *buf, char *charset) |
|
88 { |
|
89 const char *next, *p; |
|
90 |
|
91 charset[0] = '\0'; |
|
92 next = buf; |
|
93 p = getTok(&next); |
|
94 if (matchkey(p, next, "text")) |
|
95 strcpy(charset, "us-ascii"); |
|
96 else if (!matchkey(p, next, "application")) |
|
97 return; |
|
98 p = getTok(&next); |
|
99 if (!p || *p != '/') |
|
100 return; |
|
101 p = getTok(&next); |
|
102 if (matchkey(p, next, "xml")) |
|
103 isXml = 1; |
|
104 p = getTok(&next); |
|
105 while (p) { |
|
106 if (*p == ';') { |
|
107 p = getTok(&next); |
|
108 if (matchkey(p, next, "charset")) { |
|
109 p = getTok(&next); |
|
110 if (p && *p == '=') { |
|
111 p = getTok(&next); |
|
112 if (p) { |
|
113 char *s = charset; |
|
114 if (*p == '"') { |
|
115 while (++p != next - 1) { |
|
116 if (*p == '\\') |
|
117 ++p; |
|
118 if (s == charset + CHARSET_MAX - 1) { |
|
119 charset[0] = '\0'; |
|
120 break; |
|
121 } |
|
122 *s++ = *p; |
|
123 } |
|
124 *s++ = '\0'; |
|
125 } |
|
126 else { |
|
127 if (next - p > CHARSET_MAX - 1) |
|
128 break; |
|
129 while (p != next) |
|
130 *s++ = *p++; |
|
131 *s = 0; |
|
132 break; |
|
133 } |
|
134 } |
|
135 } |
|
136 } |
|
137 } |
|
138 else |
|
139 p = getTok(&next); |
|
140 } |
|
141 } |
|
142 |
|
143 int |
|
144 main(int argc, char **argv) |
|
145 { |
|
146 char buf[CHARSET_MAX]; |
|
147 getXMLCharset(argv[1], buf); |
|
148 printf("charset = \"%s\"\n", buf); |
|
149 return 0; |
|
150 } |