0
|
1 |
/*
|
|
2 |
* Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved.
|
|
3 |
*
|
|
4 |
* Redistribution and use in source and binary forms, with or without
|
|
5 |
* modification, are permitted provided that the following conditions
|
|
6 |
* are met:
|
|
7 |
* 1. Redistributions of source code must retain the above copyright
|
|
8 |
* notice, this list of conditions and the following disclaimer.
|
|
9 |
* 2. Redistributions in binary form must reproduce the above copyright
|
|
10 |
* notice, this list of conditions and the following disclaimer in the
|
|
11 |
* documentation and/or other materials provided with the distribution.
|
|
12 |
*
|
|
13 |
* THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
|
|
14 |
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
15 |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
16 |
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
|
|
17 |
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
18 |
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
19 |
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
20 |
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
21 |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
22 |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
23 |
*/
|
|
24 |
|
|
25 |
#include "config.h"
|
|
26 |
#include "TextDocument.h"
|
|
27 |
|
|
28 |
#include "Element.h"
|
|
29 |
#include "HTMLNames.h"
|
|
30 |
#include "HTMLViewSourceDocument.h"
|
|
31 |
#include "SegmentedString.h"
|
|
32 |
#include "Text.h"
|
|
33 |
#include "XMLTokenizer.h"
|
|
34 |
|
|
35 |
using namespace std;
|
|
36 |
|
|
37 |
namespace WebCore {
|
|
38 |
|
|
39 |
using namespace HTMLNames;
|
|
40 |
|
|
41 |
class TextTokenizer : public Tokenizer {
|
|
42 |
public:
|
|
43 |
TextTokenizer(Document*);
|
|
44 |
virtual ~TextTokenizer();
|
|
45 |
TextTokenizer(HTMLViewSourceDocument*);
|
|
46 |
|
|
47 |
virtual void write(const SegmentedString&, bool appendData);
|
|
48 |
virtual void finish();
|
|
49 |
virtual bool isWaitingForScripts() const;
|
|
50 |
|
|
51 |
inline void checkBuffer(int len = 10)
|
|
52 |
{
|
|
53 |
if ((m_dest - m_buffer) > m_size - len) {
|
|
54 |
// Enlarge buffer
|
|
55 |
int newSize = std::max(m_size * 2, m_size + len);
|
|
56 |
int oldOffset = m_dest - m_buffer;
|
|
57 |
m_buffer = static_cast<UChar*>(fastRealloc(m_buffer, newSize * sizeof(UChar)));
|
|
58 |
m_dest = m_buffer + oldOffset;
|
|
59 |
m_size = newSize;
|
|
60 |
}
|
|
61 |
}
|
|
62 |
|
|
63 |
private:
|
|
64 |
Document* m_doc;
|
|
65 |
Element* m_preElement;
|
|
66 |
|
|
67 |
bool m_skipLF;
|
|
68 |
|
|
69 |
int m_size;
|
|
70 |
UChar* m_buffer;
|
|
71 |
UChar* m_dest;
|
|
72 |
};
|
|
73 |
|
|
74 |
TextTokenizer::TextTokenizer(Document* doc)
|
|
75 |
: m_doc(doc)
|
|
76 |
, m_preElement(0)
|
|
77 |
, m_skipLF(false)
|
|
78 |
{
|
|
79 |
// Allocate buffer
|
|
80 |
m_size = 254;
|
|
81 |
m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * m_size));
|
|
82 |
m_dest = m_buffer;
|
|
83 |
}
|
|
84 |
|
|
85 |
TextTokenizer::TextTokenizer(HTMLViewSourceDocument* doc)
|
|
86 |
: Tokenizer(true)
|
|
87 |
, m_doc(doc)
|
|
88 |
, m_preElement(0)
|
|
89 |
, m_skipLF(false)
|
|
90 |
{
|
|
91 |
// Allocate buffer
|
|
92 |
m_size = 254;
|
|
93 |
m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * m_size));
|
|
94 |
m_dest = m_buffer;
|
|
95 |
}
|
|
96 |
|
|
97 |
TextTokenizer::~TextTokenizer()
|
|
98 |
{
|
|
99 |
// finish() should have been called to prevent any leaks
|
|
100 |
ASSERT(!m_buffer);
|
|
101 |
}
|
|
102 |
|
|
103 |
void TextTokenizer::write(const SegmentedString& s, bool)
|
|
104 |
{
|
|
105 |
ExceptionCode ec;
|
|
106 |
|
|
107 |
m_dest = m_buffer;
|
|
108 |
|
|
109 |
SegmentedString str = s;
|
|
110 |
while (!str.isEmpty()) {
|
|
111 |
UChar c = *str;
|
|
112 |
|
|
113 |
if (c == '\r') {
|
|
114 |
*m_dest++ = '\n';
|
|
115 |
|
|
116 |
// possibly skip an LF in the case of an CRLF sequence
|
|
117 |
m_skipLF = true;
|
|
118 |
} else if (c == '\n') {
|
|
119 |
if (!m_skipLF)
|
|
120 |
*m_dest++ = c;
|
|
121 |
else
|
|
122 |
m_skipLF = false;
|
|
123 |
} else {
|
|
124 |
*m_dest++ = c;
|
|
125 |
m_skipLF = false;
|
|
126 |
}
|
|
127 |
|
|
128 |
str.advance();
|
|
129 |
|
|
130 |
// Maybe enlarge the buffer
|
|
131 |
checkBuffer();
|
|
132 |
}
|
|
133 |
|
|
134 |
if (!m_preElement && !inViewSourceMode()) {
|
|
135 |
RefPtr<Element> rootElement = m_doc->createElement(htmlTag, false);
|
|
136 |
m_doc->appendChild(rootElement, ec);
|
|
137 |
|
|
138 |
RefPtr<Element> body = m_doc->createElement(bodyTag, false);
|
|
139 |
rootElement->appendChild(body, ec);
|
|
140 |
|
|
141 |
RefPtr<Element> preElement = m_doc->createElement(preTag, false);
|
|
142 |
preElement->setAttribute("style", "word-wrap: break-word; white-space: pre-wrap;", ec);
|
|
143 |
|
|
144 |
body->appendChild(preElement, ec);
|
|
145 |
|
|
146 |
m_preElement = preElement.get();
|
|
147 |
}
|
|
148 |
|
|
149 |
String string = String(m_buffer, m_dest - m_buffer);
|
|
150 |
if (inViewSourceMode()) {
|
|
151 |
static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceText(string);
|
|
152 |
return;
|
|
153 |
}
|
|
154 |
|
|
155 |
unsigned charsLeft = string.length();
|
|
156 |
while (charsLeft) {
|
|
157 |
// split large text to nodes of manageable size
|
|
158 |
RefPtr<Text> text = Text::createWithLengthLimit(m_doc, string, charsLeft);
|
|
159 |
m_preElement->appendChild(text, ec);
|
|
160 |
}
|
|
161 |
}
|
|
162 |
|
|
163 |
void TextTokenizer::finish()
|
|
164 |
{
|
|
165 |
if (!m_preElement)
|
|
166 |
write(SegmentedString(), true); // Create document structure for an empty text document.
|
|
167 |
m_preElement = 0;
|
|
168 |
fastFree(m_buffer);
|
|
169 |
m_buffer = 0;
|
|
170 |
m_dest = 0;
|
|
171 |
|
|
172 |
m_doc->finishedParsing();
|
|
173 |
}
|
|
174 |
|
|
175 |
bool TextTokenizer::isWaitingForScripts() const
|
|
176 |
{
|
|
177 |
// A text document is never waiting for scripts
|
|
178 |
return false;
|
|
179 |
}
|
|
180 |
|
|
181 |
TextDocument::TextDocument(Frame* frame)
|
|
182 |
: HTMLDocument(frame)
|
|
183 |
{
|
|
184 |
}
|
|
185 |
|
|
186 |
Tokenizer* TextDocument::createTokenizer()
|
|
187 |
{
|
|
188 |
return new TextTokenizer(this);
|
|
189 |
}
|
|
190 |
|
|
191 |
Tokenizer* createTextTokenizer(HTMLViewSourceDocument* document)
|
|
192 |
{
|
|
193 |
return new TextTokenizer(document);
|
|
194 |
}
|
|
195 |
|
|
196 |
}
|