|
1 """A dumb and slow but simple dbm clone. |
|
2 |
|
3 For database spam, spam.dir contains the index (a text file), |
|
4 spam.bak *may* contain a backup of the index (also a text file), |
|
5 while spam.dat contains the data (a binary file). |
|
6 |
|
7 XXX TO DO: |
|
8 |
|
9 - seems to contain a bug when updating... |
|
10 |
|
11 - reclaim free space (currently, space once occupied by deleted or expanded |
|
12 items is never reused) |
|
13 |
|
14 - support concurrent access (currently, if two processes take turns making |
|
15 updates, they can mess up the index) |
|
16 |
|
17 - support efficient access to large databases (currently, the whole index |
|
18 is read when the database is opened, and some updates rewrite the whole index) |
|
19 |
|
20 - support opening for read-only (flag = 'm') |
|
21 |
|
22 """ |
|
23 |
|
24 import os as _os |
|
25 import __builtin__ |
|
26 import UserDict |
|
27 |
|
28 _open = __builtin__.open |
|
29 |
|
30 _BLOCKSIZE = 512 |
|
31 |
|
32 error = IOError # For anydbm |
|
33 |
|
34 class _Database(UserDict.DictMixin): |
|
35 |
|
36 # The on-disk directory and data files can remain in mutually |
|
37 # inconsistent states for an arbitrarily long time (see comments |
|
38 # at the end of __setitem__). This is only repaired when _commit() |
|
39 # gets called. One place _commit() gets called is from __del__(), |
|
40 # and if that occurs at program shutdown time, module globals may |
|
41 # already have gotten rebound to None. Since it's crucial that |
|
42 # _commit() finish successfully, we can't ignore shutdown races |
|
43 # here, and _commit() must not reference any globals. |
|
44 _os = _os # for _commit() |
|
45 _open = _open # for _commit() |
|
46 |
|
47 def __init__(self, filebasename, mode): |
|
48 self._mode = mode |
|
49 |
|
50 # The directory file is a text file. Each line looks like |
|
51 # "%r, (%d, %d)\n" % (key, pos, siz) |
|
52 # where key is the string key, pos is the offset into the dat |
|
53 # file of the associated value's first byte, and siz is the number |
|
54 # of bytes in the associated value. |
|
55 self._dirfile = filebasename + _os.extsep + 'dir' |
|
56 |
|
57 # The data file is a binary file pointed into by the directory |
|
58 # file, and holds the values associated with keys. Each value |
|
59 # begins at a _BLOCKSIZE-aligned byte offset, and is a raw |
|
60 # binary 8-bit string value. |
|
61 self._datfile = filebasename + _os.extsep + 'dat' |
|
62 self._bakfile = filebasename + _os.extsep + 'bak' |
|
63 |
|
64 # The index is an in-memory dict, mirroring the directory file. |
|
65 self._index = None # maps keys to (pos, siz) pairs |
|
66 |
|
67 # Mod by Jack: create data file if needed |
|
68 try: |
|
69 f = _open(self._datfile, 'r') |
|
70 except IOError: |
|
71 f = _open(self._datfile, 'w') |
|
72 self._chmod(self._datfile) |
|
73 f.close() |
|
74 self._update() |
|
75 |
|
76 # Read directory file into the in-memory index dict. |
|
77 def _update(self): |
|
78 self._index = {} |
|
79 try: |
|
80 f = _open(self._dirfile) |
|
81 except IOError: |
|
82 pass |
|
83 else: |
|
84 for line in f: |
|
85 line = line.rstrip() |
|
86 key, pos_and_siz_pair = eval(line) |
|
87 self._index[key] = pos_and_siz_pair |
|
88 f.close() |
|
89 |
|
90 # Write the index dict to the directory file. The original directory |
|
91 # file (if any) is renamed with a .bak extension first. If a .bak |
|
92 # file currently exists, it's deleted. |
|
93 def _commit(self): |
|
94 # CAUTION: It's vital that _commit() succeed, and _commit() can |
|
95 # be called from __del__(). Therefore we must never reference a |
|
96 # global in this routine. |
|
97 if self._index is None: |
|
98 return # nothing to do |
|
99 |
|
100 try: |
|
101 self._os.unlink(self._bakfile) |
|
102 except self._os.error: |
|
103 pass |
|
104 |
|
105 try: |
|
106 self._os.rename(self._dirfile, self._bakfile) |
|
107 except self._os.error: |
|
108 pass |
|
109 |
|
110 f = self._open(self._dirfile, 'w') |
|
111 self._chmod(self._dirfile) |
|
112 for key, pos_and_siz_pair in self._index.iteritems(): |
|
113 f.write("%r, %r\n" % (key, pos_and_siz_pair)) |
|
114 f.close() |
|
115 |
|
116 sync = _commit |
|
117 |
|
118 def __getitem__(self, key): |
|
119 pos, siz = self._index[key] # may raise KeyError |
|
120 f = _open(self._datfile, 'rb') |
|
121 f.seek(pos) |
|
122 dat = f.read(siz) |
|
123 f.close() |
|
124 return dat |
|
125 |
|
126 # Append val to the data file, starting at a _BLOCKSIZE-aligned |
|
127 # offset. The data file is first padded with NUL bytes (if needed) |
|
128 # to get to an aligned offset. Return pair |
|
129 # (starting offset of val, len(val)) |
|
130 def _addval(self, val): |
|
131 f = _open(self._datfile, 'rb+') |
|
132 f.seek(0, 2) |
|
133 pos = int(f.tell()) |
|
134 npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE |
|
135 f.write('\0'*(npos-pos)) |
|
136 pos = npos |
|
137 f.write(val) |
|
138 f.close() |
|
139 return (pos, len(val)) |
|
140 |
|
141 # Write val to the data file, starting at offset pos. The caller |
|
142 # is responsible for ensuring that there's enough room starting at |
|
143 # pos to hold val, without overwriting some other value. Return |
|
144 # pair (pos, len(val)). |
|
145 def _setval(self, pos, val): |
|
146 f = _open(self._datfile, 'rb+') |
|
147 f.seek(pos) |
|
148 f.write(val) |
|
149 f.close() |
|
150 return (pos, len(val)) |
|
151 |
|
152 # key is a new key whose associated value starts in the data file |
|
153 # at offset pos and with length siz. Add an index record to |
|
154 # the in-memory index dict, and append one to the directory file. |
|
155 def _addkey(self, key, pos_and_siz_pair): |
|
156 self._index[key] = pos_and_siz_pair |
|
157 f = _open(self._dirfile, 'a') |
|
158 self._chmod(self._dirfile) |
|
159 f.write("%r, %r\n" % (key, pos_and_siz_pair)) |
|
160 f.close() |
|
161 |
|
162 def __setitem__(self, key, val): |
|
163 if not type(key) == type('') == type(val): |
|
164 raise TypeError, "keys and values must be strings" |
|
165 if key not in self._index: |
|
166 self._addkey(key, self._addval(val)) |
|
167 else: |
|
168 # See whether the new value is small enough to fit in the |
|
169 # (padded) space currently occupied by the old value. |
|
170 pos, siz = self._index[key] |
|
171 oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE |
|
172 newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE |
|
173 if newblocks <= oldblocks: |
|
174 self._index[key] = self._setval(pos, val) |
|
175 else: |
|
176 # The new value doesn't fit in the (padded) space used |
|
177 # by the old value. The blocks used by the old value are |
|
178 # forever lost. |
|
179 self._index[key] = self._addval(val) |
|
180 |
|
181 # Note that _index may be out of synch with the directory |
|
182 # file now: _setval() and _addval() don't update the directory |
|
183 # file. This also means that the on-disk directory and data |
|
184 # files are in a mutually inconsistent state, and they'll |
|
185 # remain that way until _commit() is called. Note that this |
|
186 # is a disaster (for the database) if the program crashes |
|
187 # (so that _commit() never gets called). |
|
188 |
|
189 def __delitem__(self, key): |
|
190 # The blocks used by the associated value are lost. |
|
191 del self._index[key] |
|
192 # XXX It's unclear why we do a _commit() here (the code always |
|
193 # XXX has, so I'm not changing it). _setitem__ doesn't try to |
|
194 # XXX keep the directory file in synch. Why should we? Or |
|
195 # XXX why shouldn't __setitem__? |
|
196 self._commit() |
|
197 |
|
198 def keys(self): |
|
199 return self._index.keys() |
|
200 |
|
201 def has_key(self, key): |
|
202 return key in self._index |
|
203 |
|
204 def __contains__(self, key): |
|
205 return key in self._index |
|
206 |
|
207 def iterkeys(self): |
|
208 return self._index.iterkeys() |
|
209 __iter__ = iterkeys |
|
210 |
|
211 def __len__(self): |
|
212 return len(self._index) |
|
213 |
|
214 def close(self): |
|
215 self._commit() |
|
216 self._index = self._datfile = self._dirfile = self._bakfile = None |
|
217 |
|
218 __del__ = close |
|
219 |
|
220 def _chmod (self, file): |
|
221 if hasattr(self._os, 'chmod'): |
|
222 self._os.chmod(file, self._mode) |
|
223 |
|
224 |
|
225 def open(file, flag=None, mode=0666): |
|
226 """Open the database file, filename, and return corresponding object. |
|
227 |
|
228 The flag argument, used to control how the database is opened in the |
|
229 other DBM implementations, is ignored in the dumbdbm module; the |
|
230 database is always opened for update, and will be created if it does |
|
231 not exist. |
|
232 |
|
233 The optional mode argument is the UNIX mode of the file, used only when |
|
234 the database has to be created. It defaults to octal code 0666 (and |
|
235 will be modified by the prevailing umask). |
|
236 |
|
237 """ |
|
238 # flag argument is currently ignored |
|
239 |
|
240 # Modify mode depending on the umask |
|
241 try: |
|
242 um = _os.umask(0) |
|
243 _os.umask(um) |
|
244 except AttributeError: |
|
245 pass |
|
246 else: |
|
247 # Turn off any bits that are set in the umask |
|
248 mode = mode & (~um) |
|
249 |
|
250 return _Database(file, mode) |