Skip to content
Navigation Menu
{{ message }}
-
-
Notifications
You must be signed in to change notification settings - Fork 193
Expand file tree
/
Copy pathcache.py
More file actions
306 lines (245 loc) · 8.68 KB
/
Copy pathcache.py
File metadata and controls
306 lines (245 loc) · 8.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""Item cache.
Between runs of Planet we need somewhere to store the feed information
we parsed, this is so we don't lose information when a particular feed
goes away or is too short to hold enough items.
This module provides the code to handle this cache transparently enough
that the rest of the code can take the persistance for granted.
"""
import os
import re
# Regular expressions to sanitise cache filenames
re_url_scheme = re.compile(r'^[^:]*://')
re_slash = re.compile(r'[?/]+')
re_initial_cruft = re.compile(r'^[,.]*')
re_final_cruft = re.compile(r'[,.]*$')
class CachedInfo:
"""Cached information.
This class is designed to hold information that is stored in a cache
between instances. It can act both as a dictionary (c['foo']) and
as an object (c.foo) to get and set values and supports both string
and date values.
If you wish to support special fields you can derive a class off this
and implement get_FIELD and set_FIELD functions which will be
automatically called.
"""
STRING = "string"
DATE = "date"
NULL = "null"
def __init__(self, cache, id_, root=0):
self._type = {}
self._value = {}
self._cached = {}
self._cache = cache
self._id = id_.replace(" ", "%20")
self._root = root
def cache_key(self, key):
"""Return the cache key name for the given key."""
key = key.replace(" ", "_")
if self._root:
return key
else:
return self._id + " " + key
def cache_read(self):
"""Read information from the cache."""
if self._root:
keys_key = " keys"
else:
keys_key = self._id
if self._cache.has_key(keys_key):
keys = self._cache[keys_key].split(" ")
else:
return
for key in keys:
cache_key = self.cache_key(key)
if not self._cached.has_key(key) or self._cached[key]:
# Key either hasn't been loaded, or is one for the cache
self._value[key] = self._cache[cache_key]
self._type[key] = self._cache[cache_key + " type"]
self._cached[key] = 1
def cache_write(self, sync=1):
"""Write information to the cache."""
self.cache_clear(sync=0)
keys = []
for key in self.keys():
cache_key = self.cache_key(key)
if not self._cached[key]:
if self._cache.has_key(cache_key):
# Non-cached keys need to be cleared
del(self._cache[cache_key])
del(self._cache[cache_key + " type"])
continue
keys.append(key)
self._cache[cache_key] = self._value[key]
self._cache[cache_key + " type"] = self._type[key]
if self._root:
keys_key = " keys"
else:
keys_key = self._id
self._cache[keys_key] = " ".join(keys)
if sync:
self._cache.sync()
def cache_clear(self, sync=1):
"""Remove information from the cache."""
if self._root:
keys_key = " keys"
else:
keys_key = self._id
if self._cache.has_key(keys_key):
keys = self._cache[keys_key].split(" ")
del(self._cache[keys_key])
else:
return
for key in keys:
cache_key = self.cache_key(key)
del(self._cache[cache_key])
del(self._cache[cache_key + " type"])
if sync:
self._cache.sync()
def has_key(self, key):
"""Check whether the key exists."""
key = key.replace(" ", "_")
return self._value.has_key(key)
def key_type(self, key):
"""Return the key type."""
key = key.replace(" ", "_")
return self._type[key]
def set(self, key, value, cached=1):
"""Set the value of the given key.
If a set_KEY function exists that is called otherwise the
string function is called and the date function if that fails
(it nearly always will).
"""
key = key.replace(" ", "_")
try:
func = getattr(self, "set_" + key)
except AttributeError:
pass
else:
return func(key, value)
if value == None:
return self.set_as_null(key, value)
else:
try:
return self.set_as_string(key, value)
except TypeError:
return self.set_as_date(key, value)
def get(self, key):
"""Return the value of the given key.
If a get_KEY function exists that is called otherwise the
correctly typed function is called if that exists.
"""
key = key.replace(" ", "_")
try:
func = getattr(self, "get_" + key)
except AttributeError:
pass
else:
return func(key)
try:
func = getattr(self, "get_as_" + self._type[key])
except AttributeError:
pass
else:
return func(key)
return self._value[key]
def set_as_string(self, key, value, cached=1):
"""Set the key to the string value.
The value is converted to UTF-8 if it is a Unicode string, otherwise
it's assumed to have failed decoding (feedparser tries pretty hard)
so has all non-ASCII characters stripped.
"""
value = utf8(value)
key = key.replace(" ", "_")
self._value[key] = value
self._type[key] = self.STRING
self._cached[key] = cached
def get_as_string(self, key):
"""Return the key as a string value."""
key = key.replace(" ", "_")
if not self.has_key(key):
raise KeyError, key
return self._value[key]
def set_as_date(self, key, value, cached=1):
"""Set the key to the date value.
The date should be a 9-item tuple as returned by time.gmtime().
"""
value = " ".join([ str(s) for s in value ])
key = key.replace(" ", "_")
self._value[key] = value
self._type[key] = self.DATE
self._cached[key] = cached
def get_as_date(self, key):
"""Return the key as a date value."""
key = key.replace(" ", "_")
if not self.has_key(key):
raise KeyError, key
value = self._value[key]
return tuple([ int(i) for i in value.split(" ") ])
def set_as_null(self, key, value, cached=1):
"""Set the key to the null value.
This only exists to make things less magic.
"""
key = key.replace(" ", "_")
self._value[key] = ""
self._type[key] = self.NULL
self._cached[key] = cached
def get_as_null(self, key):
"""Return the key as the null value."""
key = key.replace(" ", "_")
if not self.has_key(key):
raise KeyError, key
return None
def del_key(self, key):
"""Delete the given key."""
key = key.replace(" ", "_")
if not self.has_key(key):
raise KeyError, key
del(self._value[key])
del(self._type[key])
del(self._cached[key])
def keys(self):
"""Return the list of cached keys."""
return self._value.keys()
def __iter__(self):
"""Iterate the cached keys."""
return iter(self._value.keys())
# Special methods
__contains__ = has_key
__setitem__ = set_as_string
__getitem__ = get
__delitem__ = del_key
__delattr__ = del_key
def __setattr__(self, key, value):
if key.startswith("_"):
self.__dict__[key] = value
else:
self.set(key, value)
def __getattr__(self, key):
if self.has_key(key):
return self.get(key)
else:
raise AttributeError, key
def filename(directory, filename):
"""Return a filename suitable for the cache.
Strips dangerous and common characters to create a filename we
can use to store the cache in.
"""
filename = re_url_scheme.sub("", filename)
filename = re_slash.sub(",", filename)
filename = re_initial_cruft.sub("", filename)
filename = re_final_cruft.sub("", filename)
return os.path.join(directory, filename)
def utf8(value):
"""Return the value as a UTF-8 string."""
if type(value) == type(u''):
return value.encode("utf-8")
else:
try:
return unicode(value, "utf-8").encode("utf-8")
except UnicodeError:
try:
return unicode(value, "iso-8859-1").encode("utf-8")
except UnicodeError:
return unicode(value, "ascii", "replace").encode("utf-8")
You can’t perform that action at this time.
