pyRdfa.rdfs.cache
Managing Vocab Caching.
@summary: RDFa parser (distiller)
@requires: U{RDFLibhttp://rdflib.net}
@organization: U{World Wide Web Consortiumhttp://www.w3.org}
@author: U{Ivan Herman}
@license: This software is available for use under the
U{W3C® SOFTWARE NOTICE AND LICENSE
1# -*- coding: utf-8 -*- 2""" 3Managing Vocab Caching. 4 5@summary: RDFa parser (distiller) 6@requires: U{RDFLib<http://rdflib.net>} 7@organization: U{World Wide Web Consortium<http://www.w3.org>} 8@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} 9@license: This software is available for use under the 10U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} 11""" 12import os, sys, datetime, re 13 14from rdflib import Graph 15 16from ..utils import create_file_name 17 18from . import VocabCachingInfo 19 20# Regular expression object for a general XML application media type 21xml_application_media_type = re.compile(r"application/[a-zA-Z0-9]+\+xml") 22 23#=========================================================================================== 24 25import pickle 26 27# Protocol to be used for pickle files. 0 is good for debug, it stores the data in ASCII; 1 is better for deployment, 28# it stores data in binary format. Care should be taken for consistency; when changing from 0 to 1 or back, all 29# cached data should be removed/regenerated, otherwise mess may occur... 30_Pickle_Protocol = 1 31 32# If I could rely on python 2.5 or 2.6 (or higher) I could use the with...as... idiom for what is below, it 33# is indeed nicer. But I cannot... 34def _load(fname): 35 """ 36 Load a cached file and return the resulting object 37 @param fname: file name 38 """ 39 try: 40 f = open(fname) 41 return pickle.load(f) 42 finally: 43 f.close() 44 45def _dump(obj, fname): 46 """ 47 Dump an object into cached file 48 @param obj: Python object to store 49 @param fname: file name 50 """ 51 try: 52 f = open(fname, "w") 53 pickle.dump(obj, f, _Pickle_Protocol) 54 f.flush() 55 finally: 56 f.close() 57 58#=========================================================================================== 59class CachedVocabIndex: 60 """ 61 Class to manage the cache index. Takes care of finding the vocab directory, and manages the index 62 to the individual vocab data. 63 64 The vocab directory is set to a platform specific area, unless an environment variable 65 sets it explicitly. The environment variable is "PyRdfaCacheDir" 66 67 Every time the index is changed, the index is put back (via pickle) to the directory. 68 69 @ivar app_data_dir: directory for the vocabulary cache directory 70 @ivar index_fname: the full path of the index file on the disc 71 @ivar indeces: the in-memory version of the index (a directory mapping URI-s to tuples) 72 @ivar options: the error handler (option) object to send warnings to 73 @type options: L{options.Options} 74 @ivar report: whether details on the caching should be reported 75 @type report: Boolean 76 @cvar vocabs: File name used for the index in the cache directory 77 @cvar preference_path: Cache directories for the three major platforms (ie, mac, windows, unix) 78 @type preference_path: directory, keyed by "mac", "win", and "unix" 79 @cvar architectures: Various 'architectures' as returned by the python call, and their mapping on one of the major platforms. If an architecture is missing, it is considered to be "unix" 80 @type architectures: directory, mapping architectures to "mac", "win", or "unix" 81 """ 82 # File Name used for the index in the cache directory 83 vocabs = "cache_index" 84 # Cache directories for the three major platforms... 85 preference_path = { 86 "mac": "Library/Application Support/pyRdfa-cache", 87 "win": "pyRdfa-cache", 88 "unix": ".pyRdfa-cache" 89 } 90 # various architectures as returned by the python call, and their mapping on platorm. If an architecture is not here, it is considered as unix 91 architectures = { 92 "darwin": "mac", 93 "nt": "win", 94 "win32": "win", 95 "cygwin": "win" 96 } 97 def __init__(self, options = None): 98 """ 99 @param options: the error handler (option) object to send warnings to 100 @type options: L{options.Options} 101 """ 102 self.options = options 103 self.report = options is not None and options.vocab_cache_report 104 105 # This is where the cache files should be 106 self.app_data_dir = self._give_preference_path() 107 self.index_fname = os.path.join(self.app_data_dir, self.vocabs) 108 self.indeces = {} 109 110 # Check whether that directory exists. 111 if not os.path.isdir(self.app_data_dir): 112 try: 113 os.mkdir(self.app_data_dir) 114 except Exception: 115 _t, value, _traceback = sys.exc_info() 116 if self.report: options.add_info("Could not create the vocab cache area %s" % value, VocabCachingInfo) 117 return 118 else: 119 # check whether it is at least readable 120 if not os.access(self.app_data_dir, os.R_OK): 121 if self.report: options.add_info("Vocab cache directory is not readable", VocabCachingInfo) 122 return 123 if not os.access(self.app_data_dir, os.W_OK): 124 if self.report: options.add_info("Vocab cache directory is not writeable, but readable", VocabCachingInfo) 125 return 126 127 if os.path.exists(self.index_fname): 128 if os.access(self.index_fname, os.R_OK): 129 self.indeces = _load(self.index_fname) 130 else: 131 if self.report: options.add_info("Vocab cache index not readable", VocabCachingInfo) 132 else: 133 # This is the very initial phase, creation 134 # of a a new index 135 if os.access(self.app_data_dir, os.W_OK): 136 # This is then put into a pickle file to put the stake in the ground... 137 try: 138 _dump(self.indeces, self.index_fname) 139 except Exception: 140 _t, value, _traceback = sys.exc_info() 141 if self.report: options.add_info("Could not create the vocabulary index %s" % value, VocabCachingInfo) 142 else: 143 if self.report: options.add_info("Vocabulary cache directory is not writeable", VocabCachingInfo) 144 self.cache_writeable = False 145 146 def add_ref(self, uri, vocab_reference): 147 """ 148 Add a new entry to the index, possibly removing the previous one. 149 150 @param uri: the URI that serves as a key in the index directory 151 @param vocab_reference: tuple consisting of file name, modification date, and expiration date 152 """ 153 # Store the index right away 154 self.indeces[uri] = vocab_reference 155 try: 156 _dump(self.indeces, self.index_fname) 157 except Exception: 158 _t, value, _traceback = sys.exc_info() 159 if self.report: self.options.add_info("Could not store the cache index %s" % value, VocabCachingInfo) 160 161 def get_ref(self, uri): 162 """ 163 Get an index entry, if available, None otherwise. 164 The return value is a tuple: file name, modification date, and expiration date 165 166 @param uri: the URI that serves as a key in the index directory 167 """ 168 if uri in self.indeces: 169 return tuple(self.indeces[uri]) 170 else: 171 return None 172 173 def _give_preference_path(self): 174 """ 175 Find the vocab cache directory. 176 """ 177 from ...pyRdfa import CACHE_DIR_VAR 178 if CACHE_DIR_VAR in os.environ: 179 return os.environ[CACHE_DIR_VAR] 180 else: 181 # find the preference path on the architecture 182 platform = sys.platform 183 if platform in self.architectures: 184 system = self.architectures[platform] 185 else: 186 system = "unix" 187 188 if system == "win": 189 # there is a user variable set for that purpose 190 app_data = os.path.expandvars("%APPDATA%") 191 return os.path.join(app_data, self.preference_path[system]) 192 else: 193 return os.path.join(os.path.expanduser('~'), self.preference_path[system]) 194 195#=========================================================================================== 196class CachedVocab(CachedVocabIndex): 197 """ 198 Cache for a specific vocab. The content of the cache is the graph. These are also the data that are stored 199 on the disc (in pickled form) 200 201 @ivar graph: the RDF graph 202 @ivar URI: vocabulary URI 203 @ivar filename: file name (not the complete path) of the cached version 204 @ivar creation_date: creation date of the cache 205 @type creation_date: datetime 206 @ivar expiration_date: expiration date of the cache 207 @type expiration_date: datetime 208 @cvar runtime_cache : a run time cache for already 'seen' vocabulary files. Apart from (marginally) speeding up processing, this also prevents recursion 209 @type runtime_cache : dictionary 210 """ 211 def __init__(self, URI, options = None, verify = True): 212 """ 213 @param URI: real URI for the vocabulary file 214 @param options: the error handler (option) object to send warnings to 215 @type options: L{options.Options} 216 @param verify: whether the SSL certificate needs to be verified. 217 @type verify: bool 218 """ 219 # First see if this particular vocab has been handled before. If yes, it is extracted and everything 220 # else can be forgotten. 221 self.uri = URI 222 (self.filename, self.creation_date, self.expiration_date) = ("",None,None) 223 self.graph = Graph() 224 225 try: 226 CachedVocabIndex.__init__(self, options) 227 vocab_reference = self.get_ref(URI) 228 self.caching = True 229 except Exception: 230 # what this means is that the caching becomes impossible through some system error... 231 _t, value, _traceback = sys.exc_info() 232 if self.report: options.add_info("Could not access the vocabulary cache area %s" % value, VocabCachingInfo, URI) 233 vocab_reference = None 234 self.caching = False 235 236 if vocab_reference == None: 237 # This has never been cached before 238 if self.report: options.add_info("No cache exists for %s, generating one" % URI, VocabCachingInfo) 239 240 # Store all the cache data unless caching proves to be impossible 241 if self._get_vocab_data(verify, newCache = True) and self.caching: 242 self.filename = create_file_name(self.uri) 243 self._store_caches() 244 if self.report: 245 options.add_info("Generated a cache for %s, with an expiration date of %s" % (URI,self.expiration_date), VocabCachingInfo, URI) 246 else: 247 (self.filename, self.creation_date, self.expiration_date) = vocab_reference 248 if self.report: options.add_info("Found a cache for %s, expiring on %s" % (URI,self.expiration_date), VocabCachingInfo) 249 # Check if the expiration date is still away 250 if options.refresh_vocab_cache == False and datetime.datetime.utcnow() <= self.expiration_date: 251 # We are fine, we can just extract the data from the cache and we're done 252 if self.report: options.add_info("Cache for %s is still valid; extracting the data" % URI, VocabCachingInfo) 253 fname = os.path.join(self.app_data_dir, self.filename) 254 try: 255 self.graph = _load(fname) 256 except Exception: 257 # what this means is that the caching becomes impossible VocabCachingInfo 258 t, value, traceback = sys.exc_info() 259 sys.excepthook(t, value, traceback) 260 if self.report: options.add_info("Could not access the vocab cache %s (%s)" % (value,fname), VocabCachingInfo, URI) 261 else: 262 if self.report: 263 if options.refresh_vocab_cache == True: 264 options.add_info("Time check is bypassed; refreshing the cache for %s" % URI, VocabCachingInfo) 265 else: 266 options.add_info("Cache timeout; refreshing the cache for %s" % URI, VocabCachingInfo) 267 # we have to refresh the graph 268 if self._get_vocab_data(verify, newCache = False) == False: 269 # bugger; the cache could not be refreshed, using the current one, and setting the cache artificially 270 # to be valid for the coming hour, hoping that the access issues will be resolved by then... 271 if self.report: 272 options.add_info("Could not refresh vocabulary cache for %s, using the old cache, extended its expiration time by an hour (network problems?)" % URI, VocabCachingInfo, URI) 273 fname = os.path.join(self.app_data_dir, self.filename) 274 try: 275 self.graph = _load(fname) 276 self.expiration_date = datetime.datetime.utcnow() + datetime.timedelta(hours=1) 277 except Exception: 278 # what this means is that the caching becomes impossible VocabCachingInfo 279 t, value, traceback = sys.exc_info() 280 sys.excepthook(t, value, traceback) 281 if self.report: options.add_info("Could not access the vocabulary cache %s (%s)" % (value,fname), VocabCachingInfo, URI) 282 self.creation_date = datetime.datetime.utcnow() 283 if self.report: 284 options.add_info("Generated a new cache for %s, with an expiration date of %s" % (URI,self.expiration_date), VocabCachingInfo, URI) 285 286 self._store_caches() 287 288 def _get_vocab_data(self, verify, newCache = True): 289 """Just a macro like function to get the data to be cached""" 290 from .process import return_graph 291 self.graph, self.expiration_date = return_graph(self.uri, self.options, newCache, verify) 292 return self.graph != None 293 294 def _store_caches(self): 295 """Called if the creation date, etc, have been refreshed or new, and 296 all content must be put into a cache file 297 """ 298 # Store the cached version of the vocabulary file 299 fname = os.path.join(self.app_data_dir, self.filename) 300 try: 301 _dump(self.graph, fname) 302 except Exception: 303 _t, value, _traceback = sys.exc_info() 304 if self.report : self.options.add_info("Could not write cache file %s (%s)", (fname, value), VocabCachingInfo, self.uri) 305 # Update the index 306 self.add_ref(self.uri,(self.filename, self.creation_date, self.expiration_date)) 307 308######################################################################################################################################### 309 310def offline_cache_generation(args, verify = True): 311 """Generate a cache for the vocabulary in args. 312 313 @param args: array of vocabulary URIs. 314 @param verify: whether the SSL certificate needs to be verified. 315 """ 316 class LocalOption: 317 def __init__(self): 318 self.vocab_cache_report = True 319 320 def pr(self, wae, txt, warning_type, context): 321 print( "====" ) 322 if warning_type != None: print( warning_type ) 323 print( wae + ": " + txt ) 324 if context != None: print( context ) 325 print( "====" ) 326 327 def add_warning(self, txt, warning_type=None, context=None): 328 """Add a warning to the processor graph. 329 @param txt: the warning text. 330 @keyword warning_type: Warning Class 331 @type warning_type: URIRef 332 @keyword context: possible context to be added to the processor graph 333 @type context: URIRef or String 334 """ 335 self.pr("Warning",txt,warning_type,context) 336 337 def add_info(self, txt, info_type=None, context=None): 338 """Add an informational comment to the processor graph. 339 @param txt: the information text. 340 @keyword info_type: Info Class 341 @type info_type: URIRef 342 @keyword context: possible context to be added to the processor graph 343 @type context: URIRef or String 344 """ 345 self.pr("Info",txt,info_type,context) 346 347 def add_error(self, txt, err_type=None, context=None): 348 """Add an error to the processor graph. 349 @param txt: the information text. 350 @keyword err_type: Error Class 351 @type err_type: URIRef 352 @keyword context: possible context to be added to the processor graph 353 @type context: URIRef or String 354 """ 355 self.pr("Error",txt,err_type,context) 356 357 for uri in args: 358 # This should write the cache 359 print( ">>>>> Writing Cache <<<<<" ) 360 _writ = CachedVocab(uri, options = LocalOption(), verify = verify) 361 # Now read it back and print the content for tracing 362 print( ">>>>> Reading Cache <<<<<" ) 363 rd = CachedVocab(uri, options = LocalOption(), verify = verify) 364 print( "URI: " + uri ) 365 print( "default vocab: " + rd.vocabulary ) 366 print( "terms: %s prefixes: %s" % (rd.terms,rd.ns) )
60class CachedVocabIndex: 61 """ 62 Class to manage the cache index. Takes care of finding the vocab directory, and manages the index 63 to the individual vocab data. 64 65 The vocab directory is set to a platform specific area, unless an environment variable 66 sets it explicitly. The environment variable is "PyRdfaCacheDir" 67 68 Every time the index is changed, the index is put back (via pickle) to the directory. 69 70 @ivar app_data_dir: directory for the vocabulary cache directory 71 @ivar index_fname: the full path of the index file on the disc 72 @ivar indeces: the in-memory version of the index (a directory mapping URI-s to tuples) 73 @ivar options: the error handler (option) object to send warnings to 74 @type options: L{options.Options} 75 @ivar report: whether details on the caching should be reported 76 @type report: Boolean 77 @cvar vocabs: File name used for the index in the cache directory 78 @cvar preference_path: Cache directories for the three major platforms (ie, mac, windows, unix) 79 @type preference_path: directory, keyed by "mac", "win", and "unix" 80 @cvar architectures: Various 'architectures' as returned by the python call, and their mapping on one of the major platforms. If an architecture is missing, it is considered to be "unix" 81 @type architectures: directory, mapping architectures to "mac", "win", or "unix" 82 """ 83 # File Name used for the index in the cache directory 84 vocabs = "cache_index" 85 # Cache directories for the three major platforms... 86 preference_path = { 87 "mac": "Library/Application Support/pyRdfa-cache", 88 "win": "pyRdfa-cache", 89 "unix": ".pyRdfa-cache" 90 } 91 # various architectures as returned by the python call, and their mapping on platorm. If an architecture is not here, it is considered as unix 92 architectures = { 93 "darwin": "mac", 94 "nt": "win", 95 "win32": "win", 96 "cygwin": "win" 97 } 98 def __init__(self, options = None): 99 """ 100 @param options: the error handler (option) object to send warnings to 101 @type options: L{options.Options} 102 """ 103 self.options = options 104 self.report = options is not None and options.vocab_cache_report 105 106 # This is where the cache files should be 107 self.app_data_dir = self._give_preference_path() 108 self.index_fname = os.path.join(self.app_data_dir, self.vocabs) 109 self.indeces = {} 110 111 # Check whether that directory exists. 112 if not os.path.isdir(self.app_data_dir): 113 try: 114 os.mkdir(self.app_data_dir) 115 except Exception: 116 _t, value, _traceback = sys.exc_info() 117 if self.report: options.add_info("Could not create the vocab cache area %s" % value, VocabCachingInfo) 118 return 119 else: 120 # check whether it is at least readable 121 if not os.access(self.app_data_dir, os.R_OK): 122 if self.report: options.add_info("Vocab cache directory is not readable", VocabCachingInfo) 123 return 124 if not os.access(self.app_data_dir, os.W_OK): 125 if self.report: options.add_info("Vocab cache directory is not writeable, but readable", VocabCachingInfo) 126 return 127 128 if os.path.exists(self.index_fname): 129 if os.access(self.index_fname, os.R_OK): 130 self.indeces = _load(self.index_fname) 131 else: 132 if self.report: options.add_info("Vocab cache index not readable", VocabCachingInfo) 133 else: 134 # This is the very initial phase, creation 135 # of a a new index 136 if os.access(self.app_data_dir, os.W_OK): 137 # This is then put into a pickle file to put the stake in the ground... 138 try: 139 _dump(self.indeces, self.index_fname) 140 except Exception: 141 _t, value, _traceback = sys.exc_info() 142 if self.report: options.add_info("Could not create the vocabulary index %s" % value, VocabCachingInfo) 143 else: 144 if self.report: options.add_info("Vocabulary cache directory is not writeable", VocabCachingInfo) 145 self.cache_writeable = False 146 147 def add_ref(self, uri, vocab_reference): 148 """ 149 Add a new entry to the index, possibly removing the previous one. 150 151 @param uri: the URI that serves as a key in the index directory 152 @param vocab_reference: tuple consisting of file name, modification date, and expiration date 153 """ 154 # Store the index right away 155 self.indeces[uri] = vocab_reference 156 try: 157 _dump(self.indeces, self.index_fname) 158 except Exception: 159 _t, value, _traceback = sys.exc_info() 160 if self.report: self.options.add_info("Could not store the cache index %s" % value, VocabCachingInfo) 161 162 def get_ref(self, uri): 163 """ 164 Get an index entry, if available, None otherwise. 165 The return value is a tuple: file name, modification date, and expiration date 166 167 @param uri: the URI that serves as a key in the index directory 168 """ 169 if uri in self.indeces: 170 return tuple(self.indeces[uri]) 171 else: 172 return None 173 174 def _give_preference_path(self): 175 """ 176 Find the vocab cache directory. 177 """ 178 from ...pyRdfa import CACHE_DIR_VAR 179 if CACHE_DIR_VAR in os.environ: 180 return os.environ[CACHE_DIR_VAR] 181 else: 182 # find the preference path on the architecture 183 platform = sys.platform 184 if platform in self.architectures: 185 system = self.architectures[platform] 186 else: 187 system = "unix" 188 189 if system == "win": 190 # there is a user variable set for that purpose 191 app_data = os.path.expandvars("%APPDATA%") 192 return os.path.join(app_data, self.preference_path[system]) 193 else: 194 return os.path.join(os.path.expanduser('~'), self.preference_path[system])
Class to manage the cache index. Takes care of finding the vocab directory, and manages the index to the individual vocab data.
The vocab directory is set to a platform specific area, unless an environment variable sets it explicitly. The environment variable is "PyRdfaCacheDir"
Every time the index is changed, the index is put back (via pickle) to the directory.
@ivar app_data_dir: directory for the vocabulary cache directory @ivar index_fname: the full path of the index file on the disc @ivar indeces: the in-memory version of the index (a directory mapping URI-s to tuples) @ivar options: the error handler (option) object to send warnings to @type options: L{options.Options} @ivar report: whether details on the caching should be reported @type report: Boolean @cvar vocabs: File name used for the index in the cache directory @cvar preference_path: Cache directories for the three major platforms (ie, mac, windows, unix) @type preference_path: directory, keyed by "mac", "win", and "unix" @cvar architectures: Various 'architectures' as returned by the python call, and their mapping on one of the major platforms. If an architecture is missing, it is considered to be "unix" @type architectures: directory, mapping architectures to "mac", "win", or "unix"
98 def __init__(self, options = None): 99 """ 100 @param options: the error handler (option) object to send warnings to 101 @type options: L{options.Options} 102 """ 103 self.options = options 104 self.report = options is not None and options.vocab_cache_report 105 106 # This is where the cache files should be 107 self.app_data_dir = self._give_preference_path() 108 self.index_fname = os.path.join(self.app_data_dir, self.vocabs) 109 self.indeces = {} 110 111 # Check whether that directory exists. 112 if not os.path.isdir(self.app_data_dir): 113 try: 114 os.mkdir(self.app_data_dir) 115 except Exception: 116 _t, value, _traceback = sys.exc_info() 117 if self.report: options.add_info("Could not create the vocab cache area %s" % value, VocabCachingInfo) 118 return 119 else: 120 # check whether it is at least readable 121 if not os.access(self.app_data_dir, os.R_OK): 122 if self.report: options.add_info("Vocab cache directory is not readable", VocabCachingInfo) 123 return 124 if not os.access(self.app_data_dir, os.W_OK): 125 if self.report: options.add_info("Vocab cache directory is not writeable, but readable", VocabCachingInfo) 126 return 127 128 if os.path.exists(self.index_fname): 129 if os.access(self.index_fname, os.R_OK): 130 self.indeces = _load(self.index_fname) 131 else: 132 if self.report: options.add_info("Vocab cache index not readable", VocabCachingInfo) 133 else: 134 # This is the very initial phase, creation 135 # of a a new index 136 if os.access(self.app_data_dir, os.W_OK): 137 # This is then put into a pickle file to put the stake in the ground... 138 try: 139 _dump(self.indeces, self.index_fname) 140 except Exception: 141 _t, value, _traceback = sys.exc_info() 142 if self.report: options.add_info("Could not create the vocabulary index %s" % value, VocabCachingInfo) 143 else: 144 if self.report: options.add_info("Vocabulary cache directory is not writeable", VocabCachingInfo) 145 self.cache_writeable = False
@param options: the error handler (option) object to send warnings to @type options: L{options.Options}
147 def add_ref(self, uri, vocab_reference): 148 """ 149 Add a new entry to the index, possibly removing the previous one. 150 151 @param uri: the URI that serves as a key in the index directory 152 @param vocab_reference: tuple consisting of file name, modification date, and expiration date 153 """ 154 # Store the index right away 155 self.indeces[uri] = vocab_reference 156 try: 157 _dump(self.indeces, self.index_fname) 158 except Exception: 159 _t, value, _traceback = sys.exc_info() 160 if self.report: self.options.add_info("Could not store the cache index %s" % value, VocabCachingInfo)
Add a new entry to the index, possibly removing the previous one.
@param uri: the URI that serves as a key in the index directory @param vocab_reference: tuple consisting of file name, modification date, and expiration date
162 def get_ref(self, uri): 163 """ 164 Get an index entry, if available, None otherwise. 165 The return value is a tuple: file name, modification date, and expiration date 166 167 @param uri: the URI that serves as a key in the index directory 168 """ 169 if uri in self.indeces: 170 return tuple(self.indeces[uri]) 171 else: 172 return None
Get an index entry, if available, None otherwise. The return value is a tuple: file name, modification date, and expiration date
@param uri: the URI that serves as a key in the index directory
197class CachedVocab(CachedVocabIndex): 198 """ 199 Cache for a specific vocab. The content of the cache is the graph. These are also the data that are stored 200 on the disc (in pickled form) 201 202 @ivar graph: the RDF graph 203 @ivar URI: vocabulary URI 204 @ivar filename: file name (not the complete path) of the cached version 205 @ivar creation_date: creation date of the cache 206 @type creation_date: datetime 207 @ivar expiration_date: expiration date of the cache 208 @type expiration_date: datetime 209 @cvar runtime_cache : a run time cache for already 'seen' vocabulary files. Apart from (marginally) speeding up processing, this also prevents recursion 210 @type runtime_cache : dictionary 211 """ 212 def __init__(self, URI, options = None, verify = True): 213 """ 214 @param URI: real URI for the vocabulary file 215 @param options: the error handler (option) object to send warnings to 216 @type options: L{options.Options} 217 @param verify: whether the SSL certificate needs to be verified. 218 @type verify: bool 219 """ 220 # First see if this particular vocab has been handled before. If yes, it is extracted and everything 221 # else can be forgotten. 222 self.uri = URI 223 (self.filename, self.creation_date, self.expiration_date) = ("",None,None) 224 self.graph = Graph() 225 226 try: 227 CachedVocabIndex.__init__(self, options) 228 vocab_reference = self.get_ref(URI) 229 self.caching = True 230 except Exception: 231 # what this means is that the caching becomes impossible through some system error... 232 _t, value, _traceback = sys.exc_info() 233 if self.report: options.add_info("Could not access the vocabulary cache area %s" % value, VocabCachingInfo, URI) 234 vocab_reference = None 235 self.caching = False 236 237 if vocab_reference == None: 238 # This has never been cached before 239 if self.report: options.add_info("No cache exists for %s, generating one" % URI, VocabCachingInfo) 240 241 # Store all the cache data unless caching proves to be impossible 242 if self._get_vocab_data(verify, newCache = True) and self.caching: 243 self.filename = create_file_name(self.uri) 244 self._store_caches() 245 if self.report: 246 options.add_info("Generated a cache for %s, with an expiration date of %s" % (URI,self.expiration_date), VocabCachingInfo, URI) 247 else: 248 (self.filename, self.creation_date, self.expiration_date) = vocab_reference 249 if self.report: options.add_info("Found a cache for %s, expiring on %s" % (URI,self.expiration_date), VocabCachingInfo) 250 # Check if the expiration date is still away 251 if options.refresh_vocab_cache == False and datetime.datetime.utcnow() <= self.expiration_date: 252 # We are fine, we can just extract the data from the cache and we're done 253 if self.report: options.add_info("Cache for %s is still valid; extracting the data" % URI, VocabCachingInfo) 254 fname = os.path.join(self.app_data_dir, self.filename) 255 try: 256 self.graph = _load(fname) 257 except Exception: 258 # what this means is that the caching becomes impossible VocabCachingInfo 259 t, value, traceback = sys.exc_info() 260 sys.excepthook(t, value, traceback) 261 if self.report: options.add_info("Could not access the vocab cache %s (%s)" % (value,fname), VocabCachingInfo, URI) 262 else: 263 if self.report: 264 if options.refresh_vocab_cache == True: 265 options.add_info("Time check is bypassed; refreshing the cache for %s" % URI, VocabCachingInfo) 266 else: 267 options.add_info("Cache timeout; refreshing the cache for %s" % URI, VocabCachingInfo) 268 # we have to refresh the graph 269 if self._get_vocab_data(verify, newCache = False) == False: 270 # bugger; the cache could not be refreshed, using the current one, and setting the cache artificially 271 # to be valid for the coming hour, hoping that the access issues will be resolved by then... 272 if self.report: 273 options.add_info("Could not refresh vocabulary cache for %s, using the old cache, extended its expiration time by an hour (network problems?)" % URI, VocabCachingInfo, URI) 274 fname = os.path.join(self.app_data_dir, self.filename) 275 try: 276 self.graph = _load(fname) 277 self.expiration_date = datetime.datetime.utcnow() + datetime.timedelta(hours=1) 278 except Exception: 279 # what this means is that the caching becomes impossible VocabCachingInfo 280 t, value, traceback = sys.exc_info() 281 sys.excepthook(t, value, traceback) 282 if self.report: options.add_info("Could not access the vocabulary cache %s (%s)" % (value,fname), VocabCachingInfo, URI) 283 self.creation_date = datetime.datetime.utcnow() 284 if self.report: 285 options.add_info("Generated a new cache for %s, with an expiration date of %s" % (URI,self.expiration_date), VocabCachingInfo, URI) 286 287 self._store_caches() 288 289 def _get_vocab_data(self, verify, newCache = True): 290 """Just a macro like function to get the data to be cached""" 291 from .process import return_graph 292 self.graph, self.expiration_date = return_graph(self.uri, self.options, newCache, verify) 293 return self.graph != None 294 295 def _store_caches(self): 296 """Called if the creation date, etc, have been refreshed or new, and 297 all content must be put into a cache file 298 """ 299 # Store the cached version of the vocabulary file 300 fname = os.path.join(self.app_data_dir, self.filename) 301 try: 302 _dump(self.graph, fname) 303 except Exception: 304 _t, value, _traceback = sys.exc_info() 305 if self.report : self.options.add_info("Could not write cache file %s (%s)", (fname, value), VocabCachingInfo, self.uri) 306 # Update the index 307 self.add_ref(self.uri,(self.filename, self.creation_date, self.expiration_date))
Cache for a specific vocab. The content of the cache is the graph. These are also the data that are stored on the disc (in pickled form)
@ivar graph: the RDF graph @ivar URI: vocabulary URI @ivar filename: file name (not the complete path) of the cached version @ivar creation_date: creation date of the cache @type creation_date: datetime @ivar expiration_date: expiration date of the cache @type expiration_date: datetime @cvar runtime_cache : a run time cache for already 'seen' vocabulary files. Apart from (marginally) speeding up processing, this also prevents recursion @type runtime_cache : dictionary
212 def __init__(self, URI, options = None, verify = True): 213 """ 214 @param URI: real URI for the vocabulary file 215 @param options: the error handler (option) object to send warnings to 216 @type options: L{options.Options} 217 @param verify: whether the SSL certificate needs to be verified. 218 @type verify: bool 219 """ 220 # First see if this particular vocab has been handled before. If yes, it is extracted and everything 221 # else can be forgotten. 222 self.uri = URI 223 (self.filename, self.creation_date, self.expiration_date) = ("",None,None) 224 self.graph = Graph() 225 226 try: 227 CachedVocabIndex.__init__(self, options) 228 vocab_reference = self.get_ref(URI) 229 self.caching = True 230 except Exception: 231 # what this means is that the caching becomes impossible through some system error... 232 _t, value, _traceback = sys.exc_info() 233 if self.report: options.add_info("Could not access the vocabulary cache area %s" % value, VocabCachingInfo, URI) 234 vocab_reference = None 235 self.caching = False 236 237 if vocab_reference == None: 238 # This has never been cached before 239 if self.report: options.add_info("No cache exists for %s, generating one" % URI, VocabCachingInfo) 240 241 # Store all the cache data unless caching proves to be impossible 242 if self._get_vocab_data(verify, newCache = True) and self.caching: 243 self.filename = create_file_name(self.uri) 244 self._store_caches() 245 if self.report: 246 options.add_info("Generated a cache for %s, with an expiration date of %s" % (URI,self.expiration_date), VocabCachingInfo, URI) 247 else: 248 (self.filename, self.creation_date, self.expiration_date) = vocab_reference 249 if self.report: options.add_info("Found a cache for %s, expiring on %s" % (URI,self.expiration_date), VocabCachingInfo) 250 # Check if the expiration date is still away 251 if options.refresh_vocab_cache == False and datetime.datetime.utcnow() <= self.expiration_date: 252 # We are fine, we can just extract the data from the cache and we're done 253 if self.report: options.add_info("Cache for %s is still valid; extracting the data" % URI, VocabCachingInfo) 254 fname = os.path.join(self.app_data_dir, self.filename) 255 try: 256 self.graph = _load(fname) 257 except Exception: 258 # what this means is that the caching becomes impossible VocabCachingInfo 259 t, value, traceback = sys.exc_info() 260 sys.excepthook(t, value, traceback) 261 if self.report: options.add_info("Could not access the vocab cache %s (%s)" % (value,fname), VocabCachingInfo, URI) 262 else: 263 if self.report: 264 if options.refresh_vocab_cache == True: 265 options.add_info("Time check is bypassed; refreshing the cache for %s" % URI, VocabCachingInfo) 266 else: 267 options.add_info("Cache timeout; refreshing the cache for %s" % URI, VocabCachingInfo) 268 # we have to refresh the graph 269 if self._get_vocab_data(verify, newCache = False) == False: 270 # bugger; the cache could not be refreshed, using the current one, and setting the cache artificially 271 # to be valid for the coming hour, hoping that the access issues will be resolved by then... 272 if self.report: 273 options.add_info("Could not refresh vocabulary cache for %s, using the old cache, extended its expiration time by an hour (network problems?)" % URI, VocabCachingInfo, URI) 274 fname = os.path.join(self.app_data_dir, self.filename) 275 try: 276 self.graph = _load(fname) 277 self.expiration_date = datetime.datetime.utcnow() + datetime.timedelta(hours=1) 278 except Exception: 279 # what this means is that the caching becomes impossible VocabCachingInfo 280 t, value, traceback = sys.exc_info() 281 sys.excepthook(t, value, traceback) 282 if self.report: options.add_info("Could not access the vocabulary cache %s (%s)" % (value,fname), VocabCachingInfo, URI) 283 self.creation_date = datetime.datetime.utcnow() 284 if self.report: 285 options.add_info("Generated a new cache for %s, with an expiration date of %s" % (URI,self.expiration_date), VocabCachingInfo, URI) 286 287 self._store_caches()
@param URI: real URI for the vocabulary file @param options: the error handler (option) object to send warnings to @type options: L{options.Options} @param verify: whether the SSL certificate needs to be verified. @type verify: bool
Inherited Members
311def offline_cache_generation(args, verify = True): 312 """Generate a cache for the vocabulary in args. 313 314 @param args: array of vocabulary URIs. 315 @param verify: whether the SSL certificate needs to be verified. 316 """ 317 class LocalOption: 318 def __init__(self): 319 self.vocab_cache_report = True 320 321 def pr(self, wae, txt, warning_type, context): 322 print( "====" ) 323 if warning_type != None: print( warning_type ) 324 print( wae + ": " + txt ) 325 if context != None: print( context ) 326 print( "====" ) 327 328 def add_warning(self, txt, warning_type=None, context=None): 329 """Add a warning to the processor graph. 330 @param txt: the warning text. 331 @keyword warning_type: Warning Class 332 @type warning_type: URIRef 333 @keyword context: possible context to be added to the processor graph 334 @type context: URIRef or String 335 """ 336 self.pr("Warning",txt,warning_type,context) 337 338 def add_info(self, txt, info_type=None, context=None): 339 """Add an informational comment to the processor graph. 340 @param txt: the information text. 341 @keyword info_type: Info Class 342 @type info_type: URIRef 343 @keyword context: possible context to be added to the processor graph 344 @type context: URIRef or String 345 """ 346 self.pr("Info",txt,info_type,context) 347 348 def add_error(self, txt, err_type=None, context=None): 349 """Add an error to the processor graph. 350 @param txt: the information text. 351 @keyword err_type: Error Class 352 @type err_type: URIRef 353 @keyword context: possible context to be added to the processor graph 354 @type context: URIRef or String 355 """ 356 self.pr("Error",txt,err_type,context) 357 358 for uri in args: 359 # This should write the cache 360 print( ">>>>> Writing Cache <<<<<" ) 361 _writ = CachedVocab(uri, options = LocalOption(), verify = verify) 362 # Now read it back and print the content for tracing 363 print( ">>>>> Reading Cache <<<<<" ) 364 rd = CachedVocab(uri, options = LocalOption(), verify = verify) 365 print( "URI: " + uri ) 366 print( "default vocab: " + rd.vocabulary ) 367 print( "terms: %s prefixes: %s" % (rd.terms,rd.ns) )
Generate a cache for the vocabulary in args.
@param args: array of vocabulary URIs. @param verify: whether the SSL certificate needs to be verified.