pyRdfa.rdfs.cache

Managing Vocab Caching.

@summary: RDFa parser (distiller) @requires: U{RDFLibhttp://rdflib.net} @organization: U{World Wide Web Consortiumhttp://www.w3.org} @author: U{Ivan Herman} @license: This software is available for use under the U{W3C® SOFTWARE NOTICE AND LICENSE}

  1# -*- coding: utf-8 -*-
  2"""
  3Managing Vocab Caching.
  4
  5@summary: RDFa parser (distiller)
  6@requires: U{RDFLib<http://rdflib.net>}
  7@organization: U{World Wide Web Consortium<http://www.w3.org>}
  8@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
  9@license: This software is available for use under the
 10U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
 11"""
 12import os, sys, datetime, re
 13
 14from rdflib import Graph
 15
 16from ..utils import create_file_name
 17
 18from . import VocabCachingInfo
 19
 20# Regular expression object for a general XML application media type
 21xml_application_media_type = re.compile(r"application/[a-zA-Z0-9]+\+xml")
 22
 23#===========================================================================================
 24
 25import pickle
 26
 27# Protocol to be used for pickle files. 0 is good for debug, it stores the data in ASCII; 1 is better for deployment,
 28# it stores data in binary format. Care should be taken for consistency; when changing from 0 to 1 or back, all
 29# cached data should be removed/regenerated, otherwise mess may occur...
 30_Pickle_Protocol = 1
 31
 32# If I could rely on python 2.5 or 2.6 (or higher) I could use the with...as... idiom for what is below, it
 33# is indeed nicer. But I cannot...
 34def _load(fname):
 35    """
 36    Load a cached file and return the resulting object
 37    @param fname: file name
 38    """
 39    try:
 40        f = open(fname)
 41        return pickle.load(f)
 42    finally:
 43        f.close()
 44
 45def _dump(obj, fname):
 46    """
 47    Dump an object into cached file
 48    @param obj: Python object to store
 49    @param fname: file name
 50    """
 51    try:
 52        f = open(fname, "w")
 53        pickle.dump(obj, f, _Pickle_Protocol)
 54        f.flush()
 55    finally:
 56        f.close()
 57
 58#===========================================================================================
 59class CachedVocabIndex:
 60    """
 61    Class to manage the cache index. Takes care of finding the vocab directory, and manages the index
 62    to the individual vocab data.
 63
 64    The vocab directory is set to a platform specific area, unless an environment variable
 65    sets it explicitly. The environment variable is "PyRdfaCacheDir"
 66
 67    Every time the index is changed, the index is put back (via pickle) to the directory.
 68
 69    @ivar app_data_dir: directory for the vocabulary cache directory
 70    @ivar index_fname: the full path of the index file on the disc
 71    @ivar indeces: the in-memory version of the index (a directory mapping URI-s to tuples)
 72    @ivar options: the error handler (option) object to send warnings to
 73    @type options: L{options.Options}
 74    @ivar report: whether details on the caching should be reported
 75    @type report: Boolean
 76    @cvar vocabs: File name used for the index in the cache directory
 77    @cvar preference_path: Cache directories for the three major platforms (ie, mac, windows, unix)
 78    @type preference_path: directory, keyed by "mac", "win", and "unix"
 79    @cvar architectures: Various 'architectures' as returned by the python call, and their mapping on one of the major platforms. If an architecture is missing, it is considered to be "unix"
 80    @type architectures: directory, mapping architectures to "mac", "win", or "unix"
 81    """
 82    # File Name used for the index in the cache directory
 83    vocabs = "cache_index"
 84    # Cache directories for the three major platforms...
 85    preference_path = {
 86        "mac":  "Library/Application Support/pyRdfa-cache",
 87        "win":  "pyRdfa-cache",
 88        "unix": ".pyRdfa-cache"
 89    }
 90    # various architectures as returned by the python call, and their mapping on platorm. If an architecture is not here, it is considered as unix
 91    architectures = {
 92        "darwin": "mac",
 93        "nt":     "win",
 94        "win32":  "win",
 95        "cygwin": "win"
 96    }
 97    def __init__(self, options = None):
 98        """
 99        @param options: the error handler (option) object to send warnings to
100        @type options: L{options.Options}
101        """
102        self.options = options
103        self.report = options is not None and options.vocab_cache_report
104
105        # This is where the cache files should be
106        self.app_data_dir = self._give_preference_path()
107        self.index_fname = os.path.join(self.app_data_dir, self.vocabs)
108        self.indeces = {}
109
110        # Check whether that directory exists.
111        if not os.path.isdir(self.app_data_dir):
112            try:
113                os.mkdir(self.app_data_dir)
114            except Exception:
115                _t, value, _traceback = sys.exc_info()
116                if self.report: options.add_info("Could not create the vocab cache area %s" % value, VocabCachingInfo)
117                return
118        else:
119            # check whether it is at least readable
120            if not os.access(self.app_data_dir, os.R_OK):
121                if self.report: options.add_info("Vocab cache directory is not readable", VocabCachingInfo)
122                return
123            if not os.access(self.app_data_dir, os.W_OK):
124                if self.report: options.add_info("Vocab cache directory is not writeable, but readable", VocabCachingInfo)
125                return
126
127        if os.path.exists(self.index_fname):
128            if os.access(self.index_fname, os.R_OK):
129                self.indeces = _load(self.index_fname)
130            else:
131                if self.report: options.add_info("Vocab cache index not readable", VocabCachingInfo)
132        else:
133            # This is the very initial phase, creation
134            # of a a new index
135            if os.access(self.app_data_dir, os.W_OK):
136                # This is then put into a pickle file to put the stake in the ground...
137                try:
138                    _dump(self.indeces, self.index_fname)
139                except Exception:
140                    _t, value, _traceback = sys.exc_info()
141                    if self.report: options.add_info("Could not create the vocabulary index %s" % value, VocabCachingInfo)
142            else:
143                if self.report: options.add_info("Vocabulary cache directory is not writeable", VocabCachingInfo)
144                self.cache_writeable = False
145
146    def add_ref(self, uri, vocab_reference):
147        """
148        Add a new entry to the index, possibly removing the previous one.
149
150        @param uri: the URI that serves as a key in the index directory
151        @param vocab_reference: tuple consisting of file name, modification date, and expiration date
152        """
153        # Store the index right away
154        self.indeces[uri] = vocab_reference
155        try:
156            _dump(self.indeces, self.index_fname)
157        except Exception:
158            _t, value, _traceback = sys.exc_info()
159            if self.report: self.options.add_info("Could not store the cache index %s" % value, VocabCachingInfo)
160
161    def get_ref(self, uri):
162        """
163        Get an index entry, if available, None otherwise.
164        The return value is a tuple: file name, modification date, and expiration date
165
166        @param uri: the URI that serves as a key in the index directory
167        """
168        if uri in self.indeces:
169            return tuple(self.indeces[uri])
170        else:
171            return None
172
173    def _give_preference_path(self):
174        """
175        Find the vocab cache directory.
176        """
177        from ...pyRdfa import CACHE_DIR_VAR
178        if CACHE_DIR_VAR in os.environ:
179            return os.environ[CACHE_DIR_VAR]
180        else:
181            # find the preference path on the architecture
182            platform = sys.platform
183            if platform in self.architectures:
184                system = self.architectures[platform]
185            else:
186                system = "unix"
187
188            if system == "win":
189                # there is a user variable set for that purpose
190                app_data = os.path.expandvars("%APPDATA%")
191                return os.path.join(app_data, self.preference_path[system])
192            else:
193                return os.path.join(os.path.expanduser('~'), self.preference_path[system])
194
195#===========================================================================================
196class CachedVocab(CachedVocabIndex):
197    """
198    Cache for a specific vocab. The content of the cache is the graph. These are also the data that are stored
199    on the disc (in pickled form)
200
201    @ivar graph: the RDF graph
202    @ivar URI: vocabulary URI
203    @ivar filename: file name (not the complete path) of the cached version
204    @ivar creation_date: creation date of the cache
205    @type creation_date: datetime
206    @ivar expiration_date: expiration date of the cache
207    @type expiration_date: datetime
208    @cvar runtime_cache : a run time cache for already 'seen' vocabulary files. Apart from (marginally) speeding up processing, this also prevents recursion
209    @type runtime_cache : dictionary
210    """
211    def __init__(self, URI, options = None, verify = True):
212        """
213        @param URI: real URI for the vocabulary file
214        @param options: the error handler (option) object to send warnings to
215        @type options: L{options.Options}
216        @param verify: whether the SSL certificate needs to be verified.
217        @type verify: bool
218        """
219        # First see if this particular vocab has been handled before. If yes, it is extracted and everything
220        # else can be forgotten. 
221        self.uri                                                    = URI
222        (self.filename, self.creation_date, self.expiration_date)    = ("",None,None)
223        self.graph                                                    = Graph()
224
225        try:
226            CachedVocabIndex.__init__(self, options)
227            vocab_reference = self.get_ref(URI)
228            self.caching = True
229        except Exception:
230            # what this means is that the caching becomes impossible through some system error...
231            _t, value, _traceback = sys.exc_info()
232            if self.report: options.add_info("Could not access the vocabulary cache area %s" % value, VocabCachingInfo, URI)
233            vocab_reference = None
234            self.caching = False
235
236        if vocab_reference == None:
237            # This has never been cached before
238            if self.report: options.add_info("No cache exists for %s, generating one" % URI, VocabCachingInfo)
239
240            # Store all the cache data unless caching proves to be impossible
241            if self._get_vocab_data(verify, newCache = True) and self.caching:
242                self.filename = create_file_name(self.uri)
243                self._store_caches()
244                if self.report:
245                    options.add_info("Generated a cache for %s, with an expiration date of %s" % (URI,self.expiration_date), VocabCachingInfo, URI)
246        else:
247            (self.filename, self.creation_date, self.expiration_date) = vocab_reference
248            if self.report: options.add_info("Found a cache for %s, expiring on %s" % (URI,self.expiration_date), VocabCachingInfo)
249            # Check if the expiration date is still away
250            if options.refresh_vocab_cache == False and datetime.datetime.utcnow() <= self.expiration_date:
251                # We are fine, we can just extract the data from the cache and we're done
252                if self.report: options.add_info("Cache for %s is still valid; extracting the data" % URI, VocabCachingInfo)
253                fname = os.path.join(self.app_data_dir, self.filename)
254                try:
255                    self.graph = _load(fname)
256                except Exception:
257                    # what this means is that the caching becomes impossible VocabCachingInfo
258                    t, value, traceback = sys.exc_info()
259                    sys.excepthook(t, value, traceback)
260                    if self.report: options.add_info("Could not access the vocab cache %s (%s)" % (value,fname), VocabCachingInfo, URI)
261            else:
262                if self.report:
263                    if options.refresh_vocab_cache == True:
264                        options.add_info("Time check is bypassed; refreshing the cache for %s" % URI, VocabCachingInfo)
265                    else:
266                        options.add_info("Cache timeout; refreshing the cache for %s" % URI, VocabCachingInfo)
267                # we have to refresh the graph
268                if self._get_vocab_data(verify, newCache = False) == False:
269                    # bugger; the cache could not be refreshed, using the current one, and setting the cache artificially
270                    # to be valid for the coming hour, hoping that the access issues will be resolved by then...
271                    if self.report:
272                        options.add_info("Could not refresh vocabulary cache for %s, using the old cache, extended its expiration time by an hour (network problems?)" % URI, VocabCachingInfo, URI)
273                    fname = os.path.join(self.app_data_dir, self.filename)
274                    try:
275                        self.graph = _load(fname)
276                        self.expiration_date = datetime.datetime.utcnow() + datetime.timedelta(hours=1)
277                    except Exception:
278                        # what this means is that the caching becomes impossible VocabCachingInfo
279                        t, value, traceback = sys.exc_info()
280                        sys.excepthook(t, value, traceback)
281                        if self.report: options.add_info("Could not access the vocabulary cache %s (%s)" % (value,fname), VocabCachingInfo, URI)
282                self.creation_date = datetime.datetime.utcnow()
283                if self.report:
284                    options.add_info("Generated a new cache for %s, with an expiration date of %s" % (URI,self.expiration_date), VocabCachingInfo, URI)
285
286                self._store_caches()
287
288    def _get_vocab_data(self, verify, newCache = True):
289        """Just a macro like function to get the data to be cached"""
290        from .process import return_graph
291        self.graph, self.expiration_date = return_graph(self.uri, self.options, newCache, verify)
292        return self.graph != None
293
294    def _store_caches(self):
295        """Called if the creation date, etc, have been refreshed or new, and
296        all content must be put into a cache file
297        """
298        # Store the cached version of the vocabulary file
299        fname = os.path.join(self.app_data_dir, self.filename)
300        try:
301            _dump(self.graph, fname)
302        except Exception:
303            _t, value, _traceback = sys.exc_info()
304            if self.report : self.options.add_info("Could not write cache file %s (%s)", (fname, value), VocabCachingInfo, self.uri)
305        # Update the index
306        self.add_ref(self.uri,(self.filename, self.creation_date, self.expiration_date))
307
308#########################################################################################################################################
309
310def offline_cache_generation(args, verify = True):
311    """Generate a cache for the vocabulary in args.
312
313    @param args:   array of vocabulary URIs.
314    @param verify: whether the SSL certificate needs to be verified.
315    """
316    class LocalOption:
317        def __init__(self):
318            self.vocab_cache_report = True
319
320        def pr(self, wae, txt, warning_type, context):
321            print( "====" )
322            if warning_type != None: print( warning_type )
323            print( wae + ": " + txt )
324            if context != None: print( context )
325            print( "====" )
326
327        def add_warning(self, txt, warning_type=None, context=None):
328            """Add a warning to the processor graph.
329            @param txt: the warning text. 
330            @keyword warning_type: Warning Class
331            @type warning_type: URIRef
332            @keyword context: possible context to be added to the processor graph
333            @type context: URIRef or String
334            """
335            self.pr("Warning",txt,warning_type,context)
336
337        def add_info(self, txt, info_type=None, context=None):
338            """Add an informational comment to the processor graph.
339            @param txt: the information text. 
340            @keyword info_type: Info Class
341            @type info_type: URIRef
342            @keyword context: possible context to be added to the processor graph
343            @type context: URIRef or String
344            """
345            self.pr("Info",txt,info_type,context)
346
347        def add_error(self, txt, err_type=None, context=None):
348            """Add an error  to the processor graph.
349            @param txt: the information text. 
350            @keyword err_type: Error Class
351            @type err_type: URIRef
352            @keyword context: possible context to be added to the processor graph
353            @type context: URIRef or String
354            """
355            self.pr("Error",txt,err_type,context)
356
357    for uri in args:
358        # This should write the cache
359        print( ">>>>> Writing Cache <<<<<" )
360        _writ = CachedVocab(uri, options = LocalOption(), verify = verify)
361        # Now read it back and print the content for tracing
362        print( ">>>>> Reading Cache <<<<<" )
363        rd = CachedVocab(uri, options = LocalOption(), verify = verify)
364        print( "URI: " + uri )
365        print( "default vocab: " + rd.vocabulary )
366        print( "terms: %s prefixes: %s" % (rd.terms,rd.ns) )
xml_application_media_type = re.compile('application/[a-zA-Z0-9]+\\+xml')
class CachedVocabIndex:
 60class CachedVocabIndex:
 61    """
 62    Class to manage the cache index. Takes care of finding the vocab directory, and manages the index
 63    to the individual vocab data.
 64
 65    The vocab directory is set to a platform specific area, unless an environment variable
 66    sets it explicitly. The environment variable is "PyRdfaCacheDir"
 67
 68    Every time the index is changed, the index is put back (via pickle) to the directory.
 69
 70    @ivar app_data_dir: directory for the vocabulary cache directory
 71    @ivar index_fname: the full path of the index file on the disc
 72    @ivar indeces: the in-memory version of the index (a directory mapping URI-s to tuples)
 73    @ivar options: the error handler (option) object to send warnings to
 74    @type options: L{options.Options}
 75    @ivar report: whether details on the caching should be reported
 76    @type report: Boolean
 77    @cvar vocabs: File name used for the index in the cache directory
 78    @cvar preference_path: Cache directories for the three major platforms (ie, mac, windows, unix)
 79    @type preference_path: directory, keyed by "mac", "win", and "unix"
 80    @cvar architectures: Various 'architectures' as returned by the python call, and their mapping on one of the major platforms. If an architecture is missing, it is considered to be "unix"
 81    @type architectures: directory, mapping architectures to "mac", "win", or "unix"
 82    """
 83    # File Name used for the index in the cache directory
 84    vocabs = "cache_index"
 85    # Cache directories for the three major platforms...
 86    preference_path = {
 87        "mac":  "Library/Application Support/pyRdfa-cache",
 88        "win":  "pyRdfa-cache",
 89        "unix": ".pyRdfa-cache"
 90    }
 91    # various architectures as returned by the python call, and their mapping on platorm. If an architecture is not here, it is considered as unix
 92    architectures = {
 93        "darwin": "mac",
 94        "nt":     "win",
 95        "win32":  "win",
 96        "cygwin": "win"
 97    }
 98    def __init__(self, options = None):
 99        """
100        @param options: the error handler (option) object to send warnings to
101        @type options: L{options.Options}
102        """
103        self.options = options
104        self.report = options is not None and options.vocab_cache_report
105
106        # This is where the cache files should be
107        self.app_data_dir = self._give_preference_path()
108        self.index_fname = os.path.join(self.app_data_dir, self.vocabs)
109        self.indeces = {}
110
111        # Check whether that directory exists.
112        if not os.path.isdir(self.app_data_dir):
113            try:
114                os.mkdir(self.app_data_dir)
115            except Exception:
116                _t, value, _traceback = sys.exc_info()
117                if self.report: options.add_info("Could not create the vocab cache area %s" % value, VocabCachingInfo)
118                return
119        else:
120            # check whether it is at least readable
121            if not os.access(self.app_data_dir, os.R_OK):
122                if self.report: options.add_info("Vocab cache directory is not readable", VocabCachingInfo)
123                return
124            if not os.access(self.app_data_dir, os.W_OK):
125                if self.report: options.add_info("Vocab cache directory is not writeable, but readable", VocabCachingInfo)
126                return
127
128        if os.path.exists(self.index_fname):
129            if os.access(self.index_fname, os.R_OK):
130                self.indeces = _load(self.index_fname)
131            else:
132                if self.report: options.add_info("Vocab cache index not readable", VocabCachingInfo)
133        else:
134            # This is the very initial phase, creation
135            # of a a new index
136            if os.access(self.app_data_dir, os.W_OK):
137                # This is then put into a pickle file to put the stake in the ground...
138                try:
139                    _dump(self.indeces, self.index_fname)
140                except Exception:
141                    _t, value, _traceback = sys.exc_info()
142                    if self.report: options.add_info("Could not create the vocabulary index %s" % value, VocabCachingInfo)
143            else:
144                if self.report: options.add_info("Vocabulary cache directory is not writeable", VocabCachingInfo)
145                self.cache_writeable = False
146
147    def add_ref(self, uri, vocab_reference):
148        """
149        Add a new entry to the index, possibly removing the previous one.
150
151        @param uri: the URI that serves as a key in the index directory
152        @param vocab_reference: tuple consisting of file name, modification date, and expiration date
153        """
154        # Store the index right away
155        self.indeces[uri] = vocab_reference
156        try:
157            _dump(self.indeces, self.index_fname)
158        except Exception:
159            _t, value, _traceback = sys.exc_info()
160            if self.report: self.options.add_info("Could not store the cache index %s" % value, VocabCachingInfo)
161
162    def get_ref(self, uri):
163        """
164        Get an index entry, if available, None otherwise.
165        The return value is a tuple: file name, modification date, and expiration date
166
167        @param uri: the URI that serves as a key in the index directory
168        """
169        if uri in self.indeces:
170            return tuple(self.indeces[uri])
171        else:
172            return None
173
174    def _give_preference_path(self):
175        """
176        Find the vocab cache directory.
177        """
178        from ...pyRdfa import CACHE_DIR_VAR
179        if CACHE_DIR_VAR in os.environ:
180            return os.environ[CACHE_DIR_VAR]
181        else:
182            # find the preference path on the architecture
183            platform = sys.platform
184            if platform in self.architectures:
185                system = self.architectures[platform]
186            else:
187                system = "unix"
188
189            if system == "win":
190                # there is a user variable set for that purpose
191                app_data = os.path.expandvars("%APPDATA%")
192                return os.path.join(app_data, self.preference_path[system])
193            else:
194                return os.path.join(os.path.expanduser('~'), self.preference_path[system])

Class to manage the cache index. Takes care of finding the vocab directory, and manages the index to the individual vocab data.

The vocab directory is set to a platform specific area, unless an environment variable sets it explicitly. The environment variable is "PyRdfaCacheDir"

Every time the index is changed, the index is put back (via pickle) to the directory.

@ivar app_data_dir: directory for the vocabulary cache directory @ivar index_fname: the full path of the index file on the disc @ivar indeces: the in-memory version of the index (a directory mapping URI-s to tuples) @ivar options: the error handler (option) object to send warnings to @type options: L{options.Options} @ivar report: whether details on the caching should be reported @type report: Boolean @cvar vocabs: File name used for the index in the cache directory @cvar preference_path: Cache directories for the three major platforms (ie, mac, windows, unix) @type preference_path: directory, keyed by "mac", "win", and "unix" @cvar architectures: Various 'architectures' as returned by the python call, and their mapping on one of the major platforms. If an architecture is missing, it is considered to be "unix" @type architectures: directory, mapping architectures to "mac", "win", or "unix"

CachedVocabIndex(options=None)
 98    def __init__(self, options = None):
 99        """
100        @param options: the error handler (option) object to send warnings to
101        @type options: L{options.Options}
102        """
103        self.options = options
104        self.report = options is not None and options.vocab_cache_report
105
106        # This is where the cache files should be
107        self.app_data_dir = self._give_preference_path()
108        self.index_fname = os.path.join(self.app_data_dir, self.vocabs)
109        self.indeces = {}
110
111        # Check whether that directory exists.
112        if not os.path.isdir(self.app_data_dir):
113            try:
114                os.mkdir(self.app_data_dir)
115            except Exception:
116                _t, value, _traceback = sys.exc_info()
117                if self.report: options.add_info("Could not create the vocab cache area %s" % value, VocabCachingInfo)
118                return
119        else:
120            # check whether it is at least readable
121            if not os.access(self.app_data_dir, os.R_OK):
122                if self.report: options.add_info("Vocab cache directory is not readable", VocabCachingInfo)
123                return
124            if not os.access(self.app_data_dir, os.W_OK):
125                if self.report: options.add_info("Vocab cache directory is not writeable, but readable", VocabCachingInfo)
126                return
127
128        if os.path.exists(self.index_fname):
129            if os.access(self.index_fname, os.R_OK):
130                self.indeces = _load(self.index_fname)
131            else:
132                if self.report: options.add_info("Vocab cache index not readable", VocabCachingInfo)
133        else:
134            # This is the very initial phase, creation
135            # of a a new index
136            if os.access(self.app_data_dir, os.W_OK):
137                # This is then put into a pickle file to put the stake in the ground...
138                try:
139                    _dump(self.indeces, self.index_fname)
140                except Exception:
141                    _t, value, _traceback = sys.exc_info()
142                    if self.report: options.add_info("Could not create the vocabulary index %s" % value, VocabCachingInfo)
143            else:
144                if self.report: options.add_info("Vocabulary cache directory is not writeable", VocabCachingInfo)
145                self.cache_writeable = False

@param options: the error handler (option) object to send warnings to @type options: L{options.Options}

vocabs = 'cache_index'
preference_path = {'mac': 'Library/Application Support/pyRdfa-cache', 'win': 'pyRdfa-cache', 'unix': '.pyRdfa-cache'}
architectures = {'darwin': 'mac', 'nt': 'win', 'win32': 'win', 'cygwin': 'win'}
options
report
app_data_dir
index_fname
indeces
def add_ref(self, uri, vocab_reference):
147    def add_ref(self, uri, vocab_reference):
148        """
149        Add a new entry to the index, possibly removing the previous one.
150
151        @param uri: the URI that serves as a key in the index directory
152        @param vocab_reference: tuple consisting of file name, modification date, and expiration date
153        """
154        # Store the index right away
155        self.indeces[uri] = vocab_reference
156        try:
157            _dump(self.indeces, self.index_fname)
158        except Exception:
159            _t, value, _traceback = sys.exc_info()
160            if self.report: self.options.add_info("Could not store the cache index %s" % value, VocabCachingInfo)

Add a new entry to the index, possibly removing the previous one.

@param uri: the URI that serves as a key in the index directory @param vocab_reference: tuple consisting of file name, modification date, and expiration date

def get_ref(self, uri):
162    def get_ref(self, uri):
163        """
164        Get an index entry, if available, None otherwise.
165        The return value is a tuple: file name, modification date, and expiration date
166
167        @param uri: the URI that serves as a key in the index directory
168        """
169        if uri in self.indeces:
170            return tuple(self.indeces[uri])
171        else:
172            return None

Get an index entry, if available, None otherwise. The return value is a tuple: file name, modification date, and expiration date

@param uri: the URI that serves as a key in the index directory

class CachedVocab(CachedVocabIndex):
197class CachedVocab(CachedVocabIndex):
198    """
199    Cache for a specific vocab. The content of the cache is the graph. These are also the data that are stored
200    on the disc (in pickled form)
201
202    @ivar graph: the RDF graph
203    @ivar URI: vocabulary URI
204    @ivar filename: file name (not the complete path) of the cached version
205    @ivar creation_date: creation date of the cache
206    @type creation_date: datetime
207    @ivar expiration_date: expiration date of the cache
208    @type expiration_date: datetime
209    @cvar runtime_cache : a run time cache for already 'seen' vocabulary files. Apart from (marginally) speeding up processing, this also prevents recursion
210    @type runtime_cache : dictionary
211    """
212    def __init__(self, URI, options = None, verify = True):
213        """
214        @param URI: real URI for the vocabulary file
215        @param options: the error handler (option) object to send warnings to
216        @type options: L{options.Options}
217        @param verify: whether the SSL certificate needs to be verified.
218        @type verify: bool
219        """
220        # First see if this particular vocab has been handled before. If yes, it is extracted and everything
221        # else can be forgotten. 
222        self.uri                                                    = URI
223        (self.filename, self.creation_date, self.expiration_date)    = ("",None,None)
224        self.graph                                                    = Graph()
225
226        try:
227            CachedVocabIndex.__init__(self, options)
228            vocab_reference = self.get_ref(URI)
229            self.caching = True
230        except Exception:
231            # what this means is that the caching becomes impossible through some system error...
232            _t, value, _traceback = sys.exc_info()
233            if self.report: options.add_info("Could not access the vocabulary cache area %s" % value, VocabCachingInfo, URI)
234            vocab_reference = None
235            self.caching = False
236
237        if vocab_reference == None:
238            # This has never been cached before
239            if self.report: options.add_info("No cache exists for %s, generating one" % URI, VocabCachingInfo)
240
241            # Store all the cache data unless caching proves to be impossible
242            if self._get_vocab_data(verify, newCache = True) and self.caching:
243                self.filename = create_file_name(self.uri)
244                self._store_caches()
245                if self.report:
246                    options.add_info("Generated a cache for %s, with an expiration date of %s" % (URI,self.expiration_date), VocabCachingInfo, URI)
247        else:
248            (self.filename, self.creation_date, self.expiration_date) = vocab_reference
249            if self.report: options.add_info("Found a cache for %s, expiring on %s" % (URI,self.expiration_date), VocabCachingInfo)
250            # Check if the expiration date is still away
251            if options.refresh_vocab_cache == False and datetime.datetime.utcnow() <= self.expiration_date:
252                # We are fine, we can just extract the data from the cache and we're done
253                if self.report: options.add_info("Cache for %s is still valid; extracting the data" % URI, VocabCachingInfo)
254                fname = os.path.join(self.app_data_dir, self.filename)
255                try:
256                    self.graph = _load(fname)
257                except Exception:
258                    # what this means is that the caching becomes impossible VocabCachingInfo
259                    t, value, traceback = sys.exc_info()
260                    sys.excepthook(t, value, traceback)
261                    if self.report: options.add_info("Could not access the vocab cache %s (%s)" % (value,fname), VocabCachingInfo, URI)
262            else:
263                if self.report:
264                    if options.refresh_vocab_cache == True:
265                        options.add_info("Time check is bypassed; refreshing the cache for %s" % URI, VocabCachingInfo)
266                    else:
267                        options.add_info("Cache timeout; refreshing the cache for %s" % URI, VocabCachingInfo)
268                # we have to refresh the graph
269                if self._get_vocab_data(verify, newCache = False) == False:
270                    # bugger; the cache could not be refreshed, using the current one, and setting the cache artificially
271                    # to be valid for the coming hour, hoping that the access issues will be resolved by then...
272                    if self.report:
273                        options.add_info("Could not refresh vocabulary cache for %s, using the old cache, extended its expiration time by an hour (network problems?)" % URI, VocabCachingInfo, URI)
274                    fname = os.path.join(self.app_data_dir, self.filename)
275                    try:
276                        self.graph = _load(fname)
277                        self.expiration_date = datetime.datetime.utcnow() + datetime.timedelta(hours=1)
278                    except Exception:
279                        # what this means is that the caching becomes impossible VocabCachingInfo
280                        t, value, traceback = sys.exc_info()
281                        sys.excepthook(t, value, traceback)
282                        if self.report: options.add_info("Could not access the vocabulary cache %s (%s)" % (value,fname), VocabCachingInfo, URI)
283                self.creation_date = datetime.datetime.utcnow()
284                if self.report:
285                    options.add_info("Generated a new cache for %s, with an expiration date of %s" % (URI,self.expiration_date), VocabCachingInfo, URI)
286
287                self._store_caches()
288
289    def _get_vocab_data(self, verify, newCache = True):
290        """Just a macro like function to get the data to be cached"""
291        from .process import return_graph
292        self.graph, self.expiration_date = return_graph(self.uri, self.options, newCache, verify)
293        return self.graph != None
294
295    def _store_caches(self):
296        """Called if the creation date, etc, have been refreshed or new, and
297        all content must be put into a cache file
298        """
299        # Store the cached version of the vocabulary file
300        fname = os.path.join(self.app_data_dir, self.filename)
301        try:
302            _dump(self.graph, fname)
303        except Exception:
304            _t, value, _traceback = sys.exc_info()
305            if self.report : self.options.add_info("Could not write cache file %s (%s)", (fname, value), VocabCachingInfo, self.uri)
306        # Update the index
307        self.add_ref(self.uri,(self.filename, self.creation_date, self.expiration_date))

Cache for a specific vocab. The content of the cache is the graph. These are also the data that are stored on the disc (in pickled form)

@ivar graph: the RDF graph @ivar URI: vocabulary URI @ivar filename: file name (not the complete path) of the cached version @ivar creation_date: creation date of the cache @type creation_date: datetime @ivar expiration_date: expiration date of the cache @type expiration_date: datetime @cvar runtime_cache : a run time cache for already 'seen' vocabulary files. Apart from (marginally) speeding up processing, this also prevents recursion @type runtime_cache : dictionary

CachedVocab(URI, options=None, verify=True)
212    def __init__(self, URI, options = None, verify = True):
213        """
214        @param URI: real URI for the vocabulary file
215        @param options: the error handler (option) object to send warnings to
216        @type options: L{options.Options}
217        @param verify: whether the SSL certificate needs to be verified.
218        @type verify: bool
219        """
220        # First see if this particular vocab has been handled before. If yes, it is extracted and everything
221        # else can be forgotten. 
222        self.uri                                                    = URI
223        (self.filename, self.creation_date, self.expiration_date)    = ("",None,None)
224        self.graph                                                    = Graph()
225
226        try:
227            CachedVocabIndex.__init__(self, options)
228            vocab_reference = self.get_ref(URI)
229            self.caching = True
230        except Exception:
231            # what this means is that the caching becomes impossible through some system error...
232            _t, value, _traceback = sys.exc_info()
233            if self.report: options.add_info("Could not access the vocabulary cache area %s" % value, VocabCachingInfo, URI)
234            vocab_reference = None
235            self.caching = False
236
237        if vocab_reference == None:
238            # This has never been cached before
239            if self.report: options.add_info("No cache exists for %s, generating one" % URI, VocabCachingInfo)
240
241            # Store all the cache data unless caching proves to be impossible
242            if self._get_vocab_data(verify, newCache = True) and self.caching:
243                self.filename = create_file_name(self.uri)
244                self._store_caches()
245                if self.report:
246                    options.add_info("Generated a cache for %s, with an expiration date of %s" % (URI,self.expiration_date), VocabCachingInfo, URI)
247        else:
248            (self.filename, self.creation_date, self.expiration_date) = vocab_reference
249            if self.report: options.add_info("Found a cache for %s, expiring on %s" % (URI,self.expiration_date), VocabCachingInfo)
250            # Check if the expiration date is still away
251            if options.refresh_vocab_cache == False and datetime.datetime.utcnow() <= self.expiration_date:
252                # We are fine, we can just extract the data from the cache and we're done
253                if self.report: options.add_info("Cache for %s is still valid; extracting the data" % URI, VocabCachingInfo)
254                fname = os.path.join(self.app_data_dir, self.filename)
255                try:
256                    self.graph = _load(fname)
257                except Exception:
258                    # what this means is that the caching becomes impossible VocabCachingInfo
259                    t, value, traceback = sys.exc_info()
260                    sys.excepthook(t, value, traceback)
261                    if self.report: options.add_info("Could not access the vocab cache %s (%s)" % (value,fname), VocabCachingInfo, URI)
262            else:
263                if self.report:
264                    if options.refresh_vocab_cache == True:
265                        options.add_info("Time check is bypassed; refreshing the cache for %s" % URI, VocabCachingInfo)
266                    else:
267                        options.add_info("Cache timeout; refreshing the cache for %s" % URI, VocabCachingInfo)
268                # we have to refresh the graph
269                if self._get_vocab_data(verify, newCache = False) == False:
270                    # bugger; the cache could not be refreshed, using the current one, and setting the cache artificially
271                    # to be valid for the coming hour, hoping that the access issues will be resolved by then...
272                    if self.report:
273                        options.add_info("Could not refresh vocabulary cache for %s, using the old cache, extended its expiration time by an hour (network problems?)" % URI, VocabCachingInfo, URI)
274                    fname = os.path.join(self.app_data_dir, self.filename)
275                    try:
276                        self.graph = _load(fname)
277                        self.expiration_date = datetime.datetime.utcnow() + datetime.timedelta(hours=1)
278                    except Exception:
279                        # what this means is that the caching becomes impossible VocabCachingInfo
280                        t, value, traceback = sys.exc_info()
281                        sys.excepthook(t, value, traceback)
282                        if self.report: options.add_info("Could not access the vocabulary cache %s (%s)" % (value,fname), VocabCachingInfo, URI)
283                self.creation_date = datetime.datetime.utcnow()
284                if self.report:
285                    options.add_info("Generated a new cache for %s, with an expiration date of %s" % (URI,self.expiration_date), VocabCachingInfo, URI)
286
287                self._store_caches()

@param URI: real URI for the vocabulary file @param options: the error handler (option) object to send warnings to @type options: L{options.Options} @param verify: whether the SSL certificate needs to be verified. @type verify: bool

uri
graph
def offline_cache_generation(args, verify=True):
311def offline_cache_generation(args, verify = True):
312    """Generate a cache for the vocabulary in args.
313
314    @param args:   array of vocabulary URIs.
315    @param verify: whether the SSL certificate needs to be verified.
316    """
317    class LocalOption:
318        def __init__(self):
319            self.vocab_cache_report = True
320
321        def pr(self, wae, txt, warning_type, context):
322            print( "====" )
323            if warning_type != None: print( warning_type )
324            print( wae + ": " + txt )
325            if context != None: print( context )
326            print( "====" )
327
328        def add_warning(self, txt, warning_type=None, context=None):
329            """Add a warning to the processor graph.
330            @param txt: the warning text. 
331            @keyword warning_type: Warning Class
332            @type warning_type: URIRef
333            @keyword context: possible context to be added to the processor graph
334            @type context: URIRef or String
335            """
336            self.pr("Warning",txt,warning_type,context)
337
338        def add_info(self, txt, info_type=None, context=None):
339            """Add an informational comment to the processor graph.
340            @param txt: the information text. 
341            @keyword info_type: Info Class
342            @type info_type: URIRef
343            @keyword context: possible context to be added to the processor graph
344            @type context: URIRef or String
345            """
346            self.pr("Info",txt,info_type,context)
347
348        def add_error(self, txt, err_type=None, context=None):
349            """Add an error  to the processor graph.
350            @param txt: the information text. 
351            @keyword err_type: Error Class
352            @type err_type: URIRef
353            @keyword context: possible context to be added to the processor graph
354            @type context: URIRef or String
355            """
356            self.pr("Error",txt,err_type,context)
357
358    for uri in args:
359        # This should write the cache
360        print( ">>>>> Writing Cache <<<<<" )
361        _writ = CachedVocab(uri, options = LocalOption(), verify = verify)
362        # Now read it back and print the content for tracing
363        print( ">>>>> Reading Cache <<<<<" )
364        rd = CachedVocab(uri, options = LocalOption(), verify = verify)
365        print( "URI: " + uri )
366        print( "default vocab: " + rd.vocabulary )
367        print( "terms: %s prefixes: %s" % (rd.terms,rd.ns) )

Generate a cache for the vocabulary in args.

@param args: array of vocabulary URIs. @param verify: whether the SSL certificate needs to be verified.