pyRdfa.rdfs.process

@organization: U{World Wide Web Consortiumhttp://www.w3.org} @author: U{Ivan Herman} @license: This software is available for use under the U{W3C® SOFTWARE NOTICE AND LICENSE}

View Source

  1# -*- coding: utf-8 -*-
  2"""
  3@organization: U{World Wide Web Consortium<http://www.w3.org>}
  4@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
  5@license: This software is available for use under the
  6U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
  7
  8"""
  9
 10"""
 11$Id: process.py,v 1.7 2012-03-23 14:06:38 ivan Exp $ $Date: 2012-03-23 14:06:38 $
 12
 13"""
 14
 15import sys
 16
 17from rdflib import Namespace
 18from rdflib import RDF  as ns_rdf
 19from rdflib import RDFS as ns_rdfs
 20from rdflib import Graph
 21
 22ns_owl = Namespace("http://www.w3.org/2002/07/owl#")
 23
 24from ..host import MediaTypes
 25
 26from ..utils import URIOpener
 27
 28from . import err_outdated_cache
 29from . import err_unreachable_vocab
 30from . import err_unparsable_Turtle_vocab
 31from . import err_unparsable_ntriples_vocab
 32from . import err_unparsable_rdfa_vocab
 33from . import err_unrecognised_vocab_type
 34
 35from .. import VocabReferenceError
 36
 37from .cache import CachedVocab, xml_application_media_type
 38from .. import HTTPError, RDFaError
 39
 40#############################################################################################################
 41
 42def return_graph(uri, options, newCache = False, verify = True):
 43    """Parse a file, and return an RDFLib Graph. The URI's content type is checked and either one of
 44    RDFLib's parsers is invoked (for the Turtle, RDF/XML, and N Triple cases) or a separate RDFa processing is invoked
 45    on the RDFa content.
 46
 47    The Accept header of the HTTP request gives a preference to Turtle, followed by RDF/XML and then HTML (RDFa), in case content negotiation is used.
 48
 49    This function is used to retreive the vocabulary file and turn it into an RDFLib graph.
 50
 51    @param uri: URI for the graph
 52    @param options: used as a place where warnings can be sent
 53    @param newCache: in case this is used with caching, whether a new cache is generated; that modifies the warning text
 54    @param verify: whether the SSL certificate should be verified
 55    @return: A tuple consisting of an RDFLib Graph instance and an expiration date); None if the dereferencing or the parsing was unsuccessful
 56    """
 57    def return_to_cache(msg):
 58        if newCache:
 59            options.add_warning(err_unreachable_vocab % uri, warning_type=VocabReferenceError)
 60        else:
 61            options.add_warning(err_outdated_cache % uri, warning_type=VocabReferenceError)
 62
 63    retval =           None
 64    _expiration_date = None
 65    content =          None
 66
 67    try:
 68        form = {'Accept' : 'text/html;q=0.8, application/xhtml+xml;q=0.8, text/turtle;q=1.0, application/rdf+xml;q=0.9'}
 69        content = URIOpener(uri, form, verify)
 70    except HTTPError:
 71        _t, value, _traceback = sys.exc_info()
 72        return_to_cache(value)
 73        return (None,None)
 74    except RDFaError:
 75        _t, value, _traceback = sys.exc_info()
 76        return_to_cache(value)
 77        return (None,None)
 78    except Exception:
 79        _t, value, _traceback = sys.exc_info()
 80        return_to_cache(value)
 81        return (None,None)
 82
 83    # Store the expiration date of the newly accessed data
 84    expiration_date = content.expiration_date
 85
 86    if content.content_type == MediaTypes.turtle:
 87        try:
 88            retval = Graph()
 89            retval.parse(content.data, format="n3")
 90        except:
 91            _t, value, _traceback = sys.exc_info()
 92            options.add_warning(err_unparsable_Turtle_vocab % (uri,value))
 93    elif content.content_type == MediaTypes.rdfxml:
 94        try:
 95            retval = Graph()
 96            retval.parse(content.data)
 97        except:
 98            _type, value, _traceback = sys.exc_info()
 99            options.add_warning(err_unparsable_Turtle_vocab % (uri,value))
100    elif content.content_type == MediaTypes.nt:
101        try:
102            retval = Graph()
103            retval.parse(content.data, format="nt")
104        except:
105            _type, value, _traceback = sys.exc_info()
106            options.add_warning(err_unparsable_ntriples_vocab % (uri,value))
107    elif content.content_type in [MediaTypes.xhtml, MediaTypes.html, MediaTypes.xml] or xml_application_media_type.match(content.content_type) != None:
108        try:
109            from .. import pyRdfa
110            from .. import Options
111            options = Options()
112            retval = pyRdfa(options).graph_from_source(content.data)
113        except:
114            _type, value, _traceback = sys.exc_info()
115            options.add_warning(err_unparsable_rdfa_vocab % (uri,value))
116    else:
117        options.add_warning(err_unrecognised_vocab_type % (uri, content.content_type))
118
119    return (retval, expiration_date)
120
121############################################################################################
122rdftype =            ns_rdf["type"]
123Property =           ns_rdf["Property"]
124Class =              ns_rdfs["Class"]
125subClassOf =         ns_rdfs["subClassOf"]
126subPropertyOf =      ns_rdfs["subPropertyOf"]
127equivalentProperty = ns_owl["equivalentProperty"]
128equivalentClass =    ns_owl["equivalentClass"]
129
130class MiniOWL:
131    """
132    Class implementing the simple OWL RL Reasoning required by RDFa in managing vocabulary files. This is done via
133    a forward chaining process (in the L{closure} method) using a few simple rules as defined by the RDF and the OWL Semantics
134    specifications.
135
136    @ivar graph: the graph that has to be expanded
137    @ivar added_triples: each cycle collects the triples that are to be added to the graph eventually.
138    @type added_triples: a set, to ensure the unicity of triples being added
139    """
140    def __init__(self, graph, schema_semantics = False):
141        self.graph = graph
142        self.added_triples = None
143        self.schema_semantics = schema_semantics
144
145    def closure(self):
146        """
147           Generate the closure the graph. This is the real 'core'.
148
149           The processing rules store new triples via the L{separate method<store_triple>} which stores
150           them in the L{added_triples<added_triples>} array. If that array is emtpy at the end of a cycle,
151           it means that the whole process can be stopped.
152        """
153
154        # Go cyclically through all rules until no change happens
155        new_cycle = True
156        cycle_num = 0
157        while new_cycle:
158            # yes, there was a change, let us go again
159            cycle_num += 1
160
161            # go through all rules, and collect the replies (to see whether any change has been done)
162            # the new triples to be added are collected separately not to interfere with
163            # the current graph yet
164            self.added_triples = set()
165
166            # Execute all the rules; these might fill up the added triples array
167            for t in self.graph: self.rules(t)
168
169            # Add the tuples to the graph (if necessary, that is). If any new triple has been generated, a new cycle
170            # will be necessary...
171            new_cycle = len(self.added_triples) > 0
172
173            for t in self.added_triples: self.graph.add(t)
174
175    def store_triple(self, t):
176        """
177        In contrast to its name, this does not yet add anything to the graph itself, it just stores the tuple in an
178        L{internal set<added_triples>}. (It is important for this to be a set: some of the rules in the various closures may
179        generate the same tuples several times.) Before adding the tuple to the set, the method checks whether
180        the tuple is in the final graph already (if yes, it is not added to the set).
181
182        The set itself is emptied at the start of every processing cycle; the triples are then effectively added to the
183        graph at the end of such a cycle. If the set is
184        actually empty at that point, this means that the cycle has not added any new triple, and the full processing can stop.
185
186        @param t: the triple to be added to the graph, unless it is already there
187        @type t: a 3-element tuple of (s,p,o)
188        """
189        if t not in self.graph:
190            self.added_triples.add(t)
191
192    def rules(self, t):
193        """
194            Go through the OWL-RL entailement rules prp-spo1, prp-eqp1, prp-eqp2, cax-sco, cax-eqc1, and cax-eqc2 by extending the graph.
195            @param t: a triple (in the form of a tuple)
196        """
197        s, p, o = t
198        if self.schema_semantics:
199            # extra resonings on the vocabulary only to reduce the overall load by reducing the expected number of chaining cycles
200            if p == subPropertyOf:
201                for _z, _y, xxx in self.graph.triples((o, subPropertyOf, None)):
202                    self.store_triple((s, subPropertyOf, xxx))
203            elif p == equivalentProperty:
204                for _z, _y, xxx in self.graph.triples((o, equivalentProperty, None)):
205                    self.store_triple((s, equivalentProperty, xxx))
206                for xxx, _y, _z in self.graph.triples((None, equivalentProperty, s)):
207                    self.store_triple((xxx, equivalentProperty, o))
208            elif p == subClassOf:
209                for _z, _y, xxx in self.graph.triples((o, subClassOf, None)):
210                    self.store_triple((s, subClassOf, xxx))
211            elif p == equivalentClass:
212                for _z, _y, xxx in self.graph.triples((o, equivalentClass, None)):
213                    self.store_triple((s, equivalentClass, xxx))
214                for xxx, _y, _z in self.graph.triples((None, equivalentClass, s)):
215                    self.store_triple((xxx, equivalentClass, o))
216        else:
217            if p == subPropertyOf:
218                # prp-spo1
219                for zzz, _z, www in self.graph.triples((None, s, None)):
220                    self.store_triple((zzz, o, www))
221            elif p == equivalentProperty:
222                # prp-eqp1
223                for zzz, _z, www in self.graph.triples((None, s, None)):
224                    self.store_triple((zzz, o, www))
225                # prp-eqp2
226                for zzz, _z, www in self.graph.triples((None, o, None)):
227                    self.store_triple((zzz, s, www))
228            elif p == subClassOf:
229                # cax-sco
230                for vvv, _y, _z in self.graph.triples((None, rdftype, s)):
231                    self.store_triple((vvv, rdftype, o))
232            elif p == equivalentClass:
233                # cax-eqc1
234                for vvv, _y, _z in self.graph.triples((None, rdftype, s)):
235                    self.store_triple((vvv, rdftype, o))
236                # cax-eqc2
237                for vvv, _y, _z in self.graph.triples((None, rdftype, o)):
238                    self.store_triple((vvv, rdftype, s))
239
240########################################################################################################
241
242def process_rdfa_sem(graph, options):
243    """
244    Expand the graph through the minimal RDFS and OWL rules defined for RDFa.
245
246    The expansion is done in several steps:
247     1. the vocabularies are retrieved from the incoming graph (there are RDFa triples generated for that)
248     2. all vocabularies are merged into a separate vocabulary graph
249     3. the RDFS/OWL expansion is done on the vocabulary graph, to take care of all the subproperty, subclass, etc, chains
250     4. the (expanded) vocabulary graph content is added to the incoming graph
251     5. the incoming graph is expanded
252     6. the triples appearing in the vocabulary graph are removed from the incoming graph, to avoid unnecessary extra triples from the data
253
254    @param graph: an RDFLib Graph instance, to be expanded
255    @param options: options as defined for the RDFa run; used to generate warnings
256    @type options: L{pyRdfa.Options}
257    """
258    # 1. collect the vocab URI-s
259    vocabs = set()
260    from ...pyRdfa import RDFA_VOCAB
261    for ((_s, _p, v)) in graph.triples((None, RDFA_VOCAB, None)):
262        vocabs.add((str(v)))
263
264    if len(vocabs) >= 0:
265        # 2. get all the vocab graphs
266        vocab_graph = Graph()
267        for uri in vocabs:
268            if options.vocab_cache:
269                v_graph = CachedVocab(uri, options).graph
270            else:
271                v_graph, _exp_date = return_graph(uri, options)
272            if v_graph != None:
273                for t in v_graph:
274                    vocab_graph.add(t)
275
276        # 3. Get the closure of the vocab graph; this will take care of local subproperty, etc, statements
277        # Strictly speaking this is not necessary, but will speed up processing, because it may save chaining cycles on the
278        # real graph
279        MiniOWL(vocab_graph, schema_semantics = True).closure()
280
281        # 4. Now get the vocab graph content added to the default graph
282        for t in vocab_graph:
283            graph.add(t)
284
285        # 5. get the graph expanded through RDFS
286        MiniOWL(graph).closure()
287
288        # 4. clean up the graph by removing the schema triples
289        for t in vocab_graph : graph.remove(t)
290
291    # That was it...
292    return graph

ns_owl = Namespace('http://www.w3.org/2002/07/owl#')

def return_graph(uri, options, newCache=False, verify=True): View Source

 43def return_graph(uri, options, newCache = False, verify = True):
 44    """Parse a file, and return an RDFLib Graph. The URI's content type is checked and either one of
 45    RDFLib's parsers is invoked (for the Turtle, RDF/XML, and N Triple cases) or a separate RDFa processing is invoked
 46    on the RDFa content.
 47
 48    The Accept header of the HTTP request gives a preference to Turtle, followed by RDF/XML and then HTML (RDFa), in case content negotiation is used.
 49
 50    This function is used to retreive the vocabulary file and turn it into an RDFLib graph.
 51
 52    @param uri: URI for the graph
 53    @param options: used as a place where warnings can be sent
 54    @param newCache: in case this is used with caching, whether a new cache is generated; that modifies the warning text
 55    @param verify: whether the SSL certificate should be verified
 56    @return: A tuple consisting of an RDFLib Graph instance and an expiration date); None if the dereferencing or the parsing was unsuccessful
 57    """
 58    def return_to_cache(msg):
 59        if newCache:
 60            options.add_warning(err_unreachable_vocab % uri, warning_type=VocabReferenceError)
 61        else:
 62            options.add_warning(err_outdated_cache % uri, warning_type=VocabReferenceError)
 63
 64    retval =           None
 65    _expiration_date = None
 66    content =          None
 67
 68    try:
 69        form = {'Accept' : 'text/html;q=0.8, application/xhtml+xml;q=0.8, text/turtle;q=1.0, application/rdf+xml;q=0.9'}
 70        content = URIOpener(uri, form, verify)
 71    except HTTPError:
 72        _t, value, _traceback = sys.exc_info()
 73        return_to_cache(value)
 74        return (None,None)
 75    except RDFaError:
 76        _t, value, _traceback = sys.exc_info()
 77        return_to_cache(value)
 78        return (None,None)
 79    except Exception:
 80        _t, value, _traceback = sys.exc_info()
 81        return_to_cache(value)
 82        return (None,None)
 83
 84    # Store the expiration date of the newly accessed data
 85    expiration_date = content.expiration_date
 86
 87    if content.content_type == MediaTypes.turtle:
 88        try:
 89            retval = Graph()
 90            retval.parse(content.data, format="n3")
 91        except:
 92            _t, value, _traceback = sys.exc_info()
 93            options.add_warning(err_unparsable_Turtle_vocab % (uri,value))
 94    elif content.content_type == MediaTypes.rdfxml:
 95        try:
 96            retval = Graph()
 97            retval.parse(content.data)
 98        except:
 99            _type, value, _traceback = sys.exc_info()
100            options.add_warning(err_unparsable_Turtle_vocab % (uri,value))
101    elif content.content_type == MediaTypes.nt:
102        try:
103            retval = Graph()
104            retval.parse(content.data, format="nt")
105        except:
106            _type, value, _traceback = sys.exc_info()
107            options.add_warning(err_unparsable_ntriples_vocab % (uri,value))
108    elif content.content_type in [MediaTypes.xhtml, MediaTypes.html, MediaTypes.xml] or xml_application_media_type.match(content.content_type) != None:
109        try:
110            from .. import pyRdfa
111            from .. import Options
112            options = Options()
113            retval = pyRdfa(options).graph_from_source(content.data)
114        except:
115            _type, value, _traceback = sys.exc_info()
116            options.add_warning(err_unparsable_rdfa_vocab % (uri,value))
117    else:
118        options.add_warning(err_unrecognised_vocab_type % (uri, content.content_type))
119
120    return (retval, expiration_date)

Parse a file, and return an RDFLib Graph. The URI's content type is checked and either one of RDFLib's parsers is invoked (for the Turtle, RDF/XML, and N Triple cases) or a separate RDFa processing is invoked on the RDFa content.

The Accept header of the HTTP request gives a preference to Turtle, followed by RDF/XML and then HTML (RDFa), in case content negotiation is used.

This function is used to retreive the vocabulary file and turn it into an RDFLib graph.

@param uri: URI for the graph @param options: used as a place where warnings can be sent @param newCache: in case this is used with caching, whether a new cache is generated; that modifies the warning text @param verify: whether the SSL certificate should be verified @return: A tuple consisting of an RDFLib Graph instance and an expiration date); None if the dereferencing or the parsing was unsuccessful

rdftype = rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')

Property = rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#Property')

Class = rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#Class')

subClassOf = rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#subClassOf')

subPropertyOf = rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#subPropertyOf')

equivalentProperty = rdflib.term.URIRef('http://www.w3.org/2002/07/owl#equivalentProperty')

equivalentClass = rdflib.term.URIRef('http://www.w3.org/2002/07/owl#equivalentClass')

class MiniOWL: View Source

131class MiniOWL:
132    """
133    Class implementing the simple OWL RL Reasoning required by RDFa in managing vocabulary files. This is done via
134    a forward chaining process (in the L{closure} method) using a few simple rules as defined by the RDF and the OWL Semantics
135    specifications.
136
137    @ivar graph: the graph that has to be expanded
138    @ivar added_triples: each cycle collects the triples that are to be added to the graph eventually.
139    @type added_triples: a set, to ensure the unicity of triples being added
140    """
141    def __init__(self, graph, schema_semantics = False):
142        self.graph = graph
143        self.added_triples = None
144        self.schema_semantics = schema_semantics
145
146    def closure(self):
147        """
148           Generate the closure the graph. This is the real 'core'.
149
150           The processing rules store new triples via the L{separate method<store_triple>} which stores
151           them in the L{added_triples<added_triples>} array. If that array is emtpy at the end of a cycle,
152           it means that the whole process can be stopped.
153        """
154
155        # Go cyclically through all rules until no change happens
156        new_cycle = True
157        cycle_num = 0
158        while new_cycle:
159            # yes, there was a change, let us go again
160            cycle_num += 1
161
162            # go through all rules, and collect the replies (to see whether any change has been done)
163            # the new triples to be added are collected separately not to interfere with
164            # the current graph yet
165            self.added_triples = set()
166
167            # Execute all the rules; these might fill up the added triples array
168            for t in self.graph: self.rules(t)
169
170            # Add the tuples to the graph (if necessary, that is). If any new triple has been generated, a new cycle
171            # will be necessary...
172            new_cycle = len(self.added_triples) > 0
173
174            for t in self.added_triples: self.graph.add(t)
175
176    def store_triple(self, t):
177        """
178        In contrast to its name, this does not yet add anything to the graph itself, it just stores the tuple in an
179        L{internal set<added_triples>}. (It is important for this to be a set: some of the rules in the various closures may
180        generate the same tuples several times.) Before adding the tuple to the set, the method checks whether
181        the tuple is in the final graph already (if yes, it is not added to the set).
182
183        The set itself is emptied at the start of every processing cycle; the triples are then effectively added to the
184        graph at the end of such a cycle. If the set is
185        actually empty at that point, this means that the cycle has not added any new triple, and the full processing can stop.
186
187        @param t: the triple to be added to the graph, unless it is already there
188        @type t: a 3-element tuple of (s,p,o)
189        """
190        if t not in self.graph:
191            self.added_triples.add(t)
192
193    def rules(self, t):
194        """
195            Go through the OWL-RL entailement rules prp-spo1, prp-eqp1, prp-eqp2, cax-sco, cax-eqc1, and cax-eqc2 by extending the graph.
196            @param t: a triple (in the form of a tuple)
197        """
198        s, p, o = t
199        if self.schema_semantics:
200            # extra resonings on the vocabulary only to reduce the overall load by reducing the expected number of chaining cycles
201            if p == subPropertyOf:
202                for _z, _y, xxx in self.graph.triples((o, subPropertyOf, None)):
203                    self.store_triple((s, subPropertyOf, xxx))
204            elif p == equivalentProperty:
205                for _z, _y, xxx in self.graph.triples((o, equivalentProperty, None)):
206                    self.store_triple((s, equivalentProperty, xxx))
207                for xxx, _y, _z in self.graph.triples((None, equivalentProperty, s)):
208                    self.store_triple((xxx, equivalentProperty, o))
209            elif p == subClassOf:
210                for _z, _y, xxx in self.graph.triples((o, subClassOf, None)):
211                    self.store_triple((s, subClassOf, xxx))
212            elif p == equivalentClass:
213                for _z, _y, xxx in self.graph.triples((o, equivalentClass, None)):
214                    self.store_triple((s, equivalentClass, xxx))
215                for xxx, _y, _z in self.graph.triples((None, equivalentClass, s)):
216                    self.store_triple((xxx, equivalentClass, o))
217        else:
218            if p == subPropertyOf:
219                # prp-spo1
220                for zzz, _z, www in self.graph.triples((None, s, None)):
221                    self.store_triple((zzz, o, www))
222            elif p == equivalentProperty:
223                # prp-eqp1
224                for zzz, _z, www in self.graph.triples((None, s, None)):
225                    self.store_triple((zzz, o, www))
226                # prp-eqp2
227                for zzz, _z, www in self.graph.triples((None, o, None)):
228                    self.store_triple((zzz, s, www))
229            elif p == subClassOf:
230                # cax-sco
231                for vvv, _y, _z in self.graph.triples((None, rdftype, s)):
232                    self.store_triple((vvv, rdftype, o))
233            elif p == equivalentClass:
234                # cax-eqc1
235                for vvv, _y, _z in self.graph.triples((None, rdftype, s)):
236                    self.store_triple((vvv, rdftype, o))
237                # cax-eqc2
238                for vvv, _y, _z in self.graph.triples((None, rdftype, o)):
239                    self.store_triple((vvv, rdftype, s))

Class implementing the simple OWL RL Reasoning required by RDFa in managing vocabulary files. This is done via a forward chaining process (in the L{closure} method) using a few simple rules as defined by the RDF and the OWL Semantics specifications.

@ivar graph: the graph that has to be expanded @ivar added_triples: each cycle collects the triples that are to be added to the graph eventually. @type added_triples: a set, to ensure the unicity of triples being added

MiniOWL(graph, schema_semantics=False) View Source

141    def __init__(self, graph, schema_semantics = False):
142        self.graph = graph
143        self.added_triples = None
144        self.schema_semantics = schema_semantics

graph

added_triples

schema_semantics

def closure(self): View Source

146    def closure(self):
147        """
148           Generate the closure the graph. This is the real 'core'.
149
150           The processing rules store new triples via the L{separate method<store_triple>} which stores
151           them in the L{added_triples<added_triples>} array. If that array is emtpy at the end of a cycle,
152           it means that the whole process can be stopped.
153        """
154
155        # Go cyclically through all rules until no change happens
156        new_cycle = True
157        cycle_num = 0
158        while new_cycle:
159            # yes, there was a change, let us go again
160            cycle_num += 1
161
162            # go through all rules, and collect the replies (to see whether any change has been done)
163            # the new triples to be added are collected separately not to interfere with
164            # the current graph yet
165            self.added_triples = set()
166
167            # Execute all the rules; these might fill up the added triples array
168            for t in self.graph: self.rules(t)
169
170            # Add the tuples to the graph (if necessary, that is). If any new triple has been generated, a new cycle
171            # will be necessary...
172            new_cycle = len(self.added_triples) > 0
173
174            for t in self.added_triples: self.graph.add(t)

Generate the closure the graph. This is the real 'core'.

The processing rules store new triples via the L{separate method} which stores them in the L{added_triples} array. If that array is emtpy at the end of a cycle, it means that the whole process can be stopped.

def store_triple(self, t): View Source

176    def store_triple(self, t):
177        """
178        In contrast to its name, this does not yet add anything to the graph itself, it just stores the tuple in an
179        L{internal set<added_triples>}. (It is important for this to be a set: some of the rules in the various closures may
180        generate the same tuples several times.) Before adding the tuple to the set, the method checks whether
181        the tuple is in the final graph already (if yes, it is not added to the set).
182
183        The set itself is emptied at the start of every processing cycle; the triples are then effectively added to the
184        graph at the end of such a cycle. If the set is
185        actually empty at that point, this means that the cycle has not added any new triple, and the full processing can stop.
186
187        @param t: the triple to be added to the graph, unless it is already there
188        @type t: a 3-element tuple of (s,p,o)
189        """
190        if t not in self.graph:
191            self.added_triples.add(t)

In contrast to its name, this does not yet add anything to the graph itself, it just stores the tuple in an L{internal set}. (It is important for this to be a set: some of the rules in the various closures may generate the same tuples several times.) Before adding the tuple to the set, the method checks whether the tuple is in the final graph already (if yes, it is not added to the set).

The set itself is emptied at the start of every processing cycle; the triples are then effectively added to the graph at the end of such a cycle. If the set is actually empty at that point, this means that the cycle has not added any new triple, and the full processing can stop.

@param t: the triple to be added to the graph, unless it is already there @type t: a 3-element tuple of (s,p,o)

def rules(self, t): View Source

193    def rules(self, t):
194        """
195            Go through the OWL-RL entailement rules prp-spo1, prp-eqp1, prp-eqp2, cax-sco, cax-eqc1, and cax-eqc2 by extending the graph.
196            @param t: a triple (in the form of a tuple)
197        """
198        s, p, o = t
199        if self.schema_semantics:
200            # extra resonings on the vocabulary only to reduce the overall load by reducing the expected number of chaining cycles
201            if p == subPropertyOf:
202                for _z, _y, xxx in self.graph.triples((o, subPropertyOf, None)):
203                    self.store_triple((s, subPropertyOf, xxx))
204            elif p == equivalentProperty:
205                for _z, _y, xxx in self.graph.triples((o, equivalentProperty, None)):
206                    self.store_triple((s, equivalentProperty, xxx))
207                for xxx, _y, _z in self.graph.triples((None, equivalentProperty, s)):
208                    self.store_triple((xxx, equivalentProperty, o))
209            elif p == subClassOf:
210                for _z, _y, xxx in self.graph.triples((o, subClassOf, None)):
211                    self.store_triple((s, subClassOf, xxx))
212            elif p == equivalentClass:
213                for _z, _y, xxx in self.graph.triples((o, equivalentClass, None)):
214                    self.store_triple((s, equivalentClass, xxx))
215                for xxx, _y, _z in self.graph.triples((None, equivalentClass, s)):
216                    self.store_triple((xxx, equivalentClass, o))
217        else:
218            if p == subPropertyOf:
219                # prp-spo1
220                for zzz, _z, www in self.graph.triples((None, s, None)):
221                    self.store_triple((zzz, o, www))
222            elif p == equivalentProperty:
223                # prp-eqp1
224                for zzz, _z, www in self.graph.triples((None, s, None)):
225                    self.store_triple((zzz, o, www))
226                # prp-eqp2
227                for zzz, _z, www in self.graph.triples((None, o, None)):
228                    self.store_triple((zzz, s, www))
229            elif p == subClassOf:
230                # cax-sco
231                for vvv, _y, _z in self.graph.triples((None, rdftype, s)):
232                    self.store_triple((vvv, rdftype, o))
233            elif p == equivalentClass:
234                # cax-eqc1
235                for vvv, _y, _z in self.graph.triples((None, rdftype, s)):
236                    self.store_triple((vvv, rdftype, o))
237                # cax-eqc2
238                for vvv, _y, _z in self.graph.triples((None, rdftype, o)):
239                    self.store_triple((vvv, rdftype, s))

Go through the OWL-RL entailement rules prp-spo1, prp-eqp1, prp-eqp2, cax-sco, cax-eqc1, and cax-eqc2 by extending the graph. @param t: a triple (in the form of a tuple)

def process_rdfa_sem(graph, options): View Source

243def process_rdfa_sem(graph, options):
244    """
245    Expand the graph through the minimal RDFS and OWL rules defined for RDFa.
246
247    The expansion is done in several steps:
248     1. the vocabularies are retrieved from the incoming graph (there are RDFa triples generated for that)
249     2. all vocabularies are merged into a separate vocabulary graph
250     3. the RDFS/OWL expansion is done on the vocabulary graph, to take care of all the subproperty, subclass, etc, chains
251     4. the (expanded) vocabulary graph content is added to the incoming graph
252     5. the incoming graph is expanded
253     6. the triples appearing in the vocabulary graph are removed from the incoming graph, to avoid unnecessary extra triples from the data
254
255    @param graph: an RDFLib Graph instance, to be expanded
256    @param options: options as defined for the RDFa run; used to generate warnings
257    @type options: L{pyRdfa.Options}
258    """
259    # 1. collect the vocab URI-s
260    vocabs = set()
261    from ...pyRdfa import RDFA_VOCAB
262    for ((_s, _p, v)) in graph.triples((None, RDFA_VOCAB, None)):
263        vocabs.add((str(v)))
264
265    if len(vocabs) >= 0:
266        # 2. get all the vocab graphs
267        vocab_graph = Graph()
268        for uri in vocabs:
269            if options.vocab_cache:
270                v_graph = CachedVocab(uri, options).graph
271            else:
272                v_graph, _exp_date = return_graph(uri, options)
273            if v_graph != None:
274                for t in v_graph:
275                    vocab_graph.add(t)
276
277        # 3. Get the closure of the vocab graph; this will take care of local subproperty, etc, statements
278        # Strictly speaking this is not necessary, but will speed up processing, because it may save chaining cycles on the
279        # real graph
280        MiniOWL(vocab_graph, schema_semantics = True).closure()
281
282        # 4. Now get the vocab graph content added to the default graph
283        for t in vocab_graph:
284            graph.add(t)
285
286        # 5. get the graph expanded through RDFS
287        MiniOWL(graph).closure()
288
289        # 4. clean up the graph by removing the schema triples
290        for t in vocab_graph : graph.remove(t)
291
292    # That was it...
293    return graph

Expand the graph through the minimal RDFS and OWL rules defined for RDFa.

The expansion is done in several steps:

the vocabularies are retrieved from the incoming graph (there are RDFa triples generated for that)
all vocabularies are merged into a separate vocabulary graph
the RDFS/OWL expansion is done on the vocabulary graph, to take care of all the subproperty, subclass, etc, chains
the (expanded) vocabulary graph content is added to the incoming graph
the incoming graph is expanded
the triples appearing in the vocabulary graph are removed from the incoming graph, to avoid unnecessary extra triples from the data

@param graph: an RDFLib Graph instance, to be expanded @param options: options as defined for the RDFa run; used to generate warnings @type options: L{pyRdfa.Options}