pyRdfa.embeddedRDF

Extracting possible embedded RDF/XML content from the file and parse it separately into the Graph. This is used, for example by U{SVG 1.2 Tinyhttp://www.w3.org/TR/SVGMobile12/}.

@author: U{Ivan Herman} @license: This software is available for use under the U{W3C® SOFTWARE NOTICE AND LICENSE} @contact: Ivan Herman, ivan@w3.org @version: $Id: embeddedRDF.py,v 1.15 2012/11/16 17:51:53 ivan Exp $

 1# -*- coding: utf-8 -*-
 2"""
 3Extracting possible embedded RDF/XML content from the file and parse it separately into the Graph. This is used, for example
 4by U{SVG 1.2 Tiny<http://www.w3.org/TR/SVGMobile12/>}.
 5
 6@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
 7@license: This software is available for use under the
 8U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
 9@contact: Ivan Herman, ivan@w3.org
10@version: $Id: embeddedRDF.py,v 1.15 2012/11/16 17:51:53 ivan Exp $
11"""
12
13from io import StringIO
14
15from .host import accept_embedded_rdf_xml, accept_embedded_turtle
16from .utils import return_XML
17import sys
18
19def handle_embeddedRDF(node, graph, state):
20    """
21    Handles embedded RDF. There are two possibilities:
22    
23     - the file is one of the XML dialects that allows for an embedded RDF/XML portion. See the L{host.accept_embedded_rdf_xml} for those (a typical example is SVG). 
24     - the file is HTML and there is a turtle portion in the C{<script>} element with type text/turtle. 
25    
26    @param node: a DOM node for the top level element
27    @param graph: target rdf graph
28    @type graph: RDFLib's Graph object instance
29    @param state: the inherited state (namespaces, lang, etc)
30    @type state: L{state.ExecutionContext}
31    @return: whether an RDF/XML or turtle content has been detected or not. If TRUE, the RDFa processing should not occur on the node and its descendents. 
32    @rtype: Boolean
33    """
34    #def _get_prefixes_in_turtle():
35    #    retval = ""
36    #    for key in state.term_or_curie.ns:
37    #        retval += "@prefix %s: <%s> .\n" % (key, state.term_or_curie.ns[key])
38    #    retval += '\n'
39    #    return retval
40    
41    # This feature is optional!
42    def _get_literal(Pnode):
43        """
44        Get the full text
45        @param Pnode: DOM Node
46        @return: string
47        """
48        rc = ""
49        for node in Pnode.childNodes:
50            if node.nodeType in [node.TEXT_NODE, node.CDATA_SECTION_NODE]:
51                rc = rc + node.data
52        # Sigh... the HTML5 parser does not recognize the CDATA escapes, ie, it just passes on the <![CDATA[ and ]]> strings:-(
53        return rc.replace("<![CDATA[","").replace("]]>","")
54
55    if state.options.embedded_rdf:
56        # Embedded turtle, per the latest Turtle draft
57        if state.options.host_language in accept_embedded_turtle and node.nodeName.lower() == "script":
58            if node.hasAttribute("type") and node.getAttribute("type") == "text/turtle":
59                #prefixes = _get_prefixes_in_turtle()
60                #content  = _get_literal(node)
61                #rdf = StringIO(prefixes + content)
62                content  = _get_literal(node)
63                rdf = StringIO(content)
64                try:
65                    graph.parse(rdf, format="n3", publicID = state.base)
66                    state.options.add_info("The output graph includes triples coming from an embedded Turtle script")
67                except:
68                    _type, value, _traceback = sys.exc_info()
69                    state.options.add_error("Embedded Turtle content could not be parsed (problems with %s?); ignored" % value)
70            return True
71        elif state.options.host_language in accept_embedded_rdf_xml and node.localName == "RDF" and node.namespaceURI == "http://www.w3.org/1999/02/22-rdf-syntax-ns#":
72            rdf = StringIO(return_XML(state, node))
73            try:
74                graph.parse(rdf)
75                state.options.add_info("The output graph includes triples coming from an embedded RDF/XML subtree")
76            except:
77                _type, value, _traceback = sys.exc_info()
78                state.options.add_error("Embedded RDF/XML content could not parsed (problems with %s?); ignored" % value)
79            return True
80        else:
81            return False
82    else:
83        return False
def handle_embeddedRDF(node, graph, state):
20def handle_embeddedRDF(node, graph, state):
21    """
22    Handles embedded RDF. There are two possibilities:
23    
24     - the file is one of the XML dialects that allows for an embedded RDF/XML portion. See the L{host.accept_embedded_rdf_xml} for those (a typical example is SVG). 
25     - the file is HTML and there is a turtle portion in the C{<script>} element with type text/turtle. 
26    
27    @param node: a DOM node for the top level element
28    @param graph: target rdf graph
29    @type graph: RDFLib's Graph object instance
30    @param state: the inherited state (namespaces, lang, etc)
31    @type state: L{state.ExecutionContext}
32    @return: whether an RDF/XML or turtle content has been detected or not. If TRUE, the RDFa processing should not occur on the node and its descendents. 
33    @rtype: Boolean
34    """
35    #def _get_prefixes_in_turtle():
36    #    retval = ""
37    #    for key in state.term_or_curie.ns:
38    #        retval += "@prefix %s: <%s> .\n" % (key, state.term_or_curie.ns[key])
39    #    retval += '\n'
40    #    return retval
41    
42    # This feature is optional!
43    def _get_literal(Pnode):
44        """
45        Get the full text
46        @param Pnode: DOM Node
47        @return: string
48        """
49        rc = ""
50        for node in Pnode.childNodes:
51            if node.nodeType in [node.TEXT_NODE, node.CDATA_SECTION_NODE]:
52                rc = rc + node.data
53        # Sigh... the HTML5 parser does not recognize the CDATA escapes, ie, it just passes on the <![CDATA[ and ]]> strings:-(
54        return rc.replace("<![CDATA[","").replace("]]>","")
55
56    if state.options.embedded_rdf:
57        # Embedded turtle, per the latest Turtle draft
58        if state.options.host_language in accept_embedded_turtle and node.nodeName.lower() == "script":
59            if node.hasAttribute("type") and node.getAttribute("type") == "text/turtle":
60                #prefixes = _get_prefixes_in_turtle()
61                #content  = _get_literal(node)
62                #rdf = StringIO(prefixes + content)
63                content  = _get_literal(node)
64                rdf = StringIO(content)
65                try:
66                    graph.parse(rdf, format="n3", publicID = state.base)
67                    state.options.add_info("The output graph includes triples coming from an embedded Turtle script")
68                except:
69                    _type, value, _traceback = sys.exc_info()
70                    state.options.add_error("Embedded Turtle content could not be parsed (problems with %s?); ignored" % value)
71            return True
72        elif state.options.host_language in accept_embedded_rdf_xml and node.localName == "RDF" and node.namespaceURI == "http://www.w3.org/1999/02/22-rdf-syntax-ns#":
73            rdf = StringIO(return_XML(state, node))
74            try:
75                graph.parse(rdf)
76                state.options.add_info("The output graph includes triples coming from an embedded RDF/XML subtree")
77            except:
78                _type, value, _traceback = sys.exc_info()
79                state.options.add_error("Embedded RDF/XML content could not parsed (problems with %s?); ignored" % value)
80            return True
81        else:
82            return False
83    else:
84        return False

Handles embedded RDF. There are two possibilities:

  • the file is one of the XML dialects that allows for an embedded RDF/XML portion. See the L{host.accept_embedded_rdf_xml} for those (a typical example is SVG).
  • the file is HTML and there is a turtle portion in the C{