pyRdfa.embeddedRDF
Extracting possible embedded RDF/XML content from the file and parse it separately into the Graph. This is used, for example by U{SVG 1.2 Tinyhttp://www.w3.org/TR/SVGMobile12/}.
@author: U{Ivan Herman}
@license: This software is available for use under the
U{W3C® SOFTWARE NOTICE AND LICENSE
1# -*- coding: utf-8 -*- 2""" 3Extracting possible embedded RDF/XML content from the file and parse it separately into the Graph. This is used, for example 4by U{SVG 1.2 Tiny<http://www.w3.org/TR/SVGMobile12/>}. 5 6@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} 7@license: This software is available for use under the 8U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} 9@contact: Ivan Herman, ivan@w3.org 10@version: $Id: embeddedRDF.py,v 1.15 2012/11/16 17:51:53 ivan Exp $ 11""" 12 13from io import StringIO 14 15from .host import accept_embedded_rdf_xml, accept_embedded_turtle 16from .utils import return_XML 17import sys 18 19def handle_embeddedRDF(node, graph, state): 20 """ 21 Handles embedded RDF. There are two possibilities: 22 23 - the file is one of the XML dialects that allows for an embedded RDF/XML portion. See the L{host.accept_embedded_rdf_xml} for those (a typical example is SVG). 24 - the file is HTML and there is a turtle portion in the C{<script>} element with type text/turtle. 25 26 @param node: a DOM node for the top level element 27 @param graph: target rdf graph 28 @type graph: RDFLib's Graph object instance 29 @param state: the inherited state (namespaces, lang, etc) 30 @type state: L{state.ExecutionContext} 31 @return: whether an RDF/XML or turtle content has been detected or not. If TRUE, the RDFa processing should not occur on the node and its descendents. 32 @rtype: Boolean 33 """ 34 #def _get_prefixes_in_turtle(): 35 # retval = "" 36 # for key in state.term_or_curie.ns: 37 # retval += "@prefix %s: <%s> .\n" % (key, state.term_or_curie.ns[key]) 38 # retval += '\n' 39 # return retval 40 41 # This feature is optional! 42 def _get_literal(Pnode): 43 """ 44 Get the full text 45 @param Pnode: DOM Node 46 @return: string 47 """ 48 rc = "" 49 for node in Pnode.childNodes: 50 if node.nodeType in [node.TEXT_NODE, node.CDATA_SECTION_NODE]: 51 rc = rc + node.data 52 # Sigh... the HTML5 parser does not recognize the CDATA escapes, ie, it just passes on the <![CDATA[ and ]]> strings:-( 53 return rc.replace("<![CDATA[","").replace("]]>","") 54 55 if state.options.embedded_rdf: 56 # Embedded turtle, per the latest Turtle draft 57 if state.options.host_language in accept_embedded_turtle and node.nodeName.lower() == "script": 58 if node.hasAttribute("type") and node.getAttribute("type") == "text/turtle": 59 #prefixes = _get_prefixes_in_turtle() 60 #content = _get_literal(node) 61 #rdf = StringIO(prefixes + content) 62 content = _get_literal(node) 63 rdf = StringIO(content) 64 try: 65 graph.parse(rdf, format="n3", publicID = state.base) 66 state.options.add_info("The output graph includes triples coming from an embedded Turtle script") 67 except: 68 _type, value, _traceback = sys.exc_info() 69 state.options.add_error("Embedded Turtle content could not be parsed (problems with %s?); ignored" % value) 70 return True 71 elif state.options.host_language in accept_embedded_rdf_xml and node.localName == "RDF" and node.namespaceURI == "http://www.w3.org/1999/02/22-rdf-syntax-ns#": 72 rdf = StringIO(return_XML(state, node)) 73 try: 74 graph.parse(rdf) 75 state.options.add_info("The output graph includes triples coming from an embedded RDF/XML subtree") 76 except: 77 _type, value, _traceback = sys.exc_info() 78 state.options.add_error("Embedded RDF/XML content could not parsed (problems with %s?); ignored" % value) 79 return True 80 else: 81 return False 82 else: 83 return False
def
handle_embeddedRDF(node, graph, state):
20def handle_embeddedRDF(node, graph, state): 21 """ 22 Handles embedded RDF. There are two possibilities: 23 24 - the file is one of the XML dialects that allows for an embedded RDF/XML portion. See the L{host.accept_embedded_rdf_xml} for those (a typical example is SVG). 25 - the file is HTML and there is a turtle portion in the C{<script>} element with type text/turtle. 26 27 @param node: a DOM node for the top level element 28 @param graph: target rdf graph 29 @type graph: RDFLib's Graph object instance 30 @param state: the inherited state (namespaces, lang, etc) 31 @type state: L{state.ExecutionContext} 32 @return: whether an RDF/XML or turtle content has been detected or not. If TRUE, the RDFa processing should not occur on the node and its descendents. 33 @rtype: Boolean 34 """ 35 #def _get_prefixes_in_turtle(): 36 # retval = "" 37 # for key in state.term_or_curie.ns: 38 # retval += "@prefix %s: <%s> .\n" % (key, state.term_or_curie.ns[key]) 39 # retval += '\n' 40 # return retval 41 42 # This feature is optional! 43 def _get_literal(Pnode): 44 """ 45 Get the full text 46 @param Pnode: DOM Node 47 @return: string 48 """ 49 rc = "" 50 for node in Pnode.childNodes: 51 if node.nodeType in [node.TEXT_NODE, node.CDATA_SECTION_NODE]: 52 rc = rc + node.data 53 # Sigh... the HTML5 parser does not recognize the CDATA escapes, ie, it just passes on the <![CDATA[ and ]]> strings:-( 54 return rc.replace("<![CDATA[","").replace("]]>","") 55 56 if state.options.embedded_rdf: 57 # Embedded turtle, per the latest Turtle draft 58 if state.options.host_language in accept_embedded_turtle and node.nodeName.lower() == "script": 59 if node.hasAttribute("type") and node.getAttribute("type") == "text/turtle": 60 #prefixes = _get_prefixes_in_turtle() 61 #content = _get_literal(node) 62 #rdf = StringIO(prefixes + content) 63 content = _get_literal(node) 64 rdf = StringIO(content) 65 try: 66 graph.parse(rdf, format="n3", publicID = state.base) 67 state.options.add_info("The output graph includes triples coming from an embedded Turtle script") 68 except: 69 _type, value, _traceback = sys.exc_info() 70 state.options.add_error("Embedded Turtle content could not be parsed (problems with %s?); ignored" % value) 71 return True 72 elif state.options.host_language in accept_embedded_rdf_xml and node.localName == "RDF" and node.namespaceURI == "http://www.w3.org/1999/02/22-rdf-syntax-ns#": 73 rdf = StringIO(return_XML(state, node)) 74 try: 75 graph.parse(rdf) 76 state.options.add_info("The output graph includes triples coming from an embedded RDF/XML subtree") 77 except: 78 _type, value, _traceback = sys.exc_info() 79 state.options.add_error("Embedded RDF/XML content could not parsed (problems with %s?); ignored" % value) 80 return True 81 else: 82 return False 83 else: 84 return False
Handles embedded RDF. There are two possibilities:
- the file is one of the XML dialects that allows for an embedded RDF/XML portion. See the L{host.accept_embedded_rdf_xml} for those (a typical example is SVG).
- the file is HTML and there is a turtle portion in the C{