pyRdfa.transform.DublinCore

Transfomer: handles the Dublin Core recommendation for XHTML for adding DC values. What this means is that:

  • DC namespaces are defined via C{}
  • The 'XX.term' is used much like QNames in C{} and C{} elements. For the latter, the namespaced names are added to a C{@property} attribute.

This transformer adds "real" namespaces and changes the DC references in link and meta elements to abide to the RDFa namespace syntax.

@summary: Dublin Core transformer @requires: U{RDFLib packagehttp://rdflib.net} @organization: U{World Wide Web Consortiumhttp://www.w3.org} @author: U{Ivan Herman} @license: This software is available for use under the U{W3C® SOFTWARE NOTICE AND LICENSE} @contact: Ivan Herman, ivan@w3.org

 1# -*- coding: utf-8 -*-
 2"""
 3Transfomer: handles the Dublin Core recommendation for XHTML for adding DC values. What this means is that:
 4
 5 - DC namespaces are defined via C{<link rel="schema.XX" value="...."/>}
 6 - The 'XX.term' is used much like QNames in C{<link>} and C{<meta>} elements. For the latter, the namespaced names are added to a C{@property} attribute.
 7
 8This transformer adds "real" namespaces and changes the DC references in link and meta elements to abide to the
 9RDFa namespace syntax.
10
11@summary: Dublin Core transformer
12@requires: U{RDFLib package<http://rdflib.net>}
13@organization: U{World Wide Web Consortium<http://www.w3.org>}
14@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
15@license: This software is available for use under the
16U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
17@contact: Ivan Herman, ivan@w3.org
18"""
19
20"""
21@version: $Id: DublinCore.py,v 1.4 2012-01-18 14:16:44 ivan Exp $
22$Date: 2012-01-18 14:16:44 $
23"""
24
25def DC_transform(html, options, state):
26    """
27    @param html: a DOM node for the top level html element
28    @param options: invocation options
29    @type options: L{Options<pyRdfa.options>}
30    @param state: top level execution state
31    @type state: L{State<pyRdfa.state>}
32    """
33    from ..host import HostLanguage
34    if not( options.host_language in [ HostLanguage.xhtml, HostLanguage.html5, HostLanguage.xhtml5 ] ):
35        return
36    
37    # the head element is necessary; to be sure, the namespaces are set
38    # on that level only
39    head = None
40    try:
41        head = html.getElementsByTagName("head")[0]
42    except:
43        # no head....
44        return
45
46    # At first, the DC namespaces must be found
47    dcprefixes = {}
48    for link in html.getElementsByTagName("link"):
49        if link.hasAttribute("rel"):
50            rel = link.getAttribute("rel")
51            uri = link.getAttribute("href")
52            if uri != None and rel != None and rel.startswith("schema."):
53                # bingo...
54                try:
55                    localname = rel.split(".")[1]
56                    head.setAttributeNS("", "xmlns:"+localname,uri)
57                    dcprefixes[localname] = uri
58                except:
59                    # problem with the split; just ignore
60                    pass
61
62    # get the link elements now to find the dc elements
63    for link in html.getElementsByTagName("link"):
64        if link.hasAttribute("rel"):
65            newProp = ""
66            for rel in link.getAttribute("rel").strip().split():
67                # see if there is '.' to separate the attributes
68                if rel.find(".") != -1:
69                    key   = rel.split(".",1)[0]
70                    lname = rel.split(".",1)[1]
71                    if key in dcprefixes and lname != "":
72                        # yep, this is one of those...
73                        newProp += " " + key + ":" + lname
74                    else:
75                        newProp += " " + rel
76                else:
77                    newProp += " " + rel
78            link.setAttribute("rel",newProp.strip())
79
80    # do almost the same with the meta elements...
81    for meta in html.getElementsByTagName("meta"):
82        if meta.hasAttribute("name"):
83            newProp = ""
84            for name in meta.getAttribute("name").strip().split():
85                # see if there is '.' to separate the attributes
86                if name.find(".") != -1:
87                    key   = name.split(".",1)[0]
88                    lname = name.split(".",1)[1]
89                    if key in dcprefixes and lname != "":
90                        # yep, this is one of those...
91                        newProp += " " + key + ":" + lname
92                    else:
93                        newProp += " " + name
94                else:
95                    newProp += " " + name
96            meta.setAttribute("property", newProp.strip())
def DC_transform(html, options, state):
26def DC_transform(html, options, state):
27    """
28    @param html: a DOM node for the top level html element
29    @param options: invocation options
30    @type options: L{Options<pyRdfa.options>}
31    @param state: top level execution state
32    @type state: L{State<pyRdfa.state>}
33    """
34    from ..host import HostLanguage
35    if not( options.host_language in [ HostLanguage.xhtml, HostLanguage.html5, HostLanguage.xhtml5 ] ):
36        return
37    
38    # the head element is necessary; to be sure, the namespaces are set
39    # on that level only
40    head = None
41    try:
42        head = html.getElementsByTagName("head")[0]
43    except:
44        # no head....
45        return
46
47    # At first, the DC namespaces must be found
48    dcprefixes = {}
49    for link in html.getElementsByTagName("link"):
50        if link.hasAttribute("rel"):
51            rel = link.getAttribute("rel")
52            uri = link.getAttribute("href")
53            if uri != None and rel != None and rel.startswith("schema."):
54                # bingo...
55                try:
56                    localname = rel.split(".")[1]
57                    head.setAttributeNS("", "xmlns:"+localname,uri)
58                    dcprefixes[localname] = uri
59                except:
60                    # problem with the split; just ignore
61                    pass
62
63    # get the link elements now to find the dc elements
64    for link in html.getElementsByTagName("link"):
65        if link.hasAttribute("rel"):
66            newProp = ""
67            for rel in link.getAttribute("rel").strip().split():
68                # see if there is '.' to separate the attributes
69                if rel.find(".") != -1:
70                    key   = rel.split(".",1)[0]
71                    lname = rel.split(".",1)[1]
72                    if key in dcprefixes and lname != "":
73                        # yep, this is one of those...
74                        newProp += " " + key + ":" + lname
75                    else:
76                        newProp += " " + rel
77                else:
78                    newProp += " " + rel
79            link.setAttribute("rel",newProp.strip())
80
81    # do almost the same with the meta elements...
82    for meta in html.getElementsByTagName("meta"):
83        if meta.hasAttribute("name"):
84            newProp = ""
85            for name in meta.getAttribute("name").strip().split():
86                # see if there is '.' to separate the attributes
87                if name.find(".") != -1:
88                    key   = name.split(".",1)[0]
89                    lname = name.split(".",1)[1]
90                    if key in dcprefixes and lname != "":
91                        # yep, this is one of those...
92                        newProp += " " + key + ":" + lname
93                    else:
94                        newProp += " " + name
95                else:
96                    newProp += " " + name
97            meta.setAttribute("property", newProp.strip())

@param html: a DOM node for the top level html element @param options: invocation options @type options: L{Options<pyRdfa.options>} @param state: top level execution state @type state: L{State<pyRdfa.state>}