Submitted by: Xi Ruoyao
Date: 2025-09-21
Initial Package Version: 2.0.7
Upstream Status: Under review (the upstream seems almost inactive)
Origin: https://github.com/itstool/itstool/pull/51
(it was a sed in the book but I don't want to keep
an explicit sed which is solely for conflict
resolve),
https://github.com/itstool/itstool/pull/57
Description: Use lxml for handling XML files instead of the
deprecated libxml2 Python module.
From 32c7d07664dc37765100285d1202d488cd6a27e8 Mon Sep 17 00:00:00 2001
From: Nils Philippsen
Date: Mon, 9 Oct 2023 14:26:43 +0200
Subject: [PATCH] Fix insufficiently quoted regular expressions
These went under the radar until Python 3.12 started warning about them.
Signed-off-by: Nils Philippsen
---
itstool.in | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/itstool.in b/itstool.in
index c21ad4b..4452616 100755
--- a/itstool.in
+++ b/itstool.in
@@ -220,7 +220,7 @@ class Message (object):
if not isinstance(text, ustr_type):
text = ustr(text, 'utf-8')
self._message[-1] += text.replace('&', '&').replace('<', '<').replace('>', '>')
- if re.sub('\s+', ' ', text).strip() != '':
+ if re.sub(r'\s+', ' ', text).strip() != '':
self._empty = False
def add_entity_ref (self, name):
@@ -318,7 +318,7 @@ class Message (object):
message += '<_:%s-%i/>' % (msg.name, placeholder)
placeholder += 1
if not self._preserve:
- message = re.sub('\s+', ' ', message).strip()
+ message = re.sub(r'\s+', ' ', message).strip()
return message
def get_preserve_space (self):
@@ -456,9 +456,9 @@ class LocNote (object):
if self._preserve_space:
return self.locnote
else:
- return re.sub('\s+', ' ', self.locnote).strip()
+ return re.sub(r'\s+', ' ', self.locnote).strip()
elif self.locnoteref is not None:
- return '(itstool) link: ' + re.sub('\s+', ' ', self.locnoteref).strip()
+ return '(itstool) link: ' + re.sub(r'\s+', ' ', self.locnoteref).strip()
return ''
@@ -889,7 +889,7 @@ class Document (object):
trans = translations.ugettext('_\x04translator-credits')
if trans is None or trans == 'translator-credits':
return
- regex = re.compile('(.*) \<(.*)\>, (.*)')
+ regex = re.compile(r'(.*) \<(.*)\>, (.*)')
for credit in trans.split('\n'):
match = regex.match(credit)
if not match:
@@ -924,7 +924,7 @@ class Document (object):
prevnode = None
if node.prev is not None and node.prev.type == 'text':
prevtext = node.prev.content
- if re.sub('\s+', '', prevtext) == '':
+ if re.sub(r'\s+', '', prevtext) == '':
prevnode = node.prev
for lang in sorted(list(translations.keys()), reverse=True):
locale = self.get_its_locale_filter(node)
@@ -1468,7 +1468,7 @@ def match_locale(extrange, locale):
localei += 1
return True
-_locale_pattern = re.compile('([a-zA-Z0-9-]+)(_[A-Za-z0-9]+)?(@[A-Za-z0-9]+)?(\.[A-Za-z0-9]+)?')
+_locale_pattern = re.compile(r'([a-zA-Z0-9-]+)(_[A-Za-z0-9]+)?(@[A-Za-z0-9]+)?(\.[A-Za-z0-9]+)?')
def convert_locale (locale):
# Automatically convert POSIX-style locales to BCP47
match = _locale_pattern.match(locale)
From 15027b5391e3d2c45846524721abbe978ca73def Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer
Date: Tue, 15 Apr 2025 02:51:25 +0200
Subject: [PATCH 1/5] Switch from libxml2 to lxml
Most of the transition is straight-forward, but some issues turned up.
- lxml doesn't seem to expose the prefixes of attributes, requiring
an XPath evaluation as work-around.
- Serializing the internal subset is a bit hacky.
- lxml doesn't support attribute nodes, so we have to emulate them.
- lxml doesn't support attributes as XPath context nodes, so some use
cases aren't supported. Using an `.` expression on an attribute
works, though.
Changes to expected test results are mostly cosmetic.
- Whitespace before and after the document element is processed more
faithfully.
- lxml removes some superfluous namespace prefixes.
- There's one superfluous namespace declaration which isn't removed
anymore.
Fixes #10.
---
configure.ac | 2 +-
itstool.in | 1012 +++++++++++------------
tests/IT-join-1.joined.xml | 6 +-
tests/IT-prefixes-1.ll.xml | 2 +-
tests/Translate/Translate1.ll.xml | 5 +-
tests/Translate/Translate2.ll.xml | 3 +-
tests/Translate/Translate3.ll.wrong.xml | 3 +-
tests/Translate/Translate3.ll.xml | 3 +-
tests/Translate/Translate4.ll.xml | 3 +-
tests/Translate/Translate5.ll.xml | 3 +-
tests/Translate/Translate6.ll.xml | 3 +-
tests/Translate/TranslateGlobal.ll.xml | 3 +-
tests/WithinText1.ll.xml | 3 +-
tests/WithinText2.ll.xml | 3 +-
tests/run_tests.py | 2 -
15 files changed, 479 insertions(+), 577 deletions(-)
diff --git a/configure.ac b/configure.ac
index 9d04372..d94bead 100644
--- a/configure.ac
+++ b/configure.ac
@@ -12,7 +12,7 @@ AC_SUBST([DATADIR])
AM_PATH_PYTHON([2.6])
-py_module=libxml2
+py_module=lxml
AC_MSG_CHECKING(for python module $py_module)
echo "import $py_module" | $PYTHON - &>/dev/null
if test $? -ne 0; then
diff --git a/itstool.in b/itstool.in
index 4452616..052255e 100755
--- a/itstool.in
+++ b/itstool.in
@@ -24,7 +24,8 @@ DATADIR="@DATADIR@"
import gettext
import hashlib
-import libxml2
+from copy import deepcopy
+from lxml import etree
import optparse
import os
import os.path
@@ -190,7 +191,7 @@ class Comment (object):
class Placeholder (object):
def __init__ (self, node):
self.node = node
- self.name = ustr(node.name, 'utf-8')
+ self.name = ustr(xml_localname(node), 'utf-8')
class Message (object):
@@ -243,32 +244,30 @@ class Message (object):
def add_start_tag (self, node):
if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)):
self._message.append('')
- if node.ns() is not None and node.ns().name is not None:
- self._message[-1] += ('<%s:%s' % (ustr(node.ns().name, 'utf-8'), ustr(node.name, 'utf-8')))
- else:
- self._message[-1] += ('<%s' % ustr(node.name, 'utf-8'))
- for prop in xml_attr_iter(node):
- name = prop.name
- if prop.ns() is not None:
- name = prop.ns().name + ':' + name
- atval = prop.content
+ self._message[-1] += ('<%s' % ustr(xml_qname(node), 'utf-8'))
+ for name, atval in node.items():
+ qname = etree.QName(name)
+ if qname.namespace is not None:
+ # lxml doesn't expose the prefix of attributes, so we use
+ # an XPath expression to get the attribute's prefixed name.
+ # This is horribly inefficient.
+ expr = 'name(@*[local-name()="%s" and namespace-uri()="%s"])' % (
+ qname.localname, qname.namespace)
+ name = node.xpath(expr)
if not isinstance(atval, ustr_type):
atval = ustr(atval, 'utf-8')
atval = atval.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"')
self._message += " %s=\"%s\"" % (name, atval)
- if node.children is not None:
+ if len(node) > 0 or node.text:
self._message[-1] += '>'
else:
self._message[-1] += '/>'
def add_end_tag (self, node):
- if node.children is not None:
+ if len(node) > 0 or node.text:
if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)):
self._message.append('')
- if node.ns() is not None and node.ns().name is not None:
- self._message[-1] += ('%s:%s>' % (ustr(node.ns().name, 'utf-8'), ustr(node.name, 'utf-8')))
- else:
- self._message[-1] += ('%s>' % ustr(node.name, 'utf-8'))
+ self._message[-1] += ('%s>' % ustr(xml_qname(node), 'utf-8'))
def is_empty (self):
return self._empty
@@ -379,67 +378,84 @@ class Message (object):
return ret
-def xml_child_iter (node):
- child = node.children
- while child is not None:
- yield child
- child = child.next
-
-def xml_attr_iter (node):
- attr = node.get_properties()
- while attr is not None:
- yield attr
- attr = attr.next
-
-def xml_is_ns_name (node, ns, name):
- if node.type != 'element':
- return False
- return node.name == name and node.ns() is not None and node.ns().content == ns
+def xml_localname (node):
+ return etree.QName(node.tag).localname
+
+def xml_qname (node):
+ qname = etree.QName(node.tag).localname
+ if node.prefix is not None:
+ qname = node.prefix + ':' + qname
+ return qname
+
+def xml_content (node):
+ if isinstance(node, string_types):
+ return node
+ if isinstance(node, XMLAttr):
+ return node.parent.get(node.tag)
+ return etree.tostring(node, method='text', encoding='unicode')
+
+def xml_delete_node (node):
+ parent = node.getparent()
+ prev = node.getprevious()
+ tail = node.tail
+ if parent is not None:
+ parent.remove(node)
+ if prev is not None:
+ if prev.tail is None or re.fullmatch(r'\s+', prev.tail):
+ prev.tail = tail
+ else:
+ prev.tail += tail
+ elif parent is not None:
+ if parent.text is None or re.fullmatch(r'\s+', parent.text):
+ parent.text = tail
+ else:
+ parent.text += tail
def xml_get_node_path(node):
# The built-in nodePath() method only does numeric indexes
# when necessary for disambiguation. For various reasons,
# we prefer always using indexes.
- name = node.name
- if node.ns() is not None and node.ns().name is not None:
- name = node.ns().name + ':' + name
- if node.type == 'attribute':
+ name = xml_qname(node)
+ if isinstance(node, XMLAttr):
name = '@' + name
name = '/' + name
- if node.type == 'element' and node.parent.type == 'element':
+ if node.getparent() is not None:
count = 1
- prev = node.previousElementSibling()
+ prev = node.getprevious()
while prev is not None:
- if prev.name == node.name:
- if prev.ns() is None:
- if node.ns() is None:
- count += 1
- else:
- if node.ns() is not None:
- if prev.ns().name == node.ns().name:
- count += 1
- prev = prev.previousElementSibling()
+ if prev.tag == node.tag:
+ count += 1
+ prev = prev.getprevious()
name = '%s[%i]' % (name, count)
- if node.parent.type == 'element':
- name = xml_get_node_path(node.parent) + name
+ name = xml_get_node_path(node.getparent()) + name
return name
-def xml_error_catcher(doc, error):
- doc._xml_err += " %s" % error
-def fix_node_ns (node, nsdefs):
- childnsdefs = nsdefs.copy()
- nsdef = node.nsDefs()
- while nsdef is not None:
- nextnsdef = nsdef.next
- if nsdef.name in nsdefs and nsdefs[nsdef.name] == nsdef.content:
- node.removeNsDef(nsdef.content)
- else:
- childnsdefs[nsdef.name] = nsdef.content
- nsdef = nextnsdef
- for child in xml_child_iter(node):
- if child.type == 'element':
- fix_node_ns(child, childnsdefs)
+# lxml doesn't support attribute nodes, so we have to emulate them.
+class XMLAttr (object):
+ def __init__(self, element, tag):
+ self.parent = element
+ self.tag = tag
+ self.attrib = {}
+ self.sourceline = element.sourceline
+
+ def __repr__(self):
+ return '%s@%s' % (repr(self.parent), self.tag)
+
+ def __eq__(self, other):
+ return other and self.parent == other.parent and self.tag == other.tag
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def __hash__(self):
+ return hash(repr(self))
+
+ def getparent(self):
+ return self.parent
+
+ def get(self, default=None):
+ return default
class LocNote (object):
@@ -464,82 +480,51 @@ class LocNote (object):
class Document (object):
def __init__ (self, filename, messages, load_dtd=False, keep_entities=False):
- self._xml_err = ''
- libxml2.registerErrorHandler(xml_error_catcher, self)
- try:
- ctxt = libxml2.createFileParserCtxt(filename)
- except:
- sys.stderr.write('Error: cannot open XML file %s\n' % filename)
- sys.exit(1)
- ctxt.lineNumbers(1)
self._load_dtd = load_dtd
self._keep_entities = keep_entities
- if load_dtd:
- ctxt.loadSubset(1)
- if keep_entities:
- ctxt.loadSubset(1)
- ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD)
- ctxt.replaceEntities(0)
- else:
- ctxt.replaceEntities(1)
- ctxt.parseDocument()
+ parser = etree.XMLParser(load_dtd = load_dtd or keep_entities,
+ resolve_entities = not(keep_entities))
+ doc = etree.parse(filename, parser)
+ doc.xinclude()
self._filename = filename
- self._doc = ctxt.doc()
+ self._doc = doc
self._localrules = []
- def pre_process (node):
- for child in xml_child_iter(node):
- if xml_is_ns_name(child, 'http://www.w3.org/2001/XInclude', 'include'):
- if child.nsProp('parse', None) == 'text':
- child.xincludeProcessTree()
- elif xml_is_ns_name(child, NS_ITS, 'rules'):
- if child.hasNsProp('href', NS_XLINK):
- href = child.nsProp('href', NS_XLINK)
- fileref = os.path.join(os.path.dirname(filename), href)
- if not os.path.exists(fileref):
- if opts.itspath is not None:
- for pathdir in opts.itspath:
- fileref = os.path.join(pathdir, href)
- if os.path.exists(fileref):
- break
- if not os.path.exists(fileref):
- sys.stderr.write('Error: Could not locate ITS file %s\n' % href)
- sys.exit(1)
- hctxt = libxml2.createFileParserCtxt(fileref)
- hctxt.replaceEntities(1)
- hctxt.parseDocument()
- root = hctxt.doc().getRootElement()
- version = None
- if root.hasNsProp('version', None):
- version = root.nsProp('version', None)
- else:
- sys.stderr.write('Warning: ITS file %s missing version attribute\n' %
- os.path.basename(href))
- if version is not None and version not in ('1.0', '2.0'):
- sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' %
- (os.path.basename(href), root.nsProp('version', None)))
- else:
- self._localrules.append(root)
+ for child in doc.iter():
+ if child.tag == '{' + NS_ITS + '}rules':
+ href = child.get('{' + NS_XLINK + '}href')
+ if href is not None:
+ fileref = os.path.join(os.path.dirname(filename), href)
+ if not os.path.exists(fileref):
+ if opts.itspath is not None:
+ for pathdir in opts.itspath:
+ fileref = os.path.join(pathdir, href)
+ if os.path.exists(fileref):
+ break
+ if not os.path.exists(fileref):
+ sys.stderr.write('Error: Could not locate ITS file %s\n' % href)
+ sys.exit(1)
+ root = etree.parse(fileref).getroot()
version = None
- if child.hasNsProp('version', None):
- version = child.nsProp('version', None)
+ version = root.get('version')
+ if version is None:
+ sys.stderr.write('Warning: ITS file %s missing version attribute\n' %
+ os.path.basename(href))
+ elif version not in ('1.0', '2.0'):
+ sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' %
+ (os.path.basename(href), root.get('version')))
else:
- root = child.doc.getRootElement()
- if root.hasNsProp('version', NS_ITS):
- version = root.nsProp('version', NS_ITS)
- else:
- sys.stderr.write('Warning: Local ITS rules missing version attribute\n')
- if version is not None and version not in ('1.0', '2.0'):
- sys.stderr.write('Warning: Skipping local ITS rules with unknown version %s\n' %
- version)
- else:
- self._localrules.append(child)
- pre_process(child)
- pre_process(self._doc)
- try:
- self._check_errors()
- except libxml2.parserError as e:
- sys.stderr.write('Error: Could not parse document:\n%s\n' % ustr(e))
- sys.exit(1)
+ self._localrules.append(root)
+ version = child.get('version')
+ if version is None:
+ root = child.getroottree()
+ version = root.get('{' + NS_ITS + '}version')
+ if version is None:
+ sys.stderr.write('Warning: Local ITS rules missing version attribute\n')
+ elif version not in ('1.0', '2.0'):
+ sys.stderr.write('Warning: Skipping local ITS rules with unknown version %s\n' %
+ version)
+ else:
+ self._localrules.append(child)
self._msgs = messages
self._its_translate_nodes = {}
self._its_within_text_nodes = {}
@@ -556,13 +541,6 @@ class Document (object):
self._clear_cache()
- def __del__ (self):
- self._doc.freeDoc()
-
- def _check_errors(self):
- if self._xml_err:
- raise libxml2.parserError(self._xml_err)
-
def _clear_cache(self):
self._its_translate_nodes_cache = {}
self._its_locale_filters_cache = {}
@@ -570,123 +548,107 @@ class Document (object):
def get_its_params(self, rules):
params = {}
- for child in xml_child_iter(rules):
- if xml_is_ns_name(child, NS_ITS, 'param'):
- params[child.nsProp('name', None)] = child.getContent()
+ for child in rules.iterchildren():
+ if child.tag == '{' + NS_ITS + '}param':
+ params[child.get('name')] = xml_content(child)
return params
- def register_its_params(self, xpath, params, userparams={}):
- for param in params:
- if param in userparams:
- xpath.xpathRegisterVariable(name, None, userparams[param])
+ def register_its_params(self, var, params, userparams={}):
+ for name in params:
+ if name in userparams:
+ var[name] = userparams[name]
else:
- xpath.xpathRegisterVariable(name, None, params[param])
+ var[name] = params[name]
def apply_its_rule(self, rule, xpath):
self._clear_cache()
- if rule.type != 'element':
- return
- if xml_is_ns_name(rule, NS_ITS, 'translateRule'):
- if rule.nsProp('selector', None) is not None:
- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
- self._its_translate_nodes[node] = rule.nsProp('translate', None)
- elif xml_is_ns_name(rule, NS_ITS, 'withinTextRule'):
- if rule.nsProp('selector', None) is not None:
- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
- self._its_within_text_nodes[node] = rule.nsProp('withinText', None)
- elif xml_is_ns_name(rule, NS_ITST, 'preserveSpaceRule'):
- if rule.nsProp('selector', None) is not None:
- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
- val = rule.nsProp('preserveSpace', None)
+ if rule.tag == '{' + NS_ITS + '}translateRule':
+ sel = rule.get('selector')
+ if sel is not None:
+ for node in self._try_xpath_eval(xpath, sel):
+ self._its_translate_nodes[node] = rule.get('translate')
+ elif rule.tag == '{' + NS_ITS + '}withinTextRule':
+ sel = rule.get('selector')
+ if sel is not None:
+ for node in self._try_xpath_eval(xpath, sel):
+ self._its_within_text_nodes[node] = rule.get('withinText')
+ elif rule.tag == '{' + NS_ITST + '}preserveSpaceRule':
+ sel = rule.get('selector')
+ if sel is not None:
+ for node in self._try_xpath_eval(xpath, sel):
+ val = rule.get('preserveSpace')
if val == 'yes':
self._its_preserve_space_nodes[node] = 'preserve'
- elif xml_is_ns_name(rule, NS_ITS, 'preserveSpaceRule'):
- if rule.nsProp('selector', None) is not None:
- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
- self._its_preserve_space_nodes[node] = rule.nsProp('space', None)
- elif xml_is_ns_name(rule, NS_ITS, 'localeFilterRule'):
- if rule.nsProp('selector', None) is not None:
- if rule.hasNsProp('localeFilterList', None):
- lst = rule.nsProp('localeFilterList', None)
- else:
- lst = '*'
- if rule.hasNsProp('localeFilterType', None):
- typ = rule.nsProp('localeFilterType', None)
- else:
- typ = 'include'
- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
+ elif rule.tag == '{' + NS_ITS + '}preserveSpaceRule':
+ sel = rule.get('selector')
+ if sel is not None:
+ for node in self._try_xpath_eval(xpath, sel):
+ self._its_preserve_space_nodes[node] = rule.get('space')
+ elif rule.tag == '{' + NS_ITS + '}localeFilterRule':
+ sel = rule.get('selector')
+ if sel is not None:
+ lst = rule.get('localeFilterList', '*')
+ typ = rule.get('localeFilterType', 'include')
+ for node in self._try_xpath_eval(xpath, sel):
self._its_locale_filters[node] = (lst, typ)
- elif xml_is_ns_name(rule, NS_ITST, 'dropRule'):
- if rule.nsProp('selector', None) is not None:
- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
- self._itst_drop_nodes[node] = rule.nsProp('drop', None)
- elif xml_is_ns_name(rule, NS_ITS, 'idValueRule'):
- sel = rule.nsProp('selector', None)
- idv = rule.nsProp('idValue', None)
+ elif rule.tag == '{' + NS_ITST + '}dropRule':
+ sel = rule.get('selector')
+ if sel is not None:
+ for node in self._try_xpath_eval(xpath, sel):
+ self._itst_drop_nodes[node] = rule.get('drop')
+ elif rule.tag == '{' + NS_ITS + '}idValueRule':
+ sel = rule.get('selector')
+ idv = rule.get('idValue')
if sel is not None and idv is not None:
for node in self._try_xpath_eval(xpath, sel):
- try:
- oldnode = xpath.contextNode()
- except:
- oldnode = None
- xpath.setContextNode(node)
- idvalue = self._try_xpath_eval(xpath, idv)
+ idvalue = self._try_xpath_eval(xpath, idv, node=node)
if isinstance(idvalue, string_types):
self._its_id_values[node] = idvalue
else:
for val in idvalue:
- self._its_id_values[node] = val.content
+ self._its_id_values[node] = xml_content(val)
break
- xpath.setContextNode(oldnode)
pass
- elif xml_is_ns_name(rule, NS_ITST, 'contextRule'):
- if rule.nsProp('selector', None) is not None:
- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
- if rule.hasNsProp('context', None):
- self._itst_contexts[node] = rule.nsProp('context', None)
- elif rule.hasNsProp('contextPointer', None):
- try:
- oldnode = xpath.contextNode()
- except:
- oldnode = None
- xpath.setContextNode(node)
- ctxt = self._try_xpath_eval(xpath, rule.nsProp('contextPointer', None))
+ elif rule.tag == '{' + NS_ITST + '}contextRule':
+ sel = rule.get('selector')
+ if sel is not None:
+ for node in self._try_xpath_eval(xpath, sel):
+ ctxt = rule.get('context')
+ cp = rule.get('contextPointer')
+ if ctxt is not None:
+ self._itst_contexts[node] = ctxt
+ elif cp is not None:
+ ctxt = self._try_xpath_eval(xpath, cp, node=node)
if isinstance(ctxt, string_types):
self._itst_contexts[node] = ctxt
else:
for ctxt in ctxt:
- self._itst_contexts[node] = ctxt.content
+ self._itst_contexts[node] = xml_content(ctxt)
break
- xpath.setContextNode(oldnode)
- elif xml_is_ns_name(rule, NS_ITS, 'locNoteRule'):
+ elif rule.tag == '{' + NS_ITS + '}locNoteRule':
locnote = None
- notetype = rule.nsProp('locNoteType', None)
- for child in xml_child_iter(rule):
- if xml_is_ns_name(child, NS_ITS, 'locNote'):
- locnote = LocNote(locnote=child.content, locnotetype=notetype)
- break
+ notetype = rule.get('locNoteType')
+ for child in rule.iterchildren('{' + NS_ITS + '}locNote'):
+ locnote = LocNote(locnote=xml_content(child), locnotetype=notetype)
+ break
if locnote is None:
- if rule.hasNsProp('locNoteRef', None):
- locnote = LocNote(locnoteref=rule.nsProp('locNoteRef', None), locnotetype=notetype)
- if rule.nsProp('selector', None) is not None:
- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
+ if 'locNoteRef' in rule.attrib:
+ locnote = LocNote(locnoteref=rule.get('locNoteRef'), locnotetype=notetype)
+ sel = rule.get('selector')
+ if sel is not None:
+ for node in self._try_xpath_eval(xpath, sel):
if locnote is not None:
self._its_loc_notes.setdefault(node, []).append(locnote)
else:
- if rule.hasNsProp('locNotePointer', None):
- sel = rule.nsProp('locNotePointer', None)
+ if 'locNotePointer' in rule.attrib:
+ sel = rule.get('locNotePointer')
ref = False
- elif rule.hasNsProp('locNoteRefPointer', None):
- sel = rule.nsProp('locNoteRefPointer', None)
+ elif 'locNoteRefPointer' in rule.attrib:
+ sel = rule.get('locNoteRefPointer')
ref = True
else:
continue
- try:
- oldnode = xpath.contextNode()
- except:
- oldnode = None
- xpath.setContextNode(node)
- note = self._try_xpath_eval(xpath, sel)
+ note = self._try_xpath_eval(xpath, sel, node=node)
if isinstance(note, string_types):
if ref:
nodenote = LocNote(locnoteref=note, locnotetype=notetype)
@@ -695,55 +657,46 @@ class Document (object):
self._its_loc_notes.setdefault(node, []).append(nodenote)
else:
for note in note:
+ text = xml_content(note)
if ref:
- nodenote = LocNote(locnoteref=note.content, locnotetype=notetype)
+ nodenote = LocNote(locnoteref=text, locnotetype=notetype)
else:
- nodenote = LocNote(locnote=note.content, locnotetype=notetype,
+ nodenote = LocNote(locnote=text, locnotetype=notetype,
space=self.get_preserve_space(note))
self._its_loc_notes.setdefault(node, []).append(nodenote)
break
- xpath.setContextNode(oldnode)
- elif xml_is_ns_name(rule, NS_ITS, 'langRule'):
- if rule.nsProp('selector', None) is not None and rule.nsProp('langPointer', None) is not None:
- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
- try:
- oldnode = xpath.contextNode()
- except:
- oldnode = None
- xpath.setContextNode(node)
- res = self._try_xpath_eval(xpath, rule.nsProp('langPointer', None))
+ elif rule.tag == '{' + NS_ITS + '}langRule':
+ sel = rule.get('selector')
+ lp = rule.get('langPointer')
+ if sel is not None and lp is not None:
+ for node in self._try_xpath_eval(xpath, sel):
+ res = self._try_xpath_eval(xpath, lp, node=node)
if len(res) > 0:
- self._its_lang[node] = res[0].content
+ self._its_lang[node] = xml_content(res[0])
# We need to construct language attributes, not just read
# language information. Technically, langPointer could be
# any XPath expression. But if it looks like an attribute
# accessor, just use the attribute name.
- if rule.nsProp('langPointer', None)[0] == '@':
- self._itst_lang_attr[node] = rule.nsProp('langPointer', None)[1:]
- xpath.setContextNode(oldnode)
- elif xml_is_ns_name(rule, NS_ITST, 'credits'):
- if rule.nsProp('appendTo', None) is not None:
- for node in self._try_xpath_eval(xpath, rule.nsProp('appendTo', None)):
+ if lp[0] == '@':
+ self._itst_lang_attr[node] = lp[1:]
+ elif rule.tag == '{' + NS_ITST + '}credits':
+ sel = rule.get('appendTo')
+ if sel is not None:
+ for node in self._try_xpath_eval(xpath, sel):
self._itst_credits = (node, rule)
break
- elif (xml_is_ns_name(rule, NS_ITS, 'externalResourceRefRule') or
- xml_is_ns_name(rule, NS_ITST, 'externalRefRule')):
- sel = rule.nsProp('selector', None)
- if xml_is_ns_name(rule, NS_ITS, 'externalResourceRefRule'):
- ptr = rule.nsProp('externalResourceRefPointer', None)
+ elif (rule.tag == '{' + NS_ITS + '}externalResourceRefRule' or
+ rule.tag == '{' + NS_ITST + '}externalRefRule'):
+ sel = rule.get('selector')
+ if rule.tag == '{' + NS_ITS + '}externalResourceRefRule':
+ ptr = rule.get('externalResourceRefPointer')
else:
- ptr = rule.nsProp('refPointer', None)
+ ptr = rule.get('refPointer')
if sel is not None and ptr is not None:
for node in self._try_xpath_eval(xpath, sel):
- try:
- oldnode = xpath.contextNode()
- except:
- oldnode = None
- xpath.setContextNode(node)
- res = self._try_xpath_eval(xpath, ptr)
+ res = self._try_xpath_eval(xpath, ptr, node=node)
if len(res) > 0:
- self._its_externals[node] = res[0].content
- xpath.setContextNode(oldnode)
+ self._its_externals[node] = xml_content(res[0])
def apply_its_rules(self, builtins, userparams={}):
self._clear_cache()
@@ -773,94 +726,59 @@ class Document (object):
def apply_its_file(self, filename, userparams={}):
self._clear_cache()
- doc = libxml2.parseFile(filename)
- root = doc.getRootElement()
- if not xml_is_ns_name(root, NS_ITS, 'rules'):
+ parser = etree.XMLParser(resolve_entities = False)
+ root = etree.parse(filename, parser).getroot()
+ if root.tag != '{' + NS_ITS + '}rules':
return
- version = None
- if root.hasNsProp('version', None):
- version = root.nsProp('version', None)
- else:
+ version = root.get('version')
+ if version is None:
sys.stderr.write('Warning: ITS file %s missing version attribute\n' %
os.path.basename(filename))
- if version is not None and version not in ('1.0', '2.0'):
+ elif version not in ('1.0', '2.0'):
sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' %
- (os.path.basename(filename), root.nsProp('version', None)))
+ (os.path.basename(filename), root.get('version')))
return
matched = True
- for match in xml_child_iter(root):
- if xml_is_ns_name(match, NS_ITST, 'match'):
+ for match in root.iterchildren():
+ if match.tag == '{' + NS_ITST + '}match':
matched = False
- xpath = self._doc.xpathNewContext()
- par = match
- nss = {}
- while par is not None:
- nsdef = par.nsDefs()
- while nsdef is not None:
- if nsdef.name is not None:
- if nsdef.name not in nss:
- nss[nsdef.name] = nsdef.content
- xpath.xpathRegisterNs(nsdef.name, nsdef.content)
- nsdef = nsdef.next
- par = par.parent
- if match.hasNsProp('selector', None):
- if len(self._try_xpath_eval(xpath, match.nsProp('selector', None))) > 0:
+ sel = match.get('selector')
+ if sel is not None:
+ ns = { k: v for k, v in match.nsmap.items() if k is not None }
+ xpath = (ns, {})
+ if len(self._try_xpath_eval(xpath, sel)) > 0:
matched = True
break
if matched == False:
return
+ ns = { k: v for k, v in match.nsmap.items() if k is not None }
+ var = {}
params = self.get_its_params(root)
- for rule in xml_child_iter(root):
- xpath = self._doc.xpathNewContext()
- par = match
- nss = {}
- while par is not None:
- nsdef = par.nsDefs()
- while nsdef is not None:
- if nsdef.name is not None:
- if nsdef.name not in nss:
- nss[nsdef.name] = nsdef.content
- xpath.xpathRegisterNs(nsdef.name, nsdef.content)
- nsdef = nsdef.next
- par = par.parent
- self.register_its_params(xpath, params, userparams=userparams)
+ self.register_its_params(var, params, userparams=userparams)
+ xpath = (ns, var)
+ for rule in root.iterchildren():
self.apply_its_rule(rule, xpath)
def apply_local_its_rules(self, userparams={}):
self._clear_cache()
for rules in self._localrules:
- def reg_ns(xpath, node):
- if node.parent is not None:
- reg_ns(xpath, node.parent)
- nsdef = node.nsDefs()
- while nsdef is not None:
- if nsdef.name is not None:
- xpath.xpathRegisterNs(nsdef.name, nsdef.content)
- nsdef = nsdef.next
- xpath = self._doc.xpathNewContext()
- reg_ns(xpath, rules)
+ var = {}
params = self.get_its_params(rules)
- self.register_its_params(xpath, params, userparams=userparams)
- for rule in xml_child_iter(rules):
- if rule.type != 'element':
- continue
- if rule.nsDefs() is not None:
- rule_xpath = self._doc.xpathNewContext()
- reg_ns(rule_xpath, rule)
- self.register_its_params(rule_xpath, params, userparams=userparams)
- else:
- rule_xpath = xpath
+ self.register_its_params(var, params, userparams=userparams)
+ for rule in rules.iterchildren():
+ ns = { k: v for k, v in rule.nsmap.items() if k is not None }
+ rule_xpath = (ns, var)
self.apply_its_rule(rule, rule_xpath)
def _append_credits(self, parent, node, trdata):
- if xml_is_ns_name(node, NS_ITST, 'for-each'):
- select = node.nsProp('select', None)
+ if node.tag == '{' + NS_ITST + '}for-each':
+ select = node.get('select')
if select == 'years':
for year in trdata[2].split(','):
- for child in xml_child_iter(node):
+ for child in node.iterchildren():
self._append_credits(parent, child, trdata + (year.strip(),))
- elif xml_is_ns_name(node, NS_ITST, 'value-of'):
- select = node.nsProp('select', None)
+ elif node.tag == '{' + NS_ITST + '}value-of':
+ select = node.get('select')
val = None
if select == 'name':
val = trdata[0]
@@ -875,9 +793,9 @@ class Document (object):
val = val.encode('utf-8')
parent.addContent(val)
else:
- newnode = node.copyNode(2)
- parent.addChild(newnode)
- for child in xml_child_iter(node):
+ newnode = parent.makeelement(node.tag, node.attrib)
+ parent.append(newnode)
+ for child in node.iterchildren():
self._append_credits(newnode, child, trdata)
def merge_credits(self, translations, language, node):
@@ -895,7 +813,7 @@ class Document (object):
if not match:
continue
trdata = match.groups()
- for node in xml_child_iter(self._itst_credits[1]):
+ for node in self._itst_credits[1].iterchildren():
self._append_credits(self._itst_credits[0], node, trdata)
def join_translations(self, translations, node=None, strict=False):
@@ -903,29 +821,30 @@ class Document (object):
if node is None:
is_root = True
self.generate_messages(comments=False)
- node = self._doc.getRootElement()
- if node is None or node.type != 'element':
+ node = self._doc.getroot()
+ if node is None:
return
if self.get_itst_drop(node) == 'yes':
- prev = node.prev
- node.unlinkNode()
- node.freeNode()
- if prev is not None and prev.isBlankNode():
- prev.unlinkNode()
- prev.freeNode()
+ xml_delete_node(node)
return
msg = self._msgs.get_message_by_node(node)
if msg is None:
- self.translate_attrs(node, node)
- children = [child for child in xml_child_iter(node)]
- for child in children:
+ #self.translate_attrs(node, node)
+ for child in node.iterchildren():
self.join_translations(translations, node=child, strict=strict)
else:
- prevnode = None
- if node.prev is not None and node.prev.type == 'text':
- prevtext = node.prev.content
- if re.sub(r'\s+', '', prevtext) == '':
- prevnode = node.prev
+ prevtext = None
+ prev = node.getprevious()
+ if prev is None:
+ parent = node.getparent()
+ if parent is not None:
+ prevtext = parent.text
+ else:
+ prevtext = prev.tail
+ if prevtext is not None:
+ if not re.fullmatch(r'\s+', prevtext):
+ prevtext = None
+ i = 0
for lang in sorted(list(translations.keys()), reverse=True):
locale = self.get_its_locale_filter(node)
lmatch = match_locale_list(locale[0], lang)
@@ -933,24 +852,26 @@ class Document (object):
continue
newnode = self.get_translated(node, translations[lang], strict=strict, lang=lang)
if newnode != node:
- newnode.setProp('xml:lang', lang)
- node.addNextSibling(newnode)
- if prevnode is not None:
- node.addNextSibling(prevnode.copyNode(0))
- if is_root:
- # Because of the way we create nodes and rewrite the document,
- # we end up with lots of redundant namespace definitions. We
- # kill them off in one fell swoop at the end.
- fix_node_ns(node, {})
- self._check_errors()
+ newnode.set('{' + NS_XML + '}lang', lang)
+ node.addnext(newnode)
+ if i == 0:
+ # Move tail to first new node
+ newnode.tail = node.tail
+ if prevtext is not None:
+ node.tail = prevtext
+ else:
+ if prevtext is not None:
+ newnode.tail = prevtext
+ i += 1
+
def merge_translations(self, translations, language, node=None, strict=False):
is_root = False
if node is None:
is_root = True
self.generate_messages(comments=False)
- node = self._doc.getRootElement()
- if node is None or node.type != 'element':
+ node = self._doc.getroot()
+ if node is None:
return
drop = False
locale = self.get_its_locale_filter(node)
@@ -962,26 +883,23 @@ class Document (object):
if match_locale_list(locale[0], language):
drop = True
if self.get_itst_drop(node) == 'yes' or drop:
- prev = node.prev
- node.unlinkNode()
- node.freeNode()
- if prev is not None and prev.isBlankNode():
- prev.unlinkNode()
- prev.freeNode()
+ xml_delete_node(node)
return
if is_root:
self.merge_credits(translations, language, node)
msg = self._msgs.get_message_by_node(node)
if msg is None:
self.translate_attrs(node, node)
- children = [child for child in xml_child_iter(node)]
- for child in children:
+ for child in node.iterchildren():
self.merge_translations(translations, language, node=child, strict=strict)
else:
newnode = self.get_translated(node, translations, strict=strict, lang=language)
if newnode != node:
self.translate_attrs(node, newnode)
- node.replaceNode(newnode)
+ newnode.tail = node.tail
+ parent = node.getparent()
+ if parent is not None:
+ parent.replace(node, newnode)
if is_root:
# Apply language attributes to untranslated nodes. We don't do
# this before processing, because then these attributes would
@@ -998,31 +916,27 @@ class Document (object):
origlang = self._its_lang.get(lcpar)
if origlang is not None:
break
- lcpar = lcpar.parent
+ lcpar = lcpar.getparent()
if origlang is not None:
- lcnode.setProp(attr, origlang)
+ lcnode.set(attr, origlang)
# And then set the language attribute on the root node.
if language is not None:
attr = self._itst_lang_attr.get(node)
if attr is not None:
- node.setProp(attr, language)
- # Because of the way we create nodes and rewrite the document,
- # we end up with lots of redundant namespace definitions. We
- # kill them off in one fell swoop at the end.
- fix_node_ns(node, {})
- self._check_errors()
+ node.set(attr, language)
def translate_attrs(self, oldnode, newnode):
- trans_attrs = [attr for attr in xml_attr_iter(oldnode) if self._its_translate_nodes.get(attr, 'no') == 'yes']
- for attr in trans_attrs:
- srccontent = attr.get_content()
+ for attrname, srccontent in oldnode.items():
+ attr = XMLAttr(oldnode, attrname)
+ if self._its_translate_nodes.get(attr, 'no') != 'yes':
+ continue
if not PY3:
srccontent = srccontent.decode('utf-8')
newcontent = translations.ugettext(srccontent)
if newcontent:
if not PY3:
newcontent = newcontent.encode('utf-8')
- newnode.setProp(attr.name, newcontent)
+ newnode.set(attrname, newcontent)
def get_translated (self, node, translations, strict=False, lang=None):
msg = self._msgs.get_message_by_node(node)
@@ -1037,106 +951,84 @@ class Document (object):
trans = translations.ugettext(msgstr)
if trans is None:
return node
- nss = {}
- def reg_ns(node, nss):
- if node.parent is not None:
- reg_ns(node.parent, nss)
- nsdef = node.nsDefs()
- while nsdef is not None:
- nss[nsdef.name] = nsdef.content
- nsdef = nsdef.next
- reg_ns(node, nss)
- nss['_'] = NS_BLANK
- try:
- blurb = node.doc.intSubset().serialize('utf-8')
- except Exception:
- blurb = ''
- blurb += '<' + ustr(node.name, 'utf-8')
- for nsname in list(nss.keys()):
+ blurb = ''
+ doc = node.getroottree()
+ if doc.docinfo.internalDTD:
+ # This is an ugly hack to serialize the DTD. We copy the
+ # document, replace the document element, serialize the
+ # document and remove the last line which contains the
+ # document element, leaving only the DTD.
+ copy = deepcopy(doc)
+ root = copy.getroot()
+ newroot = root.makeelement(root.tag)
+ copy._setroot(newroot)
+ blurb = re.sub('.*$', '', etree.tostring(copy, encoding='unicode'))
+ localname = ustr(xml_localname(node), 'utf-8')
+ blurb += '<' + localname
+ blurb += ' xmlns:_="%s"' % NS_BLANK
+ for nsname, nsuri in node.nsmap.items():
if nsname is None:
- blurb += ' xmlns="%s"' % nss[nsname]
+ blurb += ' xmlns="%s"' % nsuri
else:
- blurb += ' xmlns:%s="%s"' % (nsname, nss[nsname])
- blurb += '>%s%s>' % (trans, ustr(node.name, 'utf-8'))
- if not PY3:
- blurb = blurb.encode('utf-8')
- ctxt = libxml2.createDocParserCtxt(blurb)
- if self._load_dtd:
- ctxt.loadSubset(1)
- if self._keep_entities:
- ctxt.loadSubset(1)
- ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD)
- ctxt.replaceEntities(0)
- else:
- ctxt.replaceEntities(1)
- ctxt.parseDocument()
- trnode = ctxt.doc().getRootElement()
+ blurb += ' xmlns:%s="%s"' % (nsname, nsuri)
+ blurb += '>%s%s>' % (trans, localname)
+ parser = etree.XMLParser(load_dtd = self._load_dtd or self._keep_entities,
+ resolve_entities = not(self._keep_entities))
try:
- self._check_errors()
- except libxml2.parserError:
+ trnode = etree.fromstring(blurb, parser)
+ except:
if strict:
raise
else:
sys.stderr.write('Warning: Could not merge %stranslation for msgid:\n%s\n' % (
- (lang + ' ') if lang is not None else '',
- msgstr.encode('utf-8')))
- self._xml_err = ''
+ (lang + ' ') if lang is not None else '',
+ msgstr.encode('utf-8')))
return node
- def scan_node(node):
- children = [child for child in xml_child_iter(node)]
- for child in children:
- if child.type != 'element':
+ try:
+ for child in trnode.iterdescendants():
+ if isinstance(child, (etree._Entity, etree._Comment, etree._ProcessingInstruction)):
continue
- if child.ns() is not None and child.ns().content == NS_BLANK:
- ph_node = msg.get_placeholder(child.name).node
- if self.has_child_elements(ph_node):
+ qname = etree.QName(child.tag)
+ if qname.namespace == NS_BLANK:
+ ph_node = msg.get_placeholder(qname.localname).node
+ if len(ph_node):
self.merge_translations(translations, None, ph_node, strict=strict)
- newnode = ph_node.copyNode(1)
- newnode.setTreeDoc(self._doc)
- child.replaceNode(newnode)
+ newnode = deepcopy(ph_node)
+ newnode.tail = child.tail
+ child.getparent().replace(child, newnode)
else:
repl = self.get_translated(ph_node, translations, strict=strict, lang=lang)
- child.replaceNode(repl)
- scan_node(child)
- try:
- scan_node(trnode)
+ repl.tail = child.tail
+ child.getparent().replace(child, repl)
except:
+ raise
if strict:
raise
else:
sys.stderr.write('Warning: Could not merge %stranslation for msgid:\n%s\n' % (
(lang + ' ') if lang is not None else '',
msgstr.encode('utf-8')))
- self._xml_err = ''
- ctxt.doc().freeDoc()
return node
- retnode = node.copyNode(2)
- retnode.setTreeDoc(self._doc)
- for child in xml_child_iter(trnode):
- newnode = child.copyNode(1)
- newnode.setTreeDoc(self._doc)
- retnode.addChild(newnode)
-
- ctxt.doc().freeDoc()
+ retnode = self._doc.getroot().makeelement(node.tag, node.attrib, node.nsmap)
+ retnode.text = trnode.text
+ for child in trnode.iterchildren():
+ retnode.append(child)
+
return retnode
def generate_messages(self, comments=True):
if self._itst_credits is not None:
self._msgs.add_credits()
- for child in xml_child_iter(self._doc):
- if child.type == 'element':
- self.generate_message(child, None, comments=comments)
- break
+ if self._doc is not None:
+ self.generate_message(self._doc.getroot(), None, comments=comments)
def generate_message(self, node, msg, comments=True, path=None):
- if node.type in ('text', 'cdata') and msg is not None:
- msg.add_text(node.content)
+ if isinstance(node, etree._Entity):
+ msg.add_entity_ref(node.name)
return
- if node.type == 'entity_ref':
- msg.add_entity_ref(node.name);
- if node.type != 'element':
+ if isinstance(node, XMLAttr):
return
- if node.hasNsProp('drop', NS_ITST) and node.nsProp('drop', NS_ITST) == 'yes':
+ if node.get('{' + NS_ITST + '}drop', 'no') == 'yes':
return
if self._itst_drop_nodes.get(node, 'no') == 'yes':
return
@@ -1158,9 +1050,7 @@ class Document (object):
if msg is not None:
msg.add_placeholder(node)
msg = Message()
- ctxt = None
- if node.hasNsProp('context', NS_ITST):
- ctxt = node.nsProp('context', NS_ITST)
+ ctxt = node.get('{' + NS_ITST + '}context')
if ctxt is None:
ctxt = self._itst_contexts.get(node)
if ctxt is not None:
@@ -1173,27 +1063,38 @@ class Document (object):
msg.set_preserve_space()
if self.get_its_locale_filter(node) != ('*', 'include'):
msg.set_locale_filter(self.get_its_locale_filter(node))
- msg.add_source('%s:%i' % (self._doc.name, node.lineNo()))
- msg.add_marker('%s/%s' % (ustr(node.parent.name, 'utf-8'), ustr(node.name, 'utf-8')))
+ msg.add_source('%s:%i' % (self._doc.docinfo.URL, node.sourceline))
+ parent = node.getparent()
+ if parent is None:
+ ptag = '#root'
+ else:
+ ptag = xml_localname(parent)
+ msg.add_marker('%s/%s' % (ustr(ptag, 'utf-8'), ustr(xml_localname(node), 'utf-8')))
else:
withinText = True
msg.add_start_tag(node)
if not withinText:
# Add msg for translatable node attributes
- for attr in xml_attr_iter(node):
+ for attrname, attrval in node.items():
+ attr = XMLAttr(node, attrname)
if self._its_translate_nodes.get(attr, 'no') == 'yes':
attr_msg = Message()
if self.get_preserve_space(attr):
attr_msg.set_preserve_space()
- attr_msg.add_source('%s:%i' % (self._doc.name, node.lineNo()))
- attr_msg.add_marker('%s/%s@%s' % (node.parent.name, node.name, attr.name))
- attr_msg.add_text(attr.content)
+ attr_msg.add_source('%s:%i' % (self._doc.docinfo.URL, node.sourceline))
+ attr_msg.add_marker('%s/%s@%s' % (
+ xml_localname(node.getparent()),
+ xml_localname(node),
+ etree.QName(attrname).localname))
+ attr_msg.add_text(attrval)
if comments:
for locnote in self.get_its_loc_notes(attr):
comment = Comment(locnote)
comment.add_marker ('%s/%s@%s' % (
- node.parent.name, node.name, attr.name))
+ xml_localname(node.getparent()),
+ xml_localname(node),
+ etree.QName(attrname).localname))
attr_msg.add_comment(comment)
self._msgs.add_message(attr_msg, attr)
@@ -1204,15 +1105,16 @@ class Document (object):
for locnote in self.get_its_loc_notes(cnode, inherit=(not withinText)):
comment = Comment(locnote)
if withinText:
- comment.add_marker('.%s/%s' % (path, cnode.name))
+ comment.add_marker('.%s/%s' % (path, xml_localname(cnode)))
msg.add_comment(comment)
hasnote = True
if hasnote or not is_unit:
break
- cnode = cnode.parent
+ cnode = cnode.getparent()
self.generate_external_resource_message(node)
- for attr in xml_attr_iter(node):
+ for attrname in node.keys():
+ attr = XMLAttr(node, attrname)
self.generate_external_resource_message(attr)
idvalue = self.get_its_id_value(attr)
if idvalue is not None:
@@ -1220,9 +1122,13 @@ class Document (object):
msg.add_id_value(basename + '#' + idvalue)
if withinText:
- path = path + '/' + node.name
- for child in xml_child_iter(node):
+ path = path + '/' + node.tag
+ if node.text is not None and msg is not None:
+ msg.add_text(node.text)
+ for child in node.iterchildren():
self.generate_message(child, msg, comments=comments, path=path)
+ if child.tail is not None and msg is not None:
+ msg.add_text(child.tail)
if translate:
if is_unit and not msg.is_empty():
@@ -1234,12 +1140,17 @@ class Document (object):
if node not in self._its_externals:
return
resref = self._its_externals[node]
- if node.type == 'element':
- translate = self.get_its_translate(node)
- marker = '%s/%s' % (node.parent.name, node.name)
+ if isinstance(node, XMLAttr):
+ elem = node.getparent()
+ translate = self.get_its_translate(elem)
+ marker = '%s/%s/@%s' % (
+ xml_localname(elem.getparent()),
+ xml_localname(elem),
+ xml_localname(node))
else:
- translate = self.get_its_translate(node.parent)
- marker = '%s/%s/@%s' % (node.parent.parent.name, node.parent.name, node.name)
+ translate = self.get_its_translate(node)
+ marker = '%s/%s' % (xml_localname(node.getparent()),
+ xml_localname(node))
if translate == 'no':
return
msg = Message()
@@ -1253,7 +1164,7 @@ class Document (object):
txt = "external ref='%s' md5='%s'" % (resref, filemd5)
msg.set_context('_')
msg.add_text(txt)
- msg.add_source('%s:%i' % (self._doc.name, node.lineNo()))
+ msg.add_source('%s:%i' % (self._doc.docinfo.URL, node.sourceline))
msg.add_marker(marker)
msg.add_comment(Comment('This is a reference to an external file such as an image or'
' video. When the file changes, the md5 hash will change to'
@@ -1265,44 +1176,41 @@ class Document (object):
def is_translation_unit (self, node):
return self.get_its_within_text(node) != 'yes'
- def has_child_elements(self, node):
- return len([child for child in xml_child_iter(node) if child.type=='element'])
-
def get_preserve_space (self, node):
- while node.type in ('attribute', 'element'):
- if node.getSpacePreserve() == 1:
+ while node is not None:
+ if node.get('{' + NS_XML + '}space') == 'preserve':
return True
if node in self._its_preserve_space_nodes:
return (self._its_preserve_space_nodes[node] == 'preserve')
- node = node.parent
+ node = node.getparent()
return False
def get_its_translate(self, node):
if node in self._its_translate_nodes_cache:
return self._its_translate_nodes_cache[node]
val = None
- if node.hasNsProp('translate', NS_ITS):
- val = node.nsProp('translate', NS_ITS)
- elif xml_is_ns_name(node, NS_ITS, 'span') and node.hasNsProp('translate', None):
- val = node.nsProp('translate', None)
+ if '{' + NS_ITS + '}translate' in node.attrib:
+ val = node.get('{' + NS_ITS + '}translate')
+ elif node.tag == '{' + NS_ITS + '}span' and 'translate' in node.attrib:
+ val = node.get('translate')
elif node in self._its_translate_nodes:
val = self._its_translate_nodes[node]
if val is not None:
self._its_translate_nodes_cache[node] = val
return val
- if node.type == 'attribute':
+ if isinstance(node, XMLAttr):
return 'no'
- if node.parent.type == 'element':
- parval = self.get_its_translate(node.parent)
+ if node.getparent() is not None:
+ parval = self.get_its_translate(node.getparent())
self._its_translate_nodes_cache[node] = parval
return parval
return 'yes'
def get_its_within_text(self, node):
- if node.hasNsProp('withinText', NS_ITS):
- val = node.nsProp('withinText', NS_ITS)
- elif xml_is_ns_name(node, NS_ITS, 'span') and node.hasNsProp('withinText', None):
- val = node.nsProp('withinText', None)
+ if '{' + NS_ITS + '}withinText' in node.attrib:
+ val = node.get('{' + NS_ITS + '}withinText')
+ elif node.tag == '{' + NS_ITS + '}span' and 'withinText' in node.attrib:
+ val = node.get('withinText')
else:
return self._its_within_text_nodes.get(node, 'no')
if val in ('yes', 'nested'):
@@ -1312,73 +1220,63 @@ class Document (object):
def get_its_locale_filter(self, node):
if node in self._its_locale_filters_cache:
return self._its_locale_filters_cache[node]
- if node.hasNsProp('localeFilterList', NS_ITS) or node.hasNsProp('localeFilterType', NS_ITS):
- if node.hasNsProp('localeFilterList', NS_ITS):
- lst = node.nsProp('localeFilterList', NS_ITS)
- else:
- lst = '*'
- if node.hasNsProp('localeFilterType', NS_ITS):
- typ = node.nsProp('localeFilterType', NS_ITS)
- else:
- typ = 'include'
+ if ('{' + NS_ITS + '}localeFilterList' in node.attrib or
+ '{' + NS_ITS + '}localeFilterType' in node.attrib):
+ lst = node.get('{' + NS_ITS + '}localeFilterList', '*')
+ typ = node.get('{' + NS_ITS + '}localeFilterType', 'include')
return (lst, typ)
- if (xml_is_ns_name(node, NS_ITS, 'span') and
- (node.hasNsProp('localeFilterList', None) or node.hasNsProp('localeFilterType', None))):
- if node.hasNsProp('localeFilterList', None):
- lst = node.nsProp('localeFilterList', None)
- else:
- lst = '*'
- if node.hasNsProp('localeFilterType', None):
- typ = node.nsProp('localeFilterType', None)
- else:
- typ = 'include'
+ if (node.tag == '{' + NS_ITS + '}span' and
+ ('localeFilterList' in node.attrib or 'localeFilterType' in node.attrib)):
+ lst = node.get('localeFilterList', '*')
+ typ = node.get('localeFilterType', 'include')
return (lst, typ)
if node in self._its_locale_filters:
return self._its_locale_filters[node]
- if node.parent.type == 'element':
- parval = self.get_its_locale_filter(node.parent)
+ if node.getparent() is not None:
+ parval = self.get_its_locale_filter(node.getparent())
self._its_locale_filters_cache[node] = parval
return parval
return ('*', 'include')
def get_itst_drop(self, node):
- if node.hasNsProp('drop', NS_ITST) and node.nsProp('drop', NS_ITST) == 'yes':
+ if node.get('{' + NS_ITST + '}drop') == 'yes':
return 'yes'
if self._itst_drop_nodes.get(node, 'no') == 'yes':
return 'yes'
return 'no'
def get_its_id_value(self, node):
- if node.hasNsProp('id', NS_XML):
- return node.nsProp('id', NS_XML)
+ if '{' + NS_XML + '}id' in node.attrib:
+ return node.get('{' + NS_XML + '}id')
return self._its_id_values.get(node, None)
def get_its_loc_notes(self, node, inherit=True):
if node in self._its_loc_notes_cache:
return self._its_loc_notes_cache[node]
ret = []
- if ( node.hasNsProp('locNote', NS_ITS) or
- node.hasNsProp('locNoteRef', NS_ITS) or
- node.hasNsProp('locNoteType', NS_ITS) ):
- notetype = node.nsProp('locNoteType', NS_ITS)
- if node.hasNsProp('locNote', NS_ITS):
- ret.append(LocNote(locnote=node.nsProp('locNote', NS_ITS), locnotetype=notetype))
- elif node.hasNsProp('locNoteRef', NS_ITS):
- ret.append(LocNote(locnoteref=node.nsProp('locNoteRef', NS_ITS), locnotetype=notetype))
- elif xml_is_ns_name(node, NS_ITS, 'span'):
- if ( node.hasNsProp('locNote', None) or
- node.hasNsProp('locNoteRef', None) or
- node.hasNsProp('locNoteType', None) ):
- notetype = node.nsProp('locNoteType', None)
- if node.hasNsProp('locNote', None):
- ret.append(LocNote(locnote=node.nsProp('locNote', None), locnotetype=notetype))
- elif node.hasNsProp('locNoteRef', None):
- ret.append(LocNote(locnoteref=node.nsProp('locNoteRef', None), locnotetype=notetype))
+ if ( '{' + NS_ITS + '}locNote' in node.attrib or
+ '{' + NS_ITS + '}locNoteRef' in node.attrib or
+ '{' + NS_ITS + '}locNoteType' in node.attrib ):
+ notetype = node.get('{' + NS_ITS + '}locNoteType')
+ if '{' + NS_ITS + '}locNote' in node.attrib:
+ ret.append(LocNote(locnote=node.get('{' + NS_ITS + '}locNote'), locnotetype=notetype))
+ elif '{' + NS_ITS + '}locNoteRef' in node.attrib:
+ ret.append(LocNote(locnoteref=node.get('{' + NS_ITS + '}locNoteRef'), locnotetype=notetype))
+ elif node.tag == '{' + NS_ITS + '}span':
+ if ( 'locNote' in node.attrib or
+ 'locNoteRef' in node.attrib or
+ 'locNoteType' in node.attrib ):
+ notetype = node.get('locNoteType')
+ if 'locNote' in node.attrib:
+ ret.append(LocNote(locnote=node.get('locNote'), locnotetype=notetype))
+ elif 'locNoteRef' in node.attrib:
+ ret.append(LocNote(locnoteref=node.get('locNoteRef'), locnotetype=notetype))
for locnote in reversed(self._its_loc_notes.get(node, [])):
ret.append(locnote)
if (len(ret) == 0 and inherit and
- node.type != 'attribute' and node.parent is not None and node.parent.type == 'element'):
- parval = self.get_its_loc_notes(node.parent)
+ not isinstance(node, XMLAttr) and
+ node.getparent() is not None):
+ parval = self.get_its_loc_notes(node.getparent())
self._its_loc_notes_cache[node] = parval
return parval
self._its_loc_notes_cache[node] = ret
@@ -1386,12 +1284,12 @@ class Document (object):
def output_test_data(self, category, out, node=None):
if node is None:
- node = self._doc.getRootElement()
+ node = self._doc.getroot()
compval = ''
if category == 'translate':
compval = 'translate="%s"' % self.get_its_translate(node)
elif category == 'withinText':
- if node.type != 'attribute':
+ if not isinstance(node, XMLAttr):
compval = 'withinText="%s"' % self.get_its_within_text(node)
elif category == 'localeFilter':
compval = 'localeFilterList="%s"\tlocaleFilterType="%s"' % self.get_its_locale_filter(node)
@@ -1422,16 +1320,32 @@ class Document (object):
out.write('%s\t%s\r\n' % (xml_get_node_path(node), compval))
else:
out.write('%s\r\n' % (xml_get_node_path(node)))
- for attr in sorted(xml_attr_iter(node), key=ustr):
+ for attrname in sorted(node.keys(), key=ustr):
+ attr = XMLAttr(node, attrname)
self.output_test_data(category, out, attr)
- for child in xml_child_iter(node):
- if child.type == 'element':
- self.output_test_data(category, out, child)
+ for child in node.iterchildren():
+ self.output_test_data(category, out, child)
- @staticmethod
- def _try_xpath_eval (xpath, expr):
+ def _try_xpath_eval (self, xpath, expr, node=None):
+ if node is None:
+ node = self._doc
+ elif isinstance(node, XMLAttr):
+ # lxml doesn't support attributes as XPath context nodes.
+ if expr == '.':
+ return [ node ]
+ sys.stderr.write('Warning: Unsupported XPath on attribute: %s\n' % expr)
+ return []
try:
- return xpath.xpathEval(expr)
+ result = node.xpath(expr, namespaces=xpath[0], **xpath[1])
+ if not isinstance(result, str):
+ for i in range(len(result)):
+ val = result[i]
+ # Use lxml's "smart string" feature to determine
+ # the attribute node.
+ if (isinstance(val, etree._ElementUnicodeResult) and
+ val.is_attribute):
+ result[i] = XMLAttr(val.getparent(), val.attrname)
+ return result
except:
sys.stderr.write('Warning: Invalid XPath: %s\n' % expr)
return []
@@ -1636,11 +1550,11 @@ if __name__ == '__main__':
raise
sys.stderr.write('Error: Could not merge translations:\n%s\n' % ustr(e))
sys.exit(1)
- serialized = doc._doc.serialize('utf-8')
- if PY3:
- # For some reason, under py3, our serialized data is returns as a str.
- # Let's encode it to bytes
- serialized = serialized.encode('utf-8')
+ # lxml generates XML declarations with single quotes.
+ serialized = (
+ b'\n' +
+ etree.tostring(doc._doc, encoding='utf-8') +
+ b'\n')
fout = out
fout_is_str = isinstance(fout, string_types)
if fout_is_str:
@@ -1675,11 +1589,11 @@ if __name__ == '__main__':
for itsfile in opts.itsfile:
doc.apply_its_file(itsfile, userparams=userparams)
doc.join_translations(translations, strict=opts.strict)
- serialized = doc._doc.serialize('utf-8')
- if PY3:
- # For some reason, under py3, our serialized data is returns as a str.
- # Let's encode it to bytes
- serialized = serialized.encode('utf-8')
+ # lxml generates XML declarations with single quotes.
+ serialized = (
+ b'\n' +
+ etree.tostring(doc._doc, encoding='utf-8') +
+ b'\n')
out.write(serialized)
out.flush()
diff --git a/tests/IT-join-1.joined.xml b/tests/IT-join-1.joined.xml
index f1cd11e..a2cdd15 100644
--- a/tests/IT-join-1.joined.xml
+++ b/tests/IT-join-1.joined.xml
@@ -8,9 +8,9 @@
Dies ist eine einfache Nachricht.
Ceci est un message clair.
This message uses a prefix.
- Tato zpráva ma předpona.
- Diese Nachricht nutzt einen Präfix.
- Ce message utilise un préfixe.
+ Tato zpráva ma předpona.
+ Diese Nachricht nutzt einen Präfix.
+ Ce message utilise un préfixe.
This message is in another namespace.
Tato zpráva ma jiný jmenný prostor.
Diese Nachricht ist in einem anderen Namensraum.
diff --git a/tests/IT-prefixes-1.ll.xml b/tests/IT-prefixes-1.ll.xml
index 6dd1f30..ec0292c 100644
--- a/tests/IT-prefixes-1.ll.xml
+++ b/tests/IT-prefixes-1.ll.xml
@@ -13,7 +13,7 @@
Faites glisser des pièces de la gauche vers la droite.
- Maintenez la touche Ctrl.
+ Maintenez la touche Ctrl.
diff --git a/tests/Translate/Translate1.ll.xml b/tests/Translate/Translate1.ll.xml
index 72b30fe..8e94592 100644
--- a/tests/Translate/Translate1.ll.xml
+++ b/tests/Translate/Translate1.ll.xml
@@ -26,7 +26,7 @@
Pippin regarda depuis l'abri du manteau de Gandalf.
-
+
@@ -35,5 +35,4 @@
C'est le premier paragraphe. Il contient du texte un-translatable code
.
Tout commença alors que Zebulon découvrit qu'il avait un doppelgänger qui était un aficionado sérieux de baseball.
Tout commença alors que Zebulon découvrit qu'il avait un doppelgänger qui était un aficionado sérieux de baseball.