# -*- coding: utf-8 -*-
################################################################################
# #
# This file is part of the Bibolamazi Project. #
# Copyright (C) 2013 by Philippe Faist #
# philippe.faist@bluewin.ch #
# #
# Bibolamazi is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# Bibolamazi is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with Bibolamazi. If not, see <http://www.gnu.org/licenses/>. #
# #
################################################################################
"""
Various utilities for use within all of the Bibolamazi Project.
"""
import re
import types
import math
import datetime
import logging
from pylatexenc import latex2text
import bibolamazi.init
from . import version
logger = logging.getLogger(__name__)
[docs]def get_version():
"""
Return the version string :py:data:`~core.version.version_str`, unchanged.
"""
return version.version_str
_theversionsplit = None
[docs]def get_version_split():
"""
Return a 4-tuple `(maj, min, rel, suffix)` resulting from parsing the version obtained
via :py:data:`version.version_str`.
............ TODO: FIXME: CURRENTLY, the elements are strings! why not integers? If
not there, they will/should be empty or None?
"""
if (_theversionsplit is None):
m = re.match(r'^(\d+)(?:\.(\d+)(?:\.(\d+)(.+)?)?)?', version.version_str)
_theversionsplit = (m.group(1), m.group(2), m.group(3), m.group(4))
return _theversionsplit
[docs]def get_copyrightyear():
"""
Return the copyright year :py:data:`~core.version.copyright_year`, unchanged.
"""
return version.copyright_year
# ------------------------------------------------------------------------------
[docs]class BibolamaziError(Exception):
"""
Root bibolamazi error exception.
See also :py:class:`~core.bibfilter.BibFilterError` and
:py:class:`~core.bibusercache.BibUserCacheError`.
"""
def __init__(self, msg, where=None):
self.where = where
fullmsg = msg
if (where is not None):
fullmsg += "\n\t@: "+where
super().__init__(fullmsg)
# ------------------------------------------------------------------------------
[docs]def getbool(x):
"""
Utility to parse a string representing a boolean value.
If `x` is already of integer or boolean type (actually, anything castable to an
integer), then the corresponding boolean convertion is returned. If it is a
string-like type, then it is matched against something that looks like 't(rue)?', '1',
'y(es)?' or 'on' (ignoring case), or against something that looks like 'f(alse)?',
'0', 'n(o)?' or 'off' (also ignoring case). Leading or trailing whitespace is ignored.
If the string cannot be parsed, a :py:exc:`ValueError` is raised.
"""
try:
return (int(x) != 0)
except (TypeError, ValueError):
pass
if isinstance(x, str):
m = re.match(r'^\s*(t(?:rue)?|1|y(?:es)?|on)\s*$', x, re.IGNORECASE)
if m:
return True
m = re.match(r'^\s*(f(?:alse)?|0|n(?:o)?|off)\s*$', x, re.IGNORECASE)
if m:
return False
raise ValueError("Can't parse boolean value: %r" % x)
[docs]def resolve_type(typename, in_module=None):
"""
Returns a type object corresponding to the given type name `typename`, given as a
string.
..... TODO: MORE DOC .........
"""
if (in_module is not None):
logger.longdebug("Resolving type %s in module %s", typename, in_module)
if (typename in in_module.__dict__):
return in_module.__dict__.get(typename)
logger.longdebug("Resolving type %s (no module)", typename)
if (typename == 'str'):
try:
return types.StringType
except AttributeError: # Python 3 doesn't have types.BooleanType etc.
return str
if (typename == 'bool'):
try:
return types.BooleanType
except AttributeError: # Python 3 doesn't have types.BooleanType etc.
return bool
if (typename == 'int'):
try:
return types.IntType
except AttributeError: # Python 3 doesn't have types.BooleanType etc.
return int
if (typename == 'float'):
try:
return types.FloatType
except AttributeError: # Python 3 doesn't have types.BooleanType etc.
return float
if (typename == 'bytes'):
try:
return types.StringType
except AttributeError: # Python 3 doesn't have types.BooleanType etc.
return bytes
if (typename == 'complex'):
try:
return types.ComplexType
except AttributeError: # Python 3 doesn't have types.BooleanType etc.
return complex
raise ValueError("Unknown type name: %s"%(typename))
_rx_quotearg_oknames = re.compile(r'^[-\w./:~%#]+$')
[docs]def quotearg(x):
"""
If `x` contains only non-special characters, it is returned as is. The
non-special characters are: all alphanumerical chars, hyphen, dot, slash,
colon, tilde, percent, hash. Otherwise, put the value `x` in double-quotes,
escaping all double-quotes and backslashes in the value of `x` by a
backslash.
The argument `x` may be either a python string or unicode object.
For example:
>>> print(quotearg('kosher_name_clean'))
kosher_name_clean
>>> print(quotearg('dirty name with spaces'))
\"dirty name with spaces\"
>>> print(quotearg(r'''really\\dirty\"name::with/tons&#$of special chars!!!'''))
\"really\\\\dirty\\\"name::with/tons&#$of special chars!!!\"
"""
if not x:
return ""
if (_rx_quotearg_oknames.match(x)):
# only very sympathetic chars
return x
return '"' + re.sub(r'("|\\)', lambda m: '\\'+m.group(), x) + '"'
[docs]def guess_encoding_decode(dat, encoding=None):
if isinstance(dat, str):
return dat # already unicode
if encoding:
return dat.decode(encoding)
try:
return dat.decode('utf-8')
except UnicodeDecodeError:
pass
# this should always succeed
return dat.decode('latin1')
[docs]def call_with_args(fn, *args, **kwargs):
"""
Utility to call a function `fn` with `*args` and `**kwargs`.
`fn(*args)` must be an acceptable function call; beyond that, additional keyword
arguments which the function accepts will be provided from `**kwargs`.
This function is meant to be essentially `fn(*args, **kwargs)`, but without raising an
error if there are arguments in `kwargs` which the function doesn't accept (in which
case, those arguments are ignored).
"""
args2 = args
kwargs2 = kwargs
if hasattr(fn, '__call__'):
args2 = [fn] + args
fn = fn.__call__
(fargs, varargs, keywords, defaults) = inspect.getargspec(fn)
if keywords:
return fn(*args2, **kwargs2)
kwargs2 = dict([(k,v) for (k,v) in kwargs2 if k in fargs])
return fn(*args2, **kwargs2)
_rx_timedelta_part = re.compile(r'(?P<value>\d+(?:\.\d*)?|\d*\.\d+)(?P<unit>\w+)', flags=re.IGNORECASE)
[docs]def parse_timedelta(in_s):
"""
Note: only positive timedelta accepted.
"""
# all-lowercase, please
keys = {"weeks": (7, 'days'),
"days": (24, 'hours'),
"hours": (60, 'minutes'),
"minutes": (60, 'seconds'),
"seconds": (1000, 'milliseconds'),
}
kwargs = {}
for k in keys.keys():
kwargs[k] = 0.0
kwargs[keys[k][1]] = 0.0
for m in _rx_timedelta_part.finditer(in_s):
unit = m.group('unit').lower()
keyoks = [x for x in keys if x.startswith(unit)]
if len(keyoks) < 1:
raise ValueError("Unknown unit for timedelta: %s" %(unit))
if len(keyoks) > 1:
raise ValueError("Ambiguous unit for timedelta: %s" %(unit)) # should never happen
key = keyoks[0]
value = float(m.group('value'))
value_int = math.floor(value)
kwargs[key] += value_int
x = value - value_int
while True:
x *= keys[key][0]
newkey = keys[key][1]
v = math.floor(x)
kwargs[newkey] += v
x = (x - v)
key = newkey
if key not in keys:
break
#print 'kwargs: %r'%(kwargs)
return datetime.timedelta(**kwargs)
[docs]def warn_deprecated(classname, oldname, newname, modulename=None, explanation=None):
import traceback
if modulename is not None:
warnlogger = logging.getLogger(modulename)
else:
warnlogger = logger
warnlogger.warning(
("%(modulenamecolon)s%(classnamedot)s%(oldname)s is deprecated. Please use "
"%(modulenamecolon)s%(classnamedot)s%(newname)s instead. %(explanationspace)s"
"at:\n"
"%(stack)s")
% { 'classnamedot': (classname+'.' if classname else ''),
'modulenamecolon': (modulename+':' if modulename else ''),
'oldname': oldname,
'newname': newname,
'explanationspace': (explanation+' ' if explanation else ''),
'stack': traceback.format_stack(limit=3)[0],
}
)
# ------------------------------------------------------------------------------
# _latex2text_default_text_replacements = (
# ("~", " "),
# ("``", '"'),
# ("''", '"'),
# #
# # do NOT replace tabular alignment symbol '&', because most often it's used
# # in names perhaps unescaped, like in "Taylor & Francis"
# )
latex2text_latex_context = latex2text.get_default_latex_context_db()
# in most instances when converting to text, keep ``, '', --, ---, etc. as they are
latex2text_latex_context.add_context_category(
'override-nonascii-specials',
prepend=True,
macros=[],
environments=[],
specials=[
latex2text.SpecialsTextSpec('~', u" "),
latex2text.SpecialsTextSpec('``', u"\""),
latex2text.SpecialsTextSpec("''", u"\""),
latex2text.SpecialsTextSpec("--", u"--"),
latex2text.SpecialsTextSpec("---", u"---"),
latex2text.SpecialsTextSpec("!`", u"!`"),
latex2text.SpecialsTextSpec("?`", u"?`"),
]
)
_l2t = latex2text.LatexNodes2Text(
strict_latex_spaces=True,
latex_context=latex2text_latex_context,
)
[docs]def latex_to_text(x):
return _l2t.latex_to_text(x, tolerant_parsing=True)