Source code for formulas.functions.text

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
#
# Copyright 2016-2023 European Commission (JRC);
# Licensed under the EUPL (the 'Licence');
# You may not use this work except in compliance with the Licence.
# You may obtain a copy of the Licence at: http://ec.europa.eu/idabc/eupl

"""
Python equivalents of text Excel functions.
"""
import json
import regex
import functools
import numpy as np
import schedula as sh
from . import (
    wrap_ufunc, Error, replace_empty, XlError, value_return, flatten, wrap_func,
    is_not_empty, raise_errors, _text2num
)

FUNCTIONS = {}


def _str(text):
    if isinstance(text, bool):
        return str(text).upper()
    if isinstance(text, float) and text.is_integer():
        return '%d' % text
    return str(text)


[docs] def xfind(find_text, within_text, start_num=1): i = int(start_num or 0) - 1 res = i >= 0 and _str(within_text).find(_str(find_text), i) + 1 or 0 return res or Error.errors['#VALUE!']
_kw0 = { 'input_parser': lambda *a: a, 'args_parser': lambda *a: map(functools.partial(replace_empty, empty=''), a) } FUNCTIONS['FIND'] = wrap_ufunc(xfind, **_kw0)
[docs] def xleft(from_str, num_chars): i = int(num_chars or 0) if i >= 0: return _str(from_str)[:i] return Error.errors['#VALUE!']
FUNCTIONS['LEFT'] = wrap_ufunc(xleft, **_kw0) _kw1 = { 'input_parser': lambda text: [_str(text)], 'return_func': value_return, 'args_parser': lambda *a: map(functools.partial(replace_empty, empty=''), a) } FUNCTIONS['LEN'] = wrap_ufunc(str.__len__, **_kw1) FUNCTIONS['LOWER'] = wrap_ufunc(str.lower, **_kw1)
[docs] def xmid(from_str, start_num, num_chars): i = j = int(start_num or 0) - 1 j += int(num_chars or 0) if 0 <= i <= j: return _str(from_str)[i:j] return Error.errors['#VALUE!']
FUNCTIONS['MID'] = wrap_ufunc(xmid, **_kw0)
[docs] def xreplace(old_text, start_num, num_chars, new_text): old_text, new_text = _str(old_text), _str(new_text) i = j = int(start_num or 0) - 1 j += int(num_chars or 0) if 0 <= i <= j: return old_text[:i] + new_text + old_text[j:] return Error.errors['#VALUE!']
FUNCTIONS['REPLACE'] = wrap_ufunc(xreplace, **_kw0)
[docs] def xright(from_str, num_chars): res = xleft(_str(from_str)[::-1], num_chars) return res if isinstance(res, XlError) else res[::-1]
FUNCTIONS['RIGHT'] = wrap_ufunc(xright, **_kw0) FUNCTIONS['TRIM'] = wrap_ufunc(str.strip, **_kw1) FUNCTIONS['UPPER'] = wrap_ufunc(str.upper, **_kw1)
[docs] def xsearch(find_text, within_text, start_num=1): n = int(start_num - 1) n = str(within_text).lower().find(str(find_text).lower(), n) if n < 0: return Error.errors['#VALUE!'] return n + 1
FUNCTIONS['SEARCH'] = wrap_ufunc(xsearch, **_kw0)
[docs] def xconcat(text, *args): it = list(flatten((text,) + args, is_not_empty)) raise_errors(it) return ''.join(map(_str, it))
FUNCTIONS['_XLFN.CONCAT'] = FUNCTIONS['CONCAT'] = wrap_func(xconcat) FUNCTIONS['CONCATENATE'] = wrap_ufunc(xconcat, return_func=value_return, **_kw0) _re_format_code = regex.compile( r'(?P<text>"[^"]*")|' r'(?P<percentage>\%)|' r'(?P<thousand>(?<=[#0]),(?=[#0]))|' r'(?P<time>[Aa][mM]/[Pp][mM]|[Aa]/[Pp]|(?<=[hH])[mM]{1,2}(?![mM])|(?<![mM])[mM]{1,2}(?=[sS])|[hH]+|[sS]+)|' r'(?P<date>[mM]+|[Yy]+|[dD]+)|' r'(?P<exp>E[+-]?)|' r'(?P<wrong>[e"])|' r'(?P<number>[#0])|' r'(?P<decimal>\.)|' r'(?P<skip>(?<=[#0]),)|' r'(?P<condition>;?\[[^\]]+\]|;)|' r'(?P<extra>$)' ) _re_sub_condition = regex.compile(r'[\[\];]+|^\s+|\s+$') @functools.lru_cache() def _parse_format_code(format_code): formats = [] codes = [] types = {} code = [] str_index = 0 for match in _re_format_code.finditer(format_code): # noinspection PyUnresolvedReferences span = match.span() if str_index != span[0]: v = format_code[str_index:span[0]] sh.get_nested_dicts(types, 'extra', default=list).append(len(codes)) codes.append(v) str_index = span[1] # noinspection PyUnresolvedReferences for k, v in match.groupdict().items(): if v is not None: if k == 'decimal' and k in types: k = 'extra' elif k == 'number' and 'exp' in types: k = 'exp' elif k == 'number' and 'decimal' in types: k = 'decimal' elif k == 'thousand' and (k in types or 'decimal' in types): k = 'skip' elif k == 'exp' and k in types or k == 'wrong': raise elif k == 'condition': code.extend(codes) code.append(v) codes = [] types = {} formats.append((v, codes, types)) break sh.get_nested_dicts(types, k, default=list).append(len(codes)) codes.append(v) break code.extend(codes) assert ''.join(code) == format_code if not formats: formats = [('', codes, types)] conditions = [] for condition, codes, types in formats: condition = _re_sub_condition.sub('', condition) if condition: from .operators import LOGIC_OPERATORS operator = '=' for k in LOGIC_OPERATORS: if condition.startswith(k) and condition != k: operator, condition = k, condition[len(k):] break check = functools.partial( LOGIC_OPERATORS[operator], y=_text2num(condition) ) else: check = lambda value: True factor = 1 thousand = False if 'number' in types: if 'date' in types or 'time' in types: raise ValueError decimals = len(types.get('decimal', [None])) - 1 if 'exp' in types: type = 'E' else: type = 'f' factor = (100 ** len(types.get('percentage', ()))) thousand = 'thousand' in types and ',' or '' fotmat_string = f"{thousand}.{decimals}{type}" else: fotmat_string = '' for i in types.get('text', []): codes[i] = codes[i][1:-1] for i in types.get('date', []): codes[i] = codes[i].lower() for i in types.get('time', []): if '/' not in codes[i] or len(codes[i]) == 5: codes[i] = codes[i].upper() for i in types.get('skip', []) + types.get('thousand', []): codes[i] = '' for k in ('number', 'decimal', 'exp'): for i in types.get(k, []): if codes[i] == '#': codes[i] = '' if thousand: for i in types.get('number', [])[::-3]: if codes[i] == '0': codes[i] = '0,' conditions.append((check, codes, types, fotmat_string, factor)) return conditions def _format_datetime(value, codes, types): codes = codes.copy() from datetime import datetime from .date import _int2date, _n2time value = datetime(*(_int2date(value) + _n2time(value))) parts = json.loads(format( value, '{"yyy":"%Y","yy":"%y","mm":"%m","mmm":"%b","mmmm":"%B",' '"dd":"%d","ddd":"%a","dddd":"%A","HH":"%H","MM":"%M",' '"SS":"%S","AM/PM":"%p"}' )) for k in 'mdHSM': parts[k] = parts[f'{k}{k}'].strip('0') parts['mmmmm'] = parts['mmm'][0] parts['m+'] = parts['mmmm'] parts['d+'] = parts['dddd'] parts['y'] = parts['yy'] parts['y+'] = parts['yyy'] parts['H+'] = parts['HH'] parts['S+'] = parts['SS'] parts['A/P'] = parts['AM/PM'].replace('M', '') parts['a/P'] = parts['A/P'].replace('A', 'a') parts['A/p'] = parts['A/P'].replace('P', 'p') parts['a/p'] = parts['A/P'].lower() for k in ('date', 'time'): for i in types.get(k, ()): v = codes[i] try: v = parts[v] except KeyError: v = parts[f'{v[0]}+'] codes[i] = v return codes _re_format_number = regex.compile( r'(?P<sign>[\+\-])?(?P<number>\d[\d,]*)(?P<decimal>\.\d+)?' r'(?>(?P<exp_sign>E[\+\-])0*(?P<exp>\d+))?' ) _re_split_number = regex.compile(r'(?=,\d)|(?<!,)(?=\d)') def _format_number(value, codes, types, fstr, mul): codes = codes.copy() parts = _re_format_number.match(format(value * mul, fstr)).groupdict('') # noinspection PyTypeChecker parts['number'] = _re_split_number.split(parts['number'].lstrip('0'))[1:] it = ( (types.get('number', ())[::-1], iter(parts['number'][::-1]), True), (types.get('decimal', ()), iter(parts['decimal'].rstrip('0')), False), (types.get('exp', ())[:0:-1], iter(parts['exp'][::-1]), True) ) for index, values, reverse in it: for i in index[:-1]: codes[i] = next(values, codes[i]) for i in index[-1:]: v = tuple(values) v = ''.join(v[::-1] if reverse else v) codes[i] = v or codes[i] for i in types.get('exp', ())[:1]: if codes[i] == 'E-' and parts['exp_sign'] == 'E+': codes[i] = 'E' else: codes[i] = parts['exp_sign'] return [parts['sign']] + codes
[docs] def xtext(value, format_code): it = _parse_format_code(str(format_code)) if isinstance(value, (np.bool_, bool)): return str(value).upper() try: value = xvalue(value) except (ValueError, TypeError, AssertionError): return value for check, codes, types, fstr, mul in it: if not check(value): continue if 'date' in types or 'time' in types: codes = _format_datetime(value, codes, types) else: codes = _format_number(value, codes, types, fstr, mul) return ''.join(codes) raise
FUNCTIONS['TEXT'] = wrap_ufunc( xtext, return_func=value_return, input_parser=lambda *a: a )
[docs] def xvalue(value): if not isinstance(value, Error) and isinstance(value, str): try: return float(value) except (ValueError, TypeError): from .date import xdate, _text2datetime, xtime value = _text2datetime(value) return xdate(*value[:3]) + xtime(*value[3:]) elif isinstance(value, (np.bool_, bool)): raise ValueError return float(value)
FUNCTIONS['VALUE'] = wrap_ufunc( xvalue, return_func=value_return, input_parser=lambda *a: a )