#!/usr/bin/env python
# -*- coding: UTF-8 -*-
#
# Copyright 2016-2025 European Commission (JRC);
# Licensed under the EUPL (the 'Licence');
# You may not use this work except in compliance with the Licence.
# You may obtain a copy of the Licence at: http://ec.europa.eu/idabc/eupl
"""
Python equivalents of text Excel functions.
"""
import json
import regex
import functools
import numpy as np
import schedula as sh
from . import (
wrap_ufunc, Error, replace_empty, XlError, flatten, wrap_func, is_not_empty,
raise_errors, _text2num
)
FUNCTIONS = {}
codes = {
9: "\t",
10: "\n",
32: " ",
33: "!",
34: '"',
35: "#",
36: "$",
37: "%",
38: "&",
39: "'",
40: "(",
41: ")",
42: "*",
43: "+",
44: ",",
45: "-",
46: ".",
47: "/",
48: "0",
49: "1",
50: "2",
51: "3",
52: "4",
53: "5",
54: "6",
55: "7",
56: "8",
57: "9",
58: ":",
59: ";",
60: "<",
61: "=",
62: ">",
63: "?",
64: "@",
65: "A",
66: "B",
67: "C",
68: "D",
69: "E",
70: "F",
71: "G",
72: "H",
73: "I",
74: "J",
75: "K",
76: "L",
77: "M",
78: "N",
79: "O",
80: "P",
81: "Q",
82: "R",
83: "S",
84: "T",
85: "U",
86: "V",
87: "W",
88: "X",
89: "Y",
90: "Z",
91: "[",
92: "\\",
93: "]",
94: "^",
95: "_",
96: "`",
97: "a",
98: "b",
99: "c",
100: "d",
101: "e",
102: "f",
103: "g",
104: "h",
105: "i",
106: "j",
107: "k",
108: "l",
109: "m",
110: "n",
111: "o",
112: "p",
113: "q",
114: "r",
115: "s",
116: "t",
117: "u",
118: "v",
119: "w",
120: "x",
121: "y",
122: "z",
123: "{",
124: "|",
125: "}",
126: "~",
127: "",
128: "Ä",
129: "Å",
130: "Ç",
131: "É",
132: "Ñ",
133: "Ö",
134: "Ü",
135: "á",
136: "à",
137: "â",
138: "ä",
139: "ã",
140: "å",
141: "ç",
142: "é",
143: "è",
144: "ê",
145: "ë",
146: "í",
147: "ì",
148: "î",
149: "ï",
150: "ñ",
151: "ó",
152: "ò",
153: "ô",
154: "ö",
155: "õ",
156: "ú",
157: "ù",
158: "û",
159: "ü",
160: "†",
161: "°",
162: "¢",
163: "£",
164: "§",
165: "•",
166: "¶",
167: "ß",
168: "®",
169: "©",
170: "™",
171: "´",
172: "¨",
173: "≠",
174: "Æ",
175: "Ø",
176: "∞",
177: "±",
178: "≤",
179: "≥",
180: "¥",
181: "µ",
182: "∂",
183: "∑",
184: "∏",
185: "π",
186: "∫",
187: "ª",
188: "º",
189: "Ω",
190: "æ",
191: "ø",
192: "¿",
193: "¡",
194: "¬",
195: "√",
196: "ƒ",
197: "≈",
198: "∆",
199: "«",
200: "»",
201: "…",
202: " ",
203: "À",
204: "Ã",
205: "Õ",
206: "Œ",
207: "œ",
208: "–",
209: "—",
210: "“",
211: "”",
212: "‘",
213: "’",
214: "÷",
215: "◊",
216: "ÿ",
217: "Ÿ",
218: "⁄",
219: "€",
220: "‹",
221: "›",
222: "fi",
223: "fl",
224: "‡",
225: "·",
226: "‚",
227: "„",
228: "‰",
229: "Â",
230: "Ê",
231: "Á",
232: "Ë",
233: "È",
234: "Í",
235: "Î",
236: "Ï",
237: "Ì",
238: "Ó",
239: "Ô",
240: "",
241: "Ò",
242: "Ú",
243: "Û",
244: "Ù",
245: "ı",
246: "ˆ",
247: "˜",
248: "¯",
249: "˘",
250: "˙",
251: "˚",
252: "¸",
253: "˝",
254: "˛",
255: "ˇ"
}
inverse_codes = {v: k for k, v in codes.items()}
[docs]
class HexValue(str):
pass
_re_hex = regex.compile("^_x([0-9A-Z]{4})_$")
[docs]
def xchar(number):
number = int(number)
if 0 < number <= 255:
if number in codes:
return codes[number]
return HexValue(f'_x{hex(number)[2:].upper():0>4}_')
return Error.errors['#VALUE!']
FUNCTIONS['CHAR'] = wrap_ufunc(xchar)
[docs]
def xcode(character) -> int:
"""
Returns the code of a character based on codes dictionary. Input must be a singular character
"""
if isinstance(character, HexValue):
try:
number = int(f'0{character[1:-1]}', 16)
if 0 < number <= 255:
return number
except ValueError:
pass
elif isinstance(character, sh.Token):
raise ValueError
return inverse_codes.get(str(character)[0], None)
FUNCTIONS["CODE"] = wrap_ufunc(
xcode, args_parser=lambda *a: a, input_parser=lambda *a: a
)
def _str(text):
if isinstance(text, bool):
return str(text).upper()
if isinstance(text, float) and text.is_integer():
return '%d' % text
return str(text)
[docs]
def xfind(find_text, within_text, start_num=1):
i = int(start_num or 0) - 1
res = i >= 0 and _str(within_text).find(_str(find_text), i) + 1 or 0
return res or Error.errors['#VALUE!']
_kw0 = {
'input_parser': lambda *a: a,
'args_parser': lambda *a: map(functools.partial(replace_empty, empty=''), a)
}
FUNCTIONS['FIND'] = wrap_ufunc(xfind, **_kw0)
[docs]
def xleft(from_str, num_chars=1):
i = int(num_chars or 0)
if i >= 0:
return _str(from_str)[:i]
return Error.errors['#VALUE!']
FUNCTIONS['LEFT'] = wrap_ufunc(xleft, **_kw0)
_kw1 = {
'input_parser': lambda text: [_str(text)],
'args_parser': lambda *a: map(functools.partial(replace_empty, empty=''), a)
}
FUNCTIONS['LEN'] = wrap_ufunc(str.__len__, **_kw1)
FUNCTIONS['LOWER'] = wrap_ufunc(str.lower, **_kw1)
[docs]
def xmid(from_str, start_num, num_chars):
i = j = int(start_num or 0) - 1
j += int(num_chars or 0)
if 0 <= i <= j:
return _str(from_str)[i:j]
return Error.errors['#VALUE!']
FUNCTIONS['MID'] = wrap_ufunc(xmid, **_kw0)
[docs]
def xreplace(old_text, start_num, num_chars, new_text):
old_text, new_text = _str(old_text), _str(new_text)
i = j = int(start_num or 0) - 1
j += int(num_chars or 0)
if 0 <= i <= j:
return old_text[:i] + new_text + old_text[j:]
return Error.errors['#VALUE!']
FUNCTIONS['REPLACE'] = wrap_ufunc(xreplace, **_kw0)
[docs]
def xright(from_str, num_chars=1):
res = xleft(_str(from_str)[::-1], num_chars)
return res if isinstance(res, XlError) else res[::-1]
FUNCTIONS['RIGHT'] = wrap_ufunc(xright, **_kw0)
FUNCTIONS['TRIM'] = wrap_ufunc(str.strip, **_kw1)
FUNCTIONS['UPPER'] = wrap_ufunc(str.upper, **_kw1)
[docs]
def xsearch(find_text, within_text, start_num=1):
n = int(start_num - 1)
n = str(within_text).lower().find(str(find_text).lower(), n)
if n < 0:
return Error.errors['#VALUE!']
return n + 1
FUNCTIONS['SEARCH'] = wrap_ufunc(xsearch, **_kw0)
[docs]
def xsubstitute(text, old_text, new_text, instance_num=None):
text, old_text, new_text = tuple(map(_str, (text, old_text, new_text)))
if instance_num is None:
return text.replace(old_text, new_text)
elif isinstance(instance_num, (
bool, np.bool_, str, np.str_
)) or instance_num < 1:
return Error.errors['#VALUE!']
parts = text.split(old_text)
instance_num = int(instance_num)
if instance_num > len(parts) - 1:
return text
return old_text.join(parts[:instance_num]) + new_text + old_text.join(
parts[instance_num:]
)
FUNCTIONS['SUBSTITUTE'] = wrap_ufunc(
xsubstitute, input_parser=lambda *a: a,
args_parser=lambda *a: (replace_empty(v, '') for v in a)
)
[docs]
def xconcat(text, *args):
it = list(flatten((text,) + args, is_not_empty))
raise_errors(it)
return ''.join(map(_str, it))
FUNCTIONS['_XLFN.CONCAT'] = FUNCTIONS['CONCAT'] = wrap_func(xconcat)
FUNCTIONS['_XLFN.CONCATENATE'] = FUNCTIONS['CONCATENATE'] = wrap_ufunc(
xconcat, **_kw0
)
[docs]
def xtextjoin(delimiter, ignore_empty, text, *args):
raise_errors(delimiter, ignore_empty, text, *args)
if ignore_empty:
it = (flatten((text,) + args, is_not_empty))
else:
it = (replace_empty(v, '') for v in flatten((text,) + args, None))
return _str(next(flatten(delimiter, None))).join(map(_str, it))
FUNCTIONS['_XLFN.TEXTJOIN'] = FUNCTIONS['TEXTJOIN'] = wrap_func(xtextjoin)
_re_format_code = regex.compile(
r'(?P<text>"[^"]*")|'
r'(?P<escape>\\.)|'
r'(?P<percentage>\%)|'
r'(?P<thousand>(?<=[#0]),(?=[#0]))|'
r'(?P<time>[Aa][mM]/[Pp][mM]|[Aa]/[Pp]|[hH]+|[sS]+)|'
r'(?P<date>[Yy]+|[dD]+)|'
r'(?P<months_minutes>[mM]+)|'
r'(?P<exp>E[+-]?)|'
r'(?P<wrong>[e"])|'
r'(?P<number>[#0])|'
r'(?P<decimal>\.)|'
r'(?P<skip>(?<=[#0]),)|'
r'(?P<condition>;?\[[^\]]+\]|;)|'
r'(?P<extra>$)'
)
_re_sub_condition = regex.compile(r'[\[\];]+|^\s+|\s+$')
@functools.lru_cache()
def _parse_format_code(format_code):
formats = []
codes = []
types = {}
code = []
str_index = 0
minutes = None
suspended = []
for match in _re_format_code.finditer(format_code):
# noinspection PyUnresolvedReferences
span = match.span()
if str_index != span[0]:
v = format_code[str_index:span[0]]
sh.get_nested_dicts(types, 'extra', default=list).append(len(codes))
codes.append(v)
str_index = span[1]
# noinspection PyUnresolvedReferences
for k, v in match.groupdict().items():
if v:
if k == 'decimal' and k in types:
k = 'extra'
elif k == 'number' and 'exp' in types:
k = 'exp'
elif k == 'number' and 'decimal' in types:
k = 'decimal'
elif k == 'thousand' and (k in types or 'decimal' in types):
k = 'skip'
elif k == 'exp' and k in types or k == 'wrong':
raise
elif k == 'condition':
code.extend(codes)
code.append(v)
codes = []
types = {}
formats.append((v, codes, types))
break
elif k == 'time':
if suspended:
sh.get_nested_dicts(
types, 's' in v.lower() and k or 'date',
default=list
).append(suspended.pop())
if minutes is None or 'h' in v.lower():
minutes = True
elif k == 'date':
if suspended:
sh.get_nested_dicts(types, k, default=list).append(
suspended.pop()
)
elif k == 'months_minutes':
if len(v) <= 2:
if minutes:
k = 'time'
minutes = False
elif minutes is None:
k = 'date'
elif len(suspended) > 0:
raise
else:
suspended.append(len(codes))
codes.append(v)
break
else:
k = 'date'
sh.get_nested_dicts(types, k, default=list).append(len(codes))
codes.append(v)
break
if len(suspended) == 1:
sh.get_nested_dicts(types, 'date', default=list).append(
suspended.pop()
)
code.extend(codes)
assert ''.join(code) == format_code
if not formats:
formats = [('', codes, types)]
conditions = []
for condition, codes, types in formats:
condition = _re_sub_condition.sub('', condition)
if condition:
from .operators import LOGIC_OPERATORS
operator = '='
for k in LOGIC_OPERATORS:
if condition.startswith(k) and condition != k:
operator, condition = k, condition[len(k):]
break
check = functools.partial(
LOGIC_OPERATORS[operator], y=_text2num(condition)
)
else:
check = lambda value: True
factor = 1
thousand = False
if 'number' in types:
if 'date' in types or 'time' in types:
raise ValueError
decimals = len(types.get('decimal', [None])) - 1
if 'exp' in types:
type = 'E'
else:
type = 'f'
factor = (100 ** len(types.get('percentage', ())))
thousand = 'thousand' in types and ',' or ''
fotmat_string = f"{thousand}.{decimals}{type}"
else:
fotmat_string = ''
for i in types.get('text', []):
codes[i] = codes[i][1:-1]
for i in types.get('escape', []):
codes[i] = codes[i][1]
for i in types.get('date', []):
codes[i] = codes[i].lower()
for i in types.get('time', []):
if '/' not in codes[i] or len(codes[i]) == 5:
codes[i] = codes[i].upper()
for i in types.get('skip', []) + types.get('thousand', []):
codes[i] = ''
for k in ('number', 'decimal', 'exp'):
for i in types.get(k, []):
if codes[i] == '#':
codes[i] = ''
if thousand:
for i in types.get('number', [])[::-3]:
if codes[i] == '0':
codes[i] = '0,'
conditions.append((check, codes, types, fotmat_string, factor))
return conditions
def _format_datetime(value, codes, types):
codes = codes.copy()
from datetime import datetime
from .date import _int2date, _n2time
value = datetime(*(_int2date(value) + _n2time(value)))
parts = json.loads(format(
value, '{"yyy":"%Y","yy":"%y","mm":"%m","mmm":"%b","mmmm":"%B",'
'"dd":"%d","ddd":"%a","dddd":"%A","HH":"%H","MM":"%M",'
'"SS":"%S","AM/PM":"%p"}'
))
for k in 'mdHSM':
parts[k] = parts[f'{k}{k}'].strip('0')
parts['mmmmm'] = parts['mmm'][0]
parts['m+'] = parts['mmmm']
parts['d+'] = parts['dddd']
parts['y'] = parts['yy']
parts['y+'] = parts['yyy']
parts['H+'] = parts['HH']
parts['S+'] = parts['SS']
parts['A/P'] = parts['AM/PM'].replace('M', '')
parts['a/P'] = parts['A/P'].replace('A', 'a')
parts['A/p'] = parts['A/P'].replace('P', 'p')
parts['a/p'] = parts['A/P'].lower()
for k in ('date', 'time'):
for i in types.get(k, ()):
v = codes[i]
try:
v = parts[v]
except KeyError:
v = parts[f'{v[0]}+']
codes[i] = v
return codes
_re_format_number = regex.compile(
r'(?P<sign>[\+\-])?(?P<number>\d[\d,]*)(?P<decimal>\.\d+)?'
r'(?>(?P<exp_sign>E[\+\-])0*(?P<exp>\d+))?'
)
_re_split_number = regex.compile(r'(?=,\d)|(?<!,)(?=\d)')
def _format_number(value, codes, types, fstr, mul):
codes = codes.copy()
parts = _re_format_number.match(format(value * mul, fstr)).groupdict('')
# noinspection PyTypeChecker
parts['number'] = _re_split_number.split(parts['number'].lstrip('0'))[1:]
it = (
(types.get('number', ())[::-1], iter(parts['number'][::-1]), True),
(types.get('decimal', ()), iter(parts['decimal'].rstrip('0')), False),
(types.get('exp', ())[:0:-1], iter(parts['exp'][::-1]), True)
)
for index, values, reverse in it:
for i in index[:-1]:
codes[i] = next(values, codes[i])
for i in index[-1:]:
v = tuple(values)
v = ''.join(v[::-1] if reverse else v)
codes[i] = v or codes[i]
for i in types.get('exp', ())[:1]:
if codes[i] == 'E-' and parts['exp_sign'] == 'E+':
codes[i] = 'E'
else:
codes[i] = parts['exp_sign']
return [parts['sign']] + codes
[docs]
def xt(value):
value = np.asarray(value, dtype=object).ravel()[0]
raise_errors(value)
if isinstance(value, (np.str_, str)):
return value
return ''
FUNCTIONS['T'] = wrap_func(xt)
[docs]
def xtext(value, format_code):
it = _parse_format_code(str(format_code))
if isinstance(value, (np.bool_, bool)):
return str(value).upper()
try:
value = xvalue(value)
except (ValueError, TypeError, AssertionError):
return value
for check, codes, types, fstr, mul in it:
if not check(value):
continue
if 'date' in types or 'time' in types:
codes = _format_datetime(value, codes, types)
else:
codes = _format_number(value, codes, types, fstr, mul)
return ''.join(codes)
raise
FUNCTIONS['TEXT'] = wrap_ufunc(xtext, input_parser=lambda *a: a)
[docs]
def xvalue(value):
if not isinstance(value, Error) and isinstance(value, str):
try:
return float(value)
except (ValueError, TypeError):
from .date import xdate, _text2datetime, xtime
value = _text2datetime(value)
return xdate(*value[:3]) + xtime(*value[3:])
elif isinstance(value, (np.bool_, bool)):
raise ValueError
return float(value)
FUNCTIONS['VALUE'] = wrap_ufunc(xvalue, input_parser=lambda *a: a)