Source code for formulas.functions.text

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
#
# Copyright 2016-2026 European Commission (JRC);
# Licensed under the EUPL (the 'Licence');
# You may not use this work except in compliance with the Licence.
# You may obtain a copy of the Licence at: http://ec.europa.eu/idabc/eupl

"""
Python equivalents of text Excel functions.
"""
import re
import json
import regex
import functools
import unicodedata
import numpy as np
import schedula as sh
from ..errors import FoundError
from . import (
    wrap_ufunc, Error, replace_empty, XlError, flatten, wrap_func, is_not_empty,
    raise_errors, _text2num, Array, get_error, return_2d_func
)

FUNCTIONS = {}
_kw0 = {
    'input_parser': lambda *a: a,
    'args_parser': lambda *a: map(functools.partial(replace_empty, empty=''), a)
}
codes = {
    1: "\x01",
    2: "\x02",
    3: "\x03",
    4: "\x04",
    5: "\x05",
    6: "\x06",
    7: "\x07",
    8: "\x08",
    9: "\t",
    10: "\n",
    11: "\x0B",
    12: "\x0C",
    13: "\x0D",
    14: "\x0E",
    15: "\x0F",
    16: "\x10",
    17: "\x11",
    18: "\x12",
    19: "\x13",
    20: "\x14",
    21: "\x15",
    22: "\x16",
    23: "\x17",
    24: "\x18",
    25: "\x19",
    26: "\x1A",
    27: "\x1B",
    28: "\x1C",
    29: "\x1D",
    30: "\x1E",
    31: "\x1F",
    32: " ",
    33: "!",
    34: '"',
    35: "#",
    36: "$",
    37: "%",
    38: "&",
    39: "'",
    40: "(",
    41: ")",
    42: "*",
    43: "+",
    44: ",",
    45: "-",
    46: ".",
    47: "/",
    48: "0",
    49: "1",
    50: "2",
    51: "3",
    52: "4",
    53: "5",
    54: "6",
    55: "7",
    56: "8",
    57: "9",
    58: ":",
    59: ";",
    60: "<",
    61: "=",
    62: ">",
    63: "?",
    64: "@",
    65: "A",
    66: "B",
    67: "C",
    68: "D",
    69: "E",
    70: "F",
    71: "G",
    72: "H",
    73: "I",
    74: "J",
    75: "K",
    76: "L",
    77: "M",
    78: "N",
    79: "O",
    80: "P",
    81: "Q",
    82: "R",
    83: "S",
    84: "T",
    85: "U",
    86: "V",
    87: "W",
    88: "X",
    89: "Y",
    90: "Z",
    91: "[",
    92: "\\",
    93: "]",
    94: "^",
    95: "_",
    96: "`",
    97: "a",
    98: "b",
    99: "c",
    100: "d",
    101: "e",
    102: "f",
    103: "g",
    104: "h",
    105: "i",
    106: "j",
    107: "k",
    108: "l",
    109: "m",
    110: "n",
    111: "o",
    112: "p",
    113: "q",
    114: "r",
    115: "s",
    116: "t",
    117: "u",
    118: "v",
    119: "w",
    120: "x",
    121: "y",
    122: "z",
    123: "{",
    124: "|",
    125: "}",
    126: "~",
    127: "",
    128: "Ä",
    129: "Å",
    130: "Ç",
    131: "É",
    132: "Ñ",
    133: "Ö",
    134: "Ü",
    135: "á",
    136: "à",
    137: "â",
    138: "ä",
    139: "ã",
    140: "å",
    141: "ç",
    142: "é",
    143: "è",
    144: "ê",
    145: "ë",
    146: "í",
    147: "ì",
    148: "î",
    149: "ï",
    150: "ñ",
    151: "ó",
    152: "ò",
    153: "ô",
    154: "ö",
    155: "õ",
    156: "ú",
    157: "ù",
    158: "û",
    159: "ü",
    160: "†",
    161: "°",
    162: "¢",
    163: "£",
    164: "§",
    165: "•",
    166: "¶",
    167: "ß",
    168: "®",
    169: "©",
    170: "™",
    171: "´",
    172: "¨",
    173: "≠",
    174: "Æ",
    175: "Ø",
    176: "∞",
    177: "±",
    178: "≤",
    179: "≥",
    180: "¥",
    181: "µ",
    182: "∂",
    183: "∑",
    184: "∏",
    185: "π",
    186: "∫",
    187: "ª",
    188: "º",
    189: "Ω",
    190: "æ",
    191: "ø",
    192: "¿",
    193: "¡",
    194: "¬",
    195: "√",
    196: "ƒ",
    197: "≈",
    198: "∆",
    199: "«",
    200: "»",
    201: "…",
    202: " ",
    203: "À",
    204: "Ã",
    205: "Õ",
    206: "Œ",
    207: "œ",
    208: "–",
    209: "—",
    210: "“",
    211: "”",
    212: "‘",
    213: "’",
    214: "÷",
    215: "◊",
    216: "ÿ",
    217: "Ÿ",
    218: "⁄",
    219: "€",
    220: "‹",
    221: "›",
    222: "fi",
    223: "fl",
    224: "‡",
    225: "·",
    226: "‚",
    227: "„",
    228: "‰",
    229: "Â",
    230: "Ê",
    231: "Á",
    232: "Ë",
    233: "È",
    234: "Í",
    235: "Î",
    236: "Ï",
    237: "Ì",
    238: "Ó",
    239: "Ô",
    240: "",
    241: "Ò",
    242: "Ú",
    243: "Û",
    244: "Ù",
    245: "ı",
    246: "ˆ",
    247: "˜",
    248: "¯",
    249: "˘",
    250: "˙",
    251: "˚",
    252: "¸",
    253: "˝",
    254: "˛",
    255: "ˇ"
}
inverse_codes = {v: k for k, v in codes.items()}

# Full-width ASCII block → ASCII (plus ideographic space)
_FULLWIDTH_ASCII = {chr(cp): chr(cp - 0xFEE0) for cp in range(0xFF01, 0xFF5F)}
_FULLWIDTH_ASCII['\u3000'] = ' '  # ideographic space → space

# Base Katakana → Halfwidth Katakana
_KATAKANA_BASE_TO_HALF = {
    'ァ': 'ァ', 'ア': 'ア', 'ィ': 'ィ', 'イ': 'イ', 'ゥ': 'ゥ', 'ウ': 'ウ', 'ェ': 'ェ',
    'エ': 'エ', 'ォ': 'ォ', 'オ': 'オ',
    'カ': 'カ', 'キ': 'キ', 'ク': 'ク', 'ケ': 'ケ', 'コ': 'コ', 'サ': 'サ', 'シ': 'シ',
    'ス': 'ス', 'セ': 'セ', 'ソ': 'ソ',
    'タ': 'タ', 'チ': 'チ', 'ツ': 'ツ', 'テ': 'テ', 'ト': 'ト', 'ナ': 'ナ', 'ニ': 'ニ',
    'ヌ': 'ヌ', 'ネ': 'ネ', 'ノ': 'ノ',
    'ハ': 'ハ', 'ヒ': 'ヒ', 'フ': 'フ', 'ヘ': 'ヘ', 'ホ': 'ホ', 'マ': 'マ', 'ミ': 'ミ',
    'ム': 'ム', 'メ': 'メ', 'モ': 'モ',
    'ヤ': 'ヤ', 'ユ': 'ユ', 'ヨ': 'ヨ', 'ラ': 'ラ', 'リ': 'リ', 'ル': 'ル', 'レ': 'レ',
    'ロ': 'ロ', 'ワ': 'ワ', 'ヲ': 'ヲ', 'ン': 'ン',
    'ャ': 'ャ', 'ュ': 'ュ', 'ョ': 'ョ', 'ッ': 'ッ',
    # Small kana & specials
    'ヵ': 'カ', 'ヶ': 'ケ',
    '・': '・', 'ー': 'ー',  # punctuation
    '。': '。', '、': '、', '「': '「', '」': '」',
}

# Combining voice marks → Halfwidth voice marks
_COMB_DAKUTEN = '\u3099'  # ◌゙
_COMB_HANDAKUTEN = '\u309A'  # ◌゚
_HALF_DAKUTEN = '゙'  # FF9E
_HALF_HANDAKUTEN = '゚'  # FF9F


[docs] def xasc(value): """ Excel-like ASC: convert full-width ASCII & Katakana to half-width. Leaves hiragana/kanji unchanged. """ s = _str(value) out = [] for ch in s: # Fast path: full-width ASCII / punctuation if ch in _FULLWIDTH_ASCII: out.append(_FULLWIDTH_ASCII[ch]) continue # Decompose to catch precomposed voiced kana (e.g., ガ = カ + ◌゙) decomp = unicodedata.normalize('NFD', ch) # If no change after NFD, decomp is typically just ch for d in decomp: if d in _KATAKANA_BASE_TO_HALF: out.append(_KATAKANA_BASE_TO_HALF[d]) elif d == _COMB_DAKUTEN: out.append(_HALF_DAKUTEN) elif d == _COMB_HANDAKUTEN: out.append(_HALF_HANDAKUTEN) else: # Not a target for conversion: keep as-is out.append(d) return ''.join(out)
FUNCTIONS['ASC'] = wrap_ufunc(xasc, **_kw0)
[docs] def xbahttext(value): from bahttext import bahttext return bahttext(value)
FUNCTIONS['BAHTTEXT'] = wrap_ufunc(xbahttext) _CLEAN_TRANS = {cp: None for cp in range(0, 32)}
[docs] def xclean(value): return _str(value).translate(_CLEAN_TRANS)
FUNCTIONS['CLEAN'] = wrap_ufunc(xclean, **_kw0) _re_hex = regex.compile("_x([0-9A-Z]{4})_")
[docs] def xchar(number): return codes.get(int(number), Error.errors['#VALUE!'])
FUNCTIONS['CHAR'] = wrap_ufunc(xchar)
[docs] def xunichar(number): number = int(number) if number: return chr(number) return Error.errors['#VALUE!']
FUNCTIONS['UNICHAR'] = FUNCTIONS['_XLFN.UNICHAR'] = wrap_ufunc(xunichar)
[docs] def xcode(character): if isinstance(character, sh.Token): raise ValueError return inverse_codes.get(str(character)[0], None)
FUNCTIONS["CODE"] = wrap_ufunc( xcode, args_parser=lambda *a: a, input_parser=lambda *a: a )
[docs] def xunicode(character): if isinstance(character, sh.Token): raise ValueError return ord(str(character)[0])
FUNCTIONS["UNICODE"] = FUNCTIONS["_XLFN.UNICODE"] = wrap_ufunc( xunicode, args_parser=lambda *a: a, input_parser=lambda *a: a ) def _str(text): if isinstance(text, bool): return str(text).upper() if isinstance(text, float) and text.is_integer(): return '%d' % text return str(text)
[docs] def xexact(text1, text2): return _str(text1) == _str(text2)
FUNCTIONS['EXACT'] = wrap_ufunc(xexact, **_kw0)
[docs] def xfind(find_text, within_text, start_num=1): i = int(start_num or 0) - 1 res = i >= 0 and _str(within_text).find(_str(find_text), i) + 1 or 0 return res or Error.errors['#VALUE!']
FUNCTIONS['FIND'] = FUNCTIONS['FINDB'] = wrap_ufunc(xfind, **_kw0)
[docs] def xleft(from_str, num_chars=1): i = int(num_chars or 0) if i >= 0: return _str(from_str)[:i] return Error.errors['#VALUE!']
FUNCTIONS['LEFT'] = FUNCTIONS['LEFTB'] = wrap_ufunc(xleft, **_kw0) _kw1 = { 'input_parser': lambda text: [_str(text)], 'args_parser': lambda *a: map(functools.partial(replace_empty, empty=''), a) } FUNCTIONS['LEN'] = FUNCTIONS['LENB'] = wrap_ufunc(str.__len__, **_kw1) FUNCTIONS['LOWER'] = wrap_ufunc(str.lower, **_kw1)
[docs] def xmid(from_str, start_num, num_chars): i = j = int(start_num or 0) - 1 j += int(num_chars or 0) if 0 <= i <= j: return _str(from_str)[i:j] return Error.errors['#VALUE!']
FUNCTIONS['MID'] = FUNCTIONS['MIDB'] = wrap_ufunc(xmid, **_kw0)
[docs] def xnumbervalue(text, decimal_sep=None, group_sep=None): text = _str(replace_empty(text, '0')) g = ',' if group_sep is None else replace_empty(group_sep)[0] d = '.' if decimal_sep is None else replace_empty(decimal_sep)[0] if g == d and group_sep is not None and decimal_sep is not None: return Error.errors['#VALUE!'] g_esc = re.escape(g) d_esc = re.escape(d) number = rf'\s*[\d{g_esc}]+(?:{d_esc}[\s\d]*)?(?:\s*[eE]\s*[+-]\s*\d[\s\d]*)?[\s%]*' if {'(', ')'}.intersection({g, d}): m = re.match(f'^\\s*[+-]?{number}$', text) else: m = re.match( f'(?:^\\s*[+-]?{number}$)|(?:^\\s*\\({number}\\)[\\s%]*$)', text ) if not m: return Error.errors['#VALUE!'] return float(text.translate(str.maketrans({ ' ': '', '%': '', '(': '-', ')': '', g: '', d: '.' }))) * 0.01 ** text.count('%')
FUNCTIONS['NUMBERVALUE'] = FUNCTIONS['_XLFN.NUMBERVALUE'] = wrap_ufunc( xnumbervalue, args_parser=lambda *a: a, input_parser=lambda *a: a ) FUNCTIONS['PROPER'] = wrap_ufunc(str.title, **_kw1) def _xregex(text, pattern, case_sensitivity=0): case_sensitivity = int(float(replace_empty(case_sensitivity))) if case_sensitivity not in (0, 1): raise FoundError(err=Error.errors['#VALUE!']) rx = re.compile( _str(pattern), re.IGNORECASE if case_sensitivity == 1 else 0 ) return rx, _str(text) _get_first = np.vectorize( lambda x: x[0] if isinstance(x, list) else x, otypes=[object] ) def _xregexextract_return_func(res, *args): if not res.shape: res = res.item() res = np.asarray(res, object).view(Array) elif res.shape == (1, 1): res = res[0, 0] res = np.asarray(res, object).view(Array) else: res = _get_first(res) return res
[docs] def xregexextract(text, pattern, return_mode=0, case_sensitivity=0): rx, text = _xregex(text, pattern, case_sensitivity) return_mode = int(float(replace_empty(return_mode))) if return_mode == 0: m = rx.search(text) r = [m.group(0) if m else None] elif return_mode == 1: r = [m.group(0) for m in rx.finditer(text)] or [None] elif return_mode == 2: m = rx.search(text) r = list(m.groups() or [Error.errors['#VALUE!']]) if m else [None] else: return Error.errors['#VALUE!'] return [Error.errors['#N/A'] if v is None else v for v in r]
[docs] def xregexreplace(text, pattern, replacement, occurrence=0, case_sensitivity=0): rx, text = _xregex(text, pattern, case_sensitivity) return rx.sub( _str(replacement), text, count=int(float(replace_empty(occurrence))) )
[docs] def xregextest(text, pattern, case_sensitivity=0): rx, text = _xregex(text, pattern, case_sensitivity) return rx.match(text) is not None
FUNCTIONS['REGEXEXTRACT'] = FUNCTIONS['_XLFN.REGEXEXTRACT'] = wrap_ufunc( xregexextract, return_func=_xregexextract_return_func, check_nan=False, **_kw0 ) FUNCTIONS['REGEXREPLACE'] = FUNCTIONS['_XLFN.REGEXREPLACE'] = wrap_ufunc( xregexreplace, check_nan=False, **_kw0 ) FUNCTIONS['REGEXTEST'] = FUNCTIONS['_XLFN.REGEXTEST'] = wrap_ufunc( xregextest, check_nan=False, **_kw0 )
[docs] def xreplace(old_text, start_num, num_chars, new_text): old_text, new_text = _str(old_text), _str(new_text) i = j = int(start_num or 0) - 1 j += int(num_chars or 0) if 0 <= i <= j: return old_text[:i] + new_text + old_text[j:] return Error.errors['#VALUE!']
FUNCTIONS['REPLACE'] = FUNCTIONS['REPLACEB'] = wrap_ufunc(xreplace, **_kw0)
[docs] def xrept(text, number_times): return _str(text) * int(float(replace_empty(number_times)))
FUNCTIONS['REPT'] = wrap_ufunc(xrept, **_kw0)
[docs] def xright(from_str, num_chars=1): res = xleft(_str(from_str)[::-1], num_chars) return res if isinstance(res, XlError) else res[::-1]
FUNCTIONS['RIGHT'] = FUNCTIONS['RIGHTB'] = wrap_ufunc(xright, **_kw0) FUNCTIONS['TRIM'] = wrap_ufunc(str.strip, **_kw1) FUNCTIONS['UPPER'] = wrap_ufunc(str.upper, **_kw1)
[docs] def xsearch(find_text, within_text, start_num=1): n = int(start_num - 1) n = str(within_text).lower().find(str(find_text).lower(), n) if n < 0: return Error.errors['#VALUE!'] return n + 1
FUNCTIONS['SEARCH'] = FUNCTIONS['SEARCHB'] = wrap_ufunc(xsearch, **_kw0)
[docs] def xsubstitute(text, old_text, new_text, instance_num=None): text, old_text, new_text = tuple(map(_str, (text, old_text, new_text))) if instance_num is None: return text.replace(old_text, new_text) elif isinstance(instance_num, ( bool, np.bool_, str, np.str_ )) or instance_num < 1: return Error.errors['#VALUE!'] parts = text.split(old_text) instance_num = int(instance_num) if instance_num > len(parts) - 1: return text return old_text.join(parts[:instance_num]) + new_text + old_text.join( parts[instance_num:] )
FUNCTIONS['SUBSTITUTE'] = wrap_ufunc( xsubstitute, input_parser=lambda *a: a, args_parser=lambda *a: (replace_empty(v, '') for v in a) )
[docs] def xconcat(text, *args): it = list(flatten((text,) + args, is_not_empty)) raise_errors(it) return ''.join(map(_str, it))
FUNCTIONS['_XLFN.CONCAT'] = FUNCTIONS['CONCAT'] = wrap_func(xconcat) FUNCTIONS['_XLFN.CONCATENATE'] = FUNCTIONS['CONCATENATE'] = wrap_ufunc( xconcat, **_kw0 )
[docs] def xtextjoin(delimiter, ignore_empty, text, *args): raise_errors(delimiter, ignore_empty, text, *args) if ignore_empty: it = (flatten((text,) + args, is_not_empty)) else: it = (replace_empty(v, '') for v in flatten((text,) + args, None)) return _str(next(flatten(delimiter, None))).join(map(_str, it))
FUNCTIONS['_XLFN.TEXTJOIN'] = FUNCTIONS['TEXTJOIN'] = wrap_func(xtextjoin) _re_format_code = regex.compile( r'(?P<text>"[^"]*")|' r'(?P<escape>\\.)|' r'(?P<percentage>\%)|' r'(?P<thousand>(?<=[#0]),(?=[#0]))|' r'(?P<time>[Aa][mM]/[Pp][mM]|[Aa]/[Pp]|[hH]+|[sS]+)|' r'(?P<date>[Yy]+|[dD]+)|' r'(?P<months_minutes>[mM]+)|' r'(?P<exp>E[+-]?)|' r'(?P<wrong>[e"])|' r'(?P<number>[#0])|' r'(?P<decimal>\.)|' r'(?P<skip>(?<=[#0]),)|' r'(?P<condition>;?\[[^\]]+\]|;)|' r'(?P<extra>$)' ) _re_sub_condition = regex.compile(r'[\[\];]+|^\s+|\s+$') @functools.lru_cache() def _parse_format_code(format_code): formats = [] codes = [] types = {} code = [] str_index = 0 minutes = None suspended = [] for match in _re_format_code.finditer(format_code): # noinspection PyUnresolvedReferences span = match.span() if str_index != span[0]: v = format_code[str_index:span[0]] sh.get_nested_dicts(types, 'extra', default=list).append(len(codes)) codes.append(v) str_index = span[1] # noinspection PyUnresolvedReferences for k, v in match.groupdict().items(): if v: if k == 'decimal' and k in types: k = 'extra' elif k == 'number' and 'exp' in types: k = 'exp' elif k == 'number' and 'decimal' in types: k = 'decimal' elif k == 'thousand' and (k in types or 'decimal' in types): k = 'skip' elif k == 'exp' and k in types or k == 'wrong': raise elif k == 'condition': code.extend(codes) code.append(v) codes = [] types = {} formats.append((v, codes, types)) break elif k == 'time': if suspended: sh.get_nested_dicts( types, 's' in v.lower() and k or 'date', default=list ).append(suspended.pop()) if minutes is None or 'h' in v.lower(): minutes = True elif k == 'date': if suspended: sh.get_nested_dicts(types, k, default=list).append( suspended.pop() ) elif k == 'months_minutes': if len(v) <= 2: if minutes: k = 'time' minutes = False elif minutes is None: k = 'date' elif len(suspended) > 0: raise else: suspended.append(len(codes)) codes.append(v) break else: k = 'date' sh.get_nested_dicts(types, k, default=list).append(len(codes)) codes.append(v) break if len(suspended) == 1: sh.get_nested_dicts(types, 'date', default=list).append( suspended.pop() ) code.extend(codes) assert ''.join(code) == format_code if not formats: formats = [('', codes, types)] conditions = [] for condition, codes, types in formats: condition = _re_sub_condition.sub('', condition) if condition: from .operators import LOGIC_OPERATORS operator = '=' for k in LOGIC_OPERATORS: if condition.startswith(k) and condition != k: operator, condition = k, condition[len(k):] break check = functools.partial( LOGIC_OPERATORS[operator], y=_text2num(condition) ) else: check = lambda value: True factor = 1 thousand = False if 'number' in types: if 'date' in types or 'time' in types: raise ValueError decimals = len(types.get('decimal', [None])) - 1 if 'exp' in types: type = 'E' else: type = 'f' factor = (100 ** len(types.get('percentage', ()))) thousand = 'thousand' in types and ',' or '' fotmat_string = f"{thousand}.{decimals}{type}" else: fotmat_string = '' for i in types.get('text', []): codes[i] = codes[i][1:-1] for i in types.get('escape', []): codes[i] = codes[i][1] for i in types.get('date', []): codes[i] = codes[i].lower() for i in types.get('time', []): if '/' not in codes[i] or len(codes[i]) == 5: codes[i] = codes[i].upper() for i in types.get('skip', []) + types.get('thousand', []): codes[i] = '' for k in ('number', 'decimal', 'exp'): for i in types.get(k, []): if codes[i] == '#': codes[i] = '' if thousand: for i in types.get('number', [])[::-3]: if codes[i] == '0': codes[i] = '0,' conditions.append((check, codes, types, fotmat_string, factor)) return conditions def _format_datetime(value, codes, types): codes = codes.copy() from datetime import datetime from .date import _int2date, _n2time value = datetime(*(_int2date(value) + _n2time(value))) parts = json.loads(format( value, '{"yyy":"%Y","yy":"%y","mm":"%m","mmm":"%b","mmmm":"%B",' '"dd":"%d","ddd":"%a","dddd":"%A","HH":"%H","MM":"%M",' '"SS":"%S","AM/PM":"%p"}' )) for k in 'mdHSM': parts[k] = parts[f'{k}{k}'].strip('0') parts['mmmmm'] = parts['mmm'][0] parts['m+'] = parts['mmmm'] parts['d+'] = parts['dddd'] parts['y'] = parts['yy'] parts['y+'] = parts['yyy'] parts['H+'] = parts['HH'] parts['S+'] = parts['SS'] parts['A/P'] = parts['AM/PM'].replace('M', '') parts['a/P'] = parts['A/P'].replace('A', 'a') parts['A/p'] = parts['A/P'].replace('P', 'p') parts['a/p'] = parts['A/P'].lower() for k in ('date', 'time'): for i in types.get(k, ()): v = codes[i] try: v = parts[v] except KeyError: v = parts[f'{v[0]}+'] codes[i] = v return codes _re_format_number = regex.compile( r'(?P<sign>[\+\-])?(?P<number>\d[\d,]*)(?P<decimal>\.\d+)?' r'(?>(?P<exp_sign>E[\+\-])0*(?P<exp>\d+))?' ) _re_split_number = regex.compile(r'(?=,\d)|(?<!,)(?=\d)') def _format_number(value, codes, types, fstr, mul): codes = codes.copy() parts = _re_format_number.match(format(value * mul, fstr)).groupdict('') # noinspection PyTypeChecker parts['number'] = _re_split_number.split(parts['number'].lstrip('0'))[1:] it = ( (types.get('number', ())[::-1], iter(parts['number'][::-1]), True), (types.get('decimal', ()), iter(parts['decimal'].rstrip('0')), False), (types.get('exp', ())[:0:-1], iter(parts['exp'][::-1]), True) ) for index, values, reverse in it: for i in index[:-1]: codes[i] = next(values, codes[i]) for i in index[-1:]: v = tuple(values) v = ''.join(v[::-1] if reverse else v) codes[i] = v or codes[i] for i in types.get('exp', ())[:1]: if codes[i] == 'E-' and parts['exp_sign'] == 'E+': codes[i] = 'E' else: codes[i] = parts['exp_sign'] return [parts['sign']] + codes
[docs] def xt(value): value = np.asarray(value, dtype=object).ravel()[0] raise_errors(value) if isinstance(value, (np.str_, str)): return replace_empty(value, '') return ''
FUNCTIONS['T'] = wrap_func(xt)
[docs] def xtext(value, format_code): it = _parse_format_code(str(format_code)) if isinstance(value, (np.bool_, bool)): return str(value).upper() try: value = xvalue(value) except (ValueError, TypeError, AssertionError): return value for check, codes, types, fstr, mul in it: if not check(value): continue if 'date' in types or 'time' in types: codes = _format_datetime(value, codes, types) else: codes = _format_number(value, codes, types, fstr, mul) return ''.join(codes) raise
FUNCTIONS['TEXT'] = wrap_ufunc(xtext, input_parser=lambda *a: a)
[docs] def xvalue(value): if not isinstance(value, Error) and isinstance(value, str): try: return float(value) except (ValueError, TypeError): from .date import xdate, _text2datetime, xtime value = _text2datetime(value) return xdate(*value[:3]) + xtime(*value[3:]) elif isinstance(value, (np.bool_, bool)): raise ValueError return float(value)
FUNCTIONS['VALUE'] = wrap_ufunc(xvalue, input_parser=lambda *a: a) def _xvaluetotext(text, format_type): if isinstance(text, sh.Token): if text is sh.EMPTY: return '' return _str(text) r = _str(text) if format_type and isinstance(text, str): return f'"{r}"' return r
[docs] def xvaluetotext(text, format_type=0): format_type = int(float(replace_empty(format_type))) if format_type not in (0, 1): return Error.errors['#VALUE!'] return _xvaluetotext(text, format_type)
FUNCTIONS['VALUETOTEXT'] = FUNCTIONS['_XLFN.VALUETOTEXT'] = wrap_ufunc( xvaluetotext, input_parser=lambda *a: a, args_parser=lambda *a: a, check_error=lambda a, format_type=0: get_error(format_type) )
[docs] def xarraytotext(array, format_type=0): format_type = int(float(replace_empty(format_type))) array = np.asarray(array, object) if format_type == 0: return ', '.join( _xvaluetotext(v, format_type) for v in flatten(array, None) ) elif format_type == 1: array = np.atleast_2d(array) if len(array.shape) == 2: r = ';'.join( ','.join(_xvaluetotext(v, format_type) for v in row) for row in array ) return f'{{{r}}}' if r else Error.errors['#VALUE!'] return Error.errors['#VALUE!']
FUNCTIONS['ARRAYTOTEXT'] = FUNCTIONS['_XLFN.ARRAYTOTEXT'] = wrap_ufunc( xarraytotext, input_parser=lambda *a: a, args_parser=lambda *a: a, check_error=lambda a, format_type=0: get_error(format_type), excluded={0} ) def _parse_bool(value): if isinstance(value, str): if value.upper() == "TRUE": value = True elif value.upper() == "FALSE": value = False else: raise FoundError(err=Error.errors['#VALUE!']) else: value = bool(value) return value
[docs] def xfixed(number, decimals=2, no_commas=False): number = float(replace_empty(number)) decimals = int(float(replace_empty(decimals))) no_commas = _parse_bool(replace_empty(no_commas, False)) if decimals > 127: return Error.errors['#VALUE!'] fmt = f"{',' if not no_commas else ''}.{max(decimals, 0)}f" from decimal import Decimal, ROUND_DOWN, ROUND_HALF_UP, localcontext number = Decimal(number) prec = len(number.as_tuple().digits) + decimals if prec <= 0: r = 0 else: if number >= 1e16: quant = Decimal(1).scaleb(number.adjusted() - (15 - 1)) number = number.quantize(quant, rounding=ROUND_DOWN) q = Decimal(1).scaleb(-decimals) with localcontext() as ctx: ctx.rounding = ROUND_HALF_UP ctx.prec = prec r = number.quantize(q) if r.is_zero(): r = abs(r) return format(r, fmt)
FUNCTIONS['FIXED'] = wrap_ufunc( xfixed, input_parser=lambda *a: a, args_parser=lambda *a: a )
[docs] def xtextsplit( text, col_delimiter, row_delimiter=None, ignore_empty=False, match_mode=0, pad_with=Error.errors['#N/A'] ): ignore_empty = _parse_bool(replace_empty(ignore_empty, False)) match_mode = int(float(replace_empty(match_mode))) if match_mode == 1: flags = re.IGNORECASE elif match_mode == 0: flags = 0 else: return Error.errors['#VALUE!'] text = replace_empty(text, Error.errors['#VALUE!']) raise_errors(text) text = _str(text) col_delimiter = tuple(map(_str, flatten(col_delimiter, None, True))) if not col_delimiter or any(not v for v in col_delimiter): return Error.errors['#VALUE!'] re_col = re.compile('|'.join(map(re.escape, col_delimiter)), flags=flags) if row_delimiter is None: rows = [text] else: row_delimiter = tuple(map(_str, flatten(row_delimiter, None, True))) if not row_delimiter or any(not v for v in row_delimiter): return Error.errors['#VALUE!'] rows = re.split('|'.join(map( re.escape, row_delimiter )), text, flags=flags) rows = [re_col.split(row) for row in rows] if ignore_empty: rows = [[v for v in row if v] for row in rows] rows = [row for row in rows if row] maxcols = max(map(len, rows)) for row in rows: if len(row) < maxcols: row.extend([pad_with] * (maxcols - len(row))) return rows
FUNCTIONS['TEXTSPLIT'] = FUNCTIONS['_XLFN.TEXTSPLIT'] = wrap_ufunc( xtextsplit, input_parser=lambda *a: a, args_parser=lambda *a: a, excluded={1, 2, 5}, return_func=return_2d_func, check_nan=False )
[docs] def xtextafterbefore( after, text, delimiter, instance_num=1, match_mode=0, match_end=0, if_not_found=Error.errors['#N/A'] ): instance_num = int(float(replace_empty(instance_num))) match_mode = int(float(replace_empty(match_mode))) match_end = int(float(replace_empty(match_end))) if match_mode == 1: flags = re.IGNORECASE elif match_mode == 0: flags = 0 else: return Error.errors['#VALUE!'] text = replace_empty(text, Error.errors['#VALUE!']) raise_errors(text) text = _str(text) if match_end not in (0, 1) or instance_num == 0 or instance_num > len(text): return Error.errors['#VALUE!'] delimiter = tuple(map(_str, flatten(delimiter, None, True))) if not delimiter or any(not v for v in delimiter): return Error.errors['#VALUE!'] if match_end: text = text + delimiter[0] r = list(re.finditer('|'.join(map( re.escape, delimiter )), text, flags=flags)) try: m = r[instance_num - 1 if instance_num > 0 else instance_num] return text[m.end():] if after else text[:m.start()] except IndexError: return if_not_found
FUNCTIONS['TEXTAFTER'] = FUNCTIONS['_XLFN.TEXTAFTER'] = wrap_ufunc( functools.partial(xtextafterbefore, True), input_parser=lambda *a: a, args_parser=lambda *a: a, excluded={1, 5}, check_nan=False ) FUNCTIONS['TEXTBEFORE'] = FUNCTIONS['_XLFN.TEXTBEFORE'] = wrap_ufunc( functools.partial(xtextafterbefore, False), input_parser=lambda *a: a, args_parser=lambda *a: a, excluded={1, 5}, check_nan=False )