Source code for formulas.excel.xlreader

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
#
# Copyright 2016-2025 European Commission (JRC);
# Licensed under the EUPL (the 'Licence');
# You may not use this work except in compliance with the Licence.
# You may obtain a copy of the Licence at: http://ec.europa.eu/idabc/eupl

"""
It provides a custom Excel Reader class.
"""
from openpyxl.reader.excel import ExcelReader
from openpyxl.xml.constants import SHARED_STRINGS
from openpyxl.cell.text import Text
from openpyxl.xml.functions import iterparse
from openpyxl.xml.constants import SHEET_MAIN_NS
from ..functions.text import _re_hex


[docs] def replace_hex(match): return chr(int(match.group(1), 16))
[docs] def read_string_table(_raw_data, xml_source): """Read in all shared strings in the table""" strings = [] STRING_TAG = '{%s}si' % SHEET_MAIN_NS for _, node in iterparse(xml_source): if node.tag == STRING_TAG: text = Text.from_tree(node).content if not _raw_data and '_x' in text: text = _re_hex.sub(replace_hex, text) text = text.replace('x005F_', '') node.clear() strings.append(text) return strings
[docs] class XlReader(ExcelReader):
[docs] def __init__(self, *args, raw_date=True, _raw_data=False, **kwargs): super(XlReader, self).__init__(*args, **kwargs) self.raw_date, self._date_formats = raw_date, set() self._raw_data = _raw_data
[docs] def read_worksheets(self): if self.raw_date: self._date_formats = self.wb._date_formats self.wb._date_formats = set() super(XlReader, self).read_worksheets()
[docs] def read_strings(self): ct = self.package.find(SHARED_STRINGS) reader = read_string_table if ct is not None: strings_path = ct.PartName[1:] with self.archive.open(strings_path, ) as src: self.shared_strings = reader(self._raw_data, src)
[docs] def load_workbook(filename, _raw_data=False, **kw): if isinstance(filename, str) and filename.endswith('.ods'): from .ods_reader import ods_to_xlsx return ods_to_xlsx(filename, **kw) reader = XlReader(filename, _raw_data=_raw_data, **kw) reader.read() return reader.wb