From 6f13b6d4f4586070de604b81a2f29c82bacaac73 Mon Sep 17 00:00:00 2001 From: Robert Yokota Date: Fri, 13 Mar 2026 18:44:48 -0700 Subject: [PATCH 1/2] Minor perf improvements --- src/jsonata/datetimeutils.py | 8 +++---- src/jsonata/functions.py | 44 +++++++++++++++--------------------- 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/src/jsonata/datetimeutils.py b/src/jsonata/datetimeutils.py index b32659a..5e35214 100644 --- a/src/jsonata/datetimeutils.py +++ b/src/jsonata/datetimeutils.py @@ -637,14 +637,14 @@ def format_datetime(millis: int, picture: Optional[str], timezone: Optional[str] offset_millis = (60 * offset_hours + offset_minutes) * 60 * 1000 date_time = datetime.datetime.fromtimestamp((millis + offset_millis) / 1000.0, datetime.timezone.utc) - result = "" + parts = [] for part in format_spec.parts: if part.type == "literal": - result += part.value + parts.append(part.value) else: - result += DateTimeUtils._format_component(date_time, part, offset_hours, offset_minutes) + parts.append(DateTimeUtils._format_component(date_time, part, offset_hours, offset_minutes)) - return result + return ''.join(parts) @staticmethod def _format_component(date: datetime.datetime, marker_spec: SpecPart, offset_hours: int, diff --git a/src/jsonata/functions.py b/src/jsonata/functions.py index c47ff9e..c3f29ad 100644 --- a/src/jsonata/functions.py +++ b/src/jsonata/functions.py @@ -474,11 +474,7 @@ def left_pad(string: Optional[str], size: Optional[int], pad_str: Optional[str]) pads = size - str_len if pads <= 0: return string - padding = "" - i = 0 - while i < pads + 1: - padding += pad_str - i += 1 + padding = pad_str * (pads // len(pad_str) + 1) return Functions.substr(padding, 0, pads) + string # Source: Jsonata4Java PadFunction @@ -501,11 +497,7 @@ def right_pad(string: Optional[str], size: Optional[int], pad_str: Optional[str] pads = size - str_len if pads <= 0: return string - padding = "" - i = 0 - while i < pads + 1: - padding += pad_str - i += 1 + padding = pad_str * (pads // len(pad_str) + 1) return string + Functions.substr(padding, 0, pads) @dataclass @@ -767,23 +759,23 @@ def replace(string: Optional[str], pattern: Union[str, re.Pattern], replacement: raise jexception.JException("Fourth argument of replace function must evaluate to a positive number", 0) def string_replacer(match): - result = '' + parts = [] position = 0 repl = str(replacement) while position < len(repl): index = repl.find('$', position) if index == -1: - result += repl[position:] + parts.append(repl[position:]) break - result += repl[position:index] + parts.append(repl[position:index]) position = index + 1 if position < len(repl): dollar_val = repl[position] if dollar_val == '$': - result += '$' + parts.append('$') position += 1 elif dollar_val == '0': - result += match.group(0) + parts.append(match.group(0)) position += 1 else: max_digits = len(str(len(match.groups()))) @@ -791,15 +783,15 @@ def string_replacer(match): if group_num.isdigit(): group_index = int(group_num) if 0 < group_index <= len(match.groups()): - result += match.group(group_index) or '' + parts.append(match.group(group_index) or '') position += len(group_num) else: - result += '$' + parts.append('$') else: - result += '$' + parts.append('$') else: - result += '$' - return result + parts.append('$') + return ''.join(parts) if callable(replacement): replacer = lambda m: replacement(m.groupdict()) @@ -810,23 +802,23 @@ def string_replacer(match): if isinstance(pattern, str): # Use string methods for literal string patterns - result = '' + parts = [] position = 0 count = 0 while True: if limit is not None and count >= limit: - result += string[position:] + parts.append(string[position:]) break index = string.find(pattern, position) if index == -1: - result += string[position:] + parts.append(string[position:]) break - result += string[position:index] + parts.append(string[position:index]) match = re.match(re.escape(pattern), string[index:]) - result += replacer(match) + parts.append(replacer(match)) position = index + len(pattern) count += 1 - return result + return ''.join(parts) else: # Use regex for pattern objects if limit is None: From 5d56b3635d2814e6c5602a1856bd3f00863a222e Mon Sep 17 00:00:00 2001 From: Robert Yokota Date: Fri, 13 Mar 2026 18:56:15 -0700 Subject: [PATCH 2/2] Optimize regex usage --- src/jsonata/tokenizer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/jsonata/tokenizer.py b/src/jsonata/tokenizer.py index d0952e9..62027b7 100644 --- a/src/jsonata/tokenizer.py +++ b/src/jsonata/tokenizer.py @@ -32,6 +32,8 @@ from jsonata import jexception, utils +_NUMBER_PATTERN = re.compile(r"^-?(0|([1-9][0-9]*))(\.[0-9]+)?([Ee][-+]?[0-9]+)?") + class Tokenizer: operators = { @@ -264,8 +266,7 @@ def next(self, prefix: bool) -> Optional[Token]: self.position += 1 raise jexception.JException("S0101", self.position) # test for numbers - numregex = re.compile("^-?(0|([1-9][0-9]*))(\\.[0-9]+)?([Ee][-+]?[0-9]+)?") - match_ = numregex.search(self.path[self.position:]) + match_ = _NUMBER_PATTERN.search(self.path[self.position:]) if match_ is not None: num = float(match_.group(0)) if not math.isnan(num) and math.isfinite(num):