This commit is contained in:
Ignacio Serantes
2026-05-10 16:37:46 +02:00
parent 6207cab27a
commit af21672b1c
3 changed files with 166 additions and 72 deletions

View File

@@ -33,31 +33,42 @@ def expression_contains_tags(text):
class EvaluateExpression:
def __init__(self):
# Pre-define the grammar structure during initialization
self.grammar = self._build_grammar()
def _compare_single(self, l_val, op, r_val):
"""
Atomic comparison logic for individual values.
Handles numeric conversion and standard operators.
"""
# Numeric conversion for mathematical operators
# 1. CASE SENSITIVE (Strict)
if op == "==":
return str(l_val) == str(r_val)
# 2. NUMERIC LOGIC
if op in (">", "<", ">=", "<="):
try:
# Attempt to treat both sides as floats
# We use float for numeric magnitude
curr_l, curr_r = float(l_val), float(r_val)
if op == ">":
return curr_l > curr_r
if op == "<":
return curr_l < curr_r
if op == ">=":
return curr_l >= curr_r
if op == "<=":
return curr_l <= curr_r
except (ValueError, TypeError):
# Fallback to string comparison if conversion fails
curr_l, curr_r = str(l_val), str(r_val)
else:
# Default to string representation for other operators
curr_l, curr_r = str(l_val), str(r_val)
# Fallback to case-insensitive string if not numeric
pass
# 3. CASE INSENSITIVE (Default for =, !=, :)
curr_l = str(l_val).lower()
curr_r = str(r_val).lower()
# Standard operator logic
if op == "=":
return l_val == r_val
return curr_l == curr_r
if op == "!=":
return l_val != r_val
return curr_l != curr_r
if op == ":":
return curr_r in curr_l
# String fallback for magnitude if numeric failed
if op == ">":
return curr_l > curr_r
if op == "<":
@@ -66,44 +77,39 @@ class EvaluateExpression:
return curr_l >= curr_r
if op == "<=":
return curr_l <= curr_r
if op == ":":
return str(r_val).lower() in str(l_val).lower()
return False
def _compare(self, data, left_key, op, right_val):
"""
Main comparison router. Checks if the field is a list or a single value.
"""
# Normalize data keys to lowercase for case-insensitive lookup
# Normalizing keys for lookup, but KEEPING the values intact
normalized_data = {k.lower(): v for k, v in data.items()}
# Extract the left-hand value (the field from the JSON)
# Get left value from data or use as literal
l_val = normalized_data.get(left_key.lower(), left_key)
# Extract the right-hand value (check if it's a literal or another field)
r_val = normalized_data.get(str(right_val).lower(), right_val)
# Resolve right value: if it's a key in data, use its value.
# Important: use lower() only for the KEY lookup, not the value itself.
r_key_lookup = str(right_val).lower()
if r_key_lookup in normalized_data:
r_val = normalized_data[r_key_lookup]
else:
r_val = right_val
# IF THE FIELD VALUE IS A LIST
if isinstance(l_val, list):
# Return True if ANY item in the list satisfies the condition
return any(self._compare_single(item, op, r_val) for item in l_val)
# IF THE FIELD VALUE IS A SINGLE DATA POINT
return self._compare_single(l_val, op, r_val)
def _build_grammar(self):
"""
Defines the pyparsing grammar for the expression engine.
"""
operators = one_of(">= <= != = > < :")
# CRITICAL: '==' must come BEFORE '=' in the list
# We use a list to ensure explicit priority in the parser
operators = one_of(["==", ">=", "<=", "!=", "=", ">", "<", ":"])
identifier = Word(alphanums + "_./\\")
quoted_string = QuotedString("'") | QuotedString('"')
operand = quoted_string | identifier
# Define basic condition (e.g., "width > 100" or "word")
condition = Group((operand + operators + operand) | operand)
# Attach the parse action to convert tokens into executable functions (lambdas)
condition.set_parse_action(lambda t: self._create_evaluator_func(t[0]))
return infix_notation(
@@ -119,25 +125,16 @@ class EvaluateExpression:
)
def _create_evaluator_func(self, tokens):
"""
Creates a closure that captures tokens and waits for the data dictionary.
"""
if len(tokens) == 1:
# Rule: Single term -> path CONTAINS term
return lambda data: self._compare(data, 'path', ':', tokens[0])
else:
# Rule: Explicit triplet (key, operator, value)
return lambda data: self._compare(data, tokens[0], tokens[1], tokens[2])
def compile(self, expression):
"""
Parses the expression once and returns a reusable function.
"""
try:
return self.grammar.parse_string(expression, parse_all=True)[0]
except Exception as e:
print(f"Compilation Error: {e}")
# Fallback: return a function that always fails gracefully
return lambda data: False
@@ -250,7 +247,7 @@ class BagheeraSearcher:
self.ids_processed.add(file_id)
if exclude_evaluator:
file_info = {'path': item["path"]}
file_info = {'path': item["path"], 'filename': Path(item["path"]).name}
if exclude_sources.get('properties'):
file_info = file_info | get_info(file_id)
if exclude_sources.get('tags'):
@@ -325,7 +322,7 @@ class BagheeraSearcher:
self.ids_processed.add(file_id)
if exclude_evaluator:
file_info = {'path': item["path"]}
file_info = {'path': item["path"], 'filename': Path(item["path"]).name}
if exclude_sources.get('properties'):
file_info = file_info | get_info(file_id)
if exclude_sources.get('tags'):