v1.1.0

2026-05-10 16:37:46 +02:00
parent 6207cab27a
commit af21672b1c
3 changed files with 166 additions and 72 deletions
--- a/bagheera_search_lib/bagheera_search.py
+++ b/bagheera_search_lib/bagheera_search.py
@@ -33,31 +33,42 @@ def expression_contains_tags(text):

 class EvaluateExpression:
    def __init__(self):
-        # Pre-define the grammar structure during initialization
        self.grammar = self._build_grammar()

    def _compare_single(self, l_val, op, r_val):
-        """
-        Atomic comparison logic for individual values.
-        Handles numeric conversion and standard operators.
-        """
-        # Numeric conversion for mathematical operators
+        # 1. CASE SENSITIVE (Strict)
+        if op == "==":
+            return str(l_val) == str(r_val)
+
+        # 2. NUMERIC LOGIC
        if op in (">", "<", ">=", "<="):
            try:
-                # Attempt to treat both sides as floats
+                # We use float for numeric magnitude
                curr_l, curr_r = float(l_val), float(r_val)
+                if op == ">":
+                    return curr_l > curr_r
+                if op == "<":
+                    return curr_l < curr_r
+                if op == ">=":
+                    return curr_l >= curr_r
+                if op == "<=":
+                    return curr_l <= curr_r
            except (ValueError, TypeError):
-                # Fallback to string comparison if conversion fails
-                curr_l, curr_r = str(l_val), str(r_val)
-        else:
-            # Default to string representation for other operators
-            curr_l, curr_r = str(l_val), str(r_val)
+                # Fallback to case-insensitive string if not numeric
+                pass
+
+        # 3. CASE INSENSITIVE (Default for =, !=, :)
+        curr_l = str(l_val).lower()
+        curr_r = str(r_val).lower()

-        # Standard operator logic
        if op == "=":
-            return l_val == r_val
+            return curr_l == curr_r
        if op == "!=":
-            return l_val != r_val
+            return curr_l != curr_r
+        if op == ":":
+            return curr_r in curr_l
+
+        # String fallback for magnitude if numeric failed
        if op == ">":
            return curr_l > curr_r
        if op == "<":
@@ -66,44 +77,39 @@ class EvaluateExpression:
            return curr_l >= curr_r
        if op == "<=":
            return curr_l <= curr_r
-        if op == ":":
-            return str(r_val).lower() in str(l_val).lower()
+
        return False

    def _compare(self, data, left_key, op, right_val):
-        """
-        Main comparison router. Checks if the field is a list or a single value.
-        """
-        # Normalize data keys to lowercase for case-insensitive lookup
+        # Normalizing keys for lookup, but KEEPING the values intact
        normalized_data = {k.lower(): v for k, v in data.items()}

-        # Extract the left-hand value (the field from the JSON)
+        # Get left value from data or use as literal
        l_val = normalized_data.get(left_key.lower(), left_key)

-        # Extract the right-hand value (check if it's a literal or another field)
-        r_val = normalized_data.get(str(right_val).lower(), right_val)
+        # Resolve right value: if it's a key in data, use its value.
+        # Important: use lower() only for the KEY lookup, not the value itself.
+        r_key_lookup = str(right_val).lower()
+        if r_key_lookup in normalized_data:
+            r_val = normalized_data[r_key_lookup]
+        else:
+            r_val = right_val

-        # IF THE FIELD VALUE IS A LIST
        if isinstance(l_val, list):
-            # Return True if ANY item in the list satisfies the condition
            return any(self._compare_single(item, op, r_val) for item in l_val)

-        # IF THE FIELD VALUE IS A SINGLE DATA POINT
        return self._compare_single(l_val, op, r_val)

    def _build_grammar(self):
-        """
-        Defines the pyparsing grammar for the expression engine.
-        """
-        operators = one_of(">= <= != = > < :")
+        # CRITICAL: '==' must come BEFORE '=' in the list
+        # We use a list to ensure explicit priority in the parser
+        operators = one_of(["==", ">=", "<=", "!=", "=", ">", "<", ":"])
+
        identifier = Word(alphanums + "_./\\")
        quoted_string = QuotedString("'") | QuotedString('"')
        operand = quoted_string | identifier

-        # Define basic condition (e.g., "width > 100" or "word")
        condition = Group((operand + operators + operand) | operand)
-
-        # Attach the parse action to convert tokens into executable functions (lambdas)
        condition.set_parse_action(lambda t: self._create_evaluator_func(t[0]))

        return infix_notation(
@@ -119,25 +125,16 @@ class EvaluateExpression:
        )

    def _create_evaluator_func(self, tokens):
-        """
-        Creates a closure that captures tokens and waits for the data dictionary.
-        """
        if len(tokens) == 1:
-            # Rule: Single term -> path CONTAINS term
            return lambda data: self._compare(data, 'path', ':', tokens[0])
        else:
-            # Rule: Explicit triplet (key, operator, value)
            return lambda data: self._compare(data, tokens[0], tokens[1], tokens[2])

    def compile(self, expression):
-        """
-        Parses the expression once and returns a reusable function.
-        """
        try:
            return self.grammar.parse_string(expression, parse_all=True)[0]
        except Exception as e:
            print(f"Compilation Error: {e}")
-            # Fallback: return a function that always fails gracefully
            return lambda data: False


@@ -250,7 +247,7 @@ class BagheeraSearcher:
            self.ids_processed.add(file_id)

            if exclude_evaluator:
-                file_info = {'path': item["path"]}
+                file_info = {'path': item["path"], 'filename': Path(item["path"]).name}
                if exclude_sources.get('properties'):
                    file_info = file_info | get_info(file_id)
                if exclude_sources.get('tags'):
@@ -325,7 +322,7 @@ class BagheeraSearcher:
            self.ids_processed.add(file_id)

            if exclude_evaluator:
-                file_info = {'path': item["path"]}
+                file_info = {'path': item["path"], 'filename': Path(item["path"]).name}
                if exclude_sources.get('properties'):
                    file_info = file_info | get_info(file_id)
                if exclude_sources.get('tags'):