v1.1.0
This commit is contained in:
@@ -1,7 +1,14 @@
|
||||
from .bagheera_search import BagheeraSearcher
|
||||
from .bagheera_search import (
|
||||
BagheeraSearcher, EvaluateExpression
|
||||
)
|
||||
|
||||
|
||||
def search(query):
|
||||
"""Interfaz simplificada para la librería."""
|
||||
bs = BagheeraSearcher()
|
||||
return bs.search(query)
|
||||
|
||||
|
||||
def create_evaluator(expression):
|
||||
ee = EvaluateExpression()
|
||||
return ee.compile(expression)
|
||||
|
||||
@@ -10,9 +10,136 @@ import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Iterator, Optional, Union
|
||||
|
||||
from baloo_tools import get_resolution
|
||||
from baloo_tools import (get_info, get_tags)
|
||||
from bagheera_query_parser_lib import parse_date
|
||||
|
||||
from pyparsing import (
|
||||
alphanums, one_of, infix_notation,
|
||||
Group, opAssoc, ParserElement, QuotedString, Word
|
||||
)
|
||||
|
||||
ParserElement.enable_packrat()
|
||||
|
||||
|
||||
def expression_contains_property(text):
|
||||
pattern = r"\b(?!tags\b)\w+[ \t]*(?:>=|<=|!=|=|>|<|:)"
|
||||
return bool(re.search(pattern, text, re.IGNORECASE))
|
||||
|
||||
|
||||
def expression_contains_tags(text):
|
||||
pattern = r"\btags\b[ \t]*(?:>=|<=|!=|=|>|<|:)"
|
||||
return bool(re.search(pattern, text, re.IGNORECASE))
|
||||
|
||||
|
||||
class EvaluateExpression:
|
||||
def __init__(self):
|
||||
# Pre-define the grammar structure during initialization
|
||||
self.grammar = self._build_grammar()
|
||||
|
||||
def _compare_single(self, l_val, op, r_val):
|
||||
"""
|
||||
Atomic comparison logic for individual values.
|
||||
Handles numeric conversion and standard operators.
|
||||
"""
|
||||
# Numeric conversion for mathematical operators
|
||||
if op in (">", "<", ">=", "<="):
|
||||
try:
|
||||
# Attempt to treat both sides as floats
|
||||
curr_l, curr_r = float(l_val), float(r_val)
|
||||
except (ValueError, TypeError):
|
||||
# Fallback to string comparison if conversion fails
|
||||
curr_l, curr_r = str(l_val), str(r_val)
|
||||
else:
|
||||
# Default to string representation for other operators
|
||||
curr_l, curr_r = str(l_val), str(r_val)
|
||||
|
||||
# Standard operator logic
|
||||
if op == "=":
|
||||
return l_val == r_val
|
||||
if op == "!=":
|
||||
return l_val != r_val
|
||||
if op == ">":
|
||||
return curr_l > curr_r
|
||||
if op == "<":
|
||||
return curr_l < curr_r
|
||||
if op == ">=":
|
||||
return curr_l >= curr_r
|
||||
if op == "<=":
|
||||
return curr_l <= curr_r
|
||||
if op == ":":
|
||||
return str(r_val).lower() in str(l_val).lower()
|
||||
return False
|
||||
|
||||
def _compare(self, data, left_key, op, right_val):
|
||||
"""
|
||||
Main comparison router. Checks if the field is a list or a single value.
|
||||
"""
|
||||
# Normalize data keys to lowercase for case-insensitive lookup
|
||||
normalized_data = {k.lower(): v for k, v in data.items()}
|
||||
|
||||
# Extract the left-hand value (the field from the JSON)
|
||||
l_val = normalized_data.get(left_key.lower(), left_key)
|
||||
|
||||
# Extract the right-hand value (check if it's a literal or another field)
|
||||
r_val = normalized_data.get(str(right_val).lower(), right_val)
|
||||
|
||||
# IF THE FIELD VALUE IS A LIST
|
||||
if isinstance(l_val, list):
|
||||
# Return True if ANY item in the list satisfies the condition
|
||||
return any(self._compare_single(item, op, r_val) for item in l_val)
|
||||
|
||||
# IF THE FIELD VALUE IS A SINGLE DATA POINT
|
||||
return self._compare_single(l_val, op, r_val)
|
||||
|
||||
def _build_grammar(self):
|
||||
"""
|
||||
Defines the pyparsing grammar for the expression engine.
|
||||
"""
|
||||
operators = one_of(">= <= != = > < :")
|
||||
identifier = Word(alphanums + "_./\\")
|
||||
quoted_string = QuotedString("'") | QuotedString('"')
|
||||
operand = quoted_string | identifier
|
||||
|
||||
# Define basic condition (e.g., "width > 100" or "word")
|
||||
condition = Group((operand + operators + operand) | operand)
|
||||
|
||||
# Attach the parse action to convert tokens into executable functions (lambdas)
|
||||
condition.set_parse_action(lambda t: self._create_evaluator_func(t[0]))
|
||||
|
||||
return infix_notation(
|
||||
condition,
|
||||
[
|
||||
("NOT", 1, opAssoc.RIGHT, lambda t: (
|
||||
lambda data: not t[0][1](data))),
|
||||
("AND", 2, opAssoc.LEFT, lambda t: (
|
||||
lambda data: all(f(data) for f in t[0] if callable(f)))),
|
||||
("OR", 2, opAssoc.LEFT, lambda t: (
|
||||
lambda data: any(f(data) for f in t[0] if callable(f)))),
|
||||
],
|
||||
)
|
||||
|
||||
def _create_evaluator_func(self, tokens):
|
||||
"""
|
||||
Creates a closure that captures tokens and waits for the data dictionary.
|
||||
"""
|
||||
if len(tokens) == 1:
|
||||
# Rule: Single term -> path CONTAINS term
|
||||
return lambda data: self._compare(data, 'path', ':', tokens[0])
|
||||
else:
|
||||
# Rule: Explicit triplet (key, operator, value)
|
||||
return lambda data: self._compare(data, tokens[0], tokens[1], tokens[2])
|
||||
|
||||
def compile(self, expression):
|
||||
"""
|
||||
Parses the expression once and returns a reusable function.
|
||||
"""
|
||||
try:
|
||||
return self.grammar.parse_string(expression, parse_all=True)[0]
|
||||
except Exception as e:
|
||||
print(f"Compilation Error: {e}")
|
||||
# Fallback: return a function that always fails gracefully
|
||||
return lambda data: False
|
||||
|
||||
|
||||
class BagheeraSearcher:
|
||||
"""Class to handle Baloo searches and interact with the C wrapper."""
|
||||
@@ -69,84 +196,8 @@ class BagheeraSearcher:
|
||||
|
||||
return lib
|
||||
|
||||
def check_keywords(
|
||||
self, text: str, query: str, file_path: str = "", file_id: int = 0
|
||||
) -> bool:
|
||||
"""
|
||||
Evaluates if a text meets a logical query.
|
||||
Supports: AND, OR, ( ), dimensions (width=height, etc.), and shapes.
|
||||
"""
|
||||
if file_path:
|
||||
try:
|
||||
w, h = get_resolution(file_id)
|
||||
except Exception:
|
||||
w, h = -1, -1
|
||||
|
||||
def replace_dim(match: re.Match) -> str:
|
||||
if w <= 0 or h <= 0:
|
||||
return "__false__"
|
||||
|
||||
s = match.group(0).upper()
|
||||
if "PORTRAIT" in s:
|
||||
return "__true__" if w < h else "__false__"
|
||||
if "LANDSCAPE" in s:
|
||||
return "__true__" if w > h else "__false__"
|
||||
if "SQUARE" in s:
|
||||
return "__true__" if w == h else "__false__"
|
||||
|
||||
op = match.group(1)
|
||||
ops_map = {
|
||||
"=": w == h,
|
||||
">": w > h,
|
||||
"<": w < h,
|
||||
">=": w >= h,
|
||||
"<=": w <= h,
|
||||
"!=": w != h,
|
||||
}
|
||||
return "__true__" if ops_map.get(op, False) else "__false__"
|
||||
|
||||
query = re.sub(
|
||||
r"\b(PORTRAIT|LANDSCAPE|SQUARE)\b",
|
||||
replace_dim,
|
||||
query,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
query = re.sub(
|
||||
r"\bwidth\s*(<=|>=|!=|<|>|=)\s*height\b",
|
||||
replace_dim,
|
||||
query,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
|
||||
text = text.lower()
|
||||
query = re.sub(r"(?<=\w)\s+(?=\w)", " AND ", query)
|
||||
|
||||
tokens = re.findall(r"\(|\)|OR|AND|[^\s()]+", query)
|
||||
regex_parts = []
|
||||
|
||||
for t in tokens:
|
||||
if t in ("(", ")"):
|
||||
regex_parts.append(t)
|
||||
elif t == "OR":
|
||||
regex_parts.append("|")
|
||||
elif t == "AND":
|
||||
continue
|
||||
elif t == "__true__":
|
||||
regex_parts.append("(?=.*)")
|
||||
elif t == "__false__":
|
||||
regex_parts.append("(?!)")
|
||||
else:
|
||||
regex_parts.append(rf"(?=.*{re.escape(t)})")
|
||||
|
||||
final_regex = "".join(regex_parts).lower()
|
||||
|
||||
try:
|
||||
return bool(re.search(f"^{final_regex}.*", text, re.DOTALL))
|
||||
except re.error:
|
||||
return False
|
||||
|
||||
def get_baloo_info(self, file_path: str) -> Dict[str, str]:
|
||||
"""Retrieves properties for a specific file from Baloo."""
|
||||
"""Extract properties for a specific file directly from file."""
|
||||
result = self.baloo_lib.get_file_properties(file_path.encode("utf-8"))
|
||||
if not result:
|
||||
return {}
|
||||
@@ -181,6 +232,8 @@ class BagheeraSearcher:
|
||||
options: Dict[str, Any],
|
||||
search_opts: Dict[str, Any],
|
||||
files_count: int,
|
||||
exclude_evaluator: Any,
|
||||
exclude_sources: Dict[str, bool]
|
||||
) -> Iterator[Dict[str, Any]]:
|
||||
"""Executes a recursive search yielded item by item."""
|
||||
options["query"] = query_text
|
||||
@@ -195,15 +248,20 @@ class BagheeraSearcher:
|
||||
continue
|
||||
|
||||
self.ids_processed.add(file_id)
|
||||
rec_exclude = search_opts.get("recursive_exclude")
|
||||
|
||||
if not rec_exclude or not self.check_keywords(
|
||||
item["path"], rec_exclude, item["path"], file_id
|
||||
):
|
||||
if exclude_evaluator:
|
||||
file_info = {'path': item["path"]}
|
||||
if exclude_sources.get('properties'):
|
||||
file_info = file_info | get_info(file_id)
|
||||
if exclude_sources.get('tags'):
|
||||
file_info = file_info | get_tags(file_id)
|
||||
else:
|
||||
file_info = None
|
||||
|
||||
if not file_info or not exclude_evaluator(file_info):
|
||||
if files_count >= search_opts.get("offset", 0):
|
||||
search_opts["limit"] -= 1
|
||||
yield item
|
||||
|
||||
files_count += 1
|
||||
|
||||
def search(
|
||||
@@ -215,6 +273,30 @@ class BagheeraSearcher:
|
||||
"""
|
||||
Main search generator. Yields file dictionaries.
|
||||
"""
|
||||
if search_opts['exclude']:
|
||||
ee = EvaluateExpression()
|
||||
exclude_evaluator = ee.compile(search_opts['exclude'])
|
||||
exclude_sources = {}
|
||||
if expression_contains_property(search_opts['exclude']):
|
||||
exclude_sources['properties'] = True
|
||||
if expression_contains_tags(search_opts['exclude']):
|
||||
exclude_sources['tags'] = True
|
||||
else:
|
||||
exclude_evaluator = None
|
||||
exclude_sources = {}
|
||||
|
||||
if search_opts['recursive_exclude']:
|
||||
ee = EvaluateExpression()
|
||||
recurse_exclude_evaluator = ee.compile(search_opts['recursive_exclude'])
|
||||
recurse_exclude_sources = {}
|
||||
if expression_contains_property(search_opts['recursive_exclude']):
|
||||
recurse_exclude_sources['properties'] = True
|
||||
if expression_contains_tags(search_opts['recursive_exclude']):
|
||||
recurse_exclude_sources['tags'] = True
|
||||
else:
|
||||
recurse_exclude_evaluator = None
|
||||
recurse_exclude_sources = {}
|
||||
|
||||
main_options["query"] = parse_date(query_text)
|
||||
files = self._execute_query(main_options)
|
||||
|
||||
@@ -241,15 +323,22 @@ class BagheeraSearcher:
|
||||
continue
|
||||
|
||||
self.ids_processed.add(file_id)
|
||||
exclude_pattern = search_opts.get("exclude")
|
||||
|
||||
if not exclude_pattern or not self.check_keywords(
|
||||
item["path"], exclude_pattern, item["path"], file_id
|
||||
):
|
||||
if exclude_evaluator:
|
||||
file_info = {'path': item["path"]}
|
||||
if exclude_sources.get('properties'):
|
||||
file_info = file_info | get_info(file_id)
|
||||
if exclude_sources.get('tags'):
|
||||
file_info = file_info | get_tags(file_id)
|
||||
else:
|
||||
file_info = None
|
||||
|
||||
if not file_info or not exclude_evaluator(file_info):
|
||||
if is_recursive:
|
||||
main_options["directory"] = item["path"]
|
||||
yield from self.search_recursive(
|
||||
query_text, main_options, search_opts, files_count
|
||||
query_text, main_options, search_opts, files_count,
|
||||
recurse_exclude_evaluator, recurse_exclude_sources
|
||||
)
|
||||
else:
|
||||
yield item
|
||||
|
||||
@@ -5,7 +5,7 @@ Bagheera Search Tool - CLI Client
|
||||
"""
|
||||
|
||||
__appname__ = "BagheeraSearch"
|
||||
__version__ = "1.0"
|
||||
__version__ = "1.1"
|
||||
__author__ = "Ignacio Serantes"
|
||||
__email__ = "kde@aynoa.net"
|
||||
__license__ = "LGPL"
|
||||
@@ -24,9 +24,9 @@ from bagheera_search_lib import BagheeraSearcher
|
||||
# --- CONFIGURATION ---
|
||||
PROG_NAME = "Bagheera Search Tool"
|
||||
PROG_ID = "bagheerasearch"
|
||||
PROG_VERSION = "1.0"
|
||||
PROG_BY = "Ignacio Serantes"
|
||||
PROG_DATE = "2026-03-19"
|
||||
PROG_VERSION = __version__
|
||||
PROG_BY = __author__
|
||||
PROG_DATE = "2026-05-09"
|
||||
|
||||
CONFIG_DIR = Path.home() / ".config" / PROG_ID
|
||||
CONFIG_FILE = CONFIG_DIR / "config.json"
|
||||
@@ -61,9 +61,9 @@ Baloo offers a rich syntax for searching through your files. Certain attributes
|
||||
|
||||
For example 'type' can be used to filter for files based on their general type:
|
||||
|
||||
type:Audio or type:Document
|
||||
type:Audio OR type:Document
|
||||
|
||||
The following comparison operators are supported, but note that 'not equal' operator is not available.
|
||||
The following comparison operators are supported, but note that 'not equal' (!=) operator is not available.
|
||||
· : - contains (only for text comparison)
|
||||
· = - equal
|
||||
· > - greater than
|
||||
@@ -72,7 +72,6 @@ The following comparison operators are supported, but note that 'not equal' oper
|
||||
· <= - less than or equal to
|
||||
|
||||
Currently the following types are supported:
|
||||
|
||||
· Archive
|
||||
· Folder
|
||||
· Audio
|
||||
@@ -83,9 +82,77 @@ Currently the following types are supported:
|
||||
· Presentation
|
||||
· Text
|
||||
|
||||
These expressions can be combined using AND or OR and additional parenthesis, but note that 'NOT' logical operator is not available.
|
||||
These expressions can be combined using logical operators 'AND' or 'OR' and additional parenthesis, but note that 'NOT' logical operator is not available.
|
||||
|
||||
|
||||
The full list of properties which can be searched is listed below. They are grouped by file types.
|
||||
|
||||
All Files
|
||||
· filename
|
||||
· mimetype
|
||||
· modified
|
||||
· rating
|
||||
· tags
|
||||
· userComment
|
||||
|
||||
Audio
|
||||
· Album
|
||||
· AlbumArtist
|
||||
· Artist
|
||||
· BitRate
|
||||
· Channels
|
||||
· Comment
|
||||
· Composer
|
||||
· Duration
|
||||
· Genre
|
||||
· Lyricist
|
||||
· ReleaseYear
|
||||
· SampleRate
|
||||
· TrackNumber
|
||||
|
||||
Documents
|
||||
· Author
|
||||
· Copyright
|
||||
· CreationDate
|
||||
· Generator
|
||||
· Keywords
|
||||
· Language
|
||||
· LineCount
|
||||
· PageCount
|
||||
· Publisher
|
||||
· Subject
|
||||
· Title
|
||||
· WordCount
|
||||
|
||||
Media
|
||||
· AspectRatio
|
||||
· FrameRate
|
||||
· Height
|
||||
· ImageDateTime
|
||||
· ImageMake
|
||||
· ImageModel
|
||||
· ImageOrientation
|
||||
· Images
|
||||
· PhotoApertureValue
|
||||
· PhotoDateTimeOriginal
|
||||
· PhotoExposureBiasValue
|
||||
· PhotoExposureTime
|
||||
· PhotoFlash
|
||||
· PhotoFNumber
|
||||
· PhotoFocalLength
|
||||
· PhotoFocalLengthIn35mmFilm
|
||||
· PhotoGpsAltitude
|
||||
· PhotoGpsLatitude
|
||||
· PhotoGpsLongitude
|
||||
· PhotoISOSpeedRatings
|
||||
· PhotoMeteringMode
|
||||
· PhotoPixelXDimension
|
||||
· PhotoPixelYDimension
|
||||
· PhotoSaturation
|
||||
· PhotoSharpness
|
||||
· PhotoWhiteBalance
|
||||
· Width
|
||||
|
||||
[... omitted for brevity, but includes the full list of searchable properties as in your original script ...]
|
||||
|
||||
{PROG_NAME} recognizes some natural language sentences in English, as long as they are capitalized, and transforms them into queries that can be interpreted by the search engine.
|
||||
|
||||
@@ -93,21 +160,18 @@ Supported natural language sentences and patterns for queries are:
|
||||
· MODIFIED TODAY
|
||||
· MODIFIED YESTERDAY
|
||||
· MODIFIED THIS [ DAY | WEEK | MONTH | YEAR ]
|
||||
· LAST <NUMBER> [ DAYS | WEEKS | MONTHS | YEARS ]
|
||||
· <NUMBER> [ DAYS | WEEKS | MONTHS | YEARS ] AGO
|
||||
· MODIFIED LAST <NUMBER> [ DAYS | WEEKS | MONTHS | YEARS ]
|
||||
· MODIFIED <NUMBER> [ DAYS | WEEKS | MONTHS | YEARS ] AGO
|
||||
|
||||
<NUMBER> can be any number or a number text from ONE to TWENTY.
|
||||
|
||||
Remarks: LAST DAY, if used, is interpreted as YESTERDAY.
|
||||
|
||||
Supported expressions for --exclude and --recursive-exclude are:
|
||||
· width<CMP_OP>height - only if file has width and height properties
|
||||
· height<CMP_OP>width - only if file has width and height properties
|
||||
· PORTRAIT - only if file width is greater or equal to height
|
||||
· LANDSCAPE - only if file height is greater or equal to width
|
||||
· SQUARE - only if file width equals to height
|
||||
|
||||
<CMP_OP> can be: != | >= | <= | = | > | <"""
|
||||
The --exclude and --recursive-exclude options allow you to filter files out of the results. The syntax for both options supports parentheses and logical operators (AND, OR, and NOT) to combine multiple patterns.
|
||||
In addition to standard query comparison operators, the not equal (!=) operator is available for comparing properties against specific values. Furthermore, you can compare two properties directly; for example, 'width > height' is a valid expression.
|
||||
Remarks:
|
||||
· All text comparison are case insensitive.
|
||||
· Tags comparisons are performed against both individual full tag string (using the '/' character as a level separator) and each individual level. All individual level values are normalized to lowercase and stripped of accents or diacritics. For example, a file tagged as 'Opera,Person/María Callas,Singer' would match any of the following elements: ['Opera', 'Person/María Callas', 'Singer', 'callas', 'maria', 'opera', 'person', 'singer']."
|
||||
· Only text and numeric data are supported."""
|
||||
print(help_query)
|
||||
|
||||
|
||||
@@ -146,7 +210,7 @@ def main():
|
||||
|
||||
parser.add_argument("--day", type=int, help="day fixed filter, --month is required")
|
||||
parser.add_argument("--month", type=int, help="month fixed filter, --year is required")
|
||||
parser.add_argument("--year", type=int, help="year filter fixed filter")
|
||||
parser.add_argument("--year", type=int, help="year fixed filter")
|
||||
|
||||
parser.add_argument("--help-query", action="store_true", help="show query syntax help")
|
||||
parser.add_argument("--version", action="store_true", help="show version information")
|
||||
@@ -163,7 +227,7 @@ def main():
|
||||
raise ValueError("Missing --month (required when --day is used)")
|
||||
|
||||
if args.month is not None and args.year is None:
|
||||
raise ValueError("Missing --year (requered when --month is used)")
|
||||
raise ValueError("Missing --year (required when --month is used)")
|
||||
|
||||
if args.help_query:
|
||||
print_help_query()
|
||||
|
||||
@@ -1,7 +1,19 @@
|
||||
from .baloo_tools import BalooTools
|
||||
|
||||
|
||||
def get_info(id):
|
||||
"""Interfaz simplificada para la librería."""
|
||||
tools = BalooTools()
|
||||
return tools.get_info(id)
|
||||
|
||||
|
||||
def get_resolution(id):
|
||||
"""Interfaz simplificada para la librería."""
|
||||
tools = BalooTools()
|
||||
return tools.get_resolution(id)
|
||||
|
||||
|
||||
def get_tags(id):
|
||||
"""Interfaz simplificada para la librería."""
|
||||
tools = BalooTools()
|
||||
return tools.get_tags(id)
|
||||
|
||||
@@ -8,9 +8,97 @@ Helper functions to interact directly with the Baloo LMDB index.
|
||||
import json
|
||||
import lmdb
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from typing import Tuple
|
||||
|
||||
PROPERTIES_ID_MAP = {
|
||||
'0': 'Empty',
|
||||
'1': 'BitRate',
|
||||
'2': 'Channels',
|
||||
'3': 'Duration',
|
||||
'4': 'Genre',
|
||||
'5': 'SampleRate',
|
||||
'6': 'TrackNumber',
|
||||
'7': 'ReleaseYear',
|
||||
'8': 'Comment',
|
||||
'9': 'Artist',
|
||||
'10': 'Album',
|
||||
'11': 'AlbumArtist',
|
||||
'12': 'Composer',
|
||||
'13': 'Lyricist',
|
||||
'14': 'Author',
|
||||
'15': 'Title',
|
||||
'16': 'Subject',
|
||||
'17': 'Generator',
|
||||
'18': 'PageCount',
|
||||
'19': 'WordCount',
|
||||
'20': 'LineCount',
|
||||
'21': 'Language',
|
||||
'22': 'Copyright',
|
||||
'23': 'Publisher',
|
||||
'24': 'CreationDate',
|
||||
'25': 'Keywords',
|
||||
'26': 'Width',
|
||||
'27': 'Height',
|
||||
'28': 'AspectRatio',
|
||||
'29': 'FrameRate',
|
||||
'30': 'Manufacturer',
|
||||
'31': 'Model',
|
||||
'32': 'ImageDateTime',
|
||||
'33': 'ImageOrientation',
|
||||
'34': 'PhotoFlash',
|
||||
'35': 'PhotoPixelXDimension',
|
||||
'36': 'PhotoPixelYDimension',
|
||||
'37': 'PhotoDateTimeOriginal',
|
||||
'38': 'PhotoFocalLength',
|
||||
'39': 'PhotoFocalLengthIn35mmFilm',
|
||||
'40': 'PhotoExposureTime',
|
||||
'41': 'PhotoFNumber',
|
||||
'42': 'PhotoApertureValue',
|
||||
'43': 'PhotoExposureBiasValue',
|
||||
'44': 'PhotoWhiteBalance',
|
||||
'45': 'PhotoMeteringMode',
|
||||
'46': 'PhotoISOSpeedRatings',
|
||||
'47': 'PhotoSaturation',
|
||||
'48': 'PhotoSharpness',
|
||||
'49': 'PhotoGpsLatitude',
|
||||
'50': 'PhotoGpsLongitude',
|
||||
'51': 'PhotoGpsAltitude',
|
||||
'52': 'TranslationUnitsTotal',
|
||||
'53': 'TranslationUnitsWithTranslation',
|
||||
'54': 'TranslationUnitsWithDraftTranslation',
|
||||
'55': 'TranslationLastAuthor',
|
||||
'56': 'TranslationLastUpDate',
|
||||
'57': 'TranslationTemplateDate',
|
||||
'58': 'OriginUrl',
|
||||
'59': 'OriginEmailSubject',
|
||||
'60': 'OriginEmailSender',
|
||||
'61': 'OriginEmailMessageId',
|
||||
'62': 'DiscNumber',
|
||||
'63': 'Location',
|
||||
'64': 'Performer',
|
||||
'65': 'Ensemble',
|
||||
'66': 'Arranger',
|
||||
'67': 'Conductor',
|
||||
'68': 'Opus',
|
||||
'69': 'Label',
|
||||
'70': 'Compilation',
|
||||
'71': 'License',
|
||||
'72': 'Rating',
|
||||
'73': 'Lyrics',
|
||||
'74': 'ReplayGainAlbumPeak',
|
||||
'75': 'ReplayGainAlbumGain',
|
||||
'76': 'ReplayGainTrackPeak',
|
||||
'77': 'ReplayGainTrackGain',
|
||||
'78': 'Description',
|
||||
'79': 'VideoCodec',
|
||||
'80': 'AudioCodec',
|
||||
'81': 'PixelFormat',
|
||||
'82': 'ColorSpace',
|
||||
'83': 'AssistiveAlternateDescription'
|
||||
}
|
||||
|
||||
|
||||
class BalooTools:
|
||||
"""Class to interact directly with the Baloo LMDB index."""
|
||||
@@ -21,16 +109,15 @@ class BalooTools:
|
||||
os.path.expanduser("~"), ".local/share/baloo/index"
|
||||
)
|
||||
|
||||
def get_resolution(self, file_id: int, sep: str = 'x') -> Tuple[int, int]:
|
||||
def get_info(self, file_id: int) -> json:
|
||||
"""
|
||||
Retrieves the width and height of an image/video from the Baloo index.
|
||||
Retrieves file metadata from the Baloo index.
|
||||
|
||||
Args:
|
||||
file_id: The integer ID of the file.
|
||||
sep: Separator used (unused currently, kept for compatibility).
|
||||
|
||||
Returns:
|
||||
A tuple of (width, height) integers. Returns (-1, -1) if not found.
|
||||
A json with all file metadata fields.
|
||||
"""
|
||||
try:
|
||||
# Using context manager ensures the environment is closed properly
|
||||
@@ -58,15 +145,85 @@ class BalooTools:
|
||||
|
||||
try:
|
||||
jvalue = json.loads(value.decode())
|
||||
# Baloo stores width in '26' and height in '27'
|
||||
return jvalue.get('26', -1), jvalue.get('27', -1)
|
||||
return {PROPERTIES_ID_MAP.get(k, k):
|
||||
v for k, v in jvalue.items()}
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
return -1, -1
|
||||
return {}
|
||||
|
||||
except lmdb.Error as e:
|
||||
print(f"Warning: Failed to access Baloo LMDB index: {e}", file=sys.stderr)
|
||||
|
||||
return -1, -1
|
||||
return {}
|
||||
|
||||
def get_resolution(self, file_id: int, sep: str = 'x') -> Tuple[int, int]:
|
||||
"""
|
||||
Retrieves the width and height of an image/video from the Baloo index.
|
||||
|
||||
Args:
|
||||
file_id: The integer ID of the file.
|
||||
sep: Separator used (unused currently, kept for compatibility).
|
||||
|
||||
Returns:
|
||||
A tuple of (width, height) integers. Returns (-1, -1) if not found.
|
||||
"""
|
||||
file_info = self.get_info(file_id)
|
||||
try:
|
||||
return file_info.get('26', -1), file_info.get('27', -1)
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
return -1, -1
|
||||
|
||||
def get_tags(self, file_id: int) -> json:
|
||||
"""
|
||||
Retrieves a string with all file tags from the Baloo index.
|
||||
|
||||
Args:
|
||||
file_id: The integer ID of the file.
|
||||
|
||||
Returns:
|
||||
A json with a field called tags with all tags comma separated.
|
||||
"""
|
||||
try:
|
||||
# Using context manager ensures the environment is closed properly
|
||||
with lmdb.Environment(
|
||||
self.baloo_db_path,
|
||||
subdir=False,
|
||||
readonly=True,
|
||||
lock=False,
|
||||
max_dbs=20
|
||||
) as env:
|
||||
document_data_db = env.open_db(b'docxatrrterms')
|
||||
|
||||
with env.begin() as txn:
|
||||
cursor = txn.cursor(document_data_db)
|
||||
|
||||
# Convert ID to 8-byte little-endian format
|
||||
file_id_bytes = int.to_bytes(
|
||||
file_id, length=8, byteorder='little', signed=False
|
||||
)
|
||||
|
||||
if cursor.set_range(file_id_bytes):
|
||||
for key, value in cursor:
|
||||
if key != file_id_bytes:
|
||||
break
|
||||
|
||||
text = value.decode('utf-8', errors='replace')
|
||||
text = re.sub(r'\x00(?![T])', '', text)
|
||||
parts = re.split(r'[\x00\x01]', text)
|
||||
|
||||
tags = []
|
||||
for p in parts:
|
||||
p = p.strip()
|
||||
if p:
|
||||
tag = p.removeprefix('TAG-').removeprefix('TA')
|
||||
tags.append(tag)
|
||||
|
||||
return {'tags': tags}
|
||||
# return {'tags': ",".join(tags)}
|
||||
|
||||
except lmdb.Error as e:
|
||||
print(f"Warning: Failed to access Baloo LMDB index: {e}", file=sys.stderr)
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
# Helper function to maintain compatibility with bagheera_search_lib.py
|
||||
|
||||
39
pyproject.toml
Normal file
39
pyproject.toml
Normal file
@@ -0,0 +1,39 @@
|
||||
[build-system]
|
||||
requires = ["setuptools>=61.0", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "bagheerasearch"
|
||||
version = "1.1.0"
|
||||
authors = [
|
||||
{ name="Ignacio Serantes" },
|
||||
]
|
||||
description = "Bagheera Search Tool"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.8"
|
||||
dependencies = [
|
||||
"lmdb",
|
||||
"pyparsing",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
bagheerasearch = "bagheerasearch:main"
|
||||
|
||||
[tool.setuptools]
|
||||
py-modules = ["bagheerasearch"]
|
||||
packages = [
|
||||
"bagheera_query_parser_lib",
|
||||
"bagheera_search_lib",
|
||||
"baloo_tools"
|
||||
]
|
||||
include-package-data = true
|
||||
zip-safe = false
|
||||
|
||||
[tool.setuptools.package-dir]
|
||||
"" = "."
|
||||
"bagheera_query_parser_lib" = "bagheera_query_parser_lib"
|
||||
"bagheera_search_lib" = "bagheera_search_lib"
|
||||
"baloo_tools" = "baloo_tools"
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"*" = ["libbaloo_wrapper.so"]
|
||||
193
setup.py
193
setup.py
@@ -6,12 +6,7 @@ from setuptools.command.install import install
|
||||
from setuptools.command.develop import develop
|
||||
from setuptools.command.build_ext import build_ext
|
||||
|
||||
|
||||
def compile_wrapper():
|
||||
"""
|
||||
Compila libbaloo_wrapper.so forzando la ruta de inclusión profunda
|
||||
detectada para KFileMetaData en KF6.
|
||||
"""
|
||||
base_path = os.path.abspath(os.path.dirname(__file__))
|
||||
source_file = os.path.join(base_path, 'baloo_wrapper', 'baloo_wrapper.cpp')
|
||||
output_lib = os.path.join(base_path, 'libbaloo_wrapper.so')
|
||||
@@ -20,37 +15,22 @@ def compile_wrapper():
|
||||
print(f"✘ Error: Source file not found at {source_file}")
|
||||
sys.exit(1)
|
||||
|
||||
# Paquetes para pkg-config (nombres comunes en KF6)
|
||||
packages = [
|
||||
'KF6Baloo',
|
||||
'KF6BalooEngine',
|
||||
'KF6FileMetadata',
|
||||
'KF6CoreAddons',
|
||||
'Qt6Core'
|
||||
]
|
||||
|
||||
packages = ['KF6Baloo', 'KF6BalooEngine', 'KF6FileMetadata', 'KF6CoreAddons', 'Qt6Core']
|
||||
cflags = []
|
||||
libs = []
|
||||
|
||||
print("Detecting KF6 dependencies...")
|
||||
for pkg in packages:
|
||||
try:
|
||||
cf = subprocess.check_output(['pkg-config', '--cflags', pkg],
|
||||
text=True).split()
|
||||
lb = subprocess.check_output(['pkg-config', '--libs', pkg],
|
||||
text=True).split()
|
||||
cf = subprocess.check_output(['pkg-config', '--cflags', pkg], text=True).split()
|
||||
lb = subprocess.check_output(['pkg-config', '--libs', pkg], text=True).split()
|
||||
cflags.extend(cf)
|
||||
libs.extend(lb)
|
||||
print(f" [OK] {pkg}")
|
||||
except subprocess.CalledProcessError:
|
||||
print(f" [!] Warning: pkg-config could not find {pkg}")
|
||||
|
||||
# CONFIGURACIÓN DE RUTAS SEGÚN TU SISTEMA:
|
||||
# Añadimos el nivel intermedio para que <KFileMetaData/ExtractorCollection>
|
||||
# se encuentre en /usr/include/KF6/KFileMetaData/KFileMetaData/
|
||||
extra_includes = [
|
||||
'-I/usr/include/KF6',
|
||||
'-I/usr/include/KF6/KFileMetaData', # Permite resolver KFileMetaData/
|
||||
'-I/usr/include/KF6/KFileMetaData',
|
||||
'-I/usr/include/qt6',
|
||||
'-I/usr/include/qt6/QtCore'
|
||||
]
|
||||
@@ -58,7 +38,6 @@ def compile_wrapper():
|
||||
cflags = list(set(cflags + extra_includes))
|
||||
libs = list(set(libs))
|
||||
|
||||
# Comando de compilación C++17 replicando tu CMakeLists.txt [cite: 1, 2]
|
||||
compile_cmd = [
|
||||
'g++', '-shared', '-o', output_lib,
|
||||
'-fPIC', '-std=c++17',
|
||||
@@ -68,60 +47,158 @@ def compile_wrapper():
|
||||
try:
|
||||
print(f"Executing compilation:\n{' '.join(compile_cmd)}")
|
||||
subprocess.check_call(compile_cmd)
|
||||
|
||||
if os.path.exists(output_lib):
|
||||
print(f"✔ Successfully compiled: {output_lib}")
|
||||
else:
|
||||
raise FileNotFoundError("Compilation finished but .so file is missing.")
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"\n✘ Compilation failed (Exit code {e.returncode}).")
|
||||
print(f"\n✘ Compilation failed.")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
class CustomInstall(install):
|
||||
def run(self):
|
||||
compile_wrapper()
|
||||
super().run()
|
||||
|
||||
|
||||
class CustomDevelop(develop):
|
||||
def run(self):
|
||||
compile_wrapper()
|
||||
super().run()
|
||||
|
||||
|
||||
class CustomBuildExt(build_ext):
|
||||
def run(self):
|
||||
compile_wrapper()
|
||||
super().run()
|
||||
|
||||
|
||||
# Solo dejamos cmdclass, el resto se lee de pyproject.toml
|
||||
setup(
|
||||
name="bagheerasearch",
|
||||
version="1.0.0",
|
||||
author="Ignacio Serantes",
|
||||
description="Bagheera Search Tool & Lib (KF6/C++17)",
|
||||
py_modules=["bagheerasearch"],
|
||||
package_dir={
|
||||
"": ".",
|
||||
"bagheera_query_parser_lib": "bagheera_query_parser_lib",
|
||||
"bagheera_search_lib": "bagheera_search_lib",
|
||||
"baloo_tools": "baloo_tools",
|
||||
},
|
||||
packages=[
|
||||
"bagheera_query_parser_lib",
|
||||
"bagheera_search_lib",
|
||||
"baloo_tools"
|
||||
],
|
||||
install_requires=["lmdb"],
|
||||
entry_points={'console_scripts': ['bagheerasearch=bagheerasearch:main']},
|
||||
cmdclass={
|
||||
'install': CustomInstall,
|
||||
'develop': CustomDevelop,
|
||||
'build_ext': CustomBuildExt,
|
||||
},
|
||||
data_files=[('lib', ['libbaloo_wrapper.so'])],
|
||||
include_package_data=True,
|
||||
zip_safe=False,
|
||||
}
|
||||
)
|
||||
|
||||
# import os
|
||||
# import subprocess
|
||||
# import sys
|
||||
# from setuptools import setup
|
||||
# from setuptools.command.install import install
|
||||
# from setuptools.command.develop import develop
|
||||
# from setuptools.command.build_ext import build_ext
|
||||
#
|
||||
#
|
||||
# def compile_wrapper():
|
||||
# """
|
||||
# Compila libbaloo_wrapper.so forzando la ruta de inclusión profunda
|
||||
# detectada para KFileMetaData en KF6.
|
||||
# """
|
||||
# base_path = os.path.abspath(os.path.dirname(__file__))
|
||||
# source_file = os.path.join(base_path, 'baloo_wrapper', 'baloo_wrapper.cpp')
|
||||
# output_lib = os.path.join(base_path, 'libbaloo_wrapper.so')
|
||||
#
|
||||
# if not os.path.exists(source_file):
|
||||
# print(f"✘ Error: Source file not found at {source_file}")
|
||||
# sys.exit(1)
|
||||
#
|
||||
# # Paquetes para pkg-config (nombres comunes en KF6)
|
||||
# packages = [
|
||||
# 'KF6Baloo',
|
||||
# 'KF6BalooEngine',
|
||||
# 'KF6FileMetadata',
|
||||
# 'KF6CoreAddons',
|
||||
# 'Qt6Core'
|
||||
# ]
|
||||
#
|
||||
# cflags = []
|
||||
# libs = []
|
||||
#
|
||||
# print("Detecting KF6 dependencies...")
|
||||
# for pkg in packages:
|
||||
# try:
|
||||
# cf = subprocess.check_output(['pkg-config', '--cflags', pkg],
|
||||
# text=True).split()
|
||||
# lb = subprocess.check_output(['pkg-config', '--libs', pkg],
|
||||
# text=True).split()
|
||||
# cflags.extend(cf)
|
||||
# libs.extend(lb)
|
||||
# print(f" [OK] {pkg}")
|
||||
# except subprocess.CalledProcessError:
|
||||
# print(f" [!] Warning: pkg-config could not find {pkg}")
|
||||
#
|
||||
# # CONFIGURACIÓN DE RUTAS SEGÚN TU SISTEMA:
|
||||
# # Añadimos el nivel intermedio para que <KFileMetaData/ExtractorCollection>
|
||||
# # se encuentre en /usr/include/KF6/KFileMetaData/KFileMetaData/
|
||||
# extra_includes = [
|
||||
# '-I/usr/include/KF6',
|
||||
# '-I/usr/include/KF6/KFileMetaData', # Permite resolver KFileMetaData/
|
||||
# '-I/usr/include/qt6',
|
||||
# '-I/usr/include/qt6/QtCore'
|
||||
# ]
|
||||
#
|
||||
# cflags = list(set(cflags + extra_includes))
|
||||
# libs = list(set(libs))
|
||||
#
|
||||
# # Comando de compilación C++17 replicando tu CMakeLists.txt [cite: 1, 2]
|
||||
# compile_cmd = [
|
||||
# 'g++', '-shared', '-o', output_lib,
|
||||
# '-fPIC', '-std=c++17',
|
||||
# source_file
|
||||
# ] + cflags + libs
|
||||
#
|
||||
# try:
|
||||
# print(f"Executing compilation:\n{' '.join(compile_cmd)}")
|
||||
# subprocess.check_call(compile_cmd)
|
||||
#
|
||||
# if os.path.exists(output_lib):
|
||||
# print(f"✔ Successfully compiled: {output_lib}")
|
||||
# else:
|
||||
# raise FileNotFoundError("Compilation finished but .so file is missing.")
|
||||
#
|
||||
# except subprocess.CalledProcessError as e:
|
||||
# print(f"\n✘ Compilation failed (Exit code {e.returncode}).")
|
||||
# sys.exit(1)
|
||||
#
|
||||
#
|
||||
# class CustomInstall(install):
|
||||
# def run(self):
|
||||
# compile_wrapper()
|
||||
# super().run()
|
||||
#
|
||||
#
|
||||
# class CustomDevelop(develop):
|
||||
# def run(self):
|
||||
# compile_wrapper()
|
||||
# super().run()
|
||||
#
|
||||
#
|
||||
# class CustomBuildExt(build_ext):
|
||||
# def run(self):
|
||||
# compile_wrapper()
|
||||
# super().run()
|
||||
#
|
||||
#
|
||||
# setup(
|
||||
# name="bagheerasearch",
|
||||
# version="1.0.0",
|
||||
# author="Ignacio Serantes",
|
||||
# description="Bagheera Search Tool & Lib (KF6/C++17)",
|
||||
# py_modules=["bagheerasearch"],
|
||||
# package_dir={
|
||||
# "": ".",
|
||||
# "bagheera_query_parser_lib": "bagheera_query_parser_lib",
|
||||
# "bagheera_search_lib": "bagheera_search_lib",
|
||||
# "baloo_tools": "baloo_tools",
|
||||
# },
|
||||
# packages=[
|
||||
# "bagheera_query_parser_lib",
|
||||
# "bagheera_search_lib",
|
||||
# "baloo_tools"
|
||||
# ],
|
||||
# install_requires=["lmdb"],
|
||||
# entry_points={'console_scripts': ['bagheerasearch=bagheerasearch:main']},
|
||||
# cmdclass={
|
||||
# 'install': CustomInstall,
|
||||
# 'develop': CustomDevelop,
|
||||
# 'build_ext': CustomBuildExt,
|
||||
# },
|
||||
# data_files=[('lib', ['libbaloo_wrapper.so'])],
|
||||
# include_package_data=True,
|
||||
# zip_safe=False,
|
||||
# )
|
||||
|
||||
Reference in New Issue
Block a user