First commit
This commit is contained in:
170
bagheera_query_parser_lib/bagheera_query_parser.py.sav
Normal file
170
bagheera_query_parser_lib/bagheera_query_parser.py.sav
Normal file
@@ -0,0 +1,170 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
Bagheera Query Parser
|
||||
Converts natural language English date expressions into Baloo-compatible queries.
|
||||
"""
|
||||
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict
|
||||
|
||||
|
||||
class BagheeraQueryParser:
|
||||
def __init__(self):
|
||||
# Actualizamos 'today' cada vez que se usa para evitar problemas si el proceso
|
||||
# queda abierto días
|
||||
self.today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
|
||||
def _convert_numbers(self, query: str) -> str:
|
||||
"""
|
||||
Replaces written numbers (ONE to TWENTY) with their numeric string equivalent.
|
||||
Only processes uppercase words.
|
||||
"""
|
||||
# Mapping for numbers as requested (ONE to TWENTY)
|
||||
number_map: Dict[str, int] = {
|
||||
'ONE': 1, 'TWO': 2, 'THREE': 3, 'FOUR': 4, 'FIVE': 5,
|
||||
'SIX': 6, 'SEVEN': 7, 'EIGHT': 8, 'NINE': 9, 'TEN': 10,
|
||||
'ELEVEN': 11, 'TWELVE': 12, 'THIRTEEN': 13, 'FOURTEEN': 14,
|
||||
'FIFTEEN': 15, 'SIXTEEN': 16, 'SEVENTEEN': 17, 'EIGHTEEN': 18,
|
||||
'NINETEEN': 19, 'TWENTY': 20
|
||||
}
|
||||
|
||||
words = query.split()
|
||||
for i, word in enumerate(words):
|
||||
# Solo intentamos convertir si la palabra está en mayúsculas
|
||||
if word.isupper() and word in number_map:
|
||||
words[i] = str(number_map[word])
|
||||
|
||||
return " ".join(words)
|
||||
|
||||
def _get_start_of_unit(self, dt, unit, offset=0):
|
||||
if unit == 'YEAR':
|
||||
return dt.replace(year=dt.year - offset, month=1, day=1)
|
||||
if unit == 'MONTH':
|
||||
month = dt.month - offset
|
||||
year = dt.year
|
||||
while month <= 0:
|
||||
month += 12
|
||||
year -= 1
|
||||
return dt.replace(year=year, month=month, day=1)
|
||||
if unit == 'WEEK':
|
||||
return dt - timedelta(days=dt.weekday() + (offset * 7))
|
||||
if unit == 'DAY':
|
||||
return dt - timedelta(days=offset)
|
||||
|
||||
def _subtract_units(self, dt, unit, n):
|
||||
if unit == 'YEAR':
|
||||
return dt.replace(year=dt.year - n)
|
||||
if unit == 'MONTH':
|
||||
return self._get_start_of_unit(dt, 'MONTH', offset=n)
|
||||
if unit == 'WEEK':
|
||||
return dt - timedelta(weeks=n)
|
||||
if unit == 'DAY':
|
||||
return dt - timedelta(days=n)
|
||||
|
||||
def parse_date(self, query):
|
||||
self.today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
q = self._convert_numbers(query)
|
||||
|
||||
# 1. Reemplazo de TODAY / YESTERDAY
|
||||
q = re.sub(r'\bMODIFIED TODAY\b', f"modified={self.today.strftime('%Y-%m-%d')}",
|
||||
q)
|
||||
yest = self.today - timedelta(days=1)
|
||||
q = re.sub(r'\bMODIFIED YESTERDAY\b', f"modified={yest.strftime('%Y-%m-%d')}",
|
||||
q)
|
||||
|
||||
# 2. Reemplazo de (LAST/THIS) (YEAR/MONTH/WEEK)
|
||||
# Usamos re.sub para encontrar el patrón en cualquier parte y reemplazarlo
|
||||
def replace_simple(m):
|
||||
mod, unit = m.groups()
|
||||
if mod == "THIS":
|
||||
start = self._get_start_of_unit(self.today, unit).strftime('%Y-%m-%d')
|
||||
end = (self.today + timedelta(days=1)).strftime('%Y-%m-%d')
|
||||
else:
|
||||
start = self._get_start_of_unit(self.today, unit,
|
||||
offset=1).strftime('%Y-%m-%d')
|
||||
# end = (self._get_start_of_unit(self.today, unit) -
|
||||
# timedelta(seconds=1)).strftime('%Y-%m-%d')
|
||||
end = (self._get_start_of_unit(self.today, unit)).strftime('%Y-%m-%d')
|
||||
return f"(modified>={start} AND modified<{end})"
|
||||
|
||||
q = re.sub(r"\bMODIFIED (LAST|THIS) (YEAR|MONTH|WEEK)\b", replace_simple, q)
|
||||
|
||||
# 3. Reemplazo de LAST <N> (YEAR/MONTH/WEEK/DAY)
|
||||
def replace_last_n(m):
|
||||
n, unit = m.groups()
|
||||
start = self._subtract_units(self.today, unit, int(n)).strftime('%Y-%m-%d')
|
||||
end = (self.today + timedelta(days=1)).strftime('%Y-%m-%d')
|
||||
return f"(modified>={start} AND modified<{end})"
|
||||
|
||||
q = re.sub(r"\bMODIFIED LAST (\d+) (YEAR|MONTH|WEEK|DAY)S?\b",
|
||||
replace_last_n, q)
|
||||
|
||||
# 4. Reemplazo de <N> AGO
|
||||
def replace_ago(m):
|
||||
n, unit = m.groups()
|
||||
start = self._subtract_units(self.today, unit, int(n))
|
||||
end = (start + timedelta(days=1)).strftime('%Y-%m-%d')
|
||||
|
||||
start = self._get_start_of_unit(self.today, unit, offset=1)
|
||||
end = self._subtract_units(start, unit, int(n)-1).strftime('%Y-%m-%d')
|
||||
start = self._subtract_units(start, unit, int(n))
|
||||
return f"(modified>={start.strftime('%Y-%m-%d')} AND modified<{end})"
|
||||
|
||||
q = re.sub(r"\bMODIFIED (\d+) (YEAR|MONTH|WEEK|DAY)S? AGO\b", replace_ago, q)
|
||||
|
||||
return q
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Pruebas unitarias básicas para el parseo de fechas
|
||||
test_queries = [
|
||||
"MODIFIED TODAY",
|
||||
"first MODIFIED YESTERDAY last",
|
||||
"MODIFIED ONE DAY AGO",
|
||||
"MODIFIED TWO DAYS AGO",
|
||||
"MODIFIED THREE DAYS AGO",
|
||||
"MODIFIED LAST TWO DAYS",
|
||||
"MODIFIED THIS WEEK",
|
||||
"MODIFIED LAST WEEK",
|
||||
"MODIFIED LAST TWO WEEKS",
|
||||
"MODIFIED ONE WEEK AGO",
|
||||
"MODIFIED TWO WEEKS AGO",
|
||||
"MODIFIED THREE WEEKS AGO",
|
||||
"MODIFIED THIS MONTH",
|
||||
"MODIFIED LAST MONTH",
|
||||
"MODIFIED LAST TWO MONTHS",
|
||||
"MODIFIED ONE MONTH AGO",
|
||||
"MODIFIED TWO MONTHS AGO",
|
||||
"MODIFIED THREE MONTHS AGO",
|
||||
"MODIFIED THIS YEAR",
|
||||
"MODIFIED LAST YEAR",
|
||||
"MODIFIED LAST TWO YEARS",
|
||||
"MODIFIED ONE YEAR AGO",
|
||||
"MODIFIED TWO YEARS AGO",
|
||||
"MODIFIED THREE YEARS AGO",
|
||||
"foto MODIFIED LAST 2 YEARS"
|
||||
]
|
||||
|
||||
parser = BagheeraQueryParser()
|
||||
print(f"Testing {__file__}:")
|
||||
for q in test_queries:
|
||||
print(f" Input: '{q}'")
|
||||
print(f" Output: '{parser.parse_date(q)}'")
|
||||
print("-" * 20)
|
||||
|
||||
test_queries = [
|
||||
"MODIFIED TODAYMODIFIED TODAY",
|
||||
"MODIFIED yesterday",
|
||||
"MODIFIED THIS MONTHMODIFIED THIS WEEK",
|
||||
"MODIFIED LAST YEARMODIFIED YESTERDAY",
|
||||
"modified TODAY",
|
||||
"modified today"
|
||||
]
|
||||
parser = BagheeraQueryParser()
|
||||
print(f"Testing {__file__}:")
|
||||
for q in test_queries:
|
||||
print(f" Input: '{q}'")
|
||||
print(f" Output: '{parser.parse_date(q)}'")
|
||||
print("-" * 20)
|
||||
Reference in New Issue
Block a user