171 lines
6.3 KiB
Python
171 lines
6.3 KiB
Python
#!/usr/bin/env python
|
|
|
|
"""
|
|
Bagheera Query Parser
|
|
Converts natural language English date expressions into Baloo-compatible queries.
|
|
"""
|
|
|
|
import re
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict
|
|
|
|
|
|
class BagheeraQueryParser:
|
|
def __init__(self):
|
|
# Actualizamos 'today' cada vez que se usa para evitar problemas si el proceso
|
|
# queda abierto días
|
|
self.today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
|
|
|
|
def _convert_numbers(self, query: str) -> str:
|
|
"""
|
|
Replaces written numbers (ONE to TWENTY) with their numeric string equivalent.
|
|
Only processes uppercase words.
|
|
"""
|
|
# Mapping for numbers as requested (ONE to TWENTY)
|
|
number_map: Dict[str, int] = {
|
|
'ONE': 1, 'TWO': 2, 'THREE': 3, 'FOUR': 4, 'FIVE': 5,
|
|
'SIX': 6, 'SEVEN': 7, 'EIGHT': 8, 'NINE': 9, 'TEN': 10,
|
|
'ELEVEN': 11, 'TWELVE': 12, 'THIRTEEN': 13, 'FOURTEEN': 14,
|
|
'FIFTEEN': 15, 'SIXTEEN': 16, 'SEVENTEEN': 17, 'EIGHTEEN': 18,
|
|
'NINETEEN': 19, 'TWENTY': 20
|
|
}
|
|
|
|
words = query.split()
|
|
for i, word in enumerate(words):
|
|
# Solo intentamos convertir si la palabra está en mayúsculas
|
|
if word.isupper() and word in number_map:
|
|
words[i] = str(number_map[word])
|
|
|
|
return " ".join(words)
|
|
|
|
def _get_start_of_unit(self, dt, unit, offset=0):
|
|
if unit == 'YEAR':
|
|
return dt.replace(year=dt.year - offset, month=1, day=1)
|
|
if unit == 'MONTH':
|
|
month = dt.month - offset
|
|
year = dt.year
|
|
while month <= 0:
|
|
month += 12
|
|
year -= 1
|
|
return dt.replace(year=year, month=month, day=1)
|
|
if unit == 'WEEK':
|
|
return dt - timedelta(days=dt.weekday() + (offset * 7))
|
|
if unit == 'DAY':
|
|
return dt - timedelta(days=offset)
|
|
|
|
def _subtract_units(self, dt, unit, n):
|
|
if unit == 'YEAR':
|
|
return dt.replace(year=dt.year - n)
|
|
if unit == 'MONTH':
|
|
return self._get_start_of_unit(dt, 'MONTH', offset=n)
|
|
if unit == 'WEEK':
|
|
return dt - timedelta(weeks=n)
|
|
if unit == 'DAY':
|
|
return dt - timedelta(days=n)
|
|
|
|
def parse_date(self, query):
|
|
self.today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
|
|
q = self._convert_numbers(query)
|
|
|
|
# 1. Reemplazo de TODAY / YESTERDAY
|
|
q = re.sub(r'\bMODIFIED TODAY\b', f"modified={self.today.strftime('%Y-%m-%d')}",
|
|
q)
|
|
yest = self.today - timedelta(days=1)
|
|
q = re.sub(r'\bMODIFIED YESTERDAY\b', f"modified={yest.strftime('%Y-%m-%d')}",
|
|
q)
|
|
|
|
# 2. Reemplazo de (LAST/THIS) (YEAR/MONTH/WEEK)
|
|
# Usamos re.sub para encontrar el patrón en cualquier parte y reemplazarlo
|
|
def replace_simple(m):
|
|
mod, unit = m.groups()
|
|
if mod == "THIS":
|
|
start = self._get_start_of_unit(self.today, unit).strftime('%Y-%m-%d')
|
|
end = (self.today + timedelta(days=1)).strftime('%Y-%m-%d')
|
|
else:
|
|
start = self._get_start_of_unit(self.today, unit,
|
|
offset=1).strftime('%Y-%m-%d')
|
|
# end = (self._get_start_of_unit(self.today, unit) -
|
|
# timedelta(seconds=1)).strftime('%Y-%m-%d')
|
|
end = (self._get_start_of_unit(self.today, unit)).strftime('%Y-%m-%d')
|
|
return f"(modified>={start} AND modified<{end})"
|
|
|
|
q = re.sub(r"\bMODIFIED (LAST|THIS) (YEAR|MONTH|WEEK)\b", replace_simple, q)
|
|
|
|
# 3. Reemplazo de LAST <N> (YEAR/MONTH/WEEK/DAY)
|
|
def replace_last_n(m):
|
|
n, unit = m.groups()
|
|
start = self._subtract_units(self.today, unit, int(n)).strftime('%Y-%m-%d')
|
|
end = (self.today + timedelta(days=1)).strftime('%Y-%m-%d')
|
|
return f"(modified>={start} AND modified<{end})"
|
|
|
|
q = re.sub(r"\bMODIFIED LAST (\d+) (YEAR|MONTH|WEEK|DAY)S?\b",
|
|
replace_last_n, q)
|
|
|
|
# 4. Reemplazo de <N> AGO
|
|
def replace_ago(m):
|
|
n, unit = m.groups()
|
|
start = self._subtract_units(self.today, unit, int(n))
|
|
end = (start + timedelta(days=1)).strftime('%Y-%m-%d')
|
|
|
|
start = self._get_start_of_unit(self.today, unit, offset=1)
|
|
end = self._subtract_units(start, unit, int(n)-1).strftime('%Y-%m-%d')
|
|
start = self._subtract_units(start, unit, int(n))
|
|
return f"(modified>={start.strftime('%Y-%m-%d')} AND modified<{end})"
|
|
|
|
q = re.sub(r"\bMODIFIED (\d+) (YEAR|MONTH|WEEK|DAY)S? AGO\b", replace_ago, q)
|
|
|
|
return q
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# Pruebas unitarias básicas para el parseo de fechas
|
|
test_queries = [
|
|
"MODIFIED TODAY",
|
|
"first MODIFIED YESTERDAY last",
|
|
"MODIFIED ONE DAY AGO",
|
|
"MODIFIED TWO DAYS AGO",
|
|
"MODIFIED THREE DAYS AGO",
|
|
"MODIFIED LAST TWO DAYS",
|
|
"MODIFIED THIS WEEK",
|
|
"MODIFIED LAST WEEK",
|
|
"MODIFIED LAST TWO WEEKS",
|
|
"MODIFIED ONE WEEK AGO",
|
|
"MODIFIED TWO WEEKS AGO",
|
|
"MODIFIED THREE WEEKS AGO",
|
|
"MODIFIED THIS MONTH",
|
|
"MODIFIED LAST MONTH",
|
|
"MODIFIED LAST TWO MONTHS",
|
|
"MODIFIED ONE MONTH AGO",
|
|
"MODIFIED TWO MONTHS AGO",
|
|
"MODIFIED THREE MONTHS AGO",
|
|
"MODIFIED THIS YEAR",
|
|
"MODIFIED LAST YEAR",
|
|
"MODIFIED LAST TWO YEARS",
|
|
"MODIFIED ONE YEAR AGO",
|
|
"MODIFIED TWO YEARS AGO",
|
|
"MODIFIED THREE YEARS AGO",
|
|
"foto MODIFIED LAST 2 YEARS"
|
|
]
|
|
|
|
parser = BagheeraQueryParser()
|
|
print(f"Testing {__file__}:")
|
|
for q in test_queries:
|
|
print(f" Input: '{q}'")
|
|
print(f" Output: '{parser.parse_date(q)}'")
|
|
print("-" * 20)
|
|
|
|
test_queries = [
|
|
"MODIFIED TODAYMODIFIED TODAY",
|
|
"MODIFIED yesterday",
|
|
"MODIFIED THIS MONTHMODIFIED THIS WEEK",
|
|
"MODIFIED LAST YEARMODIFIED YESTERDAY",
|
|
"modified TODAY",
|
|
"modified today"
|
|
]
|
|
parser = BagheeraQueryParser()
|
|
print(f"Testing {__file__}:")
|
|
for q in test_queries:
|
|
print(f" Input: '{q}'")
|
|
print(f" Output: '{parser.parse_date(q)}'")
|
|
print("-" * 20)
|