Группа :: Работа с файлами
Пакет: catfish
Главная Изменения Спек Патчи Sources Загрузить Gear Bugs and FR Repocop
Патч: catfish-4.18.0-ALT-searchODF.patch
Скачать
Скачать
diff --git a/catfish/catfish/CatfishSearchEngine.py b/catfish/catfish/CatfishSearchEngine.py
index 4ad19cc..7ac3426 100644
--- a/catfish/catfish/CatfishSearchEngine.py
+++ b/catfish/catfish/CatfishSearchEngine.py
@@ -20,6 +20,7 @@
# pylint: disable=C0114
# pylint: disable=C0116
+from odf.opendocument import load as odfload
import logging
import io
@@ -487,6 +488,12 @@ class CatfishSearchMethod_Fulltext(CatfishSearchMethod):
used as a replacement for the 'find' search method, which is difficult to
interrupt and is slower than os.walk."""
+ openers = {
+ '<<UNKNOWN>>': lambda fname: open(fname, "r"),
+ 'vnd.oasis.opendocument': lambda fname: io.StringIO(str(odfload(fname).body)),
+ 'text': lambda fname: open(fname, "r"),
+ }
+
def __init__(self):
"""Initialize the 'fulltext' search method."""
super().__init__("fulltext")
@@ -581,15 +588,10 @@ class CatfishSearchMethod_Fulltext(CatfishSearchMethod):
True if still running."""
self.running = True
- find_keywords_backup = []
- if not self.exact:
- # Split the keywords into a list if they are not already.
- if isinstance(keywords, str):
- keywords = keywords.replace(',', ' ').strip().split()
-
- for keyword in keywords:
- if keyword not in find_keywords_backup:
- find_keywords_backup.append(keyword)
+ if isinstance(keywords, str):
+ keywords = set(keywords.replace(',', ' ').split()) if self.exact else {keywords}
+ else:
+ keywords = {" ".join(keywords)} if self.exact else set(keywords)
# Start walking the folder structure.
for root, dirs, files in os.walk(path): # pylint: disable=W0612
@@ -601,48 +603,33 @@ class CatfishSearchMethod_Fulltext(CatfishSearchMethod):
continue
for filename in files:
+ if self.force_stop: break
+ mime = guess_type(filename)[0] or "<<UNKNOWN>>"
+ fname = os.path.join(root, filename)
+ if not os.path.isfile(fname): continue
try:
- fullpath = os.path.join(root, filename)
-
- # Skip if special file.
- if not os.path.isfile(fullpath):
- continue
- if os.path.getsize(fullpath) == 0:
- continue
- if fullpath.lower().endswith('.pdf'):
- if self.search_pdf(fullpath, keywords):
- yield fullpath
- if zipfile.is_zipfile(fullpath):
- yield fullpath
- # Skip if not text file.
- if not self.is_txt(filename):
- continue
- # Check character encoding, skip if binary.
- charset = self.check_charset(root, filename)
- if charset == 'binary':
- continue
-
- # Check each line. If a keyword is found, yield.
- open_file = open(fullpath, 'r', encoding=charset)
- with open_file as file_text:
- if self.search_text(file_text, keywords):
- yield fullpath
- # Skips on errors, move on to next in list.
- except UnicodeDecodeError:
- continue
- except UnicodeError:
- continue
- except FileNotFoundError:
- continue
- except PermissionError:
- continue
- except OSError:
- continue
+ for submime in self.openers:
+ if submime in mime:
+ opened = self.openers[submime](fname)
+ if self.textsearch(opened, keywords, regex):
+ yield fname
+ except (IOError, UnicodeDecodeError, ValueError):
+ pass
yield True
yield False
self.force_stop = False
self.running = False
+ def textsearch(self, stream, keywords, regex):
+ """Internal text search for keywords in string-oriented stream."""
+ tofind, res = re.compile("|".join(keywords)), keywords.copy()
+ for line in stream:
+ if self.force_stop: break
+ res -= set(tofind.findall(line))
+ if not res:
+ return True
+ return False
+
def stop(self):
"""Stop the running search method."""
self.force_stop = True