Sisyphus repository
Last update: 1 october 2023 | SRPMs: 18631 | Visits: 37517315
en ru br
ALT Linux repos
S:4.18.0-alt1

Group :: File tools
RPM: catfish

 Main   Changelog   Spec   Patches   Sources   Download   Gear   Bugs and FR  Repocop 

Patch: catfish-4.18.0-ALT-searchODF.patch
Download


diff --git a/catfish/catfish/CatfishSearchEngine.py b/catfish/catfish/CatfishSearchEngine.py
index 4ad19cc..7ac3426 100644
--- a/catfish/catfish/CatfishSearchEngine.py
+++ b/catfish/catfish/CatfishSearchEngine.py
@@ -20,6 +20,7 @@
 # pylint: disable=C0114
 # pylint: disable=C0116
 
+from odf.opendocument import load as odfload
 import logging
 
 import io
@@ -487,6 +488,12 @@ class CatfishSearchMethod_Fulltext(CatfishSearchMethod):
     used as a replacement for the 'find' search method, which is difficult to
     interrupt and is slower than os.walk."""
 
+    openers = {
+        '<<UNKNOWN>>': lambda fname: open(fname, "r"),
+        'vnd.oasis.opendocument': lambda fname: io.StringIO(str(odfload(fname).body)),
+        'text': lambda fname: open(fname, "r"),
+    }
+
     def __init__(self):
         """Initialize the 'fulltext' search method."""
         super().__init__("fulltext")
@@ -581,15 +588,10 @@ class CatfishSearchMethod_Fulltext(CatfishSearchMethod):
         True if still running."""
         self.running = True
 
-        find_keywords_backup = []
-        if not self.exact:
-            # Split the keywords into a list if they are not already.
-            if isinstance(keywords, str):
-                keywords = keywords.replace(',', ' ').strip().split()
-
-            for keyword in keywords:
-                if keyword not in find_keywords_backup:
-                    find_keywords_backup.append(keyword)
+        if isinstance(keywords, str):
+            keywords = set(keywords.replace(',', ' ').split()) if self.exact else {keywords}
+        else:
+            keywords = {" ".join(keywords)} if self.exact else set(keywords)
 
         # Start walking the folder structure.
         for root, dirs, files in os.walk(path):  # pylint: disable=W0612
@@ -601,48 +603,33 @@ class CatfishSearchMethod_Fulltext(CatfishSearchMethod):
                 continue
 
             for filename in files:
+                if self.force_stop: break
+                mime = guess_type(filename)[0] or "<<UNKNOWN>>"
+                fname = os.path.join(root, filename)
+                if not os.path.isfile(fname): continue
                 try:
-                    fullpath = os.path.join(root, filename)
-
-                    # Skip if special file.
-                    if not os.path.isfile(fullpath):
-                        continue
-                    if os.path.getsize(fullpath) == 0:
-                        continue
-                    if fullpath.lower().endswith('.pdf'):
-                        if self.search_pdf(fullpath, keywords):
-                            yield fullpath
-                    if zipfile.is_zipfile(fullpath):
-                        yield fullpath
-                    # Skip if not text file.
-                    if not self.is_txt(filename):
-                        continue
-                    # Check character encoding, skip if binary.
-                    charset = self.check_charset(root, filename)
-                    if charset == 'binary':
-                        continue
-
-                    # Check each line. If a keyword is found, yield.
-                    open_file = open(fullpath, 'r', encoding=charset)
-                    with open_file as file_text:
-                        if self.search_text(file_text, keywords):
-                            yield fullpath
-                # Skips on errors, move on to next in list.
-                except UnicodeDecodeError:
-                    continue
-                except UnicodeError:
-                    continue
-                except FileNotFoundError:
-                    continue
-                except PermissionError:
-                    continue
-                except OSError:
-                    continue
+                    for submime in self.openers:
+                        if submime in mime:
+                            opened = self.openers[submime](fname)
+                            if self.textsearch(opened, keywords, regex):
+                                yield fname
+                except (IOError, UnicodeDecodeError, ValueError):
+                    pass
             yield True
         yield False
         self.force_stop = False
         self.running = False
 
+    def textsearch(self, stream,  keywords, regex):
+        """Internal text search for keywords in string-oriented stream."""
+        tofind, res = re.compile("|".join(keywords)), keywords.copy()
+        for line in stream:
+            if self.force_stop: break
+            res -= set(tofind.findall(line))
+            if not res:
+                return True
+        return False
+
     def stop(self):
         """Stop the running search method."""
         self.force_stop = True
 
design & coding: Vladimir Lettiev aka crux © 2004-2005, Andrew Avramenko aka liks © 2007-2008
current maintainer: Michael Shigorin