


python3+PyQt5 implements a page indexer application that supports multi-threading
This article mainly introduces in detail how python3 PyQt5 implements a page indexer application that supports multi-threading. It has a certain reference value. Interested friends can refer to it.
This article uses Python3 pyqt5 Implemented the page indexer application example of Chapter 19 of python Qt GUI fast programming.
/home/yrd/eric_workspace/chap19/walker_ans.py
#!/usr/bin/env python3 import codecs import html.entities import re import sys from PyQt5.QtCore import (QMutex, QThread,pyqtSignal,Qt) class Walker(QThread): finished = pyqtSignal(bool,int) indexed = pyqtSignal(str,int) COMMON_WORDS_THRESHOLD = 250 MIN_WORD_LEN = 3 MAX_WORD_LEN = 25 INVALID_FIRST_OR_LAST = frozenset("0123456789_") STRIPHTML_RE = re.compile(r"<[^>]*?>", re.IGNORECASE|re.MULTILINE) ENTITY_RE = re.compile(r"&(\w+?);|(\d+?);") SPLIT_RE = re.compile(r"\W+", re.IGNORECASE|re.MULTILINE) def __init__(self, index, lock, files, filenamesForWords, commonWords, parent=None): super(Walker, self).__init__(parent) self.index = index self.lock = lock self.files = files self.filenamesForWords = filenamesForWords self.commonWords = commonWords self.stopped = False self.mutex = QMutex() self.completed = False def stop(self): try: self.mutex.lock() self.stopped = True finally: self.mutex.unlock() def isStopped(self): try: self.mutex.lock() return self.stopped finally: self.mutex.unlock() def run(self): self.processFiles() self.stop() self.finished.emit(self.completed,self.index) def processFiles(self): def unichrFromEntity(match): text = match.group(match.lastindex) if text.isdigit(): return chr(int(text)) u = html.entities.name2codepoint.get(text) return chr(u) if u is not None else "" for fname in self.files: if self.isStopped(): return words = set() fh = None try: fh = codecs.open(fname, "r", "UTF8", "ignore") text = fh.read() except EnvironmentError as e: sys.stderr.write("Error: {0}\n".format(e)) continue finally: if fh is not None: fh.close() if self.isStopped(): return text = self.STRIPHTML_RE.sub("", text) text = self.ENTITY_RE.sub(unichrFromEntity, text) text = text.lower() for word in self.SPLIT_RE.split(text): if (self.MIN_WORD_LEN <= len(word) <= self.MAX_WORD_LEN and word[0] not in self.INVALID_FIRST_OR_LAST and word[-1] not in self.INVALID_FIRST_OR_LAST): try: self.lock.lockForRead() new = word not in self.commonWords finally: self.lock.unlock() if new: words.add(word) if self.isStopped(): return for word in words: try: self.lock.lockForWrite() files = self.filenamesForWords[word] if len(files) > self.COMMON_WORDS_THRESHOLD: del self.filenamesForWords[word] self.commonWords.add(word) else: files.add(str(fname)) finally: self.lock.unlock() self.indexed.emit(fname,self.index) self.completed = True /home/yrd/eric_workspace/chap19/pageindexer_ans.pyw #!/usr/bin/env python3 import collections import os import sys from PyQt5.QtCore import (QDir, QReadWriteLock, QMutex,Qt) from PyQt5.QtWidgets import (QApplication, QDialog, QFileDialog, QFrame, QHBoxLayout, QLCDNumber, QLabel, QLineEdit, QListWidget, QPushButton, QVBoxLayout) import walker_ans as walker def isAlive(qobj): import sip try: sip.unwrapinstance(qobj) except RuntimeError: return False return True class Form(QDialog): def __init__(self, parent=None): super(Form, self).__init__(parent) self.mutex = QMutex() self.fileCount = 0 self.filenamesForWords = collections.defaultdict(set) self.commonWords = set() self.lock = QReadWriteLock() self.path = QDir.homePath() pathLabel = QLabel("Indexing path:") self.pathLabel = QLabel() self.pathLabel.setFrameStyle(QFrame.StyledPanel|QFrame.Sunken) self.pathButton = QPushButton("Set &Path...") self.pathButton.setAutoDefault(False) findLabel = QLabel("&Find word:") self.findEdit = QLineEdit() findLabel.setBuddy(self.findEdit) commonWordsLabel = QLabel("&Common words:") self.commonWordsListWidget = QListWidget() commonWordsLabel.setBuddy(self.commonWordsListWidget) filesLabel = QLabel("Files containing the &word:") self.filesListWidget = QListWidget() filesLabel.setBuddy(self.filesListWidget) filesIndexedLabel = QLabel("Files indexed") self.filesIndexedLCD = QLCDNumber() self.filesIndexedLCD.setSegmentStyle(QLCDNumber.Flat) wordsIndexedLabel = QLabel("Words indexed") self.wordsIndexedLCD = QLCDNumber() self.wordsIndexedLCD.setSegmentStyle(QLCDNumber.Flat) commonWordsLCDLabel = QLabel("Common words") self.commonWordsLCD = QLCDNumber() self.commonWordsLCD.setSegmentStyle(QLCDNumber.Flat) self.statusLabel = QLabel("Click the 'Set Path' " "button to start indexing") self.statusLabel.setFrameStyle(QFrame.StyledPanel|QFrame.Sunken) topLayout = QHBoxLayout() topLayout.addWidget(pathLabel) topLayout.addWidget(self.pathLabel, 1) topLayout.addWidget(self.pathButton) topLayout.addWidget(findLabel) topLayout.addWidget(self.findEdit, 1) leftLayout = QVBoxLayout() leftLayout.addWidget(filesLabel) leftLayout.addWidget(self.filesListWidget) rightLayout = QVBoxLayout() rightLayout.addWidget(commonWordsLabel) rightLayout.addWidget(self.commonWordsListWidget) middleLayout = QHBoxLayout() middleLayout.addLayout(leftLayout, 1) middleLayout.addLayout(rightLayout) bottomLayout = QHBoxLayout() bottomLayout.addWidget(filesIndexedLabel) bottomLayout.addWidget(self.filesIndexedLCD) bottomLayout.addWidget(wordsIndexedLabel) bottomLayout.addWidget(self.wordsIndexedLCD) bottomLayout.addWidget(commonWordsLCDLabel) bottomLayout.addWidget(self.commonWordsLCD) bottomLayout.addStretch() layout = QVBoxLayout() layout.addLayout(topLayout) layout.addLayout(middleLayout) layout.addLayout(bottomLayout) layout.addWidget(self.statusLabel) self.setLayout(layout) self.walkers = [] self.completed = [] self.pathButton.clicked.connect(self.setPath) self.findEdit.returnPressed.connect(self.find) self.setWindowTitle("Page Indexer") def stopWalkers(self): for walker in self.walkers: if isAlive(walker) and walker.isRunning(): walker.stop() for walker in self.walkers: if isAlive(walker) and walker.isRunning(): walker.wait() self.walkers = [] self.completed = [] def setPath(self): self.stopWalkers() self.pathButton.setEnabled(False) path = QFileDialog.getExistingDirectory(self, "Choose a Path to Index", self.path) if not path: self.statusLabel.setText("Click the 'Set Path' " "button to start indexing") self.pathButton.setEnabled(True) return self.statusLabel.setText("Scanning directories...") QApplication.processEvents() # Needed for Windows self.path = QDir.toNativeSeparators(path) self.findEdit.setFocus() self.pathLabel.setText(self.path) self.statusLabel.clear() self.filesListWidget.clear() self.fileCount = 0 self.filenamesForWords = collections.defaultdict(set) self.commonWords = set() nofilesfound = True files = [] index = 0 for root, dirs, fnames in os.walk(str(self.path)): for name in [name for name in fnames if name.endswith((".htm", ".html"))]: files.append(os.path.join(root, name)) if len(files) == 1000: self.processFiles(index, files[:]) files = [] index += 1 nofilesfound = False if files: self.processFiles(index, files[:]) nofilesfound = False if nofilesfound: self.finishedIndexing() self.statusLabel.setText( "No HTML files found in the given path") def processFiles(self, index, files): thread = walker.Walker(index, self.lock, files, self.filenamesForWords, self.commonWords, self) thread.indexed[str,int].connect(self.indexed) thread.finished[bool,int].connect(self.finished) thread.finished.connect(thread.deleteLater) self.walkers.append(thread) self.completed.append(False) thread.start() thread.wait(300) # Needed for Windows def find(self): word = str(self.findEdit.text()) if not word: try: self.mutex.lock() self.statusLabel.setText("Enter a word to find in files") finally: self.mutex.unlock() return try: self.mutex.lock() self.statusLabel.clear() self.filesListWidget.clear() finally: self.mutex.unlock() word = word.lower() if " " in word: word = word.split()[0] try: self.lock.lockForRead() found = word in self.commonWords finally: self.lock.unlock() if found: try: self.mutex.lock() self.statusLabel.setText("Common words like '{0}' " "are not indexed".format(word)) finally: self.mutex.unlock() return try: self.lock.lockForRead() files = self.filenamesForWords.get(word, set()).copy() finally: self.lock.unlock() if not files: try: self.mutex.lock() self.statusLabel.setText("No indexed file contains " "the word '{0}'".format(word)) finally: self.mutex.unlock() return files = [QDir.toNativeSeparators(name) for name in sorted(files, key=str.lower)] try: self.mutex.lock() self.filesListWidget.addItems(files) self.statusLabel.setText( "{0} indexed files contain the word '{1}'".format( len(files), word)) finally: self.mutex.unlock() def indexed(self, fname, index): try: self.mutex.lock() self.statusLabel.setText(fname) self.fileCount += 1 count = self.fileCount finally: self.mutex.unlock() if count % 25 == 0: try: self.lock.lockForRead() indexedWordCount = len(self.filenamesForWords) commonWordCount = len(self.commonWords) finally: self.lock.unlock() try: self.mutex.lock() self.filesIndexedLCD.display(count) self.wordsIndexedLCD.display(indexedWordCount) self.commonWordsLCD.display(commonWordCount) finally: self.mutex.unlock() elif count % 101 == 0: try: self.lock.lockForRead() words = self.commonWords.copy() finally: self.lock.unlock() try: self.mutex.lock() self.commonWordsListWidget.clear() self.commonWordsListWidget.addItems(sorted(words)) finally: self.mutex.unlock() def finished(self, completed, index): done = False if self.walkers: self.completed[index] = True if all(self.completed): try: self.mutex.lock() self.statusLabel.setText("Finished") done = True finally: self.mutex.unlock() else: try: self.mutex.lock() self.statusLabel.setText("Finished") done = True finally: self.mutex.unlock() if done: self.finishedIndexing() def reject(self): if not all(self.completed): self.stopWalkers() self.finishedIndexing() else: self.accept() def closeEvent(self, event=None): self.stopWalkers() def finishedIndexing(self): self.filesIndexedLCD.display(self.fileCount) self.wordsIndexedLCD.display(len(self.filenamesForWords)) self.commonWordsLCD.display(len(self.commonWords)) self.pathButton.setEnabled(True) QApplication.processEvents() # Needed for Windows app = QApplication(sys.argv) form = Form() form.show() app.exec_()
Running result:
Related recommendations:
python3 PyQt5 Qt Designer implements stacked widgets
python3 PyQt5 Qt Designer implements extended dialog boxes
The above is the detailed content of python3+PyQt5 implements a page indexer application that supports multi-threading. For more information, please follow other related articles on the PHP Chinese website!

Hot AI Tools

Undresser.AI Undress
AI-powered app for creating realistic nude photos

AI Clothes Remover
Online AI tool for removing clothes from photos.

Undress AI Tool
Undress images for free

Clothoff.io
AI clothes remover

Video Face Swap
Swap faces in any video effortlessly with our completely free AI face swap tool!

Hot Article

Hot Tools

Notepad++7.3.1
Easy-to-use and free code editor

SublimeText3 Chinese version
Chinese version, very easy to use

Zend Studio 13.0.1
Powerful PHP integrated development environment

Dreamweaver CS6
Visual web development tools

SublimeText3 Mac version
God-level code editing software (SublimeText3)

Hot Topics

Want to copy a page in Microsoft Word and keep the formatting intact? This is a smart idea because duplicating pages in Word can be a useful time-saving technique when you want to create multiple copies of a specific document layout or format. This guide will walk you through the step-by-step process of copying pages in Word, whether you are creating a template or copying a specific page in a document. These simple instructions are designed to help you easily recreate your page without having to start from scratch. Why copy pages in Microsoft Word? There are several reasons why copying pages in Word is very beneficial: When you have a document with a specific layout or format that you want to copy. Unlike recreating the entire page from scratch

8-core means that the CPU has 8 physical cores, and 16-thread means that the CPU can have up to 16 threads processing tasks at the same time. The number of cores and threads are important performance indicators of a computer CPU. The higher the number of cores of the CPU, the higher the processing speed; the higher the number of threads, the more conducive it is to running multiple programs at the same time, because the number of threads is equivalent to the number of times the CPU can run at the same time at a certain moment. The number of tasks to be processed in parallel. Multi-threading can maximize wide-issue, out-of-order superscalar processing, improve the utilization of processor computing components, and alleviate memory access delays caused by data correlation or cache misses.

Page refresh is very common in our daily network use. When we visit a web page, we sometimes encounter some problems, such as the web page not loading or displaying abnormally, etc. At this time, we usually choose to refresh the page to solve the problem, so how to refresh the page quickly? Let’s discuss the shortcut keys for page refresh. The page refresh shortcut key is a method to quickly refresh the current web page through keyboard operations. In different operating systems and browsers, the shortcut keys for page refresh may be different. Below we use the common W

Standby is a new feature in the iOS 17 update that provides a new and enhanced way to access information when your phone is idle quickly. With StandBy, you can conveniently check the time, view upcoming events, browse your calendar, get weather updates for your location, and more. Once activated, the iPhone will intuitively enter standby mode when set to landscape while charging. This feature is perfect for wireless charging points like your bedside table, or when you're away from your iPhone charging during daily tasks. It allows you to swipe through various widgets displayed in standby to access different sets of information from various applications. However, you may want to modify these widgets or even delete some based on your preferences and the information you need frequently. So let's dive into

"Methods to handle Laravel pages that cannot display CSS correctly, need specific code examples" When using the Laravel framework to develop web applications, sometimes you will encounter the problem that the page cannot display CSS styles correctly, which may cause the page to render abnormal styles. Affect user experience. This article will introduce some methods to deal with the failure of Laravel pages to display CSS correctly, and provide specific code examples to help developers solve this common problem. 1. Check the file path. First check the path of the CSS file.

In iOS, Apple allows you to disable individual home screen pages on your iPhone. It's also possible to rearrange the order of home screen pages and delete pages directly instead of just disabling them. Here's how it works. How to Rearrange Home Screen Pages Touch and hold Space on the Home screen to enter jitter mode. Tap the row of dots that represent Home screen pages. In the Home screen grid that appears, touch and drag a page to rearrange it relative to other pages. Others move in response to your dragging. When you're happy with your new arrangement, tap "Done" in the upper right corner of the screen, then tap "Done" again to exit dither mode. How to Disable or Remove Home Screen Pages Touch and hold Space on the Home screen to enter dither mode. Tap to represent home screen

Title: Implementation method of page jump in 3 seconds: PHP Programming Guide In web development, page jump is a common operation. Generally, we use meta tags in HTML or JavaScript methods to jump to pages. However, in some specific cases, we need to perform page jumps on the server side. This article will introduce how to use PHP programming to implement a function that automatically jumps to a specified page within 3 seconds, and will also give specific code examples. The basic principle of page jump using PHP. PHP is a kind of

As the Internet develops, many websites or applications have gradually become more complex. When users use it, they often encounter error pages, the most common of which is the 404 page. The 404 page means that the page being accessed does not exist and is a common error page. For websites or applications, a beautiful 404 page can greatly improve the user experience. In this article, we will introduce how to use ThinkPHP6 to quickly implement a beautiful 404 page. Create a route First, we need to create an err in the route folder
