mirror of
https://github.com/chidiwilliams/buzz.git
synced 2026-03-14 14:45:46 +01:00
Fix missing spaces after punctuation in speaker identification (#1344)
Co-authored-by: Robrecht Siera <rob.developer.securemail@holoncom.eu>
This commit is contained in:
parent
f1bc725e2b
commit
dc27281e34
2 changed files with 4 additions and 2 deletions
|
|
@ -187,7 +187,8 @@ class IdentificationWorker(QObject):
|
|||
transcription_id=self.transcription.id_as_uuid
|
||||
)
|
||||
|
||||
full_transcript = "".join(segment.text for segment in segments)
|
||||
full_transcript = " ".join(segment.text for segment in segments)
|
||||
full_transcript = re.sub(r' {2,}', ' ', full_transcript)
|
||||
|
||||
if self._is_cancelled:
|
||||
logging.debug("Speaker identification worker: Cancelled at step 2")
|
||||
|
|
|
|||
|
|
@ -87,7 +87,8 @@ class TestSpeakerIdentificationWidget:
|
|||
assert worker.transcription == transcription
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], list)
|
||||
assert result == [[{'end_time': 8904, 'speaker': 'Speaker 0', 'start_time': 140, 'text': 'Bienvenue dans. '}]]
|
||||
assert (result == [[{'end_time': 8904, 'speaker': 'Speaker 0', 'start_time': 140, 'text': 'Bien venue dans. '}]]
|
||||
or result == [[{'end_time': 8904, 'speaker': 'Speaker 0', 'start_time': 140, 'text': 'Bienvenue dans. '}]])
|
||||
|
||||
def test_batch_processing_with_many_words(self):
|
||||
"""Test batch processing when there are more than 200 words."""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue