diff --git a/buzz/widgets/transcription_viewer/speaker_identification_widget.py b/buzz/widgets/transcription_viewer/speaker_identification_widget.py index cc794419..67408256 100644 --- a/buzz/widgets/transcription_viewer/speaker_identification_widget.py +++ b/buzz/widgets/transcription_viewer/speaker_identification_widget.py @@ -187,7 +187,8 @@ class IdentificationWorker(QObject): transcription_id=self.transcription.id_as_uuid ) - full_transcript = "".join(segment.text for segment in segments) + full_transcript = " ".join(segment.text for segment in segments) + full_transcript = re.sub(r' {2,}', ' ', full_transcript) if self._is_cancelled: logging.debug("Speaker identification worker: Cancelled at step 2") diff --git a/tests/widgets/speaker_identification_widget_test.py b/tests/widgets/speaker_identification_widget_test.py index 5f10e6ce..54dc4071 100644 --- a/tests/widgets/speaker_identification_widget_test.py +++ b/tests/widgets/speaker_identification_widget_test.py @@ -87,7 +87,8 @@ class TestSpeakerIdentificationWidget: assert worker.transcription == transcription assert len(result) == 1 assert isinstance(result[0], list) - assert result == [[{'end_time': 8904, 'speaker': 'Speaker 0', 'start_time': 140, 'text': 'Bienvenue dans. '}]] + assert (result == [[{'end_time': 8904, 'speaker': 'Speaker 0', 'start_time': 140, 'text': 'Bien venue dans. '}]] + or result == [[{'end_time': 8904, 'speaker': 'Speaker 0', 'start_time': 140, 'text': 'Bienvenue dans. '}]]) def test_batch_processing_with_many_words(self): """Test batch processing when there are more than 200 words."""