From 33dd3b9bcddb23e6f7d92b6fd61d342e5f63d7a1 Mon Sep 17 00:00:00 2001
From: "Fernando O. Gallego" <f.ortega.gallego@gmail.com>
Date: Fri, 24 Mar 2023 11:56:41 +0100
Subject: [PATCH] Update decoding.py

Changes from https://github.com/openai/whisper/pull/914/
---
 whisperx/decoding.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/whisperx/decoding.py b/whisperx/decoding.py
index bb70cc0..ca608ca 100644
--- a/whisperx/decoding.py
+++ b/whisperx/decoding.py
@@ -413,7 +413,8 @@ class ApplyTimestampRules(LogitFilter):
 
         # timestamps have to appear in pairs, except directly before EOT; mask logits accordingly
         for k in range(tokens.shape[0]):
-            seq = [t for t in tokens[k, self.sample_begin :].tolist()]
+            sampled_tokens = tokens[k, self.sample_begin :]
+            seq = [t for t in sampled_tokens.tolist()]
             last_was_timestamp = len(seq) >= 1 and seq[-1] >= self.tokenizer.timestamp_begin
             penultimate_was_timestamp = len(seq) < 2 or seq[-2] >= self.tokenizer.timestamp_begin
 
@@ -422,6 +423,11 @@ class ApplyTimestampRules(LogitFilter):
                     logits[k, self.tokenizer.timestamp_begin :] = -np.inf
                 else:  # cannot be normal text tokens
                     logits[k, : self.tokenizer.eot] = -np.inf
+                    
+            timestamps = sampled_tokens[sampled_tokens.ge(self.tokenizer.timestamp_begin)]
+            if timestamps.numel() > 0:
+                # timestamps shouldn't decrease; forbid timestamp tokens smaller than the last
+                logits[k, self.tokenizer.timestamp_begin : timestamps[-1]] = -np.inf
 
         if tokens.shape[1] == self.sample_begin:
             # suppress generating non-timestamp tokens at the beginning