__all__ = ['total_seconds',
'total_ms',
'timestamp',
'get_srt_duration',
'remove_dialogue_for_first_ts',
'remove_dialogue_between']
from datetime import timedelta
from functools import reduce
[docs]def total_seconds(ts: str) -> str:
"""
Converts a timestamp containing hours, minutes, seconds, and milliseconds
(for example, in the "HH:mm:ss,SSS" format) to a string representing the
total seconds, along with the millisecond part.
For example, a string like "1:20:32,5" will be returned as "4832.005"
Supports parsing the following input formats:
- (H)H:mm:ss,SSS
- (H)H:mm:ss.SSS
- (H)H:mm:ss:SSS
A modified version of the following (great) solution:
https://stackoverflow.com/a/57610198
"""
seconds, milliseconds = divmod(total_ms(ts), 1000)
return f'{seconds}.{milliseconds:0>3}'
[docs]def total_ms(ts: str) -> int:
"""
Converts a timestamp containing hours, minutes, seconds, and milliseconds
(for example, in the "HH:mm:ss,SSS" format) to an integer value representing
the total milliseconds.
For example, a string like "1:20:32,5" will be returned as 4832005
Supports parsing the following input formats:
- (H)H:mm:ss,SSS
- (H)H:mm:ss.SSS
- (H)H:mm:ss:SSS
A modified version of the following (great) solution:
https://stackoverflow.com/a/57610198
"""
try:
h_m_s, milliseconds = ts.replace('.', ',').rsplit(',', 1)
except ValueError:
h_m_s, milliseconds = ts.rsplit(':', 1)
seconds = reduce(lambda sum, d: sum * 60 + int(d), h_m_s.split(':'), 0)
return (seconds * 1000) + int(milliseconds)
[docs]def timestamp(seconds: float = 0, *,
hours: float = 0, minutes: float = 0, milliseconds: float = 0):
"""
Convert a duration (generally specified in seconds) to a formatted string
in the 'HH:mm:ss.SSS' format, for example '2:01:03.150'.
"""
kwargs = locals()
ts = str(timedelta(**kwargs))
if '.' in ts:
# Replace the microsecond part with milliseconds
return ts[:-3]
return ts
[docs]def get_srt_duration(srt_contents: str, default_end_seconds=0.0) -> float:
"""
Gets the total duration (based on end timestamp) of an SRT file
"""
caption_text = srt_contents.split('\n')
captions_end_seconds = default_end_seconds
following_line = ''
for line in reversed(caption_text):
if '-->' in line:
# Fix: sometimes the durations will be listed for
# a blank line (no dialogue)
if not following_line.strip():
continue
end = line.replace(' ', '').rsplit('-->', 1)[-1]
captions_end_seconds = float(total_seconds(end))
break
following_line = line
return captions_end_seconds
[docs]def remove_dialogue_for_first_ts(srt_contents: str, ts: str) -> str:
"""
Removes dialogue under the first occurrence of a start timestamp
in an SRT file. If the start timestamp is not found, return
the `srt_contents` instead.
"""
caption_text = srt_contents.split('\n')
for i, line in enumerate(caption_text):
if '-->' in line:
start_ts = line.split('-->', 1)[0].strip()
if start_ts == ts:
split_ind = i + 1
for j in range(split_ind, len(caption_text)):
if not caption_text[j].strip():
# Found the next blank line
split_ind = j
break
# Return SRT contents with the first dialogue for that timestamp removed
return '\n'.join(caption_text[:i+1] + caption_text[split_ind:])
return srt_contents
[docs]def remove_dialogue_between(srt_contents: str, start_ms: int, end_ms: int):
"""
Remove all dialogue between `start_ms` and `end_ms`, non-inclusive of any
dialogue for `end_ms` - note that values are in milliseconds.
"""
caption_text = srt_contents.split('\n')
srt_lines = [] # Lines to keep
exclude_dialogue = False
for line in caption_text:
if '-->' in line:
start_ts = line.split('-->', 1)[0].strip()
line_ts_ms = total_ms(start_ts)
if start_ms <= line_ts_ms < end_ms:
# If start timestamp of the line is between start_ts and end_ms,
# exclude all of its dialogue
exclude_dialogue = True
elif exclude_dialogue:
if line.strip():
continue
else:
# Found blank line
exclude_dialogue = False
srt_lines.append(line)
return '\n'.join(srt_lines or caption_text)