musicpom/utils/get_reorganize_vars.py
2026-05-03 16:08:47 -04:00

110 lines
3.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from mutagen.id3 import ID3
from pathlib import Path
def get_reorganize_vars(filepath: Path) -> tuple[str, str]:
"""
Takes in a path to an audio file
returns the (artist, album) as a tuple of strings
if no artist or album or ID3 tags at all are found,
function will return ("Unknown Artist", "Unknown Album")
"""
# logging.debug(f"getting reorganize vars for {filepath}")
# TODO: fix this func. id3_remap(get_tags())
# or is what i have less memory so more better? :shrug:
audio = ID3(filepath)
try:
artist = str(audio["TPE1"].text[0])
if artist == "":
artist = "Unknown Artist"
except KeyError:
artist = "Unknown Artist"
try:
album = str(audio["TALB"].text[0])
if album == "":
album = "Unknown Album"
except KeyError:
album = "Unknown Album"
return artist, album
def synchsafe_to_int(b):
"""Convert 4 synchsafe bytes to int (ID3v2.4)."""
return (b[0] << 21) | (b[1] << 14) | (b[2] << 7) | b[3]
def decode_text_frame(data):
"""Decode ID3 text frame payload."""
if not data:
return None
encoding_byte = data[0]
text_bytes = data[1:]
if encoding_byte == 0:
return text_bytes.decode("iso-8859-1", errors="replace").rstrip("\x00")
elif encoding_byte == 1:
return text_bytes.decode("utf-16", errors="replace").rstrip("\x00")
elif encoding_byte == 2:
return text_bytes.decode("utf-16-be", errors="replace").rstrip("\x00")
elif encoding_byte == 3:
return text_bytes.decode("utf-8", errors="replace").rstrip("\x00")
else:
return None
def parse_artist_album(filepath) -> tuple[str, str]:
"""
ai slop plus riley
"""
with open(filepath, "rb") as f:
header = f.read(10)
if len(header) < 10 or header[0:3] != b"ID3":
raise ValueError("No ID3v2 tag found")
version_major = header[3]
tag_size = synchsafe_to_int(header[6:10])
artist = None
album = None
start_pos = f.tell()
while f.tell() - start_pos < tag_size:
frame_header = f.read(10)
if len(frame_header) < 10:
break
frame_id = frame_header[0:4].decode("ascii", errors="replace")
# Stop at padding
if frame_id.strip("\x00") == "":
break
if version_major == 4:
frame_size = synchsafe_to_int(frame_header[4:8])
else:
frame_size = int.from_bytes(frame_header[4:8], "big")
# Skip flags (bytes 89 already included in header)
frame_data = f.read(frame_size)
if frame_id == "TPE1":
artist = decode_text_frame(frame_data)
elif frame_id == "TALB":
album = decode_text_frame(frame_data)
if artist and album:
break
if not artist:
artist = "Unknown Arist"
if not album:
album = "Unknown Album"
return artist, album