4 changed files with 7 additions and 134 deletions
--- a/components/AudioVisualizer.py
+++ b/components/AudioVisualizer.py
@ -12,7 +12,6 @@ class AudioVisualizer(QtWidgets.QWidget):
        self.x_resolution = x_resolution
        self.fft_analyser = FFTAnalyser(self.media_player, self.x_resolution)
        self.fft_analyser.calculatedVisual.connect(self.set_amplitudes)
        self.fft_analyser.calculatedVisualRs.connect(self.set_rs)
        self.fft_analyser.start()
        self.amps = np.array([])
        self._plot_item = None
@ -56,9 +55,6 @@ class AudioVisualizer(QtWidgets.QWidget):
    def get_amplitudes(self):
        return self.amps
    def get_rs(self):
        return self.rs
    def get_decibels(self):
        """Convert amplitude values to decibel scale
@ -67,7 +63,7 @@ class AudioVisualizer(QtWidgets.QWidget):
        With a noise floor cutoff at around -96dB (for very small values)
        """
        # Avoid log(0) by adding a small epsilon
-        epsilon = 1e-6
+        epsilon = 1e-30
        amplitudes = np.maximum(self.amps, epsilon)
        # Convert to decibels (20*log10 is the standard formula for amplitude to dB)
        db_values = 20 * np.log10(amplitudes)
@ -75,9 +71,6 @@ class AudioVisualizer(QtWidgets.QWidget):
        db_values = np.maximum(db_values, -96)
        return db_values
    def set_rs(self, rs):
        self.rs = np.array(rs)
    def set_amplitudes(self, amps):
        """
        This function is hooked into the calculatedVisual signal from FFTAnalyzer() object
--- a/up.mp3
+++ b/up.mp3
--- a/utils/fft_analyser.py
+++ b/utils/fft_analyser.py
@ -13,7 +13,6 @@ class FFTAnalyser(QtCore.QThread):
    """Analyses a song using FFTs."""
    calculatedVisual = QtCore.pyqtSignal(np.ndarray)
    calculatedVisualRs = QtCore.pyqtSignal(np.ndarray)
    def __init__(self, player, x_resolution):  # noqa: F821
        super().__init__()
@ -27,7 +26,7 @@ class FFTAnalyser(QtCore.QThread):
        # in this case, it takes 5% of the samples at some point in time
        self.sampling_window_length = 0.05
        self.visual_delta_threshold = 1000
-        self.sensitivity = 1
+        self.sensitivity = 10
    def reset_media(self):
        """Resets the media to the currently playing song."""
@ -109,7 +108,6 @@ class FFTAnalyser(QtCore.QThread):
        # array (self.points) is so that we can fade out the previous amplitudes from
        # the past
        for n, amp in enumerate(point_samples):
            amp *= 2
            if self.player.state() in (
                self.player.PausedState,
                self.player.StoppedState,
@ -122,14 +120,13 @@ class FFTAnalyser(QtCore.QThread):
            else:
                # Rise quickly to new peaks
                self.points[n] = amp
                # print(f'amp > points[n] - {amp} > {self.points[n]}')
            # Set a lower threshold to properly reach zero
            if self.points[n] < 1:
                self.points[n] = 1e-5
-        # print(self.points)
+            # Set a lower threshold to properly reach zero
            if self.points[n] < 1e-4:
                self.points[n] = 0
        # interpolate points
-        rs = gaussian_filter1d(self.points, sigma=2)
+        rs = gaussian_filter1d(self.points, sigma=1)
        # divide by the highest sample in the song to normalise the
        # amps in terms of decimals from 0 -> 1
--- a/utils/fft_analyzer.py.ai
+++ b/utils/fft_analyzer.py.ai
@ -1,117 +0,0 @@
 # Credit
 # https://github.com/ravenkls/MilkPlayer/blob/master/audio/fft_analyser.py
 import time
 from PyQt5 import QtCore
 from pydub import AudioSegment
 import numpy as np
 from scipy.ndimage.filters import gaussian_filter1d
 from logging import debug, info
 class FFTAnalyser(QtCore.QThread):
    """Analyses a song using FFTs."""
    calculatedVisual = QtCore.pyqtSignal(np.ndarray)
    calculatedVisualRs = QtCore.pyqtSignal(np.ndarray)
    def __init__(self, player, x_resolution):  # noqa: F821
        super().__init__()
        self.player = player
        self.reset_media()
        self.player.currentMediaChanged.connect(self.reset_media)
        self.resolution = x_resolution
        # this length is a number, in seconds, of how much audio is sampled to determine the frequencies
        # of the audio at a specific point in time
        # in this case, it takes 5% of the samples at some point in time
        self.sampling_window_length = 0.05
        self.visual_delta_threshold = 1000
        self.sensitivity = 0.2
    def reset_media(self):
        """Resets the media to the currently playing song."""
        audio_file = self.player.currentMedia().canonicalUrl().path()
        # if os.name == "nt" and audio_file.startswith("/"):
        #     audio_file = audio_file[1:]
        if audio_file:
            try:
                self.song = AudioSegment.from_file(audio_file).set_channels(1)
            except PermissionError:
                self.start_animate = False
            else:
                self.samples = np.array(self.song.get_array_of_samples())
                self.max_sample = self.samples.max()
                self.points = np.zeros(self.resolution)
                self.start_animate = True
        else:
            self.start_animate = False
    def calculate_amps(self):
        """Calculates the amplitudes used for visualising the media."""
        sample_count = int(self.song.frame_rate * self.sampling_window_length)
        start_index = int((self.player.position() / 1000) * self.song.frame_rate)
        # samples to analyse
        v_sample = self.samples[start_index : start_index + sample_count]
        # Use a window function to reduce spectral leakage
        window = np.hanning(len(v_sample))
        v_sample = v_sample * window
        # use FFTs to analyse frequency and amplitudes
        fourier = np.fft.fft(v_sample)
        freq = np.fft.fftfreq(fourier.size, d=1/self.song.frame_rate)
        amps = np.abs(fourier)[:len(fourier)//2]  # Only take positive frequencies
        freq = freq[:len(fourier)//2]  # Match frequencies to amplitudes
        # Define frequency bands (in Hz)
        bands = np.logspace(np.log10(10), np.log10(23000), self.resolution + 1)
        point_samples = np.zeros(self.resolution)
        # Calculate average amplitude for each frequency band
        for i in range(len(bands) - 1):
            mask = (freq >= bands[i]) & (freq < bands[i+1])
            if np.any(mask):
                point_samples[i] = np.mean(amps[mask])
        # Calculate RMS of the sample for dynamic sensitivity
        rms = np.sqrt(np.mean(np.square(v_sample)))
        rms_ratio = min(0.2, rms / (0.01 * self.max_sample))  # Smooth transition near silence
        # Normalize and apply sensitivity with RMS-based scaling
        if np.max(point_samples) > 0:
            point_samples = point_samples / np.max(point_samples)
            point_samples = point_samples * self.sensitivity * rms_ratio
        else:
            point_samples = np.zeros(self.resolution)
        # Update visualization points with decay
        for n in range(self.resolution):
            amp = point_samples[n]
            if self.player.state() in (self.player.PausedState, self.player.StoppedState):
                self.points[n] *= 0.95  # Fast decay when paused/stopped
            elif amp < self.points[n]:
                # More aggressive decay for very quiet signals
                decay_factor = 0.7 if rms_ratio < 0.1 else 0.9
                self.points[n] = max(amp, self.points[n] * decay_factor)
            else:
                self.points[n] = amp
        # Apply Gaussian smoothing
        rs = gaussian_filter1d(self.points, sigma=1)
        # Emit the smoothed data
        self.calculatedVisual.emit(rs)
    def run(self):
        """Runs the animate function depending on the song."""
        while True:
            if self.start_animate:
                try:
                    self.calculate_amps()
                except ValueError:
                    self.calculatedVisual.emit(np.zeros(self.resolution))
                    self.start_animate = False
            time.sleep(0.033)