From f1c346a634873c7f9f0e5cfcceb039b756c42788 Mon Sep 17 00:00:00 2001
From: Yavin7 <morgan07kelley@gmail.com>
Date: Tue, 16 Sep 2025 20:36:39 -0600
Subject: [PATCH] Renamed decompressed_files/ to out/ Fised a lot of logic Set
 starting bit to 0x800, since that's after a big blank space in files, is the
 method bit we are expecting (0x0B) every time, and works to decomp all files
 (wow!)

---
 .gitignore |   2 +-
 main.py    | 178 ++++++++++++++++++++++++++++-------------------------
 2 files changed, 95 insertions(+), 85 deletions(-)

diff --git a/.gitignore b/.gitignore
index 31c33a6..b68a1ac 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,6 @@
 # Project Specific
 bin_extract/*
-decompressed_files/*
+out/*
 bzz_mudule.py
 
 # Byte-compiled / optimized / DLL files
diff --git a/main.py b/main.py
index ac21844..461d7a5 100644
--- a/main.py
+++ b/main.py
@@ -1,171 +1,166 @@
+import os
 from bitarray import bitarray
+from pathlib import Path
 
 
 class BZZCompressor:
-    def decompress(self, input_file_path) -> bytes:
+    def decompress(self, input_file_path, input_file, output_folder="out/") -> bytes:
         data = bitarray(endian="big")
-        output_buffer = bitarray(endian="big")
+        output_buffer = bytearray()
+        index = 0x800
 
         # read the input file
         try:
-            with open(input_file_path, "rb") as input_file:
+            with open(f"{input_file_path}/{input_file}", "rb") as input_file:
                 data.fromfile(input_file)
+                data = data.tobytes()
         except IOError:
             print("Could not open input file...")
             raise
 
         if len(data) > 9:
             # Getting our method, this is likely imprecise, since I'm one dealing with one method type, but it gets what I want
-            method = data[0:8] | bitarray("00001010")
+            method = data[index]
             # We move on to the next byte in data
-            del data[0:8]
+            index = index + 1
 
             # Gathering variables based on the method according to https://problemkaputt.de/psxspx-cdrom-file-compression-bzz.htm
             # Note: bin(int)[2:].zfill(8) converts a number to an 8-bit binary string
 
             # `>> 3` is the same as dividing by 8
-            shifter = (method >> 3) & bitarray(bin(0x03)[2:].zfill(8))
-            len_bits = (method & bitarray(bin(0x07)[2:].zfill(8))) ^ bitarray(
-                bin(0x07)[2:].zfill(8)
-            )
+            shifter = (method >> 3) & 0x03
+            len_bits = (method & 0x07) ^ 0x07
 
             # The bin() function only returns the second half of the byte, so we pad the byte
-            len_mask = bitarray(bin((1 << int(len_bits.to01(), 2)) - 1)[2:].zfill(8))
+            len_mask = 1 << len_bits
 
             threshold = len_mask >> 1
-            if int(threshold.to01(), 2) > 0x07:
-                threshold = bitarray(bin(0x13).zfill(8))
+
+            if threshold > 0x07:
+                threshold = 0x13
 
             len_table = []
 
-            for i in range(int(len_mask.to01(), 2)):
-                if i > int(threshold.to01(), 2):
-                    len_table.append(
-                        (i - int(threshold.to01(), 2) << int(shifter.to01(), 2))
-                        + int(threshold.to01(), 2)
-                        + 3
-                    )
+            for i in range(len_mask):
+                if i > threshold:
+                    len_table.append((i - threshold << shifter) + threshold + 3)
                 else:
                     len_table.append(i + 3)
 
-            num_flags = bitarray(bin(int(data[0:24].to01(), 2) + 1)[2:].zfill(24))
-            del data[0:24]
+            temp_flags = ""
 
-            print(f"Method: {method.tobytes()}")
-            print(f"Shifter: {shifter.tobytes()}")
-            print(f"Len Bits: {len_bits.tobytes()}")
-            print(f"Len Mask: {len_mask.tobytes()}")
-            print(f"Threshold: {threshold.tobytes()}")
+            for item in data[index : index + 3]:
+                temp_flags += bin(item)[2:].zfill(8)
+
+            num_flags = int(temp_flags, 2) + 1
+            index = index + 3
+
+            print(f"Method: {hex(method)}")
+            print(f"Shifter: {shifter}")
+            print(f"Len Bits: {bin(len_bits)}")
+            print(f"Len Mask: {bin(len_mask)}")
+            print(f"Threshold: {threshold}")
             print(f"Len Table: {len_table}")
-            print(f"Num Flags: {num_flags.tobytes()}")
+            print(f"Num Flags: {bin(num_flags)}")
 
             # Adding 0x100 here means the bitarray is a length of 9, and the first item is always 1
             # This means that later, when we need to gather more flag bits, we aren't losing any data, or
             # hitting an index out of bounds error
-            flag_bits = bitarray(bin(int(data[0:8].to01(), 2) + 0x100)[2:])
-            del data[0:8]
+            flag_bits = bitarray(bin(data[index] + 0x100)[2:])
+            index = index + 1
 
             print(f"Starting flag_bits: {flag_bits}")
 
-            while int(num_flags.to01(), 2) > 0:
+            while num_flags > 0:
                 carry = flag_bits[-1]
                 flag_bits = flag_bits >> 1
 
-                if len(flag_bits) > 8 and flag_bits[0] == 0:
-                    flag_bits = flag_bits[1:]
-
                 print(f"Carry: {carry}")
                 print(f"Flag Bits: {flag_bits}")
 
                 # if we are down to only 0 bits, we are out of file-driven data
                 # Here we collect more flag bits and re-iterate the loop
-                if len(flag_bits.to01()) == 0:
-                    flag_bits = bitarray(bin(int(data[0:8].to01(), 2) + 0x100)[2:])
-                    del data[0:8]
+                if int(flag_bits.to01(), 2) == 0x00:
+                    flag_bits = bitarray(bin(data[index] + 0x100)[2:])
+                    index = index + 1
                     continue
 
                 # Carry means the next byte is raw data, no weird placement or indexing
                 if carry:
-                    if len(data) == 0:
-                        print("Error processing file. Reached of data stream early.")
+                    try:
+                        output_buffer.append(data[index])
+                        index = index + 1
+                    except IndexError:
+                        print(output_buffer)
+                        print(
+                            f"Error processing file. Reached of data stream early. Index: {index}"
+                        )
                         return
 
-                    output_buffer.append(data[0:8])
-                    del data[0:8]
-
                 # If Carry is 0, then we are doing actual decompression. This is the tricky part
                 else:
                     # This shouldn't happen
-                    if len(data) <= 8:
+                    if len(data) <= index + 1:
                         print("Error processing file. Reached of data stream early.")
                         return
 
                     # This is "temp" in our documentation
-                    distance_data = data[0:16]
-                    del data[0:16]
+                    temp = ""
+                    for item in data[index : index + 2]:
+                        temp = temp + bin(item)[2:].zfill(8)
+
+                    distance_data = int(temp, 2)
+                    index = index + 2
 
                     # length here is the length of the data we are copying.
                     # We multiply by 8 since we are working with bits instead of bytes
-                    length = (
-                        len_table[
-                            int(
-                                (
-                                    distance_data
-                                    & bitarray(
-                                        bin(int(len_mask.to01(), 2))[2:].zfill(16)
-                                    )
-                                ).to01(),
-                                2,
-                            )
-                        ]
-                        * 8
-                    )
+                    length = len_table[(distance_data & len_mask) - 1]
 
                     # Displacement is how far back in the existing output_buffer we are
                     # looking to copy from. We multiply by 8 since we are working with bits and not bytes
-                    displacement = (
-                        int((distance_data >> int(len_bits.to01(), 2)).to01(), 2) * 8
-                    )
+                    displacement = distance_data >> len_bits
 
                     # This shouldn't happen
                     if displacement <= 0:
                         print(
-                            "Error processing file. Displacement was less than or equal to 0"
+                            f"Error processing file. Displacement was less than or equal to 0.\n"
+                            + f"Distance Data: {distance_data}. Displacement: {displacement}. Index: {hex(index)}"
                         )
                         return
 
-                    print(f"Output Buffer Size (in bits): {len(output_buffer)}")
-                    print(f"Distance Data: {distance_data.tobytes()}")
-                    print(f"Displacement (in bits): {displacement}")
-                    print(f"Length (in bits): {length}")
+                    print(f"Output Buffer Size {len(output_buffer)}")
+                    print(f"Distance Data: {distance_data}")
+                    print(f"Displacement: {displacement}")
+                    print(f"Length: {length}")
 
                     # Here we copy bit by bit from earlier in the output buffer.
                     # we use this instead of index slicing since the slice could lead to
                     # data we are currently copying into the buffer
-                    start_index = len(output_buffer) - displacement
+                    copy_index = len(output_buffer) - displacement
 
                     # If start index is less than 0, we'll be checking something like output_buffer[-2]
                     # or smth, which will have an IndexOutOfBounds exception
-                    if start_index < 0:
+                    if copy_index < 0:
+                        print(output_buffer)
                         print("Error decompressing file. Start Index was out of range.")
                         return
 
                     for i in range(length):
-                        output_buffer.append(output_buffer[start_index + i])
+                        output_buffer.append(output_buffer[copy_index + i])
 
-                num_flags = bitarray(bin(int(num_flags.to01(), 2) - 1)[2:].zfill(24))
+                num_flags = num_flags - 1
 
-            if len(data) > 0:
-                output_buffer.append(
-                    bitarray(data.to01() + "0".join("" for i in range(8 - len(data))))
-                )
+            if len(data) > index:
+                for item in data[index:]:
+                    output_buffer.append(item)
 
         else:
             # If the file is less than 9 bits, it's just output
             for i in data:
                 output_buffer.append(i)
 
-        out_data = b"".join(output_buffer.tobytes())
+        # This handoff is so I can change buffer logic without breaking write-out logic
+        out_data = output_buffer
 
         try:
             if "bin_extract" in input_file_path[0:11]:
@@ -174,20 +169,35 @@ class BZZCompressor:
                 output_file_path = input_file_path
 
             if "/" in output_file_path:
-                output_file_name = output_file_path.split("/")[-1]
+                output_file_name = output_file_path.split("/")[-1] + ".file"
             else:
-                output_file_name = output_file_path
+                output_file_name = output_file_path + ".file"
 
             # TODO: Create file path, if it doesn't exist
-            with open(
-                f"decompressed_files/{output_file_name[:4]}.file", "wb"
-            ) as outfile:
+            with open(f"{output_folder}/{output_file_name}", "wb") as outfile:
                 outfile.write(out_data)
                 print(f"File {output_file_name} saved successfully!")
-        except IOError:
-            print(f"Unable to write file for {input_file_path}")
+        except IOError as e:
+            print(
+                f"Unable to write file for {input_file_path}/{input_file}. Error: {e}"
+            )
 
 
-compressor = BZZCompressor()
+if __name__ == "__main__":
+    compressor = BZZCompressor()
 
-compressor.decompress("bin_extract/level/mc/mccave01.bzz")
+    for dirpath, dirnames, filenames in os.walk("./bin_extract"):
+        print(f"{dirpath} | {', '.join(filenames)}")
+
+        for file in filenames:
+            if file[-4:] == ".bzz":
+                output_folder_path = Path(f"out/{'/'.join(dirpath.split("/")[2:])}")
+                output_folder_path.mkdir(parents=True, exist_ok=True)
+
+                try:
+                    compressor.decompress(dirpath, file, output_folder_path)
+                except Exception as e:
+                    print(
+                        f"Error while decompressing {output_folder_path}/{file}. Error: {e}"
+                    )
+                    continue