msos/optimize_images.py

289 lines
14 KiB
Python

import os
import re
import argparse
import shutil
from PIL import Image
# --- Konfiguracija skripte ---
PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__)) # Predvideva, da je skripta v root direktoriju projekta
IMAGES_DIR_NAME = "images"
ORIGINAL_IMAGES_SUBDIR_NAME = "original" # Podmapa znotraj IMAGES_DIR_NAME
HTML_ROOT_DIRS = ['en', 'si', 'mk'] # Jezikovne mape, kjer se nahajajo HTML datoteke za posodobitev
WEBP_QUALITY = 80 # Kakovost kompresije za WebP (0-100). Višja številka = boljša kakovost, večja datoteka.
MAX_IMAGE_DIMENSION = 1920 # Maksimalna dimenzija (širina ali višina) za pomanjšanje, če je slika večja. Ohranijo se razmerja. Če je 0, se ne pomanjšuje.
IMAGE_EXTENSIONS_TO_OPTIMIZE = ['.jpg', '.jpeg', '.png', '.gif'] # Končnice datotek, ki jih želimo optimizirati
EXCLUDE_FILES_FROM_OPTIMIZATION = [
'1-logo.png', # Logotip, ki ga morda ne želimo spreminjati
'13-whitelogo.png', # Beli logotip, ki ga morda ne želimo spreminjati
'favicon.ico' # Ikona, ki ni slikovni format, ki ga Pillow lahko enostavno pretvori v WebP
# Dodaj še druga imena datotek, ki jih želiš izključiti iz optimizacije
]
# --- Pomožne funkcije ---
def optimize_image(image_path, output_path, quality, max_dim, dry_run=False):
"""
Naloži sliko, jo po potrebi pomanjša, optimizira in pretvori v WebP format.
"""
try:
with Image.open(image_path) as img:
original_size = img.size
# Pretvorba v RGB, če je potrebno (WebP ne podpira npr. paletnih slik brez RGB pretvorbe)
if img.mode in ('P', 'LA', 'RGBA'):
if img.mode == 'RGBA':
# Za RGBA slike, ki so shranjene v WebP, se shrani z alfa kanalom.
# Če želimo brez alfa, bi morali ustvariti belo ozadje:
# new_img = Image.new("RGB", img.size, (255, 255, 255))
# new_img.paste(img, mask=img.split()[3])
# img = new_img
pass # Ohranimo RGBA za WebP z alfa kanalom
else:
img = img.convert("RGB")
# Pomanjšanje slike, če je prevelika
if max_dim > 0 and (img.width > max_dim or img.height > max_dim):
print(f" Pomanjšujem sliko {os.path.basename(image_path)} iz {img.width}x{img.height} ...", end="")
img.thumbnail((max_dim, max_dim), Image.LANCZOS) # Lanczos za kakovostno pomanjšanje
print(f" na {img.width}x{img.height}")
else:
print(f" Obdelujem sliko {os.path.basename(image_path)} ({img.width}x{img.height})...", end="")
if not dry_run:
# Shrani v WebP
# Za RGBA WebP slike, `lossless=True` ali `quality` določita kompresijo.
# `method` (0-6) nadzoruje hitrost/kakovost kodiranja. Višja = počasneje/boljše.
img.save(output_path, "webp", quality=quality, method=6)
print(f" Shrani v {os.path.basename(output_path)} (kvaliteta: {quality}).")
else:
print(f" DRY RUN: Shranil bi v {os.path.basename(output_path)} (kvaliteta: {quality}).")
return True
except FileNotFoundError:
print(f" Napaka: Izvorna slika '{image_path}' ni najdena.")
return False
except Exception as e:
print(f" Napaka pri optimizaciji slike '{image_path}': {e}")
return False
def update_html_references(html_file_path, old_filename_with_ext, new_filename_with_ext, dry_run=False):
"""
Poišče staro ime slike v HTML datoteki in ga zamenja z novim imenom WebP.
"""
try:
with open(html_file_path, 'r', encoding='utf-8') as f:
content = f.read()
except Exception as e:
print(f" Napaka pri branju HTML datoteke '{html_file_path}': {e}")
return False
original_content = content
changed = False
# Izdelava regularnega izraza, ki bo iskal staro ime datoteke znotraj poti
# in ga zamenjal z novim. Poskrbimo, da ujemanje ni preveč široko.
# Iščemo vzorce kot so src=".../staro_ime.ext", url('.../staro_ime.ext'), itd.
# Prvi del ujame začetek atributa/URL-ja (npr. src=" ali url()...)
# Drugi del ujame *katerokoli pot* pred imenom datoteke (npr. ../images/)
# Tretji del ujame staro ime datoteke z razširitvijo
# Četrti del ujame zaključek atributa/URL-ja
# Regex pattern to match filename.ext in various HTML/CSS contexts
# Example: 1-logo.png -> 1-logo.webp
# It must replace only the filename, not parts of other filenames.
# This pattern is specifically designed to handle common cases where image names appear
# in src/srcset/href attributes or url() CSS functions.
# It uses named groups for clarity and robustness.
# Group `before_path` captures anything before the actual filename to ensure we only
# target image references and not random text.
# Group `path` captures the relative path (e.g., ../images/).
# We need to escape the old filename for regex safety.
escaped_old_filename = re.escape(old_filename_with_ext)
# Pattern to find old_filename_with_ext in common image contexts.
# This pattern looks for the filename preceded by either:
# - `src="` or `srcset="` or `href="` followed by any path characters
# - `url(` followed by optional quotes and any path characters
# It attempts to capture the path prefix to preserve it.
# This is a complex regex. Let's simplify the strategy slightly to avoid over-matching.
# Iterate through potential contexts and replace just the filename within that context.
# Contexts: src="...", srcset="...", url('...'), url("..."), url(...)
patterns = [
# For src, srcset, href attributes (usually contain path/filename.ext)
re.compile(rf'(?P<attr>src|srcset|href)=["\'](?P<path>.*?){escaped_old_filename}(?P<end>["\'])', re.IGNORECASE),
# For url() in CSS (can be with single, double quotes or no quotes)
re.compile(rf'(url\([\'"]*)(?P<path>.*?){escaped_old_filename}([\'"]*\))', re.IGNORECASE)
]
for pattern in patterns:
content, num_replacements = pattern.subn(
lambda m: f"{m.group('attr')}=\"{m.group('path')}{new_filename_with_ext}\"" if 'attr' in m.groupdict() else f"{m.group(1)}{m.group('path')}{new_filename_with_ext}{m.group(3)}",
content
)
if num_replacements > 0:
changed = True
print(f" Zamenjano {num_replacements} referenc za '{old_filename_with_ext}' z '{new_filename_with_ext}' v '{html_file_path}'.")
if changed and not dry_run:
backup_path = html_file_path + ".bak"
try:
shutil.copy2(html_file_path, backup_path)
# print(f" Varnostna kopija ustvarjena: {backup_path}") # Odkomentiraj za več izpisa
except Exception as e:
print(f" Napaka pri ustvarjanju varnostne kopije za '{html_file_path}': {e}")
return False
try:
with open(html_file_path, 'w', encoding='utf-8') as f:
f.write(content)
# print(f" HTML datoteka posodobljena: {html_file_path}") # Odkomentiraj za več izpisa
return True
except Exception as e:
print(f" Napaka pri zapisovanju v HTML datoteko '{html_file_path}': {e}")
return False
elif changed and dry_run:
print(f" DRY RUN: HTML datoteka '{html_file_path}' bi bila posodobljena (reference za '{old_filename_with_ext}' z '{new_filename_with_ext}').")
return True # Return true in dry_run if changes would have occurred
else:
# print(f" Ni referenc za '{old_filename_with_ext}' v '{html_file_path}'.") # Odkomentiraj za več izpisa
return False
def find_html_files(root_dir):
"""
Rekurzivno poišče vse HTML datoteke v določenem korenskem imeniku.
Vrne seznam absolutnih poti do datotek.
"""
html_files = []
for dirpath, _, filenames in os.walk(root_dir):
for filename in filenames:
if filename.endswith(".html"):
html_files.append(os.path.join(dirpath, filename))
return html_files
# --- Glavna funkcija ---
def main():
parser = argparse.ArgumentParser(description="Skripta za optimizacijo slik (pretvorba v WebP) in posodobitev HTML referenc.")
parser.add_argument("--dry-run", action="store_true", help="Izvede operacije, vendar ne spreminja datotek in ne premika originalnih slik.")
args = parser.parse_args()
images_full_path = os.path.join(PROJECT_ROOT, IMAGES_DIR_NAME)
original_images_full_path = os.path.join(images_full_path, ORIGINAL_IMAGES_SUBDIR_NAME)
if not os.path.isdir(images_full_path):
print(f"Napaka: Mapa s slikami '{images_full_path}' ne obstaja. Preverite konfiguracijo.")
return
if not args.dry_run:
# Ustvari mapo za originalne slike, če ne obstaja
os.makedirs(original_images_full_path, exist_ok=True)
print(f"Mapa za originalne slike: '{original_images_full_path}' (ustvarjena, če ni obstajala).")
else:
print(f"DRY RUN: Mapa za originalne slike bi bila '{original_images_full_path}'.")
optimized_images_map = [] # Seznam slovarjev: {'old_name': 'slika.jpg', 'new_name': 'slika.webp', 'old_full_path': ..., 'new_full_path': ...}
print("\n--- ZAČETEK OPTIMIZACIJE SLIK ---")
for filename in os.listdir(images_full_path):
if filename == ORIGINAL_IMAGES_SUBDIR_NAME:
continue # Preskoči mapo za originalne slike
file_path = os.path.join(images_full_path, filename)
if os.path.isfile(file_path):
name, ext = os.path.splitext(filename)
ext = ext.lower()
if filename in EXCLUDE_FILES_FROM_OPTIMIZATION:
print(f" Preskakujem izključeno datoteko: '{filename}'.")
continue
if ext in IMAGE_EXTENSIONS_TO_OPTIMIZE:
new_filename = f"{name}.webp"
new_file_path = os.path.join(images_full_path, new_filename)
# Preveri, če WebP slika že obstaja in je starejša od originalne
if os.path.exists(new_file_path) and os.path.getmtime(new_file_path) > os.path.getmtime(file_path):
print(f" WebP različica '{new_filename}' že obstaja in je novejša od originala. Preskakujem optimizacijo.")
# Vseeno jo dodaj v mapo za posodobitev referenc
optimized_images_map.append({
'old_name': filename,
'new_name': new_filename,
'old_full_path': file_path,
'new_full_path': new_file_path
})
continue
if optimize_image(file_path, new_file_path, WEBP_QUALITY, MAX_IMAGE_DIMENSION, args.dry_run):
optimized_images_map.append({
'old_name': filename,
'new_name': new_filename,
'old_full_path': file_path,
'new_full_path': new_file_path
})
if not args.dry_run:
# Premakni originalno sliko v 'original' mapo
try:
shutil.move(file_path, os.path.join(original_images_full_path, filename))
print(f" Originalna slika premaknjena: '{filename}' -> '{ORIGINAL_IMAGES_SUBDIR_NAME}/'.")
except Exception as e:
print(f" Napaka pri premikanju originalne slike '{filename}': {e}")
else:
print(f" DRY RUN: Originalna slika '{filename}' bi bila premaknjena v '{ORIGINAL_IMAGES_SUBDIR_NAME}/'.")
else:
print(f" Preskakujem datoteko (ni slika za optimizacijo): '{filename}'.")
else:
print(f" Preskakujem (ni datoteka): '{filename}'.")
if not optimized_images_map:
print("\nNi slik za optimizacijo ali pa so že optimizirane. Preskakujem posodabljanje HTML referenc.")
print("\n--- OPTIMIZACIJA SLIK ZAKLJUČENA ---")
return
print("\n--- ZAČETEK POSODABLJANJA HTML REFERENC ---")
all_html_files = []
for lang_dir_name in HTML_ROOT_DIRS:
lang_full_path = os.path.join(PROJECT_ROOT, lang_dir_name)
if os.path.isdir(lang_full_path):
all_html_files.extend(find_html_files(lang_full_path))
else:
print(f"Opozorilo: Jezikovna mapa '{lang_full_path}' ne obstaja. Preskakujem iskanje HTML datotek v njej.")
if not all_html_files:
print("Ni najdenih HTML datotek za posodobitev referenc.")
print("\n--- OPTIMIZACIJA SLIK ZAKLJUČENA ---")
return
updated_html_files_count = 0
for html_file in all_html_files:
file_changed = False
for img_info in optimized_images_map:
old_name = img_info['old_name']
new_name = img_info['new_name']
# Kliče update_html_references in preveri, ali je bila datoteka spremenjena
if update_html_references(html_file, old_name, new_name, args.dry_run):
file_changed = True
if file_changed:
updated_html_files_count += 1
# print(f" Posodobljene reference v: {html_file}") # Odkomentiraj za več izpisa
print(f"\nPosodobljenih je bilo {updated_html_files_count} HTML datotek (vključno z DRY RUN).")
print("\n--- OPTIMIZACIJA SLIK ZAKLJUČENA ---")
if __name__ == "__main__":
main()