Gists bash


#!/bin/bash
BASHSCRIPTDIR="$(realpath "$(dirname "$0")")"
OUTDIR=OUTDIR

displayusage() {
  echo " ================================================================================= "
  echo "|    Usage:                                                                       |"
  echo "| extract_translations.sh OPTS FILENAME                                           |"
  echo "|    available options [OPTS]:                                                    |"
  echo "| -a) --data-attribute) the data-attribute to filter  [default=local]             |"
  echo "| -h) --help)           print this help                                           |"
  echo "| -v) --verbose)        verbose output                                            |"
  echo "| [no arguments]        DEFAULT                                                   |"
  echo " ================================================================================= "
  echo "| Version 1.0  2025/01/25                                                         |"
  echo " ================================================================================= "
}

for arg in "$@"; do
  shift
  case "$arg" in
    "--data-attribute") set -- "$@" "-a"  ;;
    "--help")           set -- "$@" "-h"  ;;
    "--verbose")        set -- "$@" "-v"  ;;
    *)                  set -- "$@" "$arg";;
  esac
done

# Parse short options
OPTIND=1
while getopts "a:h?v" opt
do
  case "$opt" in
    "a") ATTRIBUTE="$OPTARG"  ;;
    "h") displayusage; exit 0 ;;
    "v") VERBOSE="TRUE"       ;;
    "?") displayusage; exit 0 ;;
  esac
done

shift "$((OPTIND-1))"

if [ -z "$ATTRIBUTE" ]; then
  ATTRIBUTE="local"
fi

FILENAME="$(realpath "$1")"
FILENAMEOUT="${OUTDIR}/$(basename "$FILENAME" ."${FILENAME##*.}").json"

if [ ! -f "$FILENAME" ]; then
  echo "Error: File ${FILENAME} does not exist."
  exit 1
fi

if [ "$VERBOSE" == "TRUE" ]; then
  echo "Processing file ${FILENAME}."
fi

# Parse the file with xml-lint to extract text attributes
TEXT_ENTRIES=$(xmllint --html --xpath "//*[@data-$ATTRIBUTE-text]/@data-$ATTRIBUTE-text" "$FILENAME" 2>/dev/null | grep -o "data-$ATTRIBUTE-text=\"[^\"]*\"" | cut -d'"' -f2)
# Parse the file with xml-lint to extract link attributes
LINK_ENTRIES=$(xmllint --html --xpath "//*[@data-$ATTRIBUTE-link]/@data-$ATTRIBUTE-link" "$FILENAME" 2>/dev/null | grep -o "data-$ATTRIBUTE-link=\"[^\"]*\"" | cut -d'"' -f2)

# Build JSON for text
TEXT_JSON=$(echo "$TEXT_ENTRIES" | jq -Rn '[inputs as $line | {($line): ($line | ascii_upcase)}] | add')

# Build JSON for links
LINK_JSON=$(echo "$LINK_ENTRIES" | while read -r ENTRY; do
  UPPER_TITLE=$(echo "$ENTRY" | awk '{print toupper($0)}')
  echo "{\"$ENTRY\": {\"href\": \"/\", \"title\": \"$UPPER_TITLE\"}}"
done | jq -s 'add')

# Combine text and links JSON into the final output
jq -n --argjson text "$TEXT_JSON" --argjson links "$LINK_JSON" '{text: $text, links: $links}' > "$FILENAMEOUT"

jq empty "$FILENAMEOUT"