#!/usr/bin/env bash # DESCRIPTION: # # Provides the content encoding the specified # resources are served with. # # USAGE: # # httpcompression URL ... # # USEFUL LINKS: # # * HTTP/1.1 (RFC 2616) - Content-Encoding # https://tools.ietf.org/html/rfc2616#section-14.11 # # * SDCH Specification: # https://lists.w3.org/Archives/Public/ietf-http-wg/2008JulSep/att-0441/Shared_Dictionary_Compression_over_HTTP.pdf # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - declare -r -a CURL_DEFAULT_OPTIONS=( --connect-timeout 30 --header "Accept-Encoding: gzip, deflate, sdch" --header "Cache-Control: no-cache" # Prevent intermediate proxies # from caching the response --location # If the page was moved to a # different location, redo the # request --max-time 150 --show-error --silent --user-agent "Mozilla/5.0 Gecko" # Send a fake UA string for sites # that sniff it instead of using # the Accept-Encoding header ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - check_for_sdch() { declare availDicts="" declare currentHeaders="$1" declare dict="" declare dictClientID="" declare dicts="" declare i="" declare url="$2" # Check if the server advertised any dictionaries dicts="$( printf "%s" "$currentHeaders" | grep -i 'Get-Dictionary:' | cut -d':' -f2 | sed s/,/\ /g )" # If it does, check to see if it really supports SDCH if [ -n "$dicts" ]; then for i in $dicts; do dict="" # Check if the dictionary location is specified as a path, # and if it is, construct it's URL from the host name of the # referrer URL [[ "$i" != http* ]] \ && dict="$( printf "%s" "$url" | sed -En 's/([^/]*\/\/)?([^/]*)\/?.*/\1\2/p' )" dict="$dict$i" # Request the dictionaries from the server and # construct the `Avail-Dictionary` header value # # [ The user agent identifier for a dictionary is defined # as the URL-safe base64 encoding (as described in RFC # 3548, section 4 [RFC3548]) of the first 48 bits (bits # 0..47) of the dictionary's SHA-256 digest ] dictClientID="$( curl "${CURL_DEFAULT_OPTIONS[@]}" "$dict" | openssl dgst -sha256 -binary | openssl base64 | cut -c 1-8 | sed -e 's/\+/-/' -e 's/\//_/' )" [ -n "$availDicts" ] && availDicts="$availDicts,$dictClientID" \ || availDicts="$dictClientID" done # Redo the request (advertising the available dictionaries) # and replace the old resulted headers with the new ones printf "$( curl "${CURL_DEFAULT_OPTIONS[@]}" \ -H "Avail-Dictionary: $availDicts" \ --dump-header - \ --output /dev/null \ "$url" )" else printf "%s" "$currentHeaders" fi } get_content_encoding() { declare currentHeaders="" declare encoding="" declare headers="" declare indent="" declare tmp="" declare url="$1" headers="$(curl "${CURL_DEFAULT_OPTIONS[@]}" \ --dump-header - \ --output /dev/null \ "$url" )" \ && ( \ # Iterate over the headers of all redirects while [ -n "$headers" ]; do # Get headers for the "current" URL currentHeaders="$( printf "%s" "$headers" | sed -n '1,/^HTTP/p' )" # Remove the headers for the "current" URL headers="${headers/"$currentHeaders"/}" currentHeaders="$(check_for_sdch "$currentHeaders" "$url")" # Get the value of the `Content-Encoding` header encoding="$( printf "%s" "$currentHeaders" | grep -i 'Content-Encoding:' | cut -d' ' -f2 | tr "\r" "," | tr -d "\n" | sed 's/,$//' )" # Print the output for the "current" URL [ -n "$encoding" ] && encoding="[$encoding]" if [ "$url" != "$1" ]; then printf "$indent$url $encoding\n" indent=" $indent" else printf "\n * $1 $encoding\n" indent=" ↳ " fi # Get the next URL from the series of redirects tmp="$url" url="$( printf "%s" "$currentHeaders" | grep -i 'Location' | sed -e 's/Location://' | sed 's/^ *//' | tr -d '\r' )" # In case the `Location` header is specified as a path [[ "$url" != http* ]] && url="$tmp$url" done ) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - main() { # Check if cURL is installed if [ -x "$(command -v "curl")" ]; then while [ $# -ne 0 ]; do get_content_encoding "$1" shift done printf "\n" else printf "cURL is required, please install it!\n" fi } main $@