109 lines
3.2 KiB
Bash
Executable File
109 lines
3.2 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Copyright 2017 The Kubernetes Authors.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# This script will scan all md (markdown) files for bad references.
|
|
# It will look for strings of the form [...](...) and make sure that
|
|
# the (...) points to either a valid file in the source tree or, in the
|
|
# case of it being an http url, it'll make sure we don't get a 404.
|
|
#
|
|
# Usage: verify-links.sh [ dir | file ... ]
|
|
# default arg is root of our source tree
|
|
|
|
set -o errexit
|
|
set -o nounset
|
|
set -o pipefail
|
|
|
|
REPO_ROOT=$(dirname "${BASH_SOURCE}")/..
|
|
|
|
if [ "$*" != "" ]; then
|
|
args="$*"
|
|
else
|
|
args="${REPO_ROOT}"
|
|
fi
|
|
|
|
mdFiles=$(find "${args}" -name "*.md" | grep -v vendor | grep -v glide)
|
|
|
|
tmp=/tmp/out${RANDOM}
|
|
|
|
rm -f /tmp/$tmp*
|
|
for file in ${mdFiles}; do
|
|
# echo scanning $file
|
|
dir=$(dirname $file)
|
|
|
|
# Replace ) with )\n so that each possible href is on its own line.
|
|
# Then only grab lines that have [..](..) in them - put results in tmp file.
|
|
# If the file doesn't have any lines with [..](..) then skip this file
|
|
sed "s/)/)\n/g" < $file | grep "\[.*\](.*)" > ${tmp}1 || continue
|
|
|
|
# This sed will extract the href portion of the [..](..) - meaning
|
|
# the stuff in the parens.
|
|
sed "s/.*\(\[[^\[\]*\]([^()]*)\)/\1/" < ${tmp}1 > ${tmp}2 || continue
|
|
|
|
# Extract all headings/anchors.
|
|
# And strip off the leading #'s and leading/trailing blanks
|
|
grep "^ *#" < $file | sed "s/ *#* *\(.*\) *$/\1/" > ${tmp}anchors
|
|
|
|
# Now convert the header to what the anchor will look like.
|
|
# - lower case stuff
|
|
# - convert spaces to -'s
|
|
# - remove punctuation marks (only accept 0-9, a-z
|
|
cat ${tmp}anchors | \
|
|
tr '[:upper:]' '[:lower:]' | \
|
|
sed "s/ /-/g" | \
|
|
sed "s/[^-a-zA-Z0-9]//g" > ${tmp}anchors1
|
|
|
|
cat ${tmp}2 | while read line ; do
|
|
# Strip off the leading and trailing parens
|
|
ref=${line#*(}
|
|
ref=${ref%)*}
|
|
|
|
# An external href (ie. starts with http)
|
|
if [ "${ref:0:4}" == "http" ]; then
|
|
if ! curl --connect-timeout 10 -o /dev/null ${ref} > /dev/null 2>&1 ; then
|
|
echo $file: Can\'t load: url ${ref} | tee -a ${tmp}3
|
|
fi
|
|
continue
|
|
fi
|
|
|
|
# Local file href - skip for now.
|
|
# TODO add support for checking these
|
|
if [ "${ref:0:1}" == "#" ]; then
|
|
ref=${ref:1}
|
|
if ! grep "^$ref$" ${tmp}anchors1 > /dev/null 2>&1 ; then
|
|
echo $file: Can\'t find anchor \'\#${ref}\' | tee -a ${tmp}3
|
|
fi
|
|
continue
|
|
fi
|
|
|
|
# Remove everything after # (aka section of page)
|
|
ref=${ref%#*}
|
|
newPath=${dir}/${ref}
|
|
|
|
# And finally make sure the file is there
|
|
# debug line: echo ref: $ref "->" $newPath
|
|
if ! ls "${newPath}" > /dev/null 2>&1 ; then
|
|
echo $file: Can\'t find: ${newPath} | tee -a ${tmp}3
|
|
failed=true
|
|
fi
|
|
done
|
|
done
|
|
rc=0
|
|
if [ -a ${tmp}3 ]; then
|
|
rc=1
|
|
fi
|
|
rm -f ${tmp}*
|
|
exit $rc
|