|
| 1 | +#------------------------------------------------------------------------------------------------------- |
| 2 | +# Copyright (C) Microsoft. All rights reserved. |
| 3 | +# Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. |
| 4 | +#------------------------------------------------------------------------------------------------------- |
| 5 | + |
| 6 | +ERRFILE=check_ascii.sh.err |
| 7 | +ERRFILETEMP=$ERRFILE.0 |
| 8 | + |
| 9 | +# display a helpful message for someone reading the log |
| 10 | +echo "Check ascii > Checking $1" |
| 11 | + |
| 12 | +if [ ! -e $1 ]; then # the file wasn't present; not necessarily an error |
| 13 | + echo "WARNING: file not found: $1" |
| 14 | + exit 0 # don't report an error but don't run the rest of this file |
| 15 | +fi |
| 16 | + |
| 17 | +# grep for non-ascii - also exclude unprintable control characters at the end of the range |
| 18 | +# specifically include x09 (tab) as it is used in pal sources which are not excluded |
| 19 | +# from this check |
| 20 | +LC_CTYPE=C grep -nP '[^\x09-\x7E]' $1 > $ERRFILETEMP |
| 21 | +if [ $? -eq 0 ]; then # grep found matches ($?==0), so we found non-ascii in the file |
| 22 | + echo "ERROR: non-ascii characters were introduced in $1" >> $ERRFILE |
| 23 | + |
| 24 | + # Display a hexdump sample of the lines with non-ascii characters in them |
| 25 | + # Don't pollute the log with every single matching line, first 10 lines should be enough. |
| 26 | + echo "Displaying first 10 lines of text where non-ascii characters were found:" >> $ERRFILE |
| 27 | + LC_CTYPE=C grep -nP '[^\x09-\x7E]' $1 | xxd -g 1 > $ERRFILETEMP |
| 28 | + head -n 10 $ERRFILETEMP >> $ERRFILE |
| 29 | + |
| 30 | + # To help the user, display how many lines of text actually contained non-ascii characters. |
| 31 | + LINECOUNT=`python -c "file=open('$ERRFILETEMP', 'r'); print len(file.readlines())"` |
| 32 | + echo "Total lines containing non-ascii: $LINECOUNT" >> $ERRFILE |
| 33 | + echo "--------------" >> $ERRFILE # same length as '--- ERRORS ---' |
| 34 | +fi |
| 35 | + |
| 36 | +rm -f $ERRFILETEMP |
0 commit comments