d55c2d7a2c
* many files had no license comments at all * some files had license comments suggesting that they weren't open source (like a terse "all rights reserved") when they actually are open source. * all our files are licensed under apache and should be marked as such
153 lines
4.0 KiB
Bash
Executable File
153 lines
4.0 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
# Copyright 2012 Google Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# This scripts reads a list of URLs from the provided file, and
|
|
# fetches them in parallel from a local slurping proxy in a randomized
|
|
# order. Loading times and statuses for them are then output to
|
|
# /tmp/latency-(encoding of settings).txt
|
|
|
|
# number of fetches to do in parallel
|
|
if [ -z $PAR ]; then
|
|
PAR=10
|
|
fi
|
|
|
|
# number of times to run
|
|
if [ -z $RUNS ]; then
|
|
RUNS=3
|
|
fi
|
|
|
|
# How many times to repeat each trace without restarting the workers
|
|
if [ -z $EXP ]; then
|
|
EXP=3
|
|
fi
|
|
|
|
# Proxy machine. If you specify this, make sure to give an IP address,
|
|
# as doing DNS lookups for it can slow things down a lot
|
|
if [ -z $PROXY_HOST ]; then
|
|
PROXY_HOST=127.0.0.1
|
|
fi
|
|
|
|
# .. and port
|
|
if [ -z $PROXY_PORT ]; then
|
|
PROXY_PORT=8080
|
|
fi
|
|
|
|
# Extra flags to pass to fetch_all.py
|
|
FLAGS=${FLAGS:-}
|
|
|
|
USER_AGENT_FLAG=${USER_AGENT:+--user_agent}
|
|
|
|
if [ $# -lt 1 ]; then
|
|
echo "Usage: devel/trace_stress_test.sh urls_file ..."
|
|
echo "Shuffles each urls_file in turn, runs through shuffled file using"
|
|
echo "$PAR parallel wget jobs. Repeats this process $RUN times."
|
|
exit 2
|
|
fi
|
|
|
|
OUR_PATH=`dirname $0`
|
|
STAMP=`date +%Y%m%d-%H%M`
|
|
LATENCY_REPORT=/tmp/latency-$PROXY_HOST-R$RUNS-P$PAR-E$EXP-$STAMP.txt
|
|
TAIL_HEAD_TEMP=/tmp/tail_head.$$
|
|
|
|
echo "time status url" > $LATENCY_REPORT
|
|
|
|
# Examines file in $1, starting at line $2, and the next $3 lines into file $4.
|
|
function tail_head {
|
|
input_file=$1
|
|
start_pos=$2
|
|
num_lines=$3
|
|
outfile=$4
|
|
|
|
# We make a temp file because otherwise we (at least Josh) get a lot of
|
|
# "tail: write error" printed out.
|
|
tail $input_file -n +$start_pos < $input_file > $TAIL_HEAD_TEMP
|
|
head $TAIL_HEAD_TEMP -n $num_lines >$outfile
|
|
}
|
|
|
|
function single_run {
|
|
FILE=$1
|
|
# Shuffle the log and split it into pieces
|
|
SHUF_FILE=`mktemp`
|
|
for I in `seq 1 $EXP`; do
|
|
shuf $FILE >> $SHUF_FILE
|
|
done
|
|
LINES=`wc -l $SHUF_FILE | sed s#$SHUF_FILE##`
|
|
# Setting chunk size slightly too large balances load a little better, most
|
|
# obvious when $LINES < $PAR.
|
|
CHUNK=`expr 1 + $LINES / $PAR`
|
|
|
|
# feed each chunk to a separate wget
|
|
PIECES=
|
|
LOGS=
|
|
POS=0
|
|
for I in `seq 1 $PAR`; do
|
|
CUR_CHUNK=$CHUNK
|
|
if [ $I -eq $PAR ]; then
|
|
# make sure we also include the remainder
|
|
EXTRA=`expr $LINES - $PAR \* $CHUNK`
|
|
CUR_CHUNK=`expr $CUR_CHUNK + $EXTRA`
|
|
fi
|
|
PIECE=`mktemp`
|
|
LOG=`mktemp`
|
|
PIECES="$PIECES $PIECE"
|
|
LOGS="$LOGS $LOG"
|
|
tail_head $SHUF_FILE $POS $CUR_CHUNK $PIECE
|
|
$OUR_PATH/fetch_all.py $FLAGS $USER_AGENT_FLAG $USER_AGENT \
|
|
--proxy_host $PROXY_HOST --proxy_port $PROXY_PORT \
|
|
--urls_file $PIECE &> $LOG &
|
|
POS=`expr $POS + $CHUNK`
|
|
done
|
|
|
|
# Wait for all to finish
|
|
wait
|
|
|
|
# Print out the summary messages
|
|
cat $LOGS >> $LATENCY_REPORT
|
|
|
|
# clean up
|
|
rm $PIECES
|
|
rm $LOGS
|
|
rm $SHUF_FILE
|
|
}
|
|
|
|
START=$SECONDS
|
|
|
|
for RUN in `seq 1 $RUNS`; do
|
|
echo "Run $RUN"
|
|
for FILE in "$@"; do
|
|
echo "File $FILE"
|
|
single_run "$FILE"
|
|
done
|
|
echo "----------------------------------------------------------------------"
|
|
done
|
|
|
|
STOP=$SECONDS
|
|
LINES=`tail -n +2 $LATENCY_REPORT|wc -l`
|
|
ELAPSED=`expr $STOP - $START`
|
|
QPS=`expr $LINES / $ELAPSED`
|
|
echo "QPS estimate (inaccurate for short runs):" $QPS "requests/sec"
|
|
echo
|
|
$OUR_PATH/trace_stress_test_percentiles.sh $LATENCY_REPORT | cut -c 1-80
|
|
echo
|
|
echo "10 worst latencies:"
|
|
head -n 10 ${LATENCY_REPORT%%.txt}-sorted.txt
|
|
echo
|
|
echo "Status statistics:"
|
|
tail -n +2 $LATENCY_REPORT | cut -d ' ' -f 2 | sort | uniq -c
|
|
echo "Full latency report in:" $LATENCY_REPORT
|
|
|
|
rm -f $TAIL_HEAD_TEMP
|
|
|