glocate

faster locate using grep
git clone https://a3nm.net/git/glocate/
Log | Files | Refs | README

commit c42507b2600b1b75b7905eaa8a0a5a14d39ace36
parent 24b232e0df4bcf8ce875f36c1ca6fc4cb268f528
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Tue, 10 Mar 2015 03:07:19 +0100

continue

Diffstat:
.gitignore | 2++
bench/bench.sh | 17+++++++++++++++++
bench/glocate | 10++++++++++
bench/glocate.gzip | 10++++++++++
bench/glocate.lz4 | 10++++++++++
bench/glocate.pigz | 10++++++++++
bench/read.sh | 10++++++++++
bench/write.sh | 4++++
glocate | 2+-
old/glocate.gzip | 12------------
old/glocate.lz4 | 12------------
old/glocate.pigz | 12------------
old/read.sh | 10----------
old/write.sh | 4----
14 files changed, 74 insertions(+), 51 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -1,2 +1,4 @@ test/* workload +bench/workload +bench/files* diff --git a/bench/bench.sh b/bench/bench.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +WORKLOAD="$1" + +set -e + +for a in 1 2 3; do time sed 's/^/locate /' "$WORKLOAD" | sh > /dev/null; done +for a in 1 2 3; do time sed 's/^/locate.findutils /' "$WORKLOAD" | sh > /dev/null; done +for a in 1 2 3; do time sed 's/^/grep /;s/$/ files/' "$WORKLOAD" | sh > /dev/null; done +for a in 1 2 3; do time sed 's/^/LC_ALL=C grep /;s/$/ files/' "$WORKLOAD" | sh > /dev/null; done + +for s in glocate glocate.gzip glocate.pigz glocate.lz4; do + for a in 1 2 3; do + time sed 's/^/.\/'$s' /;' "$WORKLOAD" | sh > /dev/null; + done +done + diff --git a/bench/glocate b/bench/glocate @@ -0,0 +1,10 @@ +#!/bin/bash + +FILE="files" + +# http://stackoverflow.com/a/10836225 +ARGS=$(printf " %q" "$@") + +# grep is slower with other locales, especially -i, so LC_ALL +ls ${FILE}.? | parallel 'sh -c "LC_ALL=C grep '$ARGS' {}"' + diff --git a/bench/glocate.gzip b/bench/glocate.gzip @@ -0,0 +1,10 @@ +#!/bin/bash + +FILE="files" + +# http://stackoverflow.com/a/10836225 +ARGS=$(printf " %q" "$@") + +# grep is slower with other locales, especially -i, so LC_ALL +ls ${FILE}.?.gz | parallel 'gzip -dc {} | sh -c "LC_ALL=C grep '$ARGS'"' + diff --git a/bench/glocate.lz4 b/bench/glocate.lz4 @@ -0,0 +1,10 @@ +#!/bin/bash + +FILE="files" + +# http://stackoverflow.com/a/10836225 +ARGS=$(printf " %q" "$@") + +# grep is slower with other locales, especially -i, so LC_ALL +ls ${FILE}.?.lz4 | parallel 'lz4 -dc {} | sh -c "LC_ALL=C grep '$ARGS'"' + diff --git a/bench/glocate.pigz b/bench/glocate.pigz @@ -0,0 +1,10 @@ +#!/bin/bash + +FILE="files" + +# http://stackoverflow.com/a/10836225 +ARGS=$(printf " %q" "$@") + +# grep is slower with other locales, especially -i, so LC_ALL +ls ${FILE}.?.gz | parallel 'pigz -dc {} | sh -c "LC_ALL=C grep '$ARGS'"' + diff --git a/bench/read.sh b/bench/read.sh @@ -0,0 +1,10 @@ +#!/bin/bash + + +# +#FILE="${@: -1}" +#set -- "${@:1:$(($#-1))}" +# +##lz4 -dck $FILE | LC_ALL=C grep "$@" > /dev/null +#LC_ALL=C grep --line-buffered "$@" "$FILE" + diff --git a/bench/write.sh b/bench/write.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +lz4 2>/dev/null > "$FILE" + diff --git a/glocate b/glocate @@ -7,6 +7,6 @@ ARGS=$(printf " %q" "$@") # grep is slower with other locales, especially -i, so LC_ALL # use --line-buffered so it's easier to grep the output of this -parallel -i \ +/usr/bin/parallel -i \ bash -c "LC_ALL=C grep $ARGS {}" -- ${FILE}.? diff --git a/old/glocate.gzip b/old/glocate.gzip @@ -1,12 +0,0 @@ -#!/bin/bash - -FILE="/home/files" - -# http://stackoverflow.com/a/10836225 -ARGS=$(printf " %q" "$@") - -# grep is slower with other locales, especially -i, so LC_ALL -# use --line-buffered so it's easier to grep the output of this -parallel -i \ - bash -c "gunzip -dc {} | LC_ALL=C grep $ARGS" -- ${FILE}.?.gz - diff --git a/old/glocate.lz4 b/old/glocate.lz4 @@ -1,12 +0,0 @@ -#!/bin/bash - -FILE="/home/files" - -# http://stackoverflow.com/a/10836225 -ARGS=$(printf " %q" "$@") - -# grep is slower with other locales, especially -i, so LC_ALL -# use --line-buffered so it's easier to grep the output of this -parallel -i \ - bash -c "lz4 -dc {} | LC_ALL=C grep $ARGS" -- ${FILE}.?.lz4 - diff --git a/old/glocate.pigz b/old/glocate.pigz @@ -1,12 +0,0 @@ -#!/bin/bash - -FILE="/home/files" - -# http://stackoverflow.com/a/10836225 -ARGS=$(printf " %q" "$@") - -# grep is slower with other locales, especially -i, so LC_ALL -# use --line-buffered so it's easier to grep the output of this -parallel -i \ - bash -c "pigz -dc {} | LC_ALL=C grep $ARGS" -- ${FILE}.?.gz - diff --git a/old/read.sh b/old/read.sh @@ -1,10 +0,0 @@ -#!/bin/bash - - -# -#FILE="${@: -1}" -#set -- "${@:1:$(($#-1))}" -# -##lz4 -dck $FILE | LC_ALL=C grep "$@" > /dev/null -#LC_ALL=C grep --line-buffered "$@" "$FILE" - diff --git a/old/write.sh b/old/write.sh @@ -1,4 +0,0 @@ -#!/bin/bash - -lz4 2>/dev/null > "$FILE" -