???????????? grep?? bzip2?? wc?? awk?? sed???????????????????????CPU?????????β????????Щ????
?????????Linux??????????е?CPU??????????????GNU Parallel???????????????е?CPU?????????????????map-reduce???????????????????????????–pipes ????(?????–spreadstdin)????????????????????????CPU???????
????BZIP2
????bzip2???gzip?????????????????????!??????????????а??????????
???????????????
????cat bigfile.bin | bzip2 --best > compressedfile.bz2
??????????????
????cat bigfile.bin | parallel --pipe --recend '' -k bzip2 --best > compressedfile.bz2
?????????????bzip2??GNU parallel????CPU??????????????????????????????
????GREP
???????????????????????????????????????????
????grep pattern bigfile.txt
???????????????????
????cat bigfile.txt | parallel --pipe grep 'pattern'
??????????????
????cat bigfile.txt | parallel --block 10M --pipe grep 'pattern'
???????????÷?????? –block 10M????????????????????1?????——???????????????????????CUP????????????????
????AWK
???????????????awk??????????????????????????????
?????????÷???
????cat rands20M.txt | awk '{s+=$1} END {print s}'
??????????????
????cat rands20M.txt | parallel --pipe awk '{s+=$1} END {print s}' | awk '{s+=$1} END {print s}'
????????е????parallel?????е?–pipe??????cat??????????????awk??????γ?????????????????Щ?????????????????????????awk?????????????????????awk????????б???????GNU parallel????awk???????
????WC
???????????????????????????????
?????????????
????wc -l bigfile.txt
???????????????????
????cat bigfile.txt | parallel --pipe wc -l | awk '{s+=$1} END {print s}'
??????????????????parallel????‘mapping’????????wc -l??????γ???????????????????awk???л????
????SED
???????????????????????sed?????????????滻???????
??????????????
????sed s^old^new^g bigfile.txt
??????????????
????cat bigfile.txt | parallel --pipe sed s^old^new^g
????…??????????ù????????洢????????????