Commit 31350b43 authored by Jakob Borg's avatar Jakob Borg

Use LevelDB storage backend

parent 4a88d124
......@@ -7,3 +7,4 @@ syncthing.exe
discosrv
.jshintrc
coverage.out
files/pidx
......@@ -32,6 +32,11 @@
"Comment": "null-81",
"Rev": "9cbe983aed9b0dfc73954433fead5e00866342ac"
},
{
"ImportPath": "code.google.com/p/snappy-go/snappy",
"Comment": "null-15",
"Rev": "12e4b4183793ac4b061921e7980845e750679fd0"
},
{
"ImportPath": "github.com/golang/groupcache/lru",
"Rev": "d781998583680cda80cf61e0b37dd0cd8da2eb52"
......@@ -40,6 +45,10 @@
"ImportPath": "github.com/juju/ratelimit",
"Rev": "cbaa435c80a9716e086f25d409344b26c4039358"
},
{
"ImportPath": "github.com/syndtr/goleveldb/leveldb",
"Rev": "ca1565e5fb6658691d7074d270602c9185a55c79"
},
{
"ImportPath": "github.com/vitrun/qart/coding",
"Rev": "ccb109cf25f0cd24474da73b9fee4e7a3e8a8ce0"
......
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package snappy
import (
"encoding/binary"
"errors"
)
// ErrCorrupt reports that the input is invalid.
var ErrCorrupt = errors.New("snappy: corrupt input")
// DecodedLen returns the length of the decoded block.
func DecodedLen(src []byte) (int, error) {
v, _, err := decodedLen(src)
return v, err
}
// decodedLen returns the length of the decoded block and the number of bytes
// that the length header occupied.
func decodedLen(src []byte) (blockLen, headerLen int, err error) {
v, n := binary.Uvarint(src)
if n == 0 {
return 0, 0, ErrCorrupt
}
if uint64(int(v)) != v {
return 0, 0, errors.New("snappy: decoded block is too large")
}
return int(v), n, nil
}
// Decode returns the decoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire decoded block.
// Otherwise, a newly allocated slice will be returned.
// It is valid to pass a nil dst.
func Decode(dst, src []byte) ([]byte, error) {
dLen, s, err := decodedLen(src)
if err != nil {
return nil, err
}
if len(dst) < dLen {
dst = make([]byte, dLen)
}
var d, offset, length int
for s < len(src) {
switch src[s] & 0x03 {
case tagLiteral:
x := uint(src[s] >> 2)
switch {
case x < 60:
s += 1
case x == 60:
s += 2
if s > len(src) {
return nil, ErrCorrupt
}
x = uint(src[s-1])
case x == 61:
s += 3
if s > len(src) {
return nil, ErrCorrupt
}
x = uint(src[s-2]) | uint(src[s-1])<<8
case x == 62:
s += 4
if s > len(src) {
return nil, ErrCorrupt
}
x = uint(src[s-3]) | uint(src[s-2])<<8 | uint(src[s-1])<<16
case x == 63:
s += 5
if s > len(src) {
return nil, ErrCorrupt
}
x = uint(src[s-4]) | uint(src[s-3])<<8 | uint(src[s-2])<<16 | uint(src[s-1])<<24
}
length = int(x + 1)
if length <= 0 {
return nil, errors.New("snappy: unsupported literal length")
}
if length > len(dst)-d || length > len(src)-s {
return nil, ErrCorrupt
}
copy(dst[d:], src[s:s+length])
d += length
s += length
continue
case tagCopy1:
s += 2
if s > len(src) {
return nil, ErrCorrupt
}
length = 4 + int(src[s-2])>>2&0x7
offset = int(src[s-2])&0xe0<<3 | int(src[s-1])
case tagCopy2:
s += 3
if s > len(src) {
return nil, ErrCorrupt
}
length = 1 + int(src[s-3])>>2
offset = int(src[s-2]) | int(src[s-1])<<8
case tagCopy4:
return nil, errors.New("snappy: unsupported COPY_4 tag")
}
end := d + length
if offset > d || end > len(dst) {
return nil, ErrCorrupt
}
for ; d < end; d++ {
dst[d] = dst[d-offset]
}
}
if d != dLen {
return nil, ErrCorrupt
}
return dst[:d], nil
}
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package snappy
import (
"encoding/binary"
)
// We limit how far copy back-references can go, the same as the C++ code.
const maxOffset = 1 << 15
// emitLiteral writes a literal chunk and returns the number of bytes written.
func emitLiteral(dst, lit []byte) int {
i, n := 0, uint(len(lit)-1)
switch {
case n < 60:
dst[0] = uint8(n)<<2 | tagLiteral
i = 1
case n < 1<<8:
dst[0] = 60<<2 | tagLiteral
dst[1] = uint8(n)
i = 2
case n < 1<<16:
dst[0] = 61<<2 | tagLiteral
dst[1] = uint8(n)
dst[2] = uint8(n >> 8)
i = 3
case n < 1<<24:
dst[0] = 62<<2 | tagLiteral
dst[1] = uint8(n)
dst[2] = uint8(n >> 8)
dst[3] = uint8(n >> 16)
i = 4
case int64(n) < 1<<32:
dst[0] = 63<<2 | tagLiteral
dst[1] = uint8(n)
dst[2] = uint8(n >> 8)
dst[3] = uint8(n >> 16)
dst[4] = uint8(n >> 24)
i = 5
default:
panic("snappy: source buffer is too long")
}
if copy(dst[i:], lit) != len(lit) {
panic("snappy: destination buffer is too short")
}
return i + len(lit)
}
// emitCopy writes a copy chunk and returns the number of bytes written.
func emitCopy(dst []byte, offset, length int) int {
i := 0
for length > 0 {
x := length - 4
if 0 <= x && x < 1<<3 && offset < 1<<11 {
dst[i+0] = uint8(offset>>8)&0x07<<5 | uint8(x)<<2 | tagCopy1
dst[i+1] = uint8(offset)
i += 2
break
}
x = length
if x > 1<<6 {
x = 1 << 6
}
dst[i+0] = uint8(x-1)<<2 | tagCopy2
dst[i+1] = uint8(offset)
dst[i+2] = uint8(offset >> 8)
i += 3
length -= x
}
return i
}
// Encode returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
// It is valid to pass a nil dst.
func Encode(dst, src []byte) ([]byte, error) {
if n := MaxEncodedLen(len(src)); len(dst) < n {
dst = make([]byte, n)
}
// The block starts with the varint-encoded length of the decompressed bytes.
d := binary.PutUvarint(dst, uint64(len(src)))
// Return early if src is short.
if len(src) <= 4 {
if len(src) != 0 {
d += emitLiteral(dst[d:], src)
}
return dst[:d], nil
}
// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
const maxTableSize = 1 << 14
shift, tableSize := uint(32-8), 1<<8
for tableSize < maxTableSize && tableSize < len(src) {
shift--
tableSize *= 2
}
var table [maxTableSize]int
// Iterate over the source bytes.
var (
s int // The iterator position.
t int // The last position with the same hash as s.
lit int // The start position of any pending literal bytes.
)
for s+3 < len(src) {
// Update the hash table.
b0, b1, b2, b3 := src[s], src[s+1], src[s+2], src[s+3]
h := uint32(b0) | uint32(b1)<<8 | uint32(b2)<<16 | uint32(b3)<<24
p := &table[(h*0x1e35a7bd)>>shift]
// We need to to store values in [-1, inf) in table. To save
// some initialization time, (re)use the table's zero value
// and shift the values against this zero: add 1 on writes,
// subtract 1 on reads.
t, *p = *p-1, s+1
// If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte.
if t < 0 || s-t >= maxOffset || b0 != src[t] || b1 != src[t+1] || b2 != src[t+2] || b3 != src[t+3] {
s++
continue
}
// Otherwise, we have a match. First, emit any pending literal bytes.
if lit != s {
d += emitLiteral(dst[d:], src[lit:s])
}
// Extend the match to be as long as possible.
s0 := s
s, t = s+4, t+4
for s < len(src) && src[s] == src[t] {
s++
t++
}
// Emit the copied bytes.
d += emitCopy(dst[d:], s-t, s-s0)
lit = s
}
// Emit any final pending literal bytes and return.
if lit != len(src) {
d += emitLiteral(dst[d:], src[lit:])
}
return dst[:d], nil
}
// MaxEncodedLen returns the maximum length of a snappy block, given its
// uncompressed length.
func MaxEncodedLen(srcLen int) int {
// Compressed data can be defined as:
// compressed := item* literal*
// item := literal* copy
//
// The trailing literal sequence has a space blowup of at most 62/60
// since a literal of length 60 needs one tag byte + one extra byte
// for length information.
//
// Item blowup is trickier to measure. Suppose the "copy" op copies
// 4 bytes of data. Because of a special check in the encoding code,
// we produce a 4-byte copy only if the offset is < 65536. Therefore
// the copy op takes 3 bytes to encode, and this type of item leads
// to at most the 62/60 blowup for representing literals.
//
// Suppose the "copy" op copies 5 bytes of data. If the offset is big
// enough, it will take 5 bytes to encode the copy op. Therefore the
// worst case here is a one-byte literal followed by a five-byte copy.
// That is, 6 bytes of input turn into 7 bytes of "compressed" data.
//
// This last factor dominates the blowup, so the final estimate is:
return 32 + srcLen + srcLen/6
}
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package snappy implements the snappy block-based compression format.
// It aims for very high speeds and reasonable compression.
//
// The C++ snappy implementation is at http://code.google.com/p/snappy/
package snappy
/*
Each encoded block begins with the varint-encoded length of the decoded data,
followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
first byte of each chunk is broken into its 2 least and 6 most significant bits
called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
Zero means a literal tag. All other values mean a copy tag.
For literal tags:
- If m < 60, the next 1 + m bytes are literal bytes.
- Otherwise, let n be the little-endian unsigned integer denoted by the next
m - 59 bytes. The next 1 + n bytes after that are literal bytes.
For copy tags, length bytes are copied from offset bytes ago, in the style of
Lempel-Ziv compression algorithms. In particular:
- For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
of the offset. The next byte is bits 0-7 of the offset.
- For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
The length is 1 + m. The offset is the little-endian unsigned integer
denoted by the next 2 bytes.
- For l == 3, this tag is a legacy format that is no longer supported.
*/
const (
tagLiteral = 0x00
tagCopy1 = 0x01
tagCopy2 = 0x02
tagCopy4 = 0x03
)
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package snappy
import (
"bytes"
"flag"
"fmt"
"io"
"io/ioutil"
"math/rand"
"net/http"
"os"
"path/filepath"
"strings"
"testing"
)
var download = flag.Bool("download", false, "If true, download any missing files before running benchmarks")
func roundtrip(b, ebuf, dbuf []byte) error {
e, err := Encode(ebuf, b)
if err != nil {
return fmt.Errorf("encoding error: %v", err)
}
d, err := Decode(dbuf, e)
if err != nil {
return fmt.Errorf("decoding error: %v", err)
}
if !bytes.Equal(b, d) {
return fmt.Errorf("roundtrip mismatch:\n\twant %v\n\tgot %v", b, d)
}
return nil
}
func TestEmpty(t *testing.T) {
if err := roundtrip(nil, nil, nil); err != nil {
t.Fatal(err)
}
}
func TestSmallCopy(t *testing.T) {
for _, ebuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
for _, dbuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
for i := 0; i < 32; i++ {
s := "aaaa" + strings.Repeat("b", i) + "aaaabbbb"
if err := roundtrip([]byte(s), ebuf, dbuf); err != nil {
t.Errorf("len(ebuf)=%d, len(dbuf)=%d, i=%d: %v", len(ebuf), len(dbuf), i, err)
}
}
}
}
}
func TestSmallRand(t *testing.T) {
rand.Seed(27354294)
for n := 1; n < 20000; n += 23 {
b := make([]byte, n)
for i, _ := range b {
b[i] = uint8(rand.Uint32())
}
if err := roundtrip(b, nil, nil); err != nil {
t.Fatal(err)
}
}
}
func TestSmallRegular(t *testing.T) {
for n := 1; n < 20000; n += 23 {
b := make([]byte, n)
for i, _ := range b {
b[i] = uint8(i%10 + 'a')
}
if err := roundtrip(b, nil, nil); err != nil {
t.Fatal(err)
}
}
}
func benchDecode(b *testing.B, src []byte) {
encoded, err := Encode(nil, src)
if err != nil {
b.Fatal(err)
}
// Bandwidth is in amount of uncompressed data.
b.SetBytes(int64(len(src)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
Decode(src, encoded)
}
}
func benchEncode(b *testing.B, src []byte) {
// Bandwidth is in amount of uncompressed data.
b.SetBytes(int64(len(src)))
dst := make([]byte, MaxEncodedLen(len(src)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
Encode(dst, src)
}
}
func readFile(b *testing.B, filename string) []byte {
src, err := ioutil.ReadFile(filename)
if err != nil {
b.Fatalf("failed reading %s: %s", filename, err)
}
if len(src) == 0 {
b.Fatalf("%s has zero length", filename)
}
return src
}
// expand returns a slice of length n containing repeated copies of src.
func expand(src []byte, n int) []byte {
dst := make([]byte, n)
for x := dst; len(x) > 0; {
i := copy(x, src)
x = x[i:]
}
return dst
}
func benchWords(b *testing.B, n int, decode bool) {
// Note: the file is OS-language dependent so the resulting values are not
// directly comparable for non-US-English OS installations.
data := expand(readFile(b, "/usr/share/dict/words"), n)
if decode {
benchDecode(b, data)
} else {
benchEncode(b, data)
}
}
func BenchmarkWordsDecode1e3(b *testing.B) { benchWords(b, 1e3, true) }
func BenchmarkWordsDecode1e4(b *testing.B) { benchWords(b, 1e4, true) }
func BenchmarkWordsDecode1e5(b *testing.B) { benchWords(b, 1e5, true) }
func BenchmarkWordsDecode1e6(b *testing.B) { benchWords(b, 1e6, true) }
func BenchmarkWordsEncode1e3(b *testing.B) { benchWords(b, 1e3, false) }
func BenchmarkWordsEncode1e4(b *testing.B) { benchWords(b, 1e4, false) }
func BenchmarkWordsEncode1e5(b *testing.B) { benchWords(b, 1e5, false) }
func BenchmarkWordsEncode1e6(b *testing.B) { benchWords(b, 1e6, false) }
// testFiles' values are copied directly from
// https://code.google.com/p/snappy/source/browse/trunk/snappy_unittest.cc.
// The label field is unused in snappy-go.
var testFiles = []struct {
label string
filename string
}{
{"html", "html"},
{"urls", "urls.10K"},
{"jpg", "house.jpg"},
{"pdf", "mapreduce-osdi-1.pdf"},
{"html4", "html_x_4"},
{"cp", "cp.html"},
{"c", "fields.c"},
{"lsp", "grammar.lsp"},
{"xls", "kennedy.xls"},
{"txt1", "alice29.txt"},
{"txt2", "asyoulik.txt"},
{"txt3", "lcet10.txt"},
{"txt4", "plrabn12.txt"},
{"bin", "ptt5"},
{"sum", "sum"},
{"man", "xargs.1"},
{"pb", "geo.protodata"},
{"gaviota", "kppkn.gtb"},
}
// The test data files are present at this canonical URL.
const baseURL = "https://snappy.googlecode.com/svn/trunk/testdata/"
func downloadTestdata(basename string) (errRet error) {
filename := filepath.Join("testdata", basename)
f, err := os.Create(filename)
if err != nil {
return fmt.Errorf("failed to create %s: %s", filename, err)
}
defer f.Close()
defer func() {
if errRet != nil {
os.Remove(filename)
}
}()
resp, err := http.Get(baseURL + basename)
if err != nil {
return fmt.Errorf("failed to download %s: %s", baseURL+basename, err)
}
defer resp.Body.Close()
_, err = io.Copy(f, resp.Body)
if err != nil {
return fmt.Errorf("failed to write %s: %s", filename, err)
}
return nil
}
func benchFile(b *testing.B, n int, decode bool) {
filename := filepath.Join("testdata", testFiles[n].filename)
if stat, err := os.Stat(filename); err != nil || stat.Size() == 0 {
if !*download {
b.Fatal("test data not found; skipping benchmark without the -download flag")
}
// Download the official snappy C++ implementation reference test data
// files for benchmarking.
if err := os.Mkdir("testdata", 0777); err != nil && !os.IsExist(err) {
b.Fatalf("failed to create testdata: %s", err)
}
for _, tf := range testFiles {
if err := downloadTestdata(tf.filename); err != nil {
b.Fatalf("failed to download testdata: %s", err)
}
}
}
data := readFile(b, filename)
if decode {
benchDecode(b, data)
} else {
benchEncode(b, data)
}
}
// Naming convention is kept similar to what snappy's C++ implementation uses.
func Benchmark_UFlat0(b *testing.B) { benchFile(b, 0, true) }
func Benchmark_UFlat1(b *testing.B) { benchFile(b, 1, true) }
func Benchmark_UFlat2(b *testing.B) { benchFile(b, 2, true) }
func Benchmark_UFlat3(b *testing.B) { benchFile(b, 3, true) }
func Benchmark_UFlat4(b *testing.B) { benchFile(b, 4, true) }
func Benchmark_UFlat5(b *testing.B) { benchFile(b, 5, true) }
func Benchmark_UFlat6(b *testing.B) { benchFile(b, 6, true) }
func Benchmark_UFlat7(b *testing.B) { benchFile(b, 7, true) }
func Benchmark_UFlat8(b *testing.B) { benchFile(b, 8, true) }
func Benchmark_UFlat9(b *testing.B) { benchFile(b, 9, true) }
func Benchmark_UFlat10(b *testing.B) { benchFile(b, 10, true) }
func Benchmark_UFlat11(b *testing.B) { benchFile(b, 11, true) }
func Benchmark_UFlat12(b *testing.B) { benchFile(b, 12, true) }
func Benchmark_UFlat13(b *testing.B) { benchFile(b, 13, true) }
func Benchmark_UFlat14(b *testing.B) { benchFile(b, 14, true) }
func Benchmark_UFlat15(b *testing.B) { benchFile(b, 15, true) }
func Benchmark_UFlat16(b *testing.B) { benchFile(b, 16, true) }
func Benchmark_UFlat17(b *testing.B) { benchFile(b, 17, true) }
func Benchmark_ZFlat0(b *testing.B) { benchFile(b, 0, false) }
func Benchmark_ZFlat1(b *testing.B) { benchFile(b, 1, false) }
func Benchmark_ZFlat2(b *testing.B) { benchFile(b, 2, false) }
func Benchmark_ZFlat3(b *testing.B) { benchFile(b, 3, false) }
func Benchmark_ZFlat4(b *testing.B) { benchFile(b, 4, false) }
func Benchmark_ZFlat5(b *testing.B) { benchFile(b, 5, false) }
func Benchmark_ZFlat6(b *testing.B) { benchFile(b, 6, false) }
func Benchmark_ZFlat7(b *testing.B) { benchFile(b, 7, false) }
func Benchmark_ZFlat8(b *testing.B) { benchFile(b, 8, false) }
func Benchmark_ZFlat9(b *testing.B) { benchFile(b, 9, false) }
func Benchmark_ZFlat10(b *testing.B) { benchFile(b, 10, false) }
func Benchmark_ZFlat11(b *testing.B) { benchFile(b, 11, false) }
func Benchmark_ZFlat12(b *testing.B) { benchFile(b, 12, false) }