fusetoys

various hacky fuse filesystem utilities
git clone https://a3nm.net/git/fusetoys/
Log | Files | Refs | README

commit 64fc5d4825616061467df4d4ccc0dba64338f62b
parent 470c7a0926e72f5eaf1d0d98970341afc2524a31
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Fri, 28 Dec 2012 18:31:15 +0100

continue

Diffstat:
cachefs.py | 175++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
1 file changed, 130 insertions(+), 45 deletions(-)

diff --git a/cachefs.py b/cachefs.py @@ -1,15 +1,14 @@ #!/usr/bin/python -# inspired by -# https://github.com/terencehonles/fusepy/blob/master/examples/loopback.py - -from fuse import Fuse import fuse import errno import logging import os import shutil import sys +import pickle +import time +import heapq from threading import Lock class __Struct: @@ -24,17 +23,81 @@ class FileTooBigException: fuse.fuse_python_api = (0, 2) -class CacheFS(fuse.Fuse): - #def __init__(self, source, cache, db): - # self.source = source - # self.cache = cache - # self.db = db - # self.rwlock = Lock() +class File: + @property + def size(self): + return os.stat(os.path.join(self.cache.path, self.path)).st_size + + def __init__(self, cache, path): + self.cache = cache + self.path = path + self.deleted = False + self.touch() + + def touch(self): + self.lastAccessed = time.time() + +class Cache: + def __init__(self, maxSize, path): + self.maxSize = maxSize + self.currentSize = 0 + self.heap = [] + self.files = {} + self.path = path + self.discover() + + @property + def freeSize(self): + return self.maxSize - self.currentSize + + def discover(self): + for x in os.walk(self.path): + dirpath, dirnames, filenames = x + for f in filenames: + full = os.path.join(dirpath, f)[len(self.path):] + if full not in self.files.keys(): + self.addFile(full) + + def addFile(self, path): + print "adding %s" % path + self.files[path] = File(self, path) + heapq.heappush(self.heap, (time.time(), self.files[path])) + self.currentSize += self.files[path].size + print "my size be now %d" % self.currentSize + + def resizeFile(self, path, newSize): + f = self.files[path] + self.currentSize -= f.size + self.currentSize += newSize + + def moveFile(self, path, newPath): + self.files[path].path = newPath + # TODO check no overwrite! + self.files[newPath] = self.files[path] + del self.files[path] + + def deleteFile(self, path): + self.currentSize -= self.files[path].size + self.files[path].deleted = True + del self.files[path] + + def oldestFile(self): + while len(self.heap) > 0: + time, f = heapq.heappop(self.heap) + if time != f.lastAccessed: + # stale + heapq.heappush(self.heap, (f.lastAccessed, f)) + if f.deleted: + continue + return f + +class CacheFS(fuse.Fuse): def __init__(self, *args, **kw): fuse.Fuse.__init__(self, *args, **kw) self.rwlock = Lock() + self.cache = None # Initialize a Logger() object to handle logging. self.logger = logging.getLogger('cachefs') @@ -49,48 +112,58 @@ class CacheFS(fuse.Fuse): self.parser.add_option('--size', dest='size', metavar='SIZE', type='int', help="size") + print self + def fsinit(self): options = self.cmdline[0] - self.source = options.source - self.cache = options.cache + self.sourceRoot = options.source + self.cacheRoot = options.cache self.db = options.db self.size = options.size + print "will load" + print self + try: + with open(self.db, 'rb') as f: + self.cache = pickle.load(f) + assert(self.cache != None) + print "loaded" + except Exception as e: + print "a problem occurred, have a fresh cache" + self.cache = Cache(self.size, self.cacheRoot) + print self.cache + print "AHA" + if (self.cache.maxSize > self.size): + self.makeRoom(self.cache.maxSize - self.size) + self.cache.maxSize = self.size + print self.cache.maxSize + + + def fsdestroy(self): + with open(self.db, 'wb+') as f: + pickle.dump(self.cache, f) def sourcePath(self, path): - return os.path.join(self.source, "./"+path) - #TODO - #return self.source+path + return os.path.join(self.sourceRoot, path[1:]) def cachePath(self, path): - #TODO - return os.path.join(self.cache, "./"+path) - #return self.cache+path - - def statvfsCache(self): - return os.statvfs(self.cache) - - def availableCache(self): - # TODO account the files, there is no better solution... - stv = self.statvfsCache() - return stv.f_bsize * stv.f_bavail - - def totalCache(self): - stv = self.statvfsCache() - return stv.f_frsize * stv.f_blocks - - def nextTarget(self): - """return the next cached file to remove""" - return None # TODO + return os.path.join(self.cacheRoot, path[1:]) def makeRoom(self, bytes): - # TODO maybe don't flush all the cache for a big file - if bytes > self.totalCache(): + # TODO maybe don't flush all the cache for a big file even if it fits... + if bytes > self.cache.maxSize: raise FileTooBigException() - while bytes > self.availableCache(): - os.unlink(self.nextTarget()) + print("now current free size is %d and must fit %d" % + (self.cache.freeSize, bytes)) + while bytes > self.cache.freeSize: + f = self.cache.oldestFile() + self.cache.deleteFile(f.path) + print("remove %s" % self.cachePath("/"+f.path)) + os.unlink(self.cachePath("/"+f.path)) + print("now current size is %d" % self.cache.currentSize) def registerHit(self, path): """register a hit for path in the cache""" + self.cache.files[path[1:]].touch() def isCached(self, path): """is a path cached?""" @@ -106,7 +179,6 @@ class CacheFS(fuse.Fuse): def prepare(self, path): #if not flags & os.O_RDONLY: # return self.sourcePath(path) - self.registerHit(path) print "PREPARATION" if not os.path.exists(self.sourcePath(path)): # no such original file, let the source handle it @@ -114,6 +186,7 @@ class CacheFS(fuse.Fuse): return self.sourcePath(path) if self.isCached(path): print "*** already cached" + self.registerHit(path) return self.cachePath(path) statOriginal = os.stat(self.sourcePath(path)) # cache the file and then open it @@ -128,6 +201,7 @@ class CacheFS(fuse.Fuse): print ("*** docopy from %s to %s" % (self.sourcePath(path), self.cachePath(path))) shutil.copy2(self.sourcePath(path), self.cachePath(path)) + self.cache.addFile(path[1:]) return self.cachePath(path) def access(self, path, mode): @@ -170,7 +244,7 @@ class CacheFS(fuse.Fuse): return 0 def read(self, path, size, offset): - f = self.prepare(path) # TODO hmm + f = self.prepare(path) with self.rwlock: fh = os.open(f, os.O_RDONLY) os.lseek(fh, offset, 0) @@ -198,14 +272,22 @@ class CacheFS(fuse.Fuse): wasCached = self.isCached(old) retval = os.rename(self.sourcePath(old), self.sourcePath(new)) if wasCached: + self.cache.moveFile(old[1:], new[1:]) os.rename(self.cachePath(old), self.cachePath(new)) return retval def rmdir(self, path): return os.rmdir(self.sourcePath(path)) - def statfs(self, path): - stv = os.statvfs(self.sourcePath(path)) + def statfs(self): + stv = os.statvfs(self.cacheRoot) + stv = Stat(**dict((key, getattr(stv, key)) for key in ["f_bavail", + "f_bfree", "f_blocks", "f_bsize", "f_favail", "f_ffree", "f_files", + "f_flag", "f_frsize", "f_namemax"])) + stv.f_bfree = (self.cache.maxSize - self.cache.currentSize)/stv.f_bsize + stv.f_bavail = stv.f_bfree + print ("maxsize %d bzide %d" % (self.cache.maxSize, stv.f_bsize)) + stv.f_blocks = self.cache.maxSize/stv.f_bsize return stv def symlink(self, target, source): @@ -219,12 +301,15 @@ class CacheFS(fuse.Fuse): wasCached = self.isCached(path) self.doTruncate(self.sourcePath(path), length) if wasCached: + self.cache.currentSize -= self.cache.files[path[1:]].size self.doTruncate(self.cachePath(path), length) + self.cache.currentSize += self.cache.files[path[1:]].size def unlink(self, path): wasCached = self.isCached(path) retval = os.unlink(self.sourcePath(path)) if wasCached: + self.cache.deleteFile(path[1:]) os.unlink(self.cachePath(path)) return retval @@ -249,15 +334,15 @@ class CacheFS(fuse.Fuse): print "writing to a %s file" % ("cached" if wasCached else "notcached") retval = self.doWrite(self.sourcePath(path), data, offset) if retval > 0 and wasCached: + self.makeRoom(len(data)) + self.cache.currentSize -= self.cache.files[path[1:]].size self.doWrite(self.cachePath(path), data, offset) + self.cache.currentSize += self.cache.files[path[1:]].size return retval - - - if __name__ == "__main__": #if len(sys.argv) != 6: # print("Usage: %s SOURCE CACHE SIZE DB MOUNTPOINT" % sys.argv[0])