[H-GEN] SSHFS - Local caching and syncing

Russell Stuart russell-humbug at stuart.id.au
Wed Jun 1 01:25:11 EDT 2016


On Wed, 2016-06-01 at 13:43 +1000, Gary Curtis wrote:
> They are of the opinion that sshfs does some sort of local caching,
> such that changes to a file at the server end are not reflected at
> the client, whether or not the file is being currently edited.

>From the FAQ:

    6. Changes on the server are not immediately visible in the mounted directory.

       By default, sshfs caches things for 20 seconds, use -o cache_timeout=N
       to change the default cache timeout (in seconds) or -o cache=no for
       disabling the cache.

       You can also control cache timeouts for directory listing etc with
       -o cache_stat_timeout=N,
       -o cache_dir_timout=N, and
       -o cache_link_timout=N.

And from the feature list of README.md:

    - Caching directory contents

>From the source, cache.c, we have the definition of the structures that
hold the cached information:

    struct node {
    	    struct stat stat;
    	    time_t stat_valid;
    	    char **dir;
    	    time_t dir_valid;
    	    char *link;
    	    time_t link_valid;
    	    time_t valid;
    };

    struct fuse_cache_dirhandle {
    	    const char *path;
    	    fuse_dirh_t h;
    	    fuse_dirfil_t filler;
    	    GPtrArray *dir;
    	    uint64_t wrctr;
    };

   Those cache structures appear to hold directory entries, only.

>From the source, sshfs.c, we have the definition of synchronous
(normal) write:

    static int sshfs_sync_write(struct sshfs_file *sf, const char *wbuf,
    	    	    	        size_t size, off_t offset)
    {
    	int err = 0;
    	    struct buffer *handle = &sf->handle;
    	    struct sshfs_io sio = { .error = 0, .num_reqs = 0 };

    	    pthread_cond_init(&sio.finished, NULL);

    	    while (!err && size) {
    	    	    struct buffer buf;
    	    	    struct iovec iov[2];
    	    	    size_t bsize = size < sshfs.max_write ? size : sshfs.max_write;

    	    	    buf_init(&buf, 0);
    	    	    buf_add_buf(&buf, handle);
    	    	    buf_add_uint64(&buf, offset);
    	    	    buf_add_uint32(&buf, bsize);
    	    	    buf_to_iov(&buf, &iov[0]);
    	    	    iov[1].iov_base = (void *) wbuf;
    	    	    iov[1].iov_len = bsize;
    	    	    err = sftp_request_send(SSH_FXP_WRITE, iov, 2,
    	    	    	    	    	    sshfs_sync_write_begin,
    	    	    	    	    	    sshfs_sync_write_end,
    	    	    	    	    	    0, &sio, NULL);
    	    	    buf_free(&buf);
    	    	    size -= bsize;
    	    	    wbuf += bsize;
    	    	    offset += bsize;
    	    }

    	    pthread_mutex_lock(&sshfs.lock);
    	    while (sio.num_reqs)
    	           pthread_cond_wait(&sio.finished, &sshfs.lock);
    	    pthread_mutex_unlock(&sshfs.lock);

    	    if (!err)
    	    	    err = sio.error;

    	    return err;
    }

    static void sshfs_sync_write_end(struct request *req)
    {
    	    uint32_t serr;
    	    struct sshfs_io *sio = (struct sshfs_io *) req->data;

    	    if (req->error) {
    	    	    sio->error = req->error;
    	    } else if (req->replied) {
    	    	    if (req->reply_type != SSH_FXP_STATUS) {
    	    	    	    fprintf(stderr, "protocol error\n");
    	    	    } else if (buf_get_uint32(&req->reply, &serr) != -1 &&
    	    	    	     serr != SSH_FX_OK) {
    	    	    	    sio->error = -EIO;
    	    	    }
    	    }
    	    sio->num_reqs--;
    	    if (!sio->num_reqs)
    	    	    pthread_cond_broadcast(&sio->finished);
    }

    We see that when a write is done it is immediately sent to the
    server, and the write() waits for the server to say it has received
    it before returning.  Therefore it is no different to a write to a
    local file system (apart from speed).

>From the source, sshfs.c, we have the definition of synchronous
(normal) read:

    static int sshfs_sync_read(struct sshfs_file *sf, char *buf, size_t size,
                               off_t offset)
    {
    	    struct read_chunk *chunk;

    	    chunk = sshfs_send_read(sf, size, offset);
    	    return wait_chunk(chunk, buf, size);
    }

    static struct read_chunk *sshfs_send_read(struct sshfs_file *sf, size_t size,
    	    	    	    	    	      off_t offset)
    {
    	    struct read_chunk *chunk = g_new0(struct read_chunk, 1);
    	    struct buffer *handle = &sf->handle;

    	    pthread_cond_init(&chunk->sio.finished, NULL);
    	    list_init(&chunk->reqs);
    	    chunk->size = size;
    	    chunk->offset = offset;
    	    chunk->refs = 1;

    	    while (size) {
    	    	    int err;
    	    	    struct buffer buf;
    	    	    struct iovec iov[1];
    	    	    struct read_req *rreq;
    	    	    size_t bsize = size < sshfs.max_read ? size : sshfs.max_read;

    	    	    rreq = g_new0(struct read_req, 1);
    	    	    rreq->sio = &chunk->sio;
    	    	    rreq->size = bsize;
    	    	    buf_init(&rreq->data, 0);
    	    	    list_add(&rreq->list, &chunk->reqs);

    	    	    buf_init(&buf, 0);
    	    	    buf_add_buf(&buf, handle);
    	    	    buf_add_uint64(&buf, offset);
    	    	    buf_add_uint32(&buf, bsize);
    	    	    buf_to_iov(&buf, &iov[0]);
    	    	    err = sftp_request_send(SSH_FXP_READ, iov, 1,
    	    	    	    	    	    sshfs_read_begin,
    	    	    	    	    	    sshfs_read_end,
    	    	    	    	    	    0, rreq, NULL);

    	    	    buf_free(&buf);
    	    	    if (err)
    	    	    	    break;

    	    	    size -= bsize;
    	    	    offset += bsize;
    	    }

    	    return chunk;
    }

    sshfs never services reads from a cache.  If it is asked to read
    something it always gets fresh data from the server.
 
Finally, a note about kernel caching:

    This is a file system.  The kernel usually caches data for file
    systems.

Conclusion:

    sshfs does not cache file data itself, but the kernel may and if it
    does the data at the client can be out of sync with respect to the
    server.  Since the kernel is doing the caching this can only become
    a problem if a program running on the server is doing multiple
    writes to a file, and a program on the client is reading the same
    file.  This is true for all network file systems of course.  The
    usual way around it is file locking, but sshfs does not support it.

    sshfs does aggressive write caching for directories (default 20
    seconds).  It must be write through because the file must exist on
    the server before you can write it.

> This raises two potential problems...
> 1) The unrecognised changes at the server end may result in a lost
> write.

Correct.  Any attempted write that isn't recognised as a write will
result in lost data.  But this is true for all file systems.

> 2) The local caching at the client end may constitute a copy of the
> file, breaching privacy and/or copyright concerns.

This has nothing to do with sshfs.  If Mr Peeping Tom's ssh login can
access other peoples private data or violate copyright's, he can do
that using "cat", "rsync" or a myriad of other programs.  Sshfs adds
nothing to the damage that can already happen.


More information about the General mailing list