Coda File System

Re:backup fails on large volumes?

From: Sean Caron <caron.sean_at_gmail.com>
Date: Wed, 6 Sep 2006 19:26:12 -0400
Jan -- you are splendid! The patch seems to be doing the trick so far. I
haven't
gotten a full dump off yet -- it'll take a while to pull it all in -- but it
has gotten much
further than before; I think it should be all set.

Thanks again,

Sean
scaron_at_umich.edu

On 9/5/06, Jan Harkes < jaharkes_at_cs.cmu.edu> wrote:

> On Tue, Sep 05, 2006 at 02:24:19PM -0400, Sean Caron wrote:
> > I'll take a look at backup.cc when I get back home today and see if I
> can
> > get it to cooperate
> > with what you've told me here; otherwise I suppose that I will just have
> to
> > bite the bullet, move
> > my tape drive to a client machine, and give this amanda program a shot.
>
> Actually, I did the same looking around backup.cc today while I was
> trying to come up with a fix. My initial idea involved opening a pipe
> inside of backup.cc to simulate the voldump | gzip sequence.
>
> Here is a preliminary patch,
>
> Jan
>
>     Break the 4GB limit for backup dumps.
>
>     By passing an open filedescriptor and not allowing sftp to seek, we
>     should be able to dump volumes that are larger than 4GB.
>
> diff --git a/coda-src/volutil/backup.cc b/coda-src/volutil/backup.cc
> index b94f79a..140ed4e 100644
> --- a/coda-src/volutil/backup.cc
> +++ b/coda-src/volutil/backup.cc
> @@ -87,27 +87,28 @@ static int Debug = 0;                  /* Global debu
> static int Naptime = 30;          /* Sleep period for PollLWP */
>
> struct hostinfo {
> -    bit32              address;  /* Assume host IP addresses are 32 bits
> */
> +    bit32      address;  /* Assume host IP addresses are 32 bits */
>      RPC2_Handle rpcid;   /* should be -1 if connection is dead. */
>      char       name[36];
> -}  Hosts[N_SERVERIDS];
> +} Hosts[N_SERVERIDS];
>
> #define BADCONNECTION  (RPC2_Handle) -1
>
> bit32 HostAddress[N_SERVERIDS];                /* Need these for macros in
> vrdb.c */
> char *ThisHost;                        /* This machine's hostname */
> -int ThisServerId = -1;         /* this server id, as found in
> ../db/servers */
> +int ThisServerId = -1;         /* this server id, as found in
> ../db/servers */
>
> /* Rock info for communicating with the DumpLWP. */
> #define ROCKTAG 12345
> struct rockInfo {
> -    char dumpfile[MAXPATHLEN];
> -    VolumeId volid;          /* Volume being dumped. */
> -    unsigned long numbytes; /* Number of bytes already written to the
> file. */
> +    int dumpfd;                   /* Open filedescriptor for WriteDump */
>
> +    VolumeId volid;       /* Volume being dumped. */
> +    unsigned int numbytes; /* Number of bytes already written to the
> file.
> +                             (has to wrap around the same way as
> 'offset') */
> } Rock;
>
> -struct hgram DataRate;                 /* Statistics on rate of data
> transfer */
> -struct hgram DataTransferred;          /* and size of dumpfiles. */
> +struct hgram DataRate;         /* Statistics on rate of data transfer */
> +struct hgram DataTransferred;  /* and size of dumpfiles. */
>
> /* Per replica info. */
> #define LOCKED 0x1
> @@ -588,47 +589,48 @@ int dumpVolume(volinfo_t *vol)
>      long rc;
>      RPC2_Unsigned dumplevel = (vol->flags & DUMPLVL_MASK) >>
> DUMPLVL_SHFT;
>      int ndumped = 0;
> -
> +
>      for (int i = 0; i < vol->nReplicas; i++) {
>         if ((Hosts[reps[i].serverNum].rpcid == BADCONNECTION) ||
>             !ISCLONED(reps[i].flags))
> -           continue;
> -
> +           continue;
> +
>         if (ISDUMPED(reps[i].flags)) {
>             ndumped++; /* Count it, but don't need to redo it. */
>             continue;
>         }
> -
> +
>         CODA_ASSERT(reps[i].backupId > 0);
> -
> +
>         /* get the name of the dumpfile. */
>         struct DiskPartition *part = NULL;
>         char buf[MAXPATHLEN];
>
>         part = findBestPartition();
> -       if (vol->flags & REPLICATED)
> +       if (vol->flags & REPLICATED)
>                 sprintf(buf, "%s/%s-%08x.%08x", part->name,
>                         Hosts[reps[i].serverNum].name, volId,
> reps[i].repvolId);
> -           else
> +       else
>                 sprintf(buf, "%s/%s-%08x", part->name,
> Hosts[reps[i].serverNum].name, volId);
> -
>
>         /* Remove the file if it already exists. Since we made the
>          * dump dir it can only exist if we are retrying the
>          * replicated dump even though it succeeded for this replica
>          * last time around. Don't care if it fails.  */
>         unlink(buf);
> -
> +
>         /* Setup the write thread to handle this operation. */
> -       CODA_ASSERT(strlen(buf) < sizeof(Rock.dumpfile));
> -       strcpy(Rock.dumpfile, buf);
> +       Rock.dumpfd = open(buf, O_CREAT | O_WRONLY | O_TRUNC, 0644);
>         Rock.volid = reps[i].backupId;
>         Rock.numbytes = 0;
> -
> +
>         VLog(0, "Dumping %x.%x to %s ...", volId, reps[i].repvolId, buf);
>
>         rc = VolNewDump(Hosts[reps[i].serverNum].rpcid, reps[i].backupId,
>                         &dumplevel);
> +
> +       close(Rock.dumpfd);
> +
>         if (rc != RPC2_SUCCESS) {
>             LogMsg(0,0,stdout, "VolDump (%x) failed on %x with %s\n",
>                    Hosts[reps[i].serverNum].rpcid, /* For debugging. */
> @@ -639,26 +641,26 @@ int dumpVolume(volinfo_t *vol)
>             continue;
>         }
>
> -       /* Incremental can be forced to be full. */
> +       /* Incremental can be forced to be full. */
>         if (dumplevel == 0) {
>                 vol->flags &= ~(INCREMENTAL | DUMPLVL_MASK);
>         }
> -
> +
>         /* Setup a pointer from the dump subtree to the actual dumpfile
>          * if a different partition was used for storage.
>          */
> -
> +
>         char link[66];
>         if (vol->flags & REPLICATED)
>                 sprintf(link,"%s/%08x.%08x",Hosts[reps[i].serverNum].name,
>                         volId, reps[i].repvolId);
>         else
>                 sprintf(link, "%s/%08x", Hosts[reps[i].serverNum].name,
> volId);
> -
> +
>         /* Remove the link if it exists. See comment by previous
>            unlink. */
>         unlink(link);
> -
> +
>         if (symlink(buf, link) == -1) {
>                 if (errno == EEXIST) {  /* Retrying dump. */
>                         if (unlink(link) != -1)
> @@ -666,7 +668,7 @@ int dumpVolume(volinfo_t *vol)
>                                         break;
>                 }
>                 perror("symlink");
> -               unlink(buf);    /* Delete the dump file. */
> +               unlink(buf);    /* Delete the dump file. */
>                 return -1;
>         }
>
> @@ -1291,7 +1293,7 @@ long S_WriteDump(RPC2_Handle rpcid, RPC2
>      struct rockInfo *rockinfo;
>      SE_Descriptor sed;
>      char *rock;
> -
> +
>      CODA_ASSERT(LWP_GetRock(ROCKTAG, &rock) == LWP_SUCCESS);
>      rockinfo = (struct rockInfo *)rock;
>
> @@ -1309,13 +1311,11 @@ long S_WriteDump(RPC2_Handle rpcid, RPC2
>      sed.Tag = SMARTFTP;
>      sed.Value.SmartFTPD.TransmissionDirection = CLIENTTOSERVER;
>      sed.Value.SmartFTPD.ByteQuota = -1;
> -    sed.Value.SmartFTPD.SeekOffset = offset;
> +    sed.Value.SmartFTPD.SeekOffset = -1; /* setting this to 'offset'
> wreaks
> +                                           havoc with dumps > 4GB */
>      sed.Value.SmartFTPD.hashmark = 0;
> -    sed.Value.SmartFTPD.Tag = FILEBYNAME;
> -    sed.Value.SmartFTPD.FileInfo.ByName.ProtectionBits = 0755;
> -    CODA_ASSERT(strlen(rockinfo->dumpfile) <
> -          sizeof( sed.Value.SmartFTPD.FileInfo.ByName.LocalFileName));
> -    strcpy(sed.Value.SmartFTPD.FileInfo.ByName.LocalFileName,
> rockinfo->dumpfile);
> +    sed.Value.SmartFTPD.Tag = FILEBYFD;
> +    sed.Value.SmartFTPD.FileInfo.ByFD.fd = rockinfo->dumpfd;
>
>      struct timeval before, after;
>      gettimeofday(&before, 0);
> diff --git a/coda-src/volutil/volclient.cc b/coda-src/volutil/volclient.cc
> index 0a8fd78..2dc6ccc 100644
> --- a/coda-src/volutil/volclient.cc
> +++ b/coda-src/volutil/volclient.cc
> @@ -127,9 +127,10 @@ static void pokexmem(void);
>
> #define ROCKTAG 12345
> struct rockInfo {
> -    int fd;                /* Open filedescriptor for ReadDump. */
> +    int fd;                /* Open filedescriptor for ReadDump/WriteDump.
> */
>      VolumeId volid;        /* Volume being dumped. */
> -    unsigned long numbytes; /* Number of bytes already written to the
> file. */
> +    unsigned int numbytes;  /* Number of bytes already written to the
> file.
> +                              (has to wrap around the same way as
> 'offset') */
> };
>
> static void V_InitRPC(int timeout);
> @@ -580,18 +581,18 @@ static void dump(void)
>      rock->fd = fileno(outf);
>      rock->volid = volid;
>      rock->numbytes = 0;
> -
> +
>      PROCESS dumpPid;
>      LWP_CreateProcess(VolDumpLWP, 16 * 1024, LWP_NORMAL_PRIORITY,
>                       (void *)rock, "VolDumpLWP", &dumpPid);
> -
> +
>      rc = VolNewDump(rpcid, volid, &Incremental);
>      if (rc != RPC2_SUCCESS) {
>         fprintf(stderr, "\nVolDump failed with %s\n",
> RPC2_ErrorMsg((int)rc));
>         exit(-1);
>      }
>
> -    fprintf(stderr, "\n%sVolDump completed, %lu bytes dumped\n",
> +    fprintf(stderr, "\n%sVolDump completed, %u bytes dumped\n",
>             Incremental ? "Incremental " : "", rock->numbytes);
>      exit(0);
> }
> @@ -673,7 +674,7 @@ long S_WriteDump(RPC2_Handle rpcid, RPC2
>      struct rockInfo *rockinfo;
>      SE_Descriptor sed;
>      char *rock;
> -
> +
>      CODA_ASSERT(LWP_GetRock(ROCKTAG, &rock) == LWP_SUCCESS);
>      rockinfo = (struct rockInfo *)rock;
>
> @@ -684,23 +685,24 @@ long S_WriteDump(RPC2_Handle rpcid, RPC2
>      }
>
>      if (rockinfo->numbytes != offset) {
> -       fprintf(stderr, "Offset %d != rockInfo->numbytes %ld\n",
> +       fprintf(stderr, "Offset %d != rockInfo->numbytes %d\n",
>                 offset, rockinfo->numbytes);
>      }
> -
> +
>      /* fetch the file with volume data */
>      memset(&sed, 0, sizeof(SE_Descriptor));
>      sed.Tag = SMARTFTP;
>      sed.Value.SmartFTPD.TransmissionDirection = CLIENTTOSERVER;
>      sed.Value.SmartFTPD.ByteQuota = -1;
> -    sed.Value.SmartFTPD.SeekOffset = offset;
> +    sed.Value.SmartFTPD.SeekOffset = -1; /* setting this to 'offset'
> wreaks
> +                                           havoc with dumps > 4GB */
>      sed.Value.SmartFTPD.hashmark = 0;
>      sed.Value.SmartFTPD.Tag = FILEBYFD;
>      sed.Value.SmartFTPD.FileInfo.ByFD.fd = rockinfo->fd;
>
>      if ((rc = RPC2_InitSideEffect(rpcid, &sed)) <= RPC2_ELIMIT){
>         fprintf(stderr, "WriteDump: Error %s in InitSideEffect\n",
> RPC2_ErrorMsg((int)rc));
> -    } else if ((rc = RPC2_CheckSideEffect(rpcid, &sed,
> SE_AWAITLOCALSTATUS))
> +    } else if ((rc = RPC2_CheckSideEffect(rpcid, &sed,
> SE_AWAITLOCALSTATUS))
>                <= RPC2_ELIMIT) {
>         fprintf(stderr, "WriteDump: Error %s in CheckSideEffect\n",
> RPC2_ErrorMsg((int)rc));
>      }
> @@ -803,7 +805,6 @@ long S_ReadDump(RPC2_Handle rpcid, RPC2_
>      long rc = 0;
>      struct rockInfo *rockinfo;
>      SE_Descriptor sed;
> -    char *buf;
>      char *rock;
>
>      CODA_ASSERT(LWP_GetRock(ROCKTAG, &rock) == LWP_SUCCESS);
> @@ -818,41 +819,17 @@ long S_ReadDump(RPC2_Handle rpcid, RPC2_
>         exit(-1);
>      }
>
> -    /* Set up a buffer and read in the data from the dump file. */
> -    buf = (char *)malloc((unsigned int)*nbytes);
> -    if (!buf) {
> -       perror("ReadDump: Can't malloc buffer!");
> -       exit(-1);
> -    }
> -
>      CODA_ASSERT(rockinfo->fd != 0); /* Better have been opened by
> restore() */
>
> -    if (lseek(rockinfo->fd, offset, L_SET) == -1) {
> -       perror("ReadDump: lseek");
> -       *nbytes = 0;
> -       free(buf);
> -       return 0;
> -    }
> -
> -    *nbytes = read(rockinfo->fd, buf, (int)*nbytes);
> -    if (*nbytes == -1) {
> -       perror("ReadDump: read");
> -       *nbytes = 0;
> -       free(buf);
> -       return 0;
> -    }
> -
>      /* fetch the file with volume data */
>      memset(&sed, 0, sizeof(SE_Descriptor));
>      sed.Tag = SMARTFTP;
>      sed.Value.SmartFTPD.TransmissionDirection = SERVERTOCLIENT;
> -    sed.Value.SmartFTPD.ByteQuota = -1;
> -    sed.Value.SmartFTPD.SeekOffset = 0;
> +    sed.Value.SmartFTPD.ByteQuota = *nbytes;
> +    sed.Value.SmartFTPD.SeekOffset = -1;
>      sed.Value.SmartFTPD.hashmark = 0;
> -    sed.Value.SmartFTPD.Tag = FILEINVM;
> -    sed.Value.SmartFTPD.FileInfo.ByAddr.vmfile.SeqBody =
> (RPC2_ByteSeq)buf;
> -    sed.Value.SmartFTPD.FileInfo.ByAddr.vmfile.MaxSeqLen =
> -    sed.Value.SmartFTPD.FileInfo.ByAddr.vmfile.SeqLen = *nbytes;
> +    sed.Value.SmartFTPD.Tag = FILEBYFD;
> +    sed.Value.SmartFTPD.FileInfo.ByFD.fd = rockinfo->fd;
>
>      if ((rc = RPC2_InitSideEffect(rpcid, &sed)) <= RPC2_ELIMIT){
>         fprintf(stderr, "ReadDump: Error %s in InitSideEffect\n",
> RPC2_ErrorMsg((int)rc));
> @@ -867,7 +844,6 @@ #else
>      fprintf(stderr, ".");
> #endif
>      rockinfo->numbytes += sed.Value.SmartFTPD.BytesTransferred;
> -    free(buf);
>      return rc;
> }
>
>
Received on 2006-09-06 20:53:34