[PATCH 2 of 2 RFC] c-hglib: hg_log() level 1 function

Giovanni Gherdovich g.gherdovich at gmail.com
Mon Aug 19 07:10:00 CDT 2013


2013/8/18 Iulian Stana <julian.stana at gmail.com>

> # HG changeset patch
> # User Iulian Stana <julian.stana at gmail.com>
> # Date 1376828454 -10800
> #      Sun Aug 18 15:20:54 2013 +0300
> # Node ID 04bf311c99b8f4cf4c3e2096b493ee126f861b99
> # Parent  21be7b973803e02b0bb310465691f88705a2dd53
> c-hglib: hg_log() level 1 function
>
> The revision history could have a huge mass of data. To deal with this
> issue, I
> had created a iterator-like mechanism. In this way I will get the
> changesets in
> chunks or more-like one at the time.
>
> The hg_log function will prepare the command and then will call cmd-server
> for
> changesets. This function will return to the user a iterator structure, to
> be
> used on hg_fetch_log_entry funciton. (The log command will not passing any
> changeset to the user)
>
> The hg_fetch_log_entry function will read changesets from command server
> and
> will pass into the hg_log_entry_t structure in a parse way the changeset.
> The hg_log_entry_t structure will be one of the function parameter.
>
> In the main.c file it can be found an example on how this function can be
> used.
>
> diff --git a/client.c b/client.c
> new file mode 100644
> --- /dev/null
> +++ b/client.c
> @@ -0,0 +1,134 @@
> +#define _GNU_SOURCE
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <sys/types.h>
> +#include <sys/wait.h>
> +#include <signal.h>
> +
> +
> +#include "client.h"
> +#include "utils.h"
> +
> +#define HGPATH "hg"
> +#define CHANGESET "{rev}\\0{node}\\0{tags}\\0{branch}\\0{author}\\0{desc}\
> +                                               \\0{date|isodate}\\0"
> +
> +/**
> + * \brief A helper for building the command arguments.
> + * \param command The name for mercurial command.
> + * \param options An array pointer to command option list.
> + * \param template Template to be used for this command.
> + * \retval new_string An array of pointers, where the command it's almost
> ready
> + *                    to be send to mercurial cmdserver.
> + * */
> +char **cmdbuilder(char *command, char *options[], char *template)
> +{
> +       int cmd_size, size;
> +       char **new_cmd;
> +       char *temp = "--template";
> +
> +       for(size = 0; options[size] != NULL; size++);
> +       cmd_size = size + 1;
> +       new_cmd = malloc(sizeof(char *) * (cmd_size + 3));
> +
> +       new_cmd[0] = command;
> +       memcpy(new_cmd + 1, options, (cmd_size - 1)  * sizeof(char *));
> +       if(template){
> +               new_cmd[cmd_size] = temp;
> +               new_cmd[cmd_size + 1] = template;
> +               new_cmd[cmd_size + 2] = NULL;
> +       }else{
> +               new_cmd[cmd_size] = NULL;
> +       }
> +
> +       return new_cmd;
> +}
> +
> +/**
> + * \brief 'Parse a changeset'. It's more like pointing to the correct
> position.
> + *
> + * The changeset could be found on buff pointer. To not duplicate the
> data I
> + * choose to point every log_entry field to the right position.
> + * \param buff The pointer to the changeset.
> + * \param le   The log_entry_t structure where the changeset will be
> parse.
> + * \retval 0 if successful.
> + * */
> +int parse_changeset(char *buff, hg_log_entry_t *le)
> +{
> +       /* set pointer for revision position */
> +       le->rev = buff;
> +
> +       /* set pointer for node position */
> +       buff = buff + strlen(buff) + 1;
> +       le->node = buff;
> +
> +       /* set pointer for tag position */
> +       buff = buff + strlen(buff) + 1;
> +       le->tags = buff;
> +
> +       /* set pointer for branch position */
> +       buff = buff + strlen(buff) + 1;
> +       le->branch = buff;
> +
> +       /* set pointer for author position */
> +       buff = buff + strlen(buff) + 1;
> +       le->author = buff;
> +
> +       /* set pointer for description position */
> +       buff = buff + strlen(buff) + 1;
> +       le->desc = buff;
> +
> +       /* set pointer for data position */
> +       buff = buff + strlen(buff) + 1;
> +       le->date = buff;
> +
> +       return 0;
> +}
> +
> +/* The high level log command for hglib API. */
> +hg_log_iterator *hg_log(hg_handle *handle, char *option[])
> +{
> +       hg_log_iterator *log_iterator = malloc(sizeof(hg_log_iterator));
> +       log_iterator->handle = handle;
> +
> +       log_iterator->command = cmdbuilder("log", option, CHANGESET);
> +
> +       if(hg_rawcommand(handle, log_iterator->command) < 0){
> +               return NULL;
> +       }
> +
> +       log_iterator->changeset = malloc(0);
> +       return log_iterator;
>


What happens here is that hg_log() does not return a status code
(with the meaning "command successfully sent"), but the hg_log_iterator
struct instead.

A status code would be desirable and idiomatic I think;
the struct to be passed as an argument.



> +}
> +
> +/* The iterator next step. Getting the next changeset. */
> +int hg_fetch_log_entry(hg_log_iterator *log_iterator, hg_log_entry_t *le)
> +{
> +       hg_header head = hg_head(log_iterator->handle);
> +
> +       if(head.channel == 'r'){
> +               free(log_iterator->command);
> +               free(log_iterator->changeset);
> +               int exitcode = hg_exitcode(log_iterator->handle);
> +               free(log_iterator);
> +               return exitcode;
> +       }
> +
> +       log_iterator->changeset = realloc(log_iterator->changeset,
> head.length);
> +
> +       if(read(log_iterator->handle->p_read, log_iterator->changeset,
> +                                       head.length) < 0){
>


Here you are not using hg_rawread(). Please explain why.
Level 1 functions are meant to build on top of level 0 functions.

Also: you are not reading in 4096 B chunks.

Also: you are assuming that all information for a given log entry
(a revision) are retrieved in a "single shot" of the  'o' channel.

You are assuming that a log revision cannot be split across two
iteration of output on the 'o' channel.

Is this assumption safe?

Please ask Idan Kamara (CC the list) and make sure of this.



> +               return -1;
> +       }
> +
> +       parse_changeset(log_iterator->changeset, le);
> +
> +       if(read_header(log_iterator->handle) < 0){
> +               return -1;
> +       }
> +       return head.length;
> +}
> diff --git a/client.h b/client.h
> --- a/client.h
> +++ b/client.h
> @@ -65,6 +65,22 @@
>         int protect;
>  } hg_handle;
>
> +typedef struct hg_log_iterator{
>


I think this name is unfortunate. This is not an iterator, in any possible
sense.
This is the place where you store your data (the "changeset" field of this
structure).

I would call this something like hg_log_rawentry_t, in contrast to the
other structure where the log entry is "parsed" and not "raw".

Please note that your code works as an iterator _not_ because of this struct
(where you are storing a single element of your possibly "infinite" list),
_but_ because it all piggybacks on the syscall read(2), which works as an
iterator itself.



> +       hg_handle *handle;
> +       char **command;
> +       char *changeset;
> +}hg_log_iterator;
> +
> +typedef struct hg_log_entry_t{
> +       char *author;
> +       char *branch;
> +       char *date;
> +       char *desc;
> +       char *node;
> +       char *rev;
> +       char *tags;
> +}hg_log_entry_t;
> +
>  /**
>   * \brief Open the connection with the mercurial command server.
>   *
> @@ -196,6 +212,63 @@
>   * */
>  int hg_exitcode(hg_handle *handle);
>
> +/**
> + * \brief hg_log command for hglib API.
> + *
> + * It's an advance function to get revision history. It's more like the
> start
> + * point of the action, this function will prepare the query question and
> will
> + * send it to the cmd-server.
> + *
> + * Return the revision history of the specified files or the entire
> project.
> + * File history is shown without following rename or copy history of
> files.
> + * Use follow with a filename to follow history across renames and copies.
> + * follow without a filename will only show ancestors or descendants of
> the
> + * starting revision. followfirst only follows the first parent of merge
> + * revisions.
> + *
> + * If revrange isn't specified, the default is "tip:0" unless follow is
> set,
> + * in which case the working directory parent is used as the starting
> + * revision.
> + *
> + * \param handle The handle of the connection, wherewith I want to
> communicate
> + * \param option The option list for mercurial log command.
> + * \retval hg_log_iterator A pointer to hg_log_iterator structure if
> successful
> + * \retval NULL to indicate an error, with errno set appropriately.
> + *
> + * errno can be:
> + *      - hg_rawcommand errors
> + * */
> +hg_log_iterator *hg_log(hg_handle *handle, char *option[]);
> +
> +/**
> + * \brief The iterator step. Getting the next changeset.
> + *
> + * The revision history could have a huge mass of data. You can pass the
> entire
> + * history in one call, so we use an iterator-like mechanism. Calling the
> + * hg_fetch_log_entry, the next changeset will be read from cmd-server,
> parse
> + * and pass to hg_log_entry_t structure.
> + * The log_entry structure will handle a changeset with the following
> string
> + * fields:
> + *         - rev
> + *         - node
> + *         - tags (space delimited)
> + *         - branch
> + *         - author
> + *         - desc
> + *
> + * \param log_iterator The iterator for log command.
> + * \param log_entry The hg_log_entry_t structure where the changeset will
> be
> + *                   pass
> + * \retval number The lenght for the pass changeset.
> + * \retval exitcode To indicate the end of log_command.
> + * \retval   -1 to indicate an error, with errno set appropriately.
> + *
> + * errno can be:
> + *      - EINVAL  - Invalid argument (handle it's set to a null pointer)
> + *      - read(2) command errors
> + *      - read_header error
> + * */
> +int hg_fetch_log_entry(hg_log_iterator *log_iterator, hg_log_entry_t *le);
>
>  #endif
>
> diff --git a/main.c b/main.c
> new file mode 100644
> --- /dev/null
> +++ b/main.c
> @@ -0,0 +1,129 @@
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +
> +#include <sys/wait.h>
> +#include <sys/types.h>
> +#include <unistd.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +
> +#include "client.h"
> +#include "utils.h"
> +
> +#define INIT_REPO  "init_test_repo"
> +
> +/****** Convenience functions. *******/
> +
> +/**
> + * \brief Create and setup the tmp directory where the acction will
> happends.
> + * */
> +void setup_tmp()
> +{
> +       system("hg init tmp");
> +       chdir("tmp");
> +}
> +
> +/**
> + * \brief Remove the tmp directory and all his files.
> + * */
> +void clean_tmp()
> +{
> +       chdir("..");
> +       system("rm -rf tmp");
> +}
> +
> +/**
> + * \brief Fill the current repository with commits for log command.
> + * */
> +void setup_log()
> +{
> +       system("touch foo ; hg add foo ; hg commit -m 'foo file'");
> +       system("echo baloo > foo ; hg commit -m 'baloo text'");
> +       system("touch voodoo ; hg add voodoo ; hg commit -m voodoo");
> +       system("echo voodoo > voodoo ; hg commit -m 'voodoo text'");
> +}
> +
> +/******* Examples using level 1 implementations. ******/
> +
> +/**
> + * \brief Log command example.
> + *
> + * \param handle The handle of the connection, wherewith I want to
> communicate
> + * \retval exitcode
> + * */
> +int hg_log_example(hg_handle *handle)
> +{
> +       char *option[] = {"-v", NULL};
> +       int nc;
> +
> +       /* hg_log function will a iterator. */
> +       hg_log_iterator *log_iterator = hg_log(handle, option);
> +
> +       /* you need to alloc some space for log_entry_t structure */
> +       hg_log_entry_t *le = malloc(sizeof(hg_log_entry_t));
> +
> +       /* Getting the next changeset using the iterator-like mechanism.
> +          Print the changest from log_entry structure.*/
> +       while(nc = hg_fetch_log_entry(log_iterator, le), nc > 0){
> +               printf("rev = %s \n", le->rev);
> +               printf("node = %s \n", le->node);
> +               printf("tags = %s \n", le->tags);
> +               printf("branch = %s \n", le->branch);
> +               printf("author = %s \n", le->author);
> +               printf("desc = %s \n", le->desc);
> +               printf("date = %s \n", le->date);
> +               printf("\n");
> +       }
> +
> +       free(le);
> +       /* last call for hg_fetch_log_entry will pass the exitcode */
> +       return nc;
> +}
> +
> +/** \brief Printing the welcome message.
> + *
> + * Will print the options that you will have in this example.
> + **/
> +void print_select_case()
> +{
> +       printf("Select test case to run:\n");
> +       printf("1) log \n");
> +       printf("\n");
> +       printf("Your choice: ");
> +}
> +
> +
> +/***** Main function. *******/
> +/**
> + * \brief The main function
> + * */
> +int main(int argc, char **argv)
> +{
> +       int select_case;
> +       hg_handle *handle;
> +
> +       print_select_case();
> +       scanf("%d", &select_case);
> +       if(select_case < 1 || select_case > 1){
> +               printf("Your choice is not an option...\n");
> +               return -1;
> +       }
> +
> +       switch(select_case){
> +               case 1:
> +                       setup_tmp();
> +                       setup_log();
> +                       handle = hg_open(NULL, "");
> +
> +                       hg_log_example(handle);
> +
> +                       hg_close(&handle);
> +                       clean_tmp();
> +                       break;
> +               default:
> +                       break;
> +       }
> +
> +       return 0;
> +}
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://selenic.com/pipermail/mercurial-devel/attachments/20130819/6140fcff/attachment.html>


More information about the Mercurial-devel mailing list