[PATCH 2 of 2 RFC] c-hglib: hg_log() level 1 function

Iulian Stana julian.stana at gmail.com
Mon Aug 19 12:18:08 CDT 2013


2013/8/19 Giovanni Gherdovich <g.gherdovich at gmail.com>

>
>
> 2013/8/18 Iulian Stana <julian.stana at gmail.com>
>
>> # HG changeset patch
>> # User Iulian Stana <julian.stana at gmail.com>
>> # Date 1376828454 -10800
>> #      Sun Aug 18 15:20:54 2013 +0300
>> # Node ID 04bf311c99b8f4cf4c3e2096b493ee126f861b99
>> # Parent  21be7b973803e02b0bb310465691f88705a2dd53
>> c-hglib: hg_log() level 1 function
>>
>> The revision history could have a huge mass of data. To deal with this
>> issue, I
>> had created a iterator-like mechanism. In this way I will get the
>> changesets in
>> chunks or more-like one at the time.
>>
>> The hg_log function will prepare the command and then will call
>> cmd-server for
>> changesets. This function will return to the user a iterator structure,
>> to be
>> used on hg_fetch_log_entry funciton. (The log command will not passing any
>> changeset to the user)
>>
>> The hg_fetch_log_entry function will read changesets from command server
>> and
>> will pass into the hg_log_entry_t structure in a parse way the changeset.
>> The hg_log_entry_t structure will be one of the function parameter.
>>
>> In the main.c file it can be found an example on how this function can be
>> used.
>>
>> diff --git a/client.c b/client.c
>> new file mode 100644
>> --- /dev/null
>> +++ b/client.c
>> @@ -0,0 +1,134 @@
>> +#define _GNU_SOURCE
>> +#include <errno.h>
>> +#include <fcntl.h>
>> +#include <stdlib.h>
>> +#include <stdio.h>
>> +#include <string.h>
>> +#include <unistd.h>
>> +#include <sys/types.h>
>> +#include <sys/wait.h>
>> +#include <signal.h>
>> +
>> +
>> +#include "client.h"
>> +#include "utils.h"
>> +
>> +#define HGPATH "hg"
>> +#define CHANGESET
>> "{rev}\\0{node}\\0{tags}\\0{branch}\\0{author}\\0{desc}\
>> +                                               \\0{date|isodate}\\0"
>> +
>> +/**
>> + * \brief A helper for building the command arguments.
>> + * \param command The name for mercurial command.
>> + * \param options An array pointer to command option list.
>> + * \param template Template to be used for this command.
>> + * \retval new_string An array of pointers, where the command it's
>> almost ready
>> + *                    to be send to mercurial cmdserver.
>> + * */
>> +char **cmdbuilder(char *command, char *options[], char *template)
>> +{
>> +       int cmd_size, size;
>> +       char **new_cmd;
>> +       char *temp = "--template";
>> +
>> +       for(size = 0; options[size] != NULL; size++);
>> +       cmd_size = size + 1;
>> +       new_cmd = malloc(sizeof(char *) * (cmd_size + 3));
>> +
>> +       new_cmd[0] = command;
>> +       memcpy(new_cmd + 1, options, (cmd_size - 1)  * sizeof(char *));
>> +       if(template){
>> +               new_cmd[cmd_size] = temp;
>> +               new_cmd[cmd_size + 1] = template;
>> +               new_cmd[cmd_size + 2] = NULL;
>> +       }else{
>> +               new_cmd[cmd_size] = NULL;
>> +       }
>> +
>> +       return new_cmd;
>> +}
>> +
>> +/**
>> + * \brief 'Parse a changeset'. It's more like pointing to the correct
>> position.
>> + *
>> + * The changeset could be found on buff pointer. To not duplicate the
>> data I
>> + * choose to point every log_entry field to the right position.
>> + * \param buff The pointer to the changeset.
>> + * \param le   The log_entry_t structure where the changeset will be
>> parse.
>> + * \retval 0 if successful.
>> + * */
>> +int parse_changeset(char *buff, hg_log_entry_t *le)
>> +{
>> +       /* set pointer for revision position */
>> +       le->rev = buff;
>> +
>> +       /* set pointer for node position */
>> +       buff = buff + strlen(buff) + 1;
>> +       le->node = buff;
>> +
>> +       /* set pointer for tag position */
>> +       buff = buff + strlen(buff) + 1;
>> +       le->tags = buff;
>> +
>> +       /* set pointer for branch position */
>> +       buff = buff + strlen(buff) + 1;
>> +       le->branch = buff;
>> +
>> +       /* set pointer for author position */
>> +       buff = buff + strlen(buff) + 1;
>> +       le->author = buff;
>> +
>> +       /* set pointer for description position */
>> +       buff = buff + strlen(buff) + 1;
>> +       le->desc = buff;
>> +
>> +       /* set pointer for data position */
>> +       buff = buff + strlen(buff) + 1;
>> +       le->date = buff;
>> +
>> +       return 0;
>> +}
>> +
>> +/* The high level log command for hglib API. */
>> +hg_log_iterator *hg_log(hg_handle *handle, char *option[])
>> +{
>> +       hg_log_iterator *log_iterator = malloc(sizeof(hg_log_iterator));
>> +       log_iterator->handle = handle;
>> +
>> +       log_iterator->command = cmdbuilder("log", option, CHANGESET);
>> +
>> +       if(hg_rawcommand(handle, log_iterator->command) < 0){
>> +               return NULL;
>> +       }
>> +
>> +       log_iterator->changeset = malloc(0);
>> +       return log_iterator;
>>
>
>
> What happens here is that hg_log() does not return a status code
> (with the meaning "command successfully sent"), but the hg_log_iterator
> struct instead.
>
> A status code would be desirable and idiomatic I think;
> the struct to be passed as an argument.
>

I don't really know what is the best way of passing the status code...
but the NULL pointer could mean that an error had occur.


>
>
>
>> +}
>> +
>> +/* The iterator next step. Getting the next changeset. */
>> +int hg_fetch_log_entry(hg_log_iterator *log_iterator, hg_log_entry_t *le)
>> +{
>> +       hg_header head = hg_head(log_iterator->handle);
>> +
>> +       if(head.channel == 'r'){
>> +               free(log_iterator->command);
>> +               free(log_iterator->changeset);
>> +               int exitcode = hg_exitcode(log_iterator->handle);
>> +               free(log_iterator);
>> +               return exitcode;
>> +       }
>> +
>> +       log_iterator->changeset = realloc(log_iterator->changeset,
>> head.length);
>> +
>> +       if(read(log_iterator->handle->p_read, log_iterator->changeset,
>> +                                       head.length) < 0){
>>
>
>
> Here you are not using hg_rawread(). Please explain why.
> Level 1 functions are meant to build on top of level 0 functions.
>
> Also: you are not reading in 4096 B chunks.
>

UPSSSS...Yes I don't know way I choose to not using the hg_rawread()
function... I will change this....

About using chunks of 4k, like I said in the earlier mail I don't know
if this is the best option. In parsing process you will need the entire
revision. This means that you need somehow to concatenate those
chunks. I prefer to read the entire revision instead of making this
expensive operation (concatenate data).

For example:
If you have a revision of 20MB, it's more easy to read in a single
call this data, (if it's possible) instead of performing 20MB/ 4096B
calls and some concatenate operation calls for every call.
concatenate operation(realloc memory and copy the memory)



>
> Also: you are assuming that all information for a given log entry
> (a revision) are retrieved in a "single shot" of the  'o' channel.
>

> You are assuming that a log revision cannot be split across two
> iteration of output on the 'o' channel.
>
> Is this assumption safe?
>
> Please ask Idan Kamara (CC the list) and make sure of this.
>
>
>
>> +               return -1;
>> +       }
>> +
>> +       parse_changeset(log_iterator->changeset, le);
>> +
>> +       if(read_header(log_iterator->handle) < 0){
>> +               return -1;
>> +       }
>> +       return head.length;
>> +}
>> diff --git a/client.h b/client.h
>> --- a/client.h
>> +++ b/client.h
>> @@ -65,6 +65,22 @@
>>         int protect;
>>  } hg_handle;
>>
>> +typedef struct hg_log_iterator{
>>
>
>
> I think this name is unfortunate. This is not an iterator, in any possible
> sense.
> This is the place where you store your data (the "changeset" field of this
> structure).
>
> I would call this something like hg_log_rawentry_t, in contrast to the
> other structure where the log entry is "parsed" and not "raw".
>

Hmmm, agreed with you. Let's talk about this on IRC channel.
Also this iterator-like mechanism probably will be used in other places
too. So a more common name probably will fit better...



>
> Please note that your code works as an iterator _not_ because of this
> struct
> (where you are storing a single element of your possibly "infinite" list),
> _but_ because it all piggybacks on the syscall read(2), which works as an
> iterator itself.
>
>
>
>> +       hg_handle *handle;
>> +       char **command;
>> +       char *changeset;
>> +}hg_log_iterator;
>> +
>> +typedef struct hg_log_entry_t{
>> +       char *author;
>> +       char *branch;
>> +       char *date;
>> +       char *desc;
>> +       char *node;
>> +       char *rev;
>> +       char *tags;
>> +}hg_log_entry_t;
>> +
>>  /**
>>   * \brief Open the connection with the mercurial command server.
>>   *
>> @@ -196,6 +212,63 @@
>>   * */
>>  int hg_exitcode(hg_handle *handle);
>>
>> +/**
>> + * \brief hg_log command for hglib API.
>> + *
>> + * It's an advance function to get revision history. It's more like the
>> start
>> + * point of the action, this function will prepare the query question
>> and will
>> + * send it to the cmd-server.
>> + *
>> + * Return the revision history of the specified files or the entire
>> project.
>> + * File history is shown without following rename or copy history of
>> files.
>> + * Use follow with a filename to follow history across renames and
>> copies.
>> + * follow without a filename will only show ancestors or descendants of
>> the
>> + * starting revision. followfirst only follows the first parent of merge
>> + * revisions.
>> + *
>> + * If revrange isn't specified, the default is "tip:0" unless follow is
>> set,
>> + * in which case the working directory parent is used as the starting
>> + * revision.
>> + *
>> + * \param handle The handle of the connection, wherewith I want to
>> communicate
>> + * \param option The option list for mercurial log command.
>> + * \retval hg_log_iterator A pointer to hg_log_iterator structure if
>> successful
>> + * \retval NULL to indicate an error, with errno set appropriately.
>> + *
>> + * errno can be:
>> + *      - hg_rawcommand errors
>> + * */
>> +hg_log_iterator *hg_log(hg_handle *handle, char *option[]);
>> +
>> +/**
>> + * \brief The iterator step. Getting the next changeset.
>> + *
>> + * The revision history could have a huge mass of data. You can pass the
>> entire
>> + * history in one call, so we use an iterator-like mechanism. Calling the
>> + * hg_fetch_log_entry, the next changeset will be read from cmd-server,
>> parse
>> + * and pass to hg_log_entry_t structure.
>> + * The log_entry structure will handle a changeset with the following
>> string
>> + * fields:
>> + *         - rev
>> + *         - node
>> + *         - tags (space delimited)
>> + *         - branch
>> + *         - author
>> + *         - desc
>> + *
>> + * \param log_iterator The iterator for log command.
>> + * \param log_entry The hg_log_entry_t structure where the changeset
>> will be
>> + *                   pass
>> + * \retval number The lenght for the pass changeset.
>> + * \retval exitcode To indicate the end of log_command.
>> + * \retval   -1 to indicate an error, with errno set appropriately.
>> + *
>> + * errno can be:
>> + *      - EINVAL  - Invalid argument (handle it's set to a null pointer)
>> + *      - read(2) command errors
>> + *      - read_header error
>> + * */
>> +int hg_fetch_log_entry(hg_log_iterator *log_iterator, hg_log_entry_t
>> *le);
>>
>>  #endif
>>
>> diff --git a/main.c b/main.c
>> new file mode 100644
>> --- /dev/null
>> +++ b/main.c
>> @@ -0,0 +1,129 @@
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <string.h>
>> +
>> +#include <sys/wait.h>
>> +#include <sys/types.h>
>> +#include <unistd.h>
>> +#include <sys/stat.h>
>> +#include <fcntl.h>
>> +
>> +#include "client.h"
>> +#include "utils.h"
>> +
>> +#define INIT_REPO  "init_test_repo"
>> +
>> +/****** Convenience functions. *******/
>> +
>> +/**
>> + * \brief Create and setup the tmp directory where the acction will
>> happends.
>> + * */
>> +void setup_tmp()
>> +{
>> +       system("hg init tmp");
>> +       chdir("tmp");
>> +}
>> +
>> +/**
>> + * \brief Remove the tmp directory and all his files.
>> + * */
>> +void clean_tmp()
>> +{
>> +       chdir("..");
>> +       system("rm -rf tmp");
>> +}
>> +
>> +/**
>> + * \brief Fill the current repository with commits for log command.
>> + * */
>> +void setup_log()
>> +{
>> +       system("touch foo ; hg add foo ; hg commit -m 'foo file'");
>> +       system("echo baloo > foo ; hg commit -m 'baloo text'");
>> +       system("touch voodoo ; hg add voodoo ; hg commit -m voodoo");
>> +       system("echo voodoo > voodoo ; hg commit -m 'voodoo text'");
>> +}
>> +
>> +/******* Examples using level 1 implementations. ******/
>> +
>> +/**
>> + * \brief Log command example.
>> + *
>> + * \param handle The handle of the connection, wherewith I want to
>> communicate
>> + * \retval exitcode
>> + * */
>> +int hg_log_example(hg_handle *handle)
>> +{
>> +       char *option[] = {"-v", NULL};
>> +       int nc;
>> +
>> +       /* hg_log function will a iterator. */
>> +       hg_log_iterator *log_iterator = hg_log(handle, option);
>> +
>> +       /* you need to alloc some space for log_entry_t structure */
>> +       hg_log_entry_t *le = malloc(sizeof(hg_log_entry_t));
>> +
>> +       /* Getting the next changeset using the iterator-like mechanism.
>> +          Print the changest from log_entry structure.*/
>> +       while(nc = hg_fetch_log_entry(log_iterator, le), nc > 0){
>> +               printf("rev = %s \n", le->rev);
>> +               printf("node = %s \n", le->node);
>> +               printf("tags = %s \n", le->tags);
>> +               printf("branch = %s \n", le->branch);
>> +               printf("author = %s \n", le->author);
>> +               printf("desc = %s \n", le->desc);
>> +               printf("date = %s \n", le->date);
>> +               printf("\n");
>> +       }
>> +
>> +       free(le);
>> +       /* last call for hg_fetch_log_entry will pass the exitcode */
>> +       return nc;
>> +}
>> +
>> +/** \brief Printing the welcome message.
>> + *
>> + * Will print the options that you will have in this example.
>> + **/
>> +void print_select_case()
>> +{
>> +       printf("Select test case to run:\n");
>> +       printf("1) log \n");
>> +       printf("\n");
>> +       printf("Your choice: ");
>> +}
>> +
>> +
>> +/***** Main function. *******/
>> +/**
>> + * \brief The main function
>> + * */
>> +int main(int argc, char **argv)
>> +{
>> +       int select_case;
>> +       hg_handle *handle;
>> +
>> +       print_select_case();
>> +       scanf("%d", &select_case);
>> +       if(select_case < 1 || select_case > 1){
>> +               printf("Your choice is not an option...\n");
>> +               return -1;
>> +       }
>> +
>> +       switch(select_case){
>> +               case 1:
>> +                       setup_tmp();
>> +                       setup_log();
>> +                       handle = hg_open(NULL, "");
>> +
>> +                       hg_log_example(handle);
>> +
>> +                       hg_close(&handle);
>> +                       clean_tmp();
>> +                       break;
>> +               default:
>> +                       break;
>> +       }
>> +
>> +       return 0;
>> +}
>> _______________________________________________
>> Mercurial-devel mailing list
>> Mercurial-devel at selenic.com
>> http://selenic.com/mailman/listinfo/mercurial-devel
>>
>
>


-- 
O zi buna,
Iulian
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://selenic.com/pipermail/mercurial-devel/attachments/20130819/645860dd/attachment.html>


More information about the Mercurial-devel mailing list