[PATCH 2 of 3 V4 RFC] model (1): c-hglib: hg_log() level 1 function

Iulian Stana julian.stana at gmail.com
Sun Sep 1 13:43:22 CDT 2013


# HG changeset patch
# User Iulian Stana <julian.stana at gmail.com>
# Date 1378056765 -10800
#      Sun Sep 01 20:32:45 2013 +0300
# Node ID 0d7faa1c7ad6bc680fe2bc2cdad0c3becaabd603
# Parent  301aad54936be170f1915a034893911f4e742c93
model (1): c-hglib: hg_log() level 1 function

This mechanism could be called model (1):

(1) Return immedietely after having sent the command to commandserv,
    just wrapping a call to hg_rawcommand().
    Other API functions are provided to retrieve:
    (a) the data sent in response by the commandserv, in parsed
(structured) form
    (b) the exitcode, i.e. the content of the 'r' channel after all things
        have happened.


Some commands must handle huge mass of data. One of those commands is "hg log"
command, that is build in this commit.

The revision history could have a huge mass of data. To deal with this issue, I
had created a iterator-like mechanism. In this way I will get the changesets in
chunks or more-like one at the time.

The hg_log function will prepare the command and then will call cmd-server for
changesets. This function will return to the user a iterator structure, to be
used on hg_fetch_cset_entry function. (The log command will not passing any
changeset to the user)

The hg_fetch_cset_entry function will read changesets from command server and
will pass into the hg_cset_entry structure in a parse way the changeset.
The hg_cset_entry structure will be one of the function parameter.

--------
A message can contain a changeset or more changesets and also can contain just a
part of a changeset. Knowing this issue I cannot assume how the next changeset
will arrive.
I had created the following mechanism:
 - I use a template to know how the cset will arrive.
 - I get data from cmdserver until I know that in the received data is a cset
 - I am parseing the cset and send it to the user

In this way in my changeset pointer I will always have a changeset.

Let's say that:
Ci is a byte belonging to changeset number "i"
Then in a call and later in the buffer field of hg_csetstream_buffer you will
have:
C1C1C1C1\0C2C2\0C3C3C3\0C4

In the main.c file it can be found an example on how this function can be used.

diff --git a/client.c b/client.c
new file mode 100644
--- /dev/null
+++ b/client.c
@@ -0,0 +1,175 @@
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <signal.h>
+
+
+#include "client.h"
+#include "utils.h"
+
+#define HGPATH "hg"
+#define CHANGESET "\\0{rev}\\n{node}\\n{tags}\\n{branch}\\n{author}\
+						\\n{date|isodate}\\n{desc}"
+
+
+
+/**
+ * \brief 'Parse a changeset'. It's more like pointing to the correct position.
+ *
+ * The changeset could be found on buff pointer. To not duplicate the data I 
+ * choose to point every hg_cset_entry field to the right position.
+ * \param cset The pointer where changeset could be found.
+ * \param ce   The hg_cset_entry structure where the changeset will be parse.
+ * \retval 0 if successful.
+ * */
+int parse_changeset(char *cset, hg_cset_entry *ce)
+{
+	char *position = cset;
+	/* set pointer for revision position */
+	ce->rev = cset;
+	position = strstr(position, "\n");
+	cset[position - cset] = '\0';
+
+	/* set pointer for node position */
+	ce->node = position + 1;
+	position = strstr(position + 1, "\n");
+	cset[position - cset] = '\0';
+
+	/* set pointer for tag position */
+	ce->tags = position + 1;
+	position = strstr(position + 1, "\n");
+	cset[position - cset] = '\0';
+
+	/* set pointer for branch position */
+	ce->branch = position + 1;
+	position = strstr(position + 1, "\n");
+	cset[position - cset] = '\0';
+
+	/* set pointer for author position */
+	ce->author = position + 1;
+	position = strstr(position + 1, "\n");
+	cset[position - cset] = '\0';
+
+	/* set pointer for data position */
+	ce->date = position + 1;
+	position = strstr(position + 1, "\n");
+	cset[position - cset] = '\0';
+
+	/* set pointer for description position */
+	ce->desc = position + 1;
+	/* */
+	return 0;
+}
+
+/* Adding to the destination pointer the source pointer. */
+int adding_data(char **dest, char *source, int dsize, int ssize)
+{
+	if(*dest == NULL){
+		*dest = malloc(ssize + 1);
+		memcpy(*dest, source, ssize + 1);
+	} else {
+		*dest = realloc(*dest, dsize + ssize + 2);
+		memcpy(*dest + dsize, source, ssize + 1);
+	}
+	return 0;
+}
+
+/* Erase the top cset from cset pointer. */
+int erase_cset(char **cset, int buf_size, int first_cset_size)
+{
+	int new_cset_size = buf_size - first_cset_size;
+	char *new_cset = malloc(new_cset_size + 1);
+	memcpy(new_cset, *cset + first_cset_size, new_cset_size + 1);
+	free(*cset);
+	*cset = new_cset;
+	return new_cset_size;
+}
+
+
+/* The high level log command for hglib API. */
+hg_csetstream_buffer *hg_log(hg_handle *handle, char *option[])
+{
+	hg_csetstream_buffer *cbuf = malloc(sizeof(hg_csetstream_buffer));
+	cbuf->handle = handle;
+
+	cbuf->command = cmdbuilder("log", option, "--template", CHANGESET,
+							NULL);
+
+	if(hg_rawcommand(handle, cbuf->command) < 0){
+		return NULL;
+	}
+
+	cbuf->buffer = NULL;
+	cbuf->buf_size = 0;
+	cbuf->is_send = 0;
+
+	return cbuf;
+}
+
+/* The cbuf next step. Getting the next changeset. */
+int hg_fetch_cset_entry(hg_csetstream_buffer *cbuf, hg_cset_entry *centry)
+{
+	hg_header head = hg_head(cbuf->handle); 
+	int exitcode;
+	char *get_data;
+	int read_size;
+
+	/* Erase the first cset from cset pointer.
+	 * This cset was already pass to user.*/
+	if(cbuf->is_send && cbuf->buf_size){
+		cbuf->buf_size = erase_cset(&cbuf->buffer, cbuf->buf_size,
+						cbuf->first_cset_size);
+	}
+	while(head.channel != 'r'){
+		/* If there is a cset in cset pointer, then parse it and send
+		 * it to user.*/
+		if(cbuf->buffer && strlen(cbuf->buffer + 1) < cbuf->buf_size -1){
+			cbuf->first_cset_size = strlen(cbuf->buffer + 1) + 1;
+			parse_changeset(cbuf->buffer + 1, centry);
+			cbuf->is_send = 1;
+			return head.length;
+		}
+		else{
+			/* Getting the next data from cmdserver and put on the
+			 * end of the cset pointer. */
+			get_data = malloc(head.length + 1);
+			if(read_size = hg_rawread(cbuf->handle, get_data, 
+						head.length), read_size < 0){
+				return -1;
+			}
+			adding_data(&cbuf->buffer, get_data, cbuf->buf_size, 
+								read_size);
+			cbuf->buf_size += read_size;
+			head = hg_head(cbuf->handle);
+			free(get_data);
+		}
+	}
+	/* After, receiveing the last message, there still could be some
+	 * csets on cset pointer. */
+	if(cbuf->buffer && strlen(cbuf->buffer + 1) == cbuf->buf_size -1){
+		cbuf->first_cset_size = strlen(cbuf->buffer + 1) + 1;
+		parse_changeset(cbuf->buffer + 1, centry);
+		cbuf->buf_size = 0;
+		cbuf->is_send = 0;
+		return head.length;
+	/* Parse first cset from the remaining data. */
+	}else if(cbuf->buf_size && cbuf->is_send){
+		cbuf->first_cset_size = strlen(cbuf->buffer + 1) + 1;
+		parse_changeset(cbuf->buffer + 1, centry);
+		cbuf->is_send = 1;
+		return head.length;
+	}
+
+	exitcode = hg_exitcode(cbuf->handle);
+	free(cbuf->command);
+	free(cbuf->buffer);
+	free(cbuf);
+	return exitcode;
+
+}
diff --git a/client.h b/client.h
--- a/client.h
+++ b/client.h
@@ -69,6 +69,24 @@
 	char *out_data;
 } hg_handle;
 
+typedef struct hg_csetstream_buffer{
+	hg_handle *handle;
+	char **command;
+	char *buffer;
+	int buf_size;
+	int is_send;
+	int first_cset_size;
+}hg_csetstream_buffer;
+
+typedef struct hg_cset_entry{
+	char *author; 
+	char *branch; 
+	char *date;
+	char *desc;
+	char *node;
+	char *rev;
+	char *tags;
+}hg_cset_entry;
 
 /**
  * \brief Open the connection with the mercurial command server.
@@ -215,4 +233,63 @@
  * */
 int hg_exitcode(hg_handle *handle);
 
+/**
+ * \brief hg_log command for hglib API.
+ *
+ * It's an advance function to get revision history. It's more like the start 
+ * point of the action, this function will prepare the query question and will 
+ * send it to the cmd-server.
+ *
+ * Return the revision history of the specified files or the entire project.
+ * File history is shown without following rename or copy history of files.
+ * Use follow with a filename to follow history across renames and copies.
+ * follow without a filename will only show ancestors or descendants of the
+ * starting revision. followfirst only follows the first parent of merge
+ * revisions.
+ *
+ * If revrange isn't specified, the default is "tip:0" unless follow is set,
+ * in which case the working directory parent is used as the starting
+ * revision.
+ *
+ * \param handle The handle of the connection, wherewith I want to communicate
+ * \param option The option list for mercurial log command.
+ * \retval hg_csetstream_buffer A pointer to hg_csetstream_buffer structure if 
+ *                              successful
+ * \retval NULL to indicate an error, with errno set appropriately.
+ *
+ * errno can be:
+ *      - hg_rawcommand errors
+ * */
+hg_csetstream_buffer *hg_log(hg_handle *handle, char *option[]);
+
+/**
+ * \brief The iterator step. Getting the next changeset.
+ *
+ * The revision history could have a huge mass of data. You cannot pass the 
+ * entire  history in one call, so we use an iterator-like mechanism. Calling 
+ * the hg_fetch_log_entry. The next changeset will be read from cmd-server, 
+ * parse and pass to hg_cset_entry structure.
+ * The cset_entry structure will handle a  changeset with the following string 
+ * fields:
+ *         - rev
+ *         - node
+ *         - tags (space delimited)
+ *         - branch
+ *         - author
+ *         - desc
+ *
+ * \param hg_csetstream_buffer The buffer structure to store cset data.
+ * \param centry The hg_cset_entry structure where the changeset will be stored
+ *               and pass
+ * \retval number The lenght for the pass changeset.
+ * \retval exitcode To indicate the end of current_command.
+ * \retval   -1 to indicate an error, with errno set appropriately.
+ *
+ * errno can be:
+ *      - EINVAL  - Invalid argument (handle it's set to a null pointer)
+ *      - read(2) command errors
+ *      - read_header error
+ * */
+int hg_fetch_cset_entry(hg_csetstream_buffer *cbuf, hg_cset_entry *centry);
+
 #endif
diff --git a/main.c b/main.c
new file mode 100644
--- /dev/null
+++ b/main.c
@@ -0,0 +1,128 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "client.h"
+#include "utils.h"
+
+#define INIT_REPO  "init_test_repo"
+
+/****** Convenience functions. *******/
+
+/** 
+ * \brief Create and setup the tmp directory where the acction will happends.
+ * */
+void setup_tmp()
+{
+	system("hg init tmp");
+	chdir("tmp");
+}
+
+/**
+ * \brief Remove the tmp directory and all his files.
+ * */
+void clean_tmp()
+{
+	chdir("..");
+	system("rm -rf tmp");
+}
+
+/** 
+ * \brief Fill the current repository with commits for log command. 
+ * */
+void setup_log()
+{
+	system("touch foo ; hg add foo ; hg commit -m 'foo file'");
+	system("echo baloo > foo ; hg commit -m 'baloo text'");
+	system("touch voodoo ; hg add voodoo ; hg commit -m voodoo");
+	system("echo voodoo > voodoo ; hg commit -m 'voodoo text'");
+}
+
+/******* Examples using level 1 implementations. ******/
+
+/**
+ * \brief Log command example.
+ *
+ * \param handle The handle of the connection, wherewith I want to communicate
+ * \retval exitcode
+ * */
+int hg_log_example(hg_handle *handle)
+{
+	char *option[] = {"-v", NULL};
+	int nc;
+
+	/* hg_log function will a iterator. */
+	hg_csetstream_buffer *log_iterator = hg_log(handle, option);
+
+	/* you need to alloc some space for log_entry_t structure */
+	hg_cset_entry *le = malloc(sizeof(hg_cset_entry));
+
+	/* Getting the next changeset using the iterator-like mechanism. 
+	   Print the changest from log_entry structure.*/
+	while(nc = hg_fetch_cset_entry(log_iterator, le), nc > 0){
+		printf("rev = %s \n", le->rev);
+		printf("node = %s \n", le->node);
+		printf("tags = %s \n", le->tags);
+		printf("branch = %s \n", le->branch);
+		printf("author = %s \n", le->author);
+		printf("date = %s \n", le->date);
+		printf("desc = %s \n", le->desc);
+		printf("\n");
+	}
+
+	free(le);
+	/* last call for hg_fetch_log_entry will pass the exitcode */
+	return nc;
+}
+
+/** \brief Printing the welcome message.
+ * 
+ * Will print the options that you will have in this example.
+ **/
+void print_select_case()
+{
+	printf("Select test case to run:\n");
+	printf("1) log \n");
+	printf("\n");
+	printf("Your choice: ");
+}
+
+
+
+/***** Main function. *******/
+/**
+ * \brief The main function
+ * */
+int main(int argc, char **argv)
+{
+	int select_case;
+	hg_handle *handle;
+
+	print_select_case();
+	scanf("%d", &select_case);
+	if(select_case < 1 || select_case > 1){
+		printf("Your choice is not an option...\n");
+		return -1;
+	}
+
+	switch(select_case){
+		case 1:
+			setup_tmp();
+			setup_log();
+			handle = hg_open(NULL, "");
+
+			hg_log_example(handle);
+
+			hg_close(&handle);
+			clean_tmp();
+			break;
+	}
+
+	return 0;
+}


More information about the Mercurial-devel mailing list