[PATCH 2 of 2] releasenotes: add similarity check function to compare incoming notes

Wed Jul 5 09:44:43 EDT 2017

On Mon, 03 Jul 2017 23:06:31 +0200, Rishabh Madan wrote:
> # HG changeset patch
> # User Rishabh Madan <rishabhmadan96 at gmail.com>
> # Date 1499115687 -7200
> #      Mon Jul 03 23:01:27 2017 +0200
> # Node ID 1fc4f41fadcca56d36edd0747a09e1be6af5f680
> # Parent  6684514628b44e838f499b2125ffebe8d700a204
> releasenotes: add similarity check function to compare incoming notes

Can you think more about how to split testable functions?

> @@ -89,7 +91,20 @@
>  
>          This is used to combine multiple sources of release notes together.
>          """
> +
> +        all_points = []
> +
>          for section in other:
> +            for title, paragraphs in self.titledforsection(section):
> +                str = ""
> +                str = converttostring(paragraphs)
> +                all_points.append(str)
> +
> +            for paragraphs in self.nontitledforsection(section):
> +                str = ""
> +                str = converttostring(paragraphs)
> +                all_points.append(str)

 (*1)

> @@ -98,17 +113,58 @@
>                               (title, section))
>                      continue
>  
> -                # TODO perform similarity comparison and try to match against
> -                # existing.
> -                self.addtitleditem(section, title, paragraphs)
> +                str_incoming = converttostring(paragraphs)
> +                if section == 'fix':
> +                    issues = re.findall(RE_ISSUE, str_incoming, re.IGNORECASE)
> +                    if len(issues) > 0:

Nit: If only first match is needed, re.search() should be enough.

> +                        issuenumber = issues[0]
> +                        issuenumber = "".join(issuenumber.split())
> +                        if any(issuenumber in s for s in all_points):
> +                            ui.write(_("\"%s\" already exists in notes; "
> +                                     "ignoring\n") % issuenumber)
> +                            continue
> +                        else:
> +                            self.addtitleditem(section, title, paragraphs)
> +                            continue
> +
> +                if len(str_incoming.split()) > 10:
> +                    merge = similaritycheck(str_incoming, all_points)
> +
> +                    if not merge:
> +                        ui.write(_("\"%s\" already exists in notes file; "
> +                                 "ignoring\n") % str_incoming)
> +                    else:
> +                        self.addtitleditem(section, title, paragraphs)
> +                else:
> +                    self.addtitleditem(section, title, paragraphs)
>  
>              for paragraphs in other.nontitledforsection(section):
> +                str_incoming = converttostring(paragraphs)
> +                if section == 'fix':
> +                    issues = re.findall(RE_ISSUE, str_incoming, re.IGNORECASE)
> +                    if len(issues) > 0:
> +                        issuenumber = issues[0].lower()
> +                        issuenumber = "".join(issuenumber.split())
> +                        if any(issuenumber in s for s in all_points):
> +                            ui.write(_("\"%s\" already exists in notes; "
> +                                     "ignoring\n") % str_incoming)
> +                            continue
> +                        else:
> +                            self.addnontitleditem(section, paragraphs)
> +                            continue
>                  if paragraphs in self.nontitledforsection(section):
>                      continue
>  
> -                # TODO perform similarily comparison and try to match against
> -                # existing.
> -                self.addnontitleditem(section, paragraphs)
> +                if len(str_incoming.split()) > 10:
> +                    merge = similaritycheck(str_incoming, all_points)
> +
> +                    if not merge:
> +                        ui.write(_("\"%s\" already exists in notes; "
> +                                 "ignoring\n") % str_incoming)
> +                    else:
> +                        self.addnontitleditem(section, paragraphs)
> +                else:
> +                    self.addnontitleditem(section, paragraphs)

For instance, maybe there could be a boolean function that returns whether
the new paragraph should be added or not.

  if f(section, paragraphs):
      self.addtitleditem(section, title, paragraphs)
  ...
  if f(section, paragraphs):
      self.addnontitleditem(section, paragraphs)

Since the function 'f' depends on 'all_points', which has wider scope (*1),
'f' might be a class that holds pre-computed 'all_points'.

  f = x()
  x.addknownparagraphs(paragraphs for _title, paragraphs
                       in self.titledforsection(section))
  ...