[Kazehakase-cvs] CVS update: kazehakase/src/libegg/regex

Back to archive index

Hiroyuki Ikezoe ikezo****@users*****
Fri Mar 16 14:00:28 JST 2007


Index: kazehakase/src/libegg/regex/.cvsignore
diff -u kazehakase/src/libegg/regex/.cvsignore:1.1 kazehakase/src/libegg/regex/.cvsignore:removed
--- kazehakase/src/libegg/regex/.cvsignore:1.1	Fri Aug  6 20:13:18 2004
+++ kazehakase/src/libegg/regex/.cvsignore	Fri Mar 16 14:00:27 2007
@@ -1,11 +0,0 @@
-Makefile
-Makefile.in
-.deps
-.libs
-*.o
-*.lo
-*.la
-*.bak
-*~
-*.rej
-*.orig
Index: kazehakase/src/libegg/regex/Makefile.am
diff -u kazehakase/src/libegg/regex/Makefile.am:1.1 kazehakase/src/libegg/regex/Makefile.am:removed
--- kazehakase/src/libegg/regex/Makefile.am:1.1	Fri Aug  6 20:13:18 2004
+++ kazehakase/src/libegg/regex/Makefile.am	Fri Mar 16 14:00:27 2007
@@ -1,24 +0,0 @@
-SUBDIRS = pcre
-
-INCLUDES = \
-  $(GTK_CFLAGS) \
-  -DEGG_COMPILATION \
-  -DGETTEXT_PACKAGE=\""$(GETTEXT_PACKAGE)"\" \
-  -DGTK_DISABLE_DEPRECATED \
-  -I$(top_srcdir)/src/utils \
-  -DGDK_DISABLE_DEPRECATED \
-  -DG_DISABLE_DEPRECATED
-
-
-noinst_LTLIBRARIES = libeggregex.la
-
-libeggregex_la_SOURCES = \
-  eggregex.c
-
-libeggregex_la_LIBADD = \
-	$(top_builddir)/src/libegg/regex/pcre/libpcre.la 
-
-noinst_HEADERS = \
-  eggregex.h
-
-
Index: kazehakase/src/libegg/regex/README
diff -u kazehakase/src/libegg/regex/README:1.1 kazehakase/src/libegg/regex/README:removed
--- kazehakase/src/libegg/regex/README:1.1	Fri Aug  6 20:13:18 2004
+++ kazehakase/src/libegg/regex/README	Fri Mar 16 14:00:27 2007
@@ -1,11 +0,0 @@
-This directory contains regular expression api which should eventually 
-land in GLib. We have put it here, since it didn't get enough testing
-to put it into GLib 2.6. 
-
-TODO items:
-- Improve the Unicode support by making PCRE use the GLib Unicode functions.
-- Maybe change the replace functions to replace only one match.
-
-July 2 2004
-
-Matthias
Index: kazehakase/src/libegg/regex/egg-macros.h
diff -u kazehakase/src/libegg/regex/egg-macros.h:1.1 kazehakase/src/libegg/regex/egg-macros.h:removed
--- kazehakase/src/libegg/regex/egg-macros.h:1.1	Fri Aug  6 20:13:18 2004
+++ kazehakase/src/libegg/regex/egg-macros.h	Fri Mar 16 14:00:27 2007
@@ -1,94 +0,0 @@
-/**
- * Useful macros.
- *
- * Author:
- *   Darin Adler <darin****@bents*****>
- *
- * Copyright 2001 Ben Tea Spoons, Inc.
- */
-#ifndef _EGG_MACROS_H_
-#define _EGG_MACROS_H_
-
-#include <glib/gmacros.h>
-
-G_BEGIN_DECLS
-
-/* Macros for defining classes.  Ideas taken from Nautilus and GOB. */
-
-/* Define the boilerplate type stuff to reduce typos and code size.  Defines
- * the get_type method and the parent_class static variable. */
-
-#define EGG_BOILERPLATE(type, type_as_function, corba_type,		\
-			   parent_type, parent_type_macro,		\
-			   register_type_macro)				\
-static void type_as_function ## _class_init    (type ## Class *klass);	\
-static void type_as_function ## _instance_init (type          *object);	\
-static parent_type ## Class *parent_class = NULL;			\
-static void								\
-type_as_function ## _class_init_trampoline (gpointer klass,		\
-					    gpointer data)		\
-{									\
-	parent_class = (parent_type ## Class *)g_type_class_ref (	\
-		parent_type_macro);					\
-	type_as_function ## _class_init ((type ## Class *)klass);	\
-}									\
-GType									\
-type_as_function ## _get_type (void)					\
-{									\
-	static GType object_type = 0;					\
-	if (object_type == 0) {						\
-		static const GTypeInfo object_info = {			\
-		    sizeof (type ## Class),				\
-		    NULL,		/* base_init */			\
-		    NULL,		/* base_finalize */		\
-		    type_as_function ## _class_init_trampoline,		\
-		    NULL,		/* class_finalize */		\
-		    NULL,               /* class_data */		\
-		    sizeof (type),					\
-		    0,                  /* n_preallocs */		\
-		    (GInstanceInitFunc) type_as_function ## _instance_init \
-		};							\
-		object_type = register_type_macro			\
-			(type, type_as_function, corba_type,		\
-			 parent_type, parent_type_macro);		\
-	}								\
-	return object_type;						\
-}
-
-/* Just call the parent handler.  This assumes that there is a variable
- * named parent_class that points to the (duh!) parent class.  Note that
- * this macro is not to be used with things that return something, use
- * the _WITH_DEFAULT version for that */
-#define EGG_CALL_PARENT(parent_class_cast, name, args)		\
-	((parent_class_cast(parent_class)->name != NULL) ?		\
-	 parent_class_cast(parent_class)->name args : (void)0)
-
-/* Same as above, but in case there is no implementation, it evaluates
- * to def_return */
-#define EGG_CALL_PARENT_WITH_DEFAULT(parent_class_cast,		\
-					name, args, def_return)		\
-	((parent_class_cast(parent_class)->name != NULL) ?		\
-	 parent_class_cast(parent_class)->name args : def_return)
-
-/* Call a virtual method */
-#define EGG_CALL_VIRTUAL(object, get_class_cast, method, args) \
-    (get_class_cast (object)->method ? (* get_class_cast (object)->method) args : (void)0)
-
-/* Call a virtual method with default */
-#define EGG_CALL_VIRTUAL_WITH_DEFAULT(object, get_class_cast, method, args, default) \
-    (get_class_cast (object)->method ? (* get_class_cast (object)->method) args : default)
-
-#define EGG_CLASS_BOILERPLATE(type, type_as_function,		\
-				 parent_type, parent_type_macro)	\
-	EGG_BOILERPLATE(type, type_as_function, type,		\
-			   parent_type, parent_type_macro,		\
-			   EGG_REGISTER_TYPE)
-
-#define EGG_REGISTER_TYPE(type, type_as_function, corba_type,		\
-			    parent_type, parent_type_macro)		\
-	g_type_register_static (parent_type_macro, #type, &object_info, 0)
-
-
-G_END_DECLS
-
-#endif /* _EGG_MACROS_H_ */
Index: kazehakase/src/libegg/regex/eggregex.c
diff -u kazehakase/src/libegg/regex/eggregex.c:1.4 kazehakase/src/libegg/regex/eggregex.c:removed
--- kazehakase/src/libegg/regex/eggregex.c:1.4	Mon Jan  8 22:58:41 2007
+++ kazehakase/src/libegg/regex/eggregex.c	Fri Mar 16 14:00:27 2007
@@ -1,1010 +0,0 @@
-/* EggRegex -- regular expression API wrapper around PCRE.
- * Copyright (C) 1999, 2000 Scott Wimer
- * Copyright (C) 2004 Matthias Clasen <mclas****@redha*****>
- *
- * This is basically an ease of user wrapper around the functionality of
- * PCRE.
- *
- * With this library, we are, hopefully, drastically reducing the code
- * complexity necessary by making use of a more complex and detailed
- * data structure to store the regex info.  I am hoping to have a regex
- * interface that is almost as easy to use as Perl's.  <fingers crossed>
- *
- * Author: Scott Wimer <scott****@cylan*****>
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
- *
- * This library is free software, you can distribute it or modify it
- * under any of the following terms:
- *  1) The GNU General Public License (GPL)
- *  2) The GNU Library General Public License (LGPL)
- *  3) The Perl Artistic license (Artistic)
- *  4) The BSD license (BSD)
- *
- * In short, you can use this library in any code you desire, so long as
- * the Copyright notice above remains intact.  If you do make changes to
- * it, I would appreciate that you let me know so I can improve this 
- * library for everybody, but I'm not gonna force you to.
- * 
- * Please note that this library is just a wrapper around Philip Hazel's
- * PCRE library.  Please see the file 'LICENSE' in your PCRE distribution.
- * And, if you live in England, please send him a pint of good beer, his
- * library is great.
- *
- */
-
-#include "config.h"
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "eggregex.h"
-#include <glib/gslist.h>
-#include <glib/glist.h>
-#include <glib/gi18n-lib.h>
-#include <glib/gstrfuncs.h>
-#include "pcre/pcre.h"
-
-struct _EggRegex
-{
-  gchar *pattern;       /* the pattern */
-  pcre *regex;		/* compiled form of the pattern */
-  pcre_extra *extra;	/* data stored when egg_regex_optimize() is used */
-  gint matches;		/* number of matching sub patterns */
-  gint pos;		/* position in the string where last match left off */
-  gint *offsets;	/* array of offsets paired 0,1 ; 2,3 ; 3,4 etc */
-  gint n_offsets;	/* number of offsets */
-  EggRegexCompileFlags compile_opts;	/* options used at compile time on the pattern */
-  EggRegexMatchFlags match_opts;	/* options used at match time on the regex */
-  gint string_len;	/* length of the string last used against */
-  GSList *delims;	/* delimiter sub strings from split next */
-};
-
-GQuark
-egg_regex_error_quark (void)
-{
-  static GQuark error_quark = 0;
-
-  if (error_quark == 0)
-    error_quark = g_quark_from_static_string ("g-regex-error-quark");
-
-  return error_quark;
-}
-
-/** 
- * egg_regex_new:
- * @pattern: the regular expression
- * @compile_options: compile options for the regular expression
- * @match_options: match options for the regular expression
- * @error: return location for a #GError
- * 
- * Compiles the regular expression to an internal form, and does the initial
- * setup of the #EggRegex structure.  
- * 
- * Returns: a #EggRegex structure
- */
-EggRegex *
-egg_regex_new (const gchar         *pattern, 
- 	     EggRegexCompileFlags   compile_options,
-	     EggRegexMatchFlags     match_options,
-	     GError             **error)
-{
-  EggRegex *regex = g_new0 (EggRegex, 1);
-  const gchar *errmsg;
-  gint erroffset;
-  gint capture_count;
-  
-  /* preset the parts of gregex that need to be set, regardless of the
-   * type of match that will be checked */
-  regex->pattern = g_strdup (pattern);
-  regex->extra = NULL;
-  regex->pos = 0;
-  regex->string_len = -1;	/* not set yet */
-
-  /* set the options */
-  regex->compile_opts = compile_options | PCRE_UTF8 | PCRE_NO_UTF8_CHECK;
-  regex->match_opts = match_options | PCRE_NO_UTF8_CHECK;
-
-  /* compile the pattern */
-  regex->regex = _pcre_compile (pattern, regex->compile_opts,
-				 &errmsg, &erroffset, NULL);
-
-  /* if the compilation failed, set the error member and return 
-   * immediately */
-  if (regex->regex == NULL)
-    {
-      GError *tmp_error = g_error_new (EGG_REGEX_ERROR, 
-				       EGG_REGEX_ERROR_COMPILE,
-				       _("Error while compiling regular "
-					 "expression %s at char %d: %s"),
-				       pattern, erroffset, errmsg);
-      g_propagate_error (error, tmp_error);
-
-      return regex;
-    }
-
-  /* otherwise, find out how many sub patterns exist in this pattern,
-   * and setup the offsets array and n_offsets accordingly */
-  _pcre_fullinfo (regex->regex, regex->extra, 
-		  PCRE_INFO_CAPTURECOUNT, &capture_count);
-  regex->n_offsets = (capture_count + 1) * 3;
-  regex->offsets = g_new0 (gint, regex->n_offsets);
-
-  return regex;
-}
-
-
-/**
- * egg_regex_free:
- * @regex: a #EggRegex structure from egg_regex_new()
- *
- * Frees all the memory associated with the regex structure.
- */
-void
-egg_regex_free (EggRegex *regex)
-{
-  g_free (regex->pattern);
-  g_slist_free (regex->delims);
-  g_free (regex->offsets);
-  if (regex->regex != NULL)
-    g_free (regex->regex);
-  if (regex->extra != NULL)
-    g_free (regex->extra);
-  g_free (regex);
-}
-
-
-/**
- * egg_regex_clear:
- * @regex: a #EggRegex structure
- *
- * Clears out the members of @regex that are holding information about the
- * last set of matches for this pattern.  egg_regex_clear() needs to be
- * called between uses of egg_regex_match() or egg_regex_match_next() against
- * new target strings. 
- */
-void
-egg_regex_clear (EggRegex *regex)
-{
-  regex->matches = -1;
-  regex->string_len = -1;
-  regex->pos = 0;
-
-  /* if the pattern was used with egg_regex_split_next(), it may have
-   * delimiter offsets stored.  Free up those guys as well. */
-  if (regex->delims != NULL)
-    g_slist_free (regex->delims);
-}
-
-/**
- * egg_regex_optimize:
- * @regex: a #EggRegex structure
- * @error: return location for a #GError
- *
- * If the pattern will be used many times, then it may be worth the
- * effort to optimize it to improve the speed of matches.
- */
-void
-egg_regex_optimize (EggRegex  *regex,
-		  GError **error)
-{
-  const gchar *errmsg;
-
-  regex->extra = _pcre_study (regex->regex, 0, &errmsg);
-
-  if (errmsg)
-    {
-      GError *tmp_error = g_error_new (EGG_REGEX_ERROR,
-				       EGG_REGEX_ERROR_OPTIMIZE, 
-				       _("Error while optimizing "
-					 "regular expression %s: %s"),
-				       regex->pattern,
-				       errmsg);
-      g_propagate_error (error, tmp_error);
-    }
-}
-
-/**
- * egg_regex_match:
- * @regex: a #EggRegex structure from egg_regex_new()
- * @string: the string to scan for matches
- * @string_len: the length of @string, or -1 to use strlen()
- * @match_options:  match options
- *
- * Scans for a match in string for the pattern in @regex. The starting index
- * of the match goes into the pos member of the @regex struct. The indexes
- * of the full match, and all matches get stored off in the offsets array.
- *
- * The @match_options are combined with the match options specified when the 
- * @regex structure was created, letting you have more flexibility in reusing
- * #EggRegex structures.
- *
- * Returns:  Number of matched substrings + 1, or 1 if the pattern has no
- *           substrings in it.  Returns #GREGEX_NOMATCH if the pattern
- *           did not match.
- */
-gint 
-egg_regex_match (EggRegex          *regex, 
-	       const gchar     *string, 
-	       gssize           string_len,
-	       EggRegexMatchFlags match_options)
-{
-  if (string_len < 0)
-    string_len = strlen (string);
-
-  regex->string_len = string_len;
-
-  /* perform the match */
-  regex->matches = _pcre_exec (regex->regex, regex->extra, 
-			       string, regex->string_len, 0,
-			       regex->match_opts | match_options,
-			       regex->offsets, regex->n_offsets);
-
-  /* if the regex matched, set regex->pos to the character past the 
-   * end of the match.
-   */
-  if (regex->matches > 0)
-    regex->pos = regex->offsets[1];
-
-  return regex->matches;	/* return what pcre_exec() returned */
-}
-
-
-/**
- * egg_regex_match_next:
- * @regex: a #EggRegex structure 
- * @string: the string to scan for matches
- * @string_len: the length of @string, or -1 to use strlen()
- * @match_options: the match options
- *
- * Scans for the next match in @string of the pattern in @regex.  The starting 
- * index of the match goes into the pos member of the @regex struct.  The 
- * indexes of the full match, and all matches get stored off in the offsets 
- * array.  The match options are ored with the match options set when
- * the @regex was created.
- *
- * You have to call egg_regex_clear() to reuse the same pattern on a new string.
- * This is especially true for use with egg_regex_match_next().
- *
- * Returns:  Number of matched substrings + 1, or 1 if the pattern has no
- *           substrings in it.  Returns #GREGEX_NOMATCH if the pattern
- *           did not match.
- */
-gint 
-egg_regex_match_next (EggRegex          *regex, 
-		    const gchar     *string, 
-		    gssize           string_len,
-		    EggRegexMatchFlags match_options)
-{
-  /* if this regex hasn't been used on this string before, then we
-   * need to calculate the length of the string, and set pos to the
-   * start of it.  
-   * Knowing if this regex has been used on this string is a bit of 
-   * a challenge.  For now, we require the user to call egg_regex_clear()
-   * in between usages on a new string.  Not perfect, but not such a
-   * bad solution either.
-   */
-  if (regex->string_len == -1)
-    {
-      if (string_len < 0)
-	string_len = strlen (string);
-      
-      regex->string_len = string_len;
-    }
-
-  /* perform the match */
-  regex->matches = _pcre_exec (regex->regex, regex->extra,
-			       string + regex->pos, 
-			       regex->string_len - regex->pos,
-			       0, regex->match_opts | match_options,
-			       regex->offsets, regex->n_offsets);
-
-  /* if the regex matched, adjust the offsets array to take into account
-   * the fact that the string they're out of is shorter than the string
-   * that the caller passed us, by regex->pos to be exact.
-   * Then, update regex->pos to take into account the new starting point.
-   */
-  if (regex->matches > 0)
-    {
-      gint i, pieces;
-      pieces = (regex->matches * 2) - 1;
-
-      for (i = 0; i <= pieces; i++)
-	regex->offsets[i] += regex->pos;
-
-      regex->pos = regex->offsets[1];
-    }
-
-  return regex->matches;
-}
-
-
-/**
- * egg_regex_fetch:
- * @regex: #EggRegex structure used in last match
- * @string: the string on which the last match was made
- * @match_num: number of the sub expression
- *
- * Retrieves the text matching the @match_num<!-- -->'th capturing parentheses.
- * 0 is the full text of the match, 1 is the first paren set, 2 the second,
- * and so on.
- *
- * Returns: The matched substring.  You have to free it yourself.
- */
-gchar *
-egg_regex_fetch (EggRegex      *regex, 
-	       const gchar *string,
-	       gint         match_num)
-{
-  gchar *match;
-
-  /* make sure the sub expression number they're requesting is less than
-   * the total number of sub expressions that were matched. */
-  if (match_num >= regex->matches)
-    return NULL;
-
-  _pcre_get_substring (string, regex->offsets, regex->matches, 
-		       match_num, (const char **)&match);
-
-  return match;
-}
-
-/**
- * egg_regex_fetch_pos:
- * @regex: #EggRegex structure used in last match
- * @string: the string on which the last match was made
- * @match_num: number of the sub expression
- * @start_pos: pointer to location where to store the start position
- * @end_pos: pointer to location where to store the end position
- *
- * Retrieves the position of the @match_num<!-- -->'th capturing parentheses.
- * 0 is the full text of the match, 1 is the first paren set, 2 the second,
- * and so on.
- */
-void
-egg_regex_fetch_pos (EggRegex      *regex, 
-		     const gchar *string,
-		     gint         match_num,
-		     gint        *start_pos,
-		     gint        *end_pos)
-{
-  /* make sure the sub expression number they're requesting is less than
-   * the total number of sub expressions that were matched. */
-  if (match_num >= regex->matches)
-    return;
-
-  if (start_pos)
-    *start_pos = regex->offsets[2 * match_num];
-
-  if (end_pos)
-    *end_pos = regex->offsets[2 * match_num + 1];
-}
-
-/**
- * egg_regex_fetch_named:
- * @regex: #EggRegex structure used in last match
- * @string: the string on which the last match was made
- * @name: name of the subexpression
- *
- * Retrieves the text matching the capturing parentheses named @name.
- *
- * Returns: The matched substring.  You have to free it yourself.
- */
-gchar *
-egg_regex_fetch_named (EggRegex      *regex, 
-		     const gchar *string,
-		     const gchar *name)
-{
-  gchar *match;
-
-  _pcre_get_named_substring (regex->regex, 
-			     string, regex->offsets, regex->matches, 
-			     name, (const char **)&match);
-
-  return match;
-}
-
-/**
- * egg_regex_fetch_all:
- * @regex: a #EggRegex structure
- * @string: the string on which the last match was made
- *
- * Bundles up pointers to each of the matching substrings from a match 
- * and stores then in an array of gchar pointers.
- *
- * Returns: a %NULL-terminated array of gchar * pointers. It must be freed using
- * g_strfreev(). If the memory can't be allocated, returns %NULL.
- */
-gchar **
-egg_regex_fetch_all (EggRegex      *regex,
-		   const gchar *string)
-{
-  gchar **listptr = NULL; /* the list pcre_get_substring_list() will fill */
-  gchar **result;
-
-  if (regex->matches < 0)
-    return NULL;
-  
-  _pcre_get_substring_list (string, regex->offsets, 
-			    regex->matches, (const char ***)&listptr);
-
-  if (listptr)
-    {
-      /* PCRE returns a single block of memory which
-       * isn't suitable for g_strfreev().
-       */
-      result = g_strdupv (listptr);
-      g_free (listptr);
-    }
-  else 
-    result = NULL;
-
-  return result;
-}
-
-
-/**
- * egg_regex_split:
- * @regex:  a #EggRegex structure
- * @string:  the string to split with the pattern
- * @string_len: the length of @string, or -1 to use strlen()
- * @match_options:  match time option flags
- * @max_pieces:  maximum number of pieces to split the string into, 
- *    or 0 for no limit
- *
- * Breaks the string on the pattern, and returns an array of the pieces.  
- *
- * Returns: a %NULL-terminated gchar ** array. Free it using g_strfreev().
- **/
-gchar **
-egg_regex_split (EggRegex           *regex, 
-	       const gchar      *string, 
-	       gssize            string_len,
-	       EggRegexMatchFlags  match_options,
-	       gint              max_pieces)
-{
-  gchar **string_list;		/* The array of char **s worked on */
-  gint pos;
-  gint match_ret;
-  gint pieces;
-  gint start_pos;
-  gchar *piece;
-  GList *list, *last;
-
-  start_pos = 0;
-  pieces = 0;
-  list = NULL;
-  while (TRUE)
-    {
-      match_ret = egg_regex_match_next (regex, string, string_len, match_options);
-      if ((match_ret > 0) && ((max_pieces == 0) || (pieces < max_pieces)))
-	{
-	  piece = g_strndup (string + start_pos, regex->offsets[0] - start_pos);
-	  list = g_list_prepend (list, piece);
-
-	  /* if there were substrings, these need to get added to the
-	   * list as well */
-	  if (match_ret > 1)
-	    {
-	      int i;
-	      for (i = 1; i < match_ret; i++)
-		list = g_list_prepend (list, egg_regex_fetch (regex, string, i));
-	    }
-
-	  start_pos = regex->pos;	/* move start_pos to end of match */
-	  pieces++;
-	}
-      else	 /* if there was no match, copy to end of string, and break */
-	{
-	  piece = g_strndup (string + start_pos, regex->string_len - start_pos);
-	  list = g_list_prepend (list, piece);
-	  break;
-	}
-    }
-
-  string_list = (gchar **) g_malloc (sizeof (gchar *) * (g_list_length (list) + 1));
-  pos = 0;
-  for (last = g_list_last (list); last; last = last->prev)
-    string_list[pos++] = last->data;
-  string_list[pos] = 0;
-
-  g_list_free (list);
-  return string_list;
-}
-
-
-/**
- * egg_regex_split_next:
- * @pattern:  gchar pointer to the pattern
- * @string:  the string to split on pattern
- * @string_len: the length of @string, or -1 to use strlen()
- * @match_options:  match time options for the regex
- *
- * egg_regex_split_next() breaks the string on pattern, and returns the  
- * pieces, one per call.  If the pattern contains capturing parentheses, 
- * then the text for each of the substrings will also be returned.
- * If the pattern does not match anywhere in the string, then the whole 
- * string is returned as the first piece.
- *
- * Returns:  a gchar * to the next piece of the string
- */
-gchar *
-egg_regex_split_next (EggRegex      *regex, 
-		    const gchar *string, 
-		    gssize       string_len, 
-		    EggRegexMatchFlags match_options)
-{
-  gint start_pos = regex->pos;
-  gchar *piece = NULL;
-  gint match_ret;
-
-  /* if there are delimiter substrings stored, return those one at a
-   * time.  
-   */
-  if (regex->delims != NULL)
-    {
-      piece = regex->delims->data;
-      regex->delims = g_slist_remove (regex->delims, piece);
-      return piece;
-    }
-
-  /* otherwise...
-   * use egg_regex_match_next() to find the next occurance of the pattern
-   * in the string.  We use start_pos to keep track of where the stuff
-   * up to the current match starts.  Copy that piece of the string off
-   * and append it to the buffer using strncpy.  We have to NUL term the
-   * piece we copied off before returning it.
-   */
-  match_ret = egg_regex_match_next (regex, string, string_len, match_options);
-  if (match_ret > 0)
-    {
-      piece = g_strndup (string + start_pos, regex->offsets[0] - start_pos);
-
-      /* if there were substrings, these need to get added to the
-       * list of delims */
-      if (match_ret > 1)
-	{
-	  gint i;
-	  for (i = 1; i < match_ret; i++)
-	    regex->delims = g_slist_append (regex->delims,
-					     egg_regex_fetch (regex, string, i));
-	}
-    }
-  else		/* if there was no match, copy to end of string */
-    piece = g_strndup (string + start_pos, regex->string_len - start_pos);
-
-  return piece;
-}
-
-enum
-{
-  REPL_TYPE_STRING,
-  REPL_TYPE_CHARACTER,
-  REPL_TYPE_SYMBOLIC_REFERENCE,
-  REPL_TYPE_NUMERIC_REFERENCE
-}; 
-
-typedef struct 
-{
-  gchar *text;   
-  gint   type;   
-  gint   num;
-  gchar  c;
-} InterpolationData;
-
-static void
-free_interpolation_data (InterpolationData *data)
-{
-  g_free (data->text);
-  g_free (data);
-}
-
-static const gchar *
-expand_escape (const gchar        *replacement,
-	       const gchar        *p, 
-	       InterpolationData  *data,
-	       GError            **error)
-{
-  const gchar *q, *r;
-  gint x, d, h, i;
-  gchar *error_detail;
-  gint base = 0;
-  GError *tmp_error = NULL;
-
-  p++;
-  switch (*p)
-    {
-    case 't':
-      p++;
-      data->c = '\t';
-      data->type = REPL_TYPE_CHARACTER;
-      break;
-    case 'n':
-      p++;
-      data->c = '\n';
-      data->type = REPL_TYPE_CHARACTER;
-      break;
-    case 'v':
-      p++;
-      data->c = '\v';
-      data->type = REPL_TYPE_CHARACTER;
-      break;
-    case 'r':
-      p++;
-      data->c = '\r';
-      data->type = REPL_TYPE_CHARACTER;
-      break;
-    case 'f':
-      p++;
-      data->c = '\f';
-      data->type = REPL_TYPE_CHARACTER;
-      break;
-    case 'a':
-      p++;
-      data->c = '\a';
-      data->type = REPL_TYPE_CHARACTER;
-      break;
-    case 'b':
-      p++;
-      data->c = '\b';
-      data->type = REPL_TYPE_CHARACTER;
-      break;
-    case '\\':
-      p++;
-      data->c = '\\';
-      data->type = REPL_TYPE_CHARACTER;
-      break;
-    case 'x':
-      p++;
-      x = 0;
-      if (*p == '{')
-	{
-	  p++;
-	  do 
-	    {
-	      h = g_ascii_xdigit_value (*p);
-	      if (h < 0)
-		{
-		  error_detail = _("hexadecimal digit or '}' expected");
-		  goto error;
-		}
-	      x = x * 16 + h;
-	      p++;
-	    }
-	  while (*p != '}');
-	  p++;
-	}
-      else
-	{
-	  for (i = 0; i < 2; i++)
-	    {
-	      h = g_ascii_xdigit_value (*p);
-	      if (h < 0)
-		{
-		  error_detail = _("hexadecimal digit expected");
-		  goto error;
-		}
-	      x = x * 16 + h;
-	      p++;
-	    }
-	}
-      data->type = REPL_TYPE_STRING;
-      data->text = g_new0 (gchar, 8);
-      g_unichar_to_utf8 (x, data->text);
-      break;
-    case 'l':
-    case 'u':
-    case 'L':
-    case 'U':
-    case 'E':
-    case 'Q':
-    case 'G':
-      error_detail = _("escape sequence not allowed");
-      goto error;
-    case 'g':
-      p++;
-      if (*p != '<')
-	{
-	  error_detail = _("missing '<' in symbolic reference");
-	  goto error;
-	}
-      q = p + 1;
-      do 
-	{
-	  p++;
-	  if (!*p)
-	    {
-	      error_detail = _("unfinished symbolic reference");
-	      goto error;
-	    }
-	}
-      while (*p != '>');
-      if (p - q == 0)
-	{
-	  error_detail = _("zero-length symbolic reference");
-	  goto error;
-	}
-      if (g_ascii_isdigit (*q))
-	{
-	  x = 0;
-	  do 
-	    {
-	      h = g_ascii_digit_value (*q);
-	      if (h < 0)
-		{
-		  error_detail = _("digit expected");
-		  p = q;
-		  goto error;
-		}
-	      x = x * 10 + h;
-	      q++;
-	    }
-	  while (q != p);
-	  data->num = x;
-	  data->type = REPL_TYPE_NUMERIC_REFERENCE;
-	}
-      else
-	{
-	  r = q;
-	  do 
-	    {
-	      if (!g_ascii_isalnum (*r))
-		{
-		  error_detail = _("illegal symbolic reference");
-		  p = r;
-		  goto error;
-		}
-	      r++;
-	    }
-	  while (r != p);
-	  data->text = g_strndup (q, p - q);
-	  data->type = REPL_TYPE_SYMBOLIC_REFERENCE;
-	}
-      p++;
-      break;
-    case '0':
-      base = 8;
-    case '1':
-    case '2':
-    case '3':
-    case '4':
-    case '5':
-    case '6':
-    case '7':
-    case '8':
-    case '9':
-      x = 0;
-      d = 0;
-      for (i = 0; i < 3; i++)
-	{
-	  h = g_ascii_digit_value (*p);
-	  if (h < 0) 
-	    break;
-	  if (h > 7)
-	    {
-	      if (base == 8)
-		break;
-	      else 
-		base = 10;
-	    }
-	  if (i == 2 && base == 10)
-	    break;
-	  x = x * 8 + h;
-	  d = d * 10 + h;
-	  p++;
-	}
-      if (base == 8 || i == 3)
-	{
-	  data->type = REPL_TYPE_STRING;
-	  data->text = g_new0 (gchar, 8);
-	  g_unichar_to_utf8 (x, data->text);
-	}
-      else
-	{
-	  data->type = REPL_TYPE_NUMERIC_REFERENCE;
-	  data->num = d;
-	}
-      break;
-    case 0:
-      error_detail = _("stray final '\\'");
-      goto error;
-      break;
-    default:
-      data->type = REPL_TYPE_STRING;
-      data->text = g_new0 (gchar, 8);
-      g_unichar_to_utf8 (g_utf8_get_char (p), data->text);
-      p = g_utf8_next_char (p);
-    }
-
-  return p;
-
- error:
-  tmp_error = g_error_new (EGG_REGEX_ERROR, 
-			   EGG_REGEX_ERROR_REPLACE,
-			   _("Error while parsing replacement "
-			     "text \"%s\" at char %d: %s"),
-			   replacement, 
-			   p - replacement,
-			   error_detail);
-  g_propagate_error (error, tmp_error);
-
-  return NULL;
-}
-
-static GList *
-split_replacement (const gchar  *replacement,
-		   GError      **error)
-{
-  GList *list = NULL;
-  InterpolationData *data;
-  const gchar *p, *start;
-  
-  start = p = replacement; 
-  while (*p)
-    {
-      if (*p == '\\')
-	{
-	  data = g_new0 (InterpolationData, 1);
-	  start = p = expand_escape (replacement, p, data, error);
-	  if (*error)
-	    {
-	      g_list_foreach (list, (GFunc)free_interpolation_data, NULL);
-	      g_list_free (list);
-
-	      return NULL;
-	    }
-	  list = g_list_prepend (list, data);
-	}
-      else
-	{
-	  p++;
-	  if (*p == '\\' || *p == '\0')
-	    {
-	      if (p - start > 0)
-		{
-		  data = g_new0 (InterpolationData, 1);
-		  data->text = g_strndup (start, p - start);
-		  data->type = REPL_TYPE_STRING;
-		  list = g_list_prepend (list, data);
-		}
-	    }
-	}
-    }
-
-  return g_list_reverse (list);
-}
-
-static gboolean
-interpolate_replacement (EggRegex      *regex,
-			 const gchar *string,
-			 GString     *result,
-			 gpointer     data)
-{
-  GList *list;
-  InterpolationData *idata;
-  gchar *match;
-
-  for (list = data; list; list = list->next)
-    {
-      idata = list->data;
-      switch (idata->type)
-	{
-	case REPL_TYPE_STRING:
-	  g_string_append (result, idata->text);
-	  break;
-	case REPL_TYPE_CHARACTER:
-	  g_string_append_c (result, idata->c);
-	  break;
-	case REPL_TYPE_NUMERIC_REFERENCE:
-	  match = egg_regex_fetch (regex, string, idata->num);
-	  if (match) 
-	    {
-	      g_string_append (result, match);
-	      g_free (match);
-	    }
-	  break;
-	case REPL_TYPE_SYMBOLIC_REFERENCE:
-	  match = egg_regex_fetch_named (regex, string, idata->text);
-	  if (match) 
-	    {
-	      g_string_append (result, match);
-	      g_free (match);
-	    }
-	  break;
-	}
-    }
-
-  return FALSE;  
-}
-
-/**
- * egg_regex_replace:
- * @regex:  a #EggRegex structure
- * @string:  the string to perform matches against
- * @string_len: the length of @string, or -1 to use strlen()
- * @replacement:  text to replace each match with
- * @match_options:  options for the match
- *
- * Replaces all occurances of the pattern in @regex with the 
- * replacement text. Backreferences of the form '\number' or '\g<number>' 
- * in the replacement text are interpolated by the number-th captured 
- * subexpression of the match, '\g<name>' refers to the captured subexpression
- * with the given name. '\0' refers to the complete match. To include a 
- * literal '\' in the replacement, write '\\'.
- *
- * Returns: a newly allocated string containing the replacements.
- */
-gchar *
-egg_regex_replace (EggRegex            *regex, 
-		 const gchar       *string, 
-		 gssize             string_len,
-		 const gchar       *replacement,
-		 EggRegexMatchFlags   match_options,
-		 GError           **error)
-{
-  gchar *result;
-  GList *list;
-
-  list = split_replacement (replacement, error);
-  result = egg_regex_replace_eval (regex, 
-				 string, string_len,
-				 interpolate_replacement,
-				 (gpointer)list,
-				 match_options);
-  g_list_foreach (list, (GFunc)free_interpolation_data, NULL);
-  g_list_free (list);
-  
-  return result;
-}
-
-
-/**
- * egg_regex_replace_eval:
- * @gregex:  a #EggRegex structure
- * @string:  string to perform matches against
- * @string_len: the length of @string, or -1 to use strlen()
- * @eval: a function to call for each match
- * @match_options:  Options for the match
- *
- * Replaces occurances of the pattern in regex with
- * the output of @eval for that occurance.
- *
- * Returns: a newly allocated string containing the replacements.
- */
-gchar *
-egg_regex_replace_eval (EggRegex             *regex, 
-		      const gchar        *string,
-		      gssize              string_len,
-		      EggRegexEvalCallback  eval,
-		      gpointer            user_data, 
-		      EggRegexMatchFlags match_options)
-{
-  GString *result;
-  gint str_pos = 0;
-  gboolean done = FALSE;
-
-  if (string_len < 0)
-    string_len = strlen (string);
-
-  /* clear out the regex for reuse, just in case */
-  egg_regex_clear (regex);
-
-  result = g_string_sized_new (string_len);
-
-  /* run down the string making matches. */
-  while (egg_regex_match_next (regex, string, string_len, match_options) > 0 && !done)
-    {
-      g_string_append_len (result, 
-			   string + str_pos, 
-			   regex->offsets[0] - str_pos);
-      done = (*eval) (regex, string, result, user_data);
-      str_pos = regex->offsets[1];
-    }
-  
-  g_string_append_len (result, string + str_pos, string_len - str_pos);
-
-  return g_string_free (result, FALSE);
-}
-
Index: kazehakase/src/libegg/regex/eggregex.h
diff -u kazehakase/src/libegg/regex/eggregex.h:1.1 kazehakase/src/libegg/regex/eggregex.h:removed
--- kazehakase/src/libegg/regex/eggregex.h:1.1	Fri Aug  6 20:13:18 2004
+++ kazehakase/src/libegg/regex/eggregex.h	Fri Mar 16 14:00:28 2007
@@ -1,152 +0,0 @@
-/* EggRegex -- regular expression API wrapper around PCRE.
- * Copyright (C) 1999 Scott Wimer
- * Copyright (C) 2004 Matthias Clasen
- *
- * This is basically an ease of user wrapper around the functionality of
- * PCRE.
- *
- * With this library, we are, hopefully, drastically reducing the code
- * complexity necessary by making use of a more complex and detailed
- * data structure to store the regex info.  I am hoping to have a regex
- * interface that is almost as easy to use as Perl's.  <fingers crossed>
- *
- * Author: Scott Wimer <scott****@cgibu*****>
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
- *
- * This library is free software, you can distribute it or modify it
- * under the following terms:
- *  1) The GNU General Public License (GPL)
- *  2) The GNU Library General Public License (LGPL)
- *  3) The Perl Artistic license (Artistic)
- *  4) The BSD license (BSD)
- *
- * In short, you can use this library in any code you desire, so long as
- * the Copyright notice above remains intact.  If you do make changes to
- * it, I would appreciate that you let me know so I can improve this 
- * library for everybody, but I'm not gonna force you to.
- * 
- * Please note that this library is just a wrapper around Philip Hazel's
- * PCRE library.  Please see the file 'LICENSE' in your PCRE distribution.
- * And, if you live in England, please send him a pint of good beer, his
- * library is great.
- *
- */
-#ifndef __EGGREGEX_H__
-#define __EGGREGEX_H__
-
-#include <glib/gtypes.h>
-#include <glib/gquark.h>
-#include <glib/gerror.h>
-#include <glib/gstring.h>
-
-G_BEGIN_DECLS
-
-typedef enum
-{
-  EGG_REGEX_ERROR_COMPILE,
-  EGG_REGEX_ERROR_OPTIMIZE,
-  EGG_REGEX_ERROR_REPLACE
-} EggRegexError;
-
-#define EGG_REGEX_ERROR egg_regex_error_quark ()
-
-GQuark egg_regex_error_quark (void);
-
-typedef enum
-{
-  EGG_REGEX_CASELESS          = 1 << 0,
-  EGG_REGEX_MULTILINE         = 1 << 1,
-  EGG_REGEX_DOTALL            = 1 << 2,
-  EGG_REGEX_EXTENDED          = 1 << 3,
-  EGG_REGEX_ANCHORED          = 1 << 4,
-  EGG_REGEX_DOLLAR_ENDONLY    = 1 << 5,
-  EGG_REGEX_UNGREEDY          = 1 << 9,
-  EGG_REGEX_NO_AUTO_CAPTURE   = 1 << 12
-} EggRegexCompileFlags;
-
-typedef enum
-{
-  EGG_REGEX_MATCH_ANCHORED    = 1 << 4,
-  EGG_REGEX_MATCH_NOTBOL      = 1 << 7,
-  EGG_REGEX_MATCH_NOTEOL      = 1 << 8,
-  EGG_REGEX_MATCH_NOTEMPTY    = 1 << 10
-} EggRegexMatchFlags;
-
-typedef struct _EggRegex  EggRegex;
-
-typedef gboolean (*EggRegexEvalCallback) (EggRegex*, const gchar*, GString*, gpointer);
-
-/* Really quick outline of features... functions are preceded by 'egg_regex_'
- *   new         - compile a pattern and put it in a egg_regex structure
- *   free        - free up the memory used by the egg_regex structure
- *   clear       - clear out the structure to match against a new string
- *   optimize    - study the pattern to make matching more efficient
- *   match       - try matching a pattern in the string
- *   match_next  - try matching pattern again in the string
- *   fetch       - fetch a particular matching sub pattern
- *   fetch_all   - get all of the matching sub patterns
- *   split       - split the string on a regex
- *   split_next  - for using split as an iterator of sorts
- *   replace     - replace occurances of a pattern with some text
- */
-
-EggRegex  *egg_regex_new          (const gchar           *pattern,
-				   EggRegexCompileFlags   compile_options,
-				   EggRegexMatchFlags     match_options,
-				   GError               **error);
-void       egg_regex_optimize     (EggRegex              *regex,
-				   GError               **error);
-void       egg_regex_free         (EggRegex              *regex);
-void       egg_regex_clear        (EggRegex              *regex);
-gint       egg_regex_match        (EggRegex              *regex,
-				   const gchar           *string,
-				   gssize                 string_len,
-				   EggRegexMatchFlags     match_options);
-gint       egg_regex_match_next   (EggRegex              *regex,
-				   const gchar           *string,
-				   gssize                 string_len,
-				   EggRegexMatchFlags     match_options);
-gchar     *egg_regex_fetch        (EggRegex              *regex,
-				   const gchar           *string,
-				   gint                   match_num);
-void       egg_regex_fetch_pos    (EggRegex              *regex,
-				   const gchar           *string,
-				   gint                   match_num,
-				   gint                  *start_pos,
-				   gint                  *end_pos);
-gchar     *egg_regex_fetch_named  (EggRegex              *regex,
-				   const gchar           *string,
-				   const gchar           *name);
-gchar    **egg_regex_fetch_all    (EggRegex              *regex,
-				   const gchar           *string);
-gchar    **egg_regex_split        (EggRegex              *regex,
-				   const gchar           *string,
-				   gssize                 string_len,
-				   EggRegexMatchFlags     match_options,
-				   gint                   max_pieces);
-gchar     *egg_regex_split_next   (EggRegex              *regex,
-				   const gchar           *string,
-				   gssize                 string_len,
-				   EggRegexMatchFlags     match_options);
-gchar     *egg_regex_replace      (EggRegex              *regex,
-				   const gchar           *string,
-				   gssize                 string_len,
-				   const gchar           *replacement,
-				   EggRegexMatchFlags     match_options,
-				   GError               **error);
-gchar     *egg_regex_replace_eval (EggRegex              *regex,
-				   const gchar           *string,
-				   gssize                 string_len,
-				   EggRegexEvalCallback   eval,
-				   gpointer               user_data,
-				   EggRegexMatchFlags     match_options);
-
-
-
-G_END_DECLS
-
-
-#endif  /*  __EGGREGEX_H__ */


More information about the Kazehakase-cvs mailing list
Back to archive index