diff options
author | Shashank | 2017-05-29 12:40:26 +0530 |
---|---|---|
committer | Shashank | 2017-05-29 12:40:26 +0530 |
commit | 0345245e860375a32c9a437c4a9d9cae807134e9 (patch) | |
tree | ad51ecbfa7bcd3cc5f09834f1bb8c08feaa526a4 /usr/share/man/man3 | |
download | scilab_for_xcos_on_cloud-0345245e860375a32c9a437c4a9d9cae807134e9.tar.gz scilab_for_xcos_on_cloud-0345245e860375a32c9a437c4a9d9cae807134e9.tar.bz2 scilab_for_xcos_on_cloud-0345245e860375a32c9a437c4a9d9cae807134e9.zip |
CMSCOPE changed
Diffstat (limited to 'usr/share/man/man3')
175 files changed, 21555 insertions, 0 deletions
diff --git a/usr/share/man/man3/Mat_Close.3 b/usr/share/man/man3/Mat_Close.3 new file mode 100755 index 000000000..b5bc581d5 --- /dev/null +++ b/usr/share/man/man3/Mat_Close.3 @@ -0,0 +1,68 @@ +.\" Copyright (c) 2011 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd April 21, 2011 +.Dt MAT_CLOSE 3 +.Os +.Sh NAME +.Nm Mat_Close +.Nd Closes an open MATLAB .mat file +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft int +.Fo Mat_Close +.Fa "mat_t *matname" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_Close +function closes a MATLAB .mat file opened with +.Fn Mat_Open +or created with +.Fn Mat_CreateVer +. +.Sh EXAMPLES +.Bd -literal +#include "matio.h" + +int main(int argc,char **argv) +{ + mat_t *matfp; + + matfp = Mat_Open(argv[1],MAT_ACC_RDONLY); + if ( NULL == matfp ) { + fprintf(stderr,"Error opening MAT file %s\n",argv[1]); + return EXIT_FAILURE; + } + + Mat_Close(matfp); + return EXIT_SUCCESS; +} + +.Ed +.Sh SEE ALSO +.Xr Mat_Open 3 , +.Xr Mat_CreateVer 3 , diff --git a/usr/share/man/man3/Mat_CreateVer.3 b/usr/share/man/man3/Mat_CreateVer.3 new file mode 100755 index 000000000..46598d73e --- /dev/null +++ b/usr/share/man/man3/Mat_CreateVer.3 @@ -0,0 +1,93 @@ +.\" Copyright (c) 2011 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd April 21, 2011 +.Dt MAT_CREATEVER 3 +.Os +.Sh NAME +.Nm Mat_CreateVer , +.Nm Mat_Create +.Nd Create a MATLAB .mat file +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft mat_t * +.Fo Mat_CreateVer +.Fa "const char *matname" +.Fa "const char *hdr_str" +.Fa "enum mat_ft mat_file_ver" +.Fc +.Ft mat_t * +.Fo Mat_Create +.Fa "const char *matname" +.Fa "const char *hdr_str" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_CreateVer +function creates a MATLAB .mat file opened for write access. The +.Em hdr_str +is a string written in the file header of MAT version 5 and 7.3 (HDF) files. +Only the first 128 bytes of the string is written. If +.Em hdr_str +is NULL, then a default string is written that contains the platform, date, +matio library version. +.Pp +The +.Em mat_file_ver +argument specifies the MAT file version to create. The matio library can write +version 5 files (MAT_FT_MAT5), and an HDF5 file format introduced in MATLAB +version 7.3 (MAT_FT_MAT73). +.Pp +The +.Fn Mat_Create +function is equivalent to calling +.Fn Mat_CreateVer +with MAT_FT_DEFAULT as the file version. +.Sh EXAMPLES +.Bd -literal +#include <stdlib.h> +#include <stdio.h> +#include "matio.h" + +int main(int argc,char **argv) +{ + mat_t *matfp; + + matfp = Mat_CreateVer(argv[1],NULL,MAT_FT_73); + if ( NULL == matfp ) { + fprintf(stderr,"Error creating MAT file %s\n",argv[1]); + return EXIT_FAILURE; + } + + Mat_Close(matfp); + return EXIT_SUCCESS; +} + +.Ed +.Sh SEE ALSO +.Xr Mat_Create 3 , +.Xr Mat_CreateVer 3 , +.Xr Mat_Close 3 diff --git a/usr/share/man/man3/Mat_GetFilename.3 b/usr/share/man/man3/Mat_GetFilename.3 new file mode 100755 index 000000000..b5f08f569 --- /dev/null +++ b/usr/share/man/man3/Mat_GetFilename.3 @@ -0,0 +1,48 @@ +.\" Copyright (c) 2012 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd March 12, 2012 +.Dt MAT_GETFILENAME 3 +.Os +.Sh NAME +.Nm Mat_GetFilename +.Nd Returns the filename of a MATLAB .mat file given a MAT file structure +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft const char * +.Fo Mat_GetFilename +.Fa "mat_t *matfp" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_GetFilename +function returns the filename of the MATLAB .mat file given by the MAT file +structure. +.Sh SEE ALSO +.Xr Mat_Create 3 , +.Xr Mat_CreateVer 3 , +.Xr Mat_Open 3 +.Xr Mat_GetVersion 3 diff --git a/usr/share/man/man3/Mat_GetLibraryVersion.3 b/usr/share/man/man3/Mat_GetLibraryVersion.3 new file mode 100755 index 000000000..b5a15f881 --- /dev/null +++ b/usr/share/man/man3/Mat_GetLibraryVersion.3 @@ -0,0 +1,49 @@ +.\" Copyright (c) 2012 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd March 16, 2012 +.Dt MAT_GETLIBRARYVERSION 3 +.Os +.Sh NAME +.Nm Mat_GetLibraryVersion +.Nd Get the version of the matio library +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft void +.Fo Mat_GetLibraryVersion +.Fa "int *major" +.Fa "int *minor" +.Fa "int *release" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_GetLibraryVersion +function returns the major, minor, and release version numbers of the library. +These can be used to check that the version specified in the header by +.Dv MATIO_MAJOR_VERSION, +.Dv MATIO_MINOR_VERSION, +and +.Dv MATIO_RELEASE_LEVEL. diff --git a/usr/share/man/man3/Mat_GetVersion.3 b/usr/share/man/man3/Mat_GetVersion.3 new file mode 100755 index 000000000..bd7a3bc2b --- /dev/null +++ b/usr/share/man/man3/Mat_GetVersion.3 @@ -0,0 +1,59 @@ +.\" Copyright (c) 2012 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd March 12, 2012 +.Dt MAT_GETVERSION 3 +.Os +.Sh NAME +.Nm Mat_GetVersion +.Nd Returns the filename of a MATLAB .mat file given a MAT file structure +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft enum mat_ft +.Fo Mat_GetVersion +.Fa "mat_t *matfp" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_GetVersion +function returns the version of the MATLAB .mat file given by the MAT file +structure. The return value is one of the following: +.Bl -bullet +.It +.Cm MAT_FT_MAT73 +if the file is an HDF5 MAT file. +.It +.Cm MAT_FT_MAT5 +if the file is a level 5 MAT file. +.It +.Cm MAT_FT_MAT4 +if the file is a level 4 MAT file. +.El +.Sh SEE ALSO +.Xr Mat_Create 3 , +.Xr Mat_CreateVer 3 , +.Xr Mat_Open 3 +.Xr Mat_GetFilename 3 diff --git a/usr/share/man/man3/Mat_Open.3 b/usr/share/man/man3/Mat_Open.3 new file mode 100755 index 000000000..93b3259d5 --- /dev/null +++ b/usr/share/man/man3/Mat_Open.3 @@ -0,0 +1,68 @@ +.\" Copyright (c) 2011 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd April 21, 2011 +.Dt MAT_OPEN 3 +.Os +.Sh NAME +.Nm Mat_Open +.Nd Open a MATLAB .mat file +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft mat_t +.Fo Mat_Open +.Fa "const char *matname" +.Fa "int mode" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_Open +function opens a MATLAB .mat file for read only or read-write access. +.Sh EXAMPLES +.Bd -literal +#include <stdlib.h> +#include <stdio.h> +#include "matio.h" + +int main(int argc,char **argv) +{ + mat_t *matfp; + + matfp = Mat_Open(argv[1],MAT_ACC_RDONLY); + if ( NULL == matfp ) { + fprintf(stderr,"Error opening MAT file %s\n",argv[1]); + return EXIT_FAILURE; + } + + Mat_Close(matfp); + return EXIT_SUCCESS; +} + +.Ed +.Sh SEE ALSO +.Xr Mat_Create 3 , +.Xr Mat_CreateVer 3 , +.Xr Mat_Close 3 diff --git a/usr/share/man/man3/Mat_Rewind.3 b/usr/share/man/man3/Mat_Rewind.3 new file mode 100755 index 000000000..3b45df9d9 --- /dev/null +++ b/usr/share/man/man3/Mat_Rewind.3 @@ -0,0 +1,103 @@ +.\" Copyright (c) 2011 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd April 21, 2011 +.Dt MAT_REWIND 3 +.Os +.Sh NAME +.Nm Mat_Rewind +.Nd Rewind an open .mat file to the beginning +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft int +.Fo Mat_Rewind +.Fa "mat_t *matfp" +.Fc +.Sh DESCRIPTION +Rewinds an open .mat file so that +.Xr Mat_VarReadNext 3 +or +.Xr Mat_VarReadNextInfo 3 +reads the first variable in the file. +.Sh RETURN VALUES +The function returns 0 on success, or -1 on failure. +.Sh EXAMPLES +The following example shows a list of variables in the MAT file, and prompts the +user for the index of the variale to read. If the index is valid, the file is +reset to the beginning, and variables read until the selected index is reached. +.Bd -literal +#include <stdlib.h> +#include <stdio.h> +#include "matio.h" + +int +main(int argc,char **argv) +{ + mat_t *matfp; + matvar_t *matvar; + int idx, num_variables; + + matfp = Mat_Open(argv[1],MAT_ACC_RDONLY); + if ( NULL == matfp ) { + fprintf(stderr,"Error opening MAT file %s\n",argv[1]); + return EXIT_FAILURE; + } + + idx = 0; + while ( NULL != (matvar = Mat_VarReadNextInfo(matfp)) ) { + idx++; + printf("%3d. %s\n",idx,matvar->name); + Mat_VarFree(matvar); + } + num_variables = idx; + if ( num_variables > 0 ) { + printf("Which variable would you like to read? "); + if ( 0 == fscanf(stdin,"%d",&idx) ) { + printf("Invalid variable selection!\n"); + } else if ( idx < 1 || idx > num_variables ) { + fprintf(stderr,"That is an invalid variable index!\n"); + } else { + int k; + Mat_Rewind(matfp); + for ( k = 1; k < idx; k++ ) { + matvar = Mat_VarReadNextInfo(matfp); + Mat_VarFree(matvar); + } + matvar = Mat_VarReadNext(matfp); + Mat_VarPrint(matvar,1); + Mat_VarFree(matvar); + } + } + + Mat_Close(matfp); + return EXIT_SUCCESS; +} +.Ed +.Sh SEE ALSO +.Xr Mat_Open 3 , +.Xr Mat_Close 3 , +.Xr Mat_VarReadNext 3 , +.Xr Mat_VarReadNextInfo 3 diff --git a/usr/share/man/man3/Mat_VarAddStructField.3 b/usr/share/man/man3/Mat_VarAddStructField.3 new file mode 100755 index 000000000..604a1ce6f --- /dev/null +++ b/usr/share/man/man3/Mat_VarAddStructField.3 @@ -0,0 +1,49 @@ +.\" Copyright (c) 2012 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd March 18, 2012 +.Dt MAT_VARADDSTRUCTFIELD 3 +.Os +.Sh NAME +.Nm Mat_VarAddStructField +.Nd Add field to a structure array. +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft int +.Fo Mat_VarAddStructField +.Fa "matvar_t *matvar" +.Fa "const char *fieldname" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_VarAddStructField +function adds a fieldname to the structure variable +.Fa matvar. + +.Sh RETURN VALUES +0 is returned on success. +.Sh SEE ALSO +.Xr Mat_VarCreateStruct 3 diff --git a/usr/share/man/man3/Mat_VarCreate.3 b/usr/share/man/man3/Mat_VarCreate.3 new file mode 100755 index 000000000..e9233ba90 --- /dev/null +++ b/usr/share/man/man3/Mat_VarCreate.3 @@ -0,0 +1,150 @@ +.\" Copyright (c) 2012 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd March 18, 2012 +.Dt MAT_VARCREATE 3 +.Os +.Sh NAME +.Nm Mat_VarCreate +.Nd Creates a MAT variable structure. +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft matvar_t * +.Fo Mat_VarCreate +.Fa "const char *name" +.Fa "enum matio_classes class_type" +.Fa "enum matio_types data_type" +.Fa "int rank" +.Fa "size_t *dims" +.Fa "void *data" +.Fa "int opt" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_VarCreate +function creates a MAT structure variable named +.Fa name +that can be written to a MAT file. The +.Fa class_type +argument specifies the class of the variable, and the +.Fa data_type +argument specifies the type of the data. For example, a double-precision class +would use +.Dv MAT_C_DOUBLE +for the class type and +.Dv MAT_T_DOUBLE +for the data type. In some instances, the data type may not match the class +type. For exmaple, an array of integers can be written in the double-precision +class by using +.Dv MAT_T_INT32 +for +.Fa data_type. + +The +.Fa rank +argument specifies how many dimensions the data has. The minimum rank is 2. The +number of elements in each dimension is specified in the array +.Fa dims. + +The +.Fa data +argument is a pointer to the variable data. The pointer is typically a pointer +to a numeric array (e.g. double, float, int, etc.) for real variables. For +complex variables, the pointer is a pointer to a +.Vt mat_complex_split_t +which contains pointers to the real and imaginary data as fields of the +structure. For sparse variables, the pointer should be a +.Vt mat_sparse_t *. + +.Sh RETURN VALUES +If the variable was successfully created, a pointer to the variable is returned. +Otherwise NULL is returned. The variable should be free'd when no longer needed +using +.Xr Mat_VarFree. +.Sh EXAMPLES +The example program below creates a MAT file named +.Va test.mat, +and writes two real numeric variables +.Va x +and +.Va y +and a complex variable +.Va z +to the file. +.Bd -literal +#include <stdlib.h> +#include <stdio.h> +#include "matio.h" + +int +main(int argc,char **argv) +{ + mat_t *matfp; + matvar_t *matvar; + size_t dims[2] = {10,1}; + double x[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9,10}, + y[10] = {11,12,13,14,15,16,17,18,19,20}; + struct mat_complex_split_t z = {x,y}; + + matfp = Mat_CreateVer("test.mat",NULL,MAT_FT_DEFAULT); + if ( NULL == matfp ) { + fprintf(stderr,"Error creating MAT file \"test.mat\"\n"); + return EXIT_FAILURE; + } + + matvar = Mat_VarCreate("x",MAT_C_DOUBLE,MAT_T_DOUBLE,2,dims,x,0); + if ( NULL == matvar ) { + fprintf(stderr,"Error creating variable for 'x'\n"); + } else { + Mat_VarWrite(matfp,matvar,COMPRESSION_NONE); + Mat_VarFree(matvar); + } + + matvar = Mat_VarCreate("y",MAT_C_DOUBLE,MAT_T_DOUBLE,2,dims,y,0); + if ( NULL == matvar ) { + fprintf(stderr,"Error creating variable for 'y'\n"); + } else { + Mat_VarWrite(matfp,matvar,COMPRESSION_NONE); + Mat_VarFree(matvar); + } + + matvar = Mat_VarCreate("z",MAT_C_DOUBLE,MAT_T_DOUBLE,2,dims,&z, + MAT_F_COMPLEX); + if ( NULL == matvar ) { + fprintf(stderr,"Error creating variable for 'z'\n"); + } else { + Mat_VarWrite(matfp,matvar,COMPRESSION_NONE); + Mat_VarFree(matvar); + } + + Mat_Close(matfp); + return EXIT_SUCCESS; +} +.Ed +.Sh SEE ALSO +.Xr Mat_VarCreateStruct 3 , +.Xr Mat_VarFree 3 , +.Xr Mat_VarWrite 3 diff --git a/usr/share/man/man3/Mat_VarCreateStruct.3 b/usr/share/man/man3/Mat_VarCreateStruct.3 new file mode 100755 index 000000000..f2c59105e --- /dev/null +++ b/usr/share/man/man3/Mat_VarCreateStruct.3 @@ -0,0 +1,104 @@ +.\" Copyright (c) 2012 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd March 18, 2012 +.Dt MAT_VARCREATESTRUCT 3 +.Os +.Sh NAME +.Nm Mat_VarCreateStruct +.Nd Creates a structure variable. +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft matvar_t * +.Fo Mat_VarCreateStruct +.Fa "const char *name" +.Fa "int rank" +.Fa "size_t *dims" +.Fa "const char **fields" +.Fa "unsigned nfields" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_VarCreateStruct +function creates a structure variable named +.Fa name +that can be written to a MAT file. +.Sh RETURN VALUES +If the structure variable was successfully created, a pointer to the variable +is returned. Otherwise NULL is returned. The structure variable pointer should +be free'd when no longer needed using +.Xr Mat_VarFree 3 +\. The names of the fields are copied in the function, and thus should be released +after calling the function if necessary. +.Sh EXAMPLES +This example program opens a MAT file named by the first argument to the +program, and writes a structure named +.Em a +to the file. +.Bd -literal +#include "matio.h" + +int +main(int argc,char **argv) +{ + mat_t *matfp; + matvar_t *matvar; + matvar_t *field; + const char *fields[2] = {"field1","field2"}; + double data1 = 1, data2 = 2; + size_t dims[2] = {1, 1}; + + matfp = Mat_Open(argv[1],MAT_ACC_RDWR); + if ( NULL == matfp ) { + fprintf(stderr,"Error opening MAT file %s\n",argv[1]); + return EXIT_FAILURE; + } + + dims[0] = 1; dims[1] = 1; + matvar = Mat_VarCreateStruct("a",2,dims,fields,2); + if ( NULL == matvar ) { + Mat_Close(matfp); + return EXIT_FAILURE; + } + + field = Mat_VarCreate(NULL,MAT_C_DOUBLE,MAT_T_DOUBLE,2,dims,&data1, + MAT_F_DONT_COPY_DATA); + Mat_VarSetStructFieldByName(matvar, "field1", 0, field); + + field = Mat_VarCreate(NULL,MAT_C_DOUBLE,MAT_T_DOUBLE,2,dims,&data2, + MAT_F_DONT_COPY_DATA); + Mat_VarSetStructFieldByName(matvar, "field2", 0, field); + + Mat_VarWrite(matfp,matvar,MAT_COMPRESSION_NONE); + Mat_VarFree(matvar); + + Mat_Close(matfp); + return EXIT_SUCCESS; +} +.Ed +.Sh SEE ALSO +.Xr Mat_VarCreate 3 , +.Xr Mat_VarSetStructFieldByName 3 diff --git a/usr/share/man/man3/Mat_VarFree.3 b/usr/share/man/man3/Mat_VarFree.3 new file mode 100755 index 000000000..e53f31992 --- /dev/null +++ b/usr/share/man/man3/Mat_VarFree.3 @@ -0,0 +1,48 @@ +.\" Copyright (c) 2012 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd March 18, 2012 +.Dt MAT_VARFREE 3 +.Os +.Sh NAME +.Nm Mat_VarFree +.Nd Free a MAT variable structure +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft void +.Fo Mat_VarRead +.Fa "matvar_t *matvar" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_VarFree +function free's the memory used by a MAT variable structure. If the variable +was no created with the +.Dv MAT_F_DONT_COPY_DATA +flag, the variable data is also free'd. +.Sh SEE ALSO +.Xr Mat_VarCreate 3 , +.Xr Mat_VarCreateStruct 3 diff --git a/usr/share/man/man3/Mat_VarGetNumberOfFields.3 b/usr/share/man/man3/Mat_VarGetNumberOfFields.3 new file mode 100755 index 000000000..5fb100f56 --- /dev/null +++ b/usr/share/man/man3/Mat_VarGetNumberOfFields.3 @@ -0,0 +1,48 @@ +.\" Copyright (c) 2012 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd March 18, 2012 +.Dt MAT_VARGETNUMBEROFFIELDS 3 +.Os +.Sh NAME +.Nm Mat_VarGetNumberOfFields +.Nd Get the number of fields for a structure variable. +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft unsigned +.Fo Mat_VarGetNumberOfFields +.Fa "const matvar_t *matvar" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_VarGetNumberOfFields +function returns the number of fields for the structure variable +.Fa matvar. +.Sh RETURN VALUES +The number of fields will be returned on success, and 0 otherwise. Note also +that a structure can validly contain 0 fields. +.Sh SEE ALSO +.Xr Mat_VarGetStructFieldnames 3 diff --git a/usr/share/man/man3/Mat_VarGetStructFieldnames.3 b/usr/share/man/man3/Mat_VarGetStructFieldnames.3 new file mode 100755 index 000000000..c005a7da2 --- /dev/null +++ b/usr/share/man/man3/Mat_VarGetStructFieldnames.3 @@ -0,0 +1,49 @@ +.\" Copyright (c) 2012 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd January 19, 2012 +.Dt MAT_VARGETSTRUCTFIELDNAMES 3 +.Os +.Sh NAME +.Nm Mat_VarGetStructFieldnames +.Nd Get the fieldnames of a structure variable. +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft char * const * +.Fo Mat_VarGetStructFieldnames +.Fa "const matvar_t *matvar" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_VarGetStructFieldnames +function returns the fieldnames of the structure variable +.Fa matvar. +.Sh RETURN VALUES +An array of fieldnames is returned of the variable is a valid structure +variable. Otherwise NULL is returned. The Mat_VarGetNumberOfFields function +returns the number of fields for the structure. +.Sh SEE ALSO +.Xr Mat_VarGetNumberOfFields 3 , diff --git a/usr/share/man/man3/Mat_VarGetStructs.3 b/usr/share/man/man3/Mat_VarGetStructs.3 new file mode 100755 index 000000000..4ce0764f7 --- /dev/null +++ b/usr/share/man/man3/Mat_VarGetStructs.3 @@ -0,0 +1,68 @@ +.\" Copyright (c) 2012 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd March 16, 2012 +.Dt MAT_VARGETSTRUCTS 3 +.Os +.Sh NAME +.Nm Mat_VarGetStructs +.Nd Index a structure variable using slices of each dimension +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft matvar_t * +.Fo Mat_VarGetStructs +.Fa "const matvar_t *matvar" +.Fa "int *start" +.Fa "int *stride" +.Fa "int *edge" +.Fa "int copy_fields" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_VarGetStructs +function selects a subarray from a structure array +.Fa matvar. +Each of the elements in the subarray contains all of the fields of the +structure. The index of each dimension starts at the +.Fa start +element in the array (0 being the first element), and reads every +.Fa stride +values (1 being every element, 2 every other element, etc.). The total number +of elements in the subarray is specified as +.Fa edge. +The +.Fa copy_fields +option specifies whether the fields of the original structure are copied for +the new subarray. If set to 0, the returned structure array's fields have the +same pointer as the original array, and the +.Dv data +field of the subarray structure. The subarray is marked with the conserve +memory flag indicating that the fields should not be free'd. They are free'd +when the original structure is free'd. +.Sh RETURN VALUES +A subarray of the original structure array, or NULL if there is an error. +.Sh SEE ALSO +.Xr Mat_VarGetStructsLinear 3 , diff --git a/usr/share/man/man3/Mat_VarGetStructsLinear.3 b/usr/share/man/man3/Mat_VarGetStructsLinear.3 new file mode 100755 index 000000000..c9ecc358d --- /dev/null +++ b/usr/share/man/man3/Mat_VarGetStructsLinear.3 @@ -0,0 +1,70 @@ +.\" Copyright (c) 2012 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd March 14, 2012 +.Dt MAT_VARGETSTRUCTSLINEAR 3 +.Os +.Sh NAME +.Nm Mat_VarGetStructsLinear +.Nd Linearly index a structure variable +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft matvar_t * +.Fo Mat_VarGetStructsLinear +.Fa "const matvar_t *matvar" +.Fa "int start" +.Fa "int stride" +.Fa "int edge" +.Fa "int copy_fields" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_VarGetStructsLinear +function selects a subarray from a structure array +.Fa matvar. +Each of the elements in the subarray contains all of the fields of the +structure. The indexing starts at the +.Fa start +element in the array (0 being the first element), and reads every +.Fa stride +values (1 being every element, 2 every other element, etc.). The total number +of elements in the subarray is specified as +.Fa edge. +The original array must have at least +.Fa start+(stride-1)*edge+1 +elements. The +.Fa copy_fields +option specifies whether the fields of the original structure are copied for +the new subarray. If set to 0, the returned structure array's fields have the +same pointer as the original array, and the +.Dv data +field of the subarray structure. The subarray is marked with the conserve +memory flag indicating that the fields should not be free'd. They are free'd +when the original structure is free'd. +.Sh RETURN VALUES +A subarray of the original structure array, or NULL if there is an error. +.Sh SEE ALSO +.Xr Mat_VarGetStructs 3 , diff --git a/usr/share/man/man3/Mat_VarRead.3 b/usr/share/man/man3/Mat_VarRead.3 new file mode 100755 index 000000000..d4121d2be --- /dev/null +++ b/usr/share/man/man3/Mat_VarRead.3 @@ -0,0 +1,85 @@ +.\" Copyright (c) 2011 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd April 21, 2011 +.Dt MAT_VARREAD 3 +.Os +.Sh NAME +.Nm Mat_VarRead +.Nd Reads the information and data for a specific variable in a MATLAB MAT file. +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft matvar_t * +.Fo Mat_VarRead +.Fa "mat_t *mat" +.Fa "const char *name" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_VarRead +function reads the information and data for the variable +.Fa name +in the open MAT file. +.Sh RETURN VALUES +If the variable was found in the MAT file and successfully read, a pointer to +the MATLAB variable structure is returned. If the variable was not found, or +there was an error reading the variable, NULL is returned. +.Sh EXAMPLES +This example program opens a MAT file named by the first argument to the +program, and reads a variable named +.Em x +from the file. The MATIO function Mat_VarPrint is used to print the information +and data from the file. +.Bd -literal +#include "matio.h" + +int +main(int argc,char **argv) +{ + mat_t *matfp; + matvar_t *matvar; + + matfp = Mat_Open(argv[1],MAT_ACC_RDONLY); + if ( NULL == matfp ) { + fprintf(stderr,"Error opening MAT file %s\n",argv[1]); + return EXIT_FAILURE; + } + + matvar = Mat_VarRead(matfp,"x"); + if ( NULL != matvar ) { + Mat_VarPrint(matvar); + Mat_VarFree(matvar); + } + + Mat_Close(matfp); + return EXIT_SUCCESS; +} + +.Ed +.Sh SEE ALSO +.Xr Mat_VarReadInfo 3 , +.Xr Mat_VarReadNext 3 , +.Xr Mat_VarPrint 3 diff --git a/usr/share/man/man3/Mat_VarReadInfo.3 b/usr/share/man/man3/Mat_VarReadInfo.3 new file mode 100755 index 000000000..75ead213c --- /dev/null +++ b/usr/share/man/man3/Mat_VarReadInfo.3 @@ -0,0 +1,80 @@ +.\" Copyright (c) 2011 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd April 21, 2011 +.Dt MAT_VARREADINFO 3 +.Os +.Sh NAME +.Nm Mat_VarReadInfo +.Nd Read the information for a specific variable in a MATLAB MAT file. +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft matvar_t * +.Fo Mat_VarReadInfo +.Fa "mat_t *mat" +.Fa "const char *name" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_VarReadInfo +function reads the information for the variable +.Fa name +in the open MAT file. +.Sh RETURN VALUES +If the variable was found in the MAT file and the information about the variable +successfully read, a pointer to the MATLAB variable structure is returned. If +the variable was not found, or there was an error reading the variable +information, NULL is returned. +.Sh EXAMPLES +.Bd -literal +#include "matio.h" + +int +main(int argc,char **argv) +{ + mat_t *matfp; + matvar_t *matvar; + + matfp = Mat_Open(argv[1],MAT_ACC_RDONLY); + if ( NULL == matfp ) { + fprintf(stderr,"Error opening MAT file %s\n",argv[1]); + return EXIT_FAILURE; + } + + matvar = Mat_VarReadInfo(matfp,"x"); + if ( NULL != matvar ) { + Mat_VarPrint(matvar); + Mat_VarFree(matvar); + } + + Mat_Close(matfp); + return EXIT_SUCCESS; +} + +.Ed +.Sh SEE ALSO +.Xr Mat_VarRead 3 , +.Xr Mat_VarReadNextInfo 3 diff --git a/usr/share/man/man3/Mat_VarReadNext.3 b/usr/share/man/man3/Mat_VarReadNext.3 new file mode 100755 index 000000000..69f67022b --- /dev/null +++ b/usr/share/man/man3/Mat_VarReadNext.3 @@ -0,0 +1,83 @@ +.\" Copyright (c) 2011 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd April 21, 2011 +.Dt MAT_VARREADNEXT 3 +.Os +.Sh NAME +.Nm Mat_VarReadNext +.Nd Reads the information and data for the next variable in a MATLAB MAT file. +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft matvar_t * +.Fo Mat_VarReadNext +.Fa "mat_t *mat" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_VarReadNext +function reads the information and data for the next variable stored in the +open MAT file. +.Sh RETURN VALUES +If there is another variable in the MAT file and is read successfully, a pointer +to the MATLAB variable structure is returned. If there are no more variables, or +there was an error reading the variable, NULL is returned. +.Sh EXAMPLES +This example program opens a MAT file named by the first argument to the +program, and uses +.Fn Mat_VarReadNext +to read each variable in the file. For each variable read, the +.Xr Mat_VarPrint 3 +function is used to display the information and data of the variable. +.Bd -literal +#include <stdlib.h> +#include <stdio.h> +#include "matio.h" + +int +main(int argc,char **argv) +{ + mat_t *matfp; + matvar_t *matvar; + + matfp = Mat_Open(argv[1],MAT_ACC_RDONLY); + if ( NULL == matfp ) { + fprintf(stderr,"Error opening MAT file %s\n",argv[1]); + return EXIT_FAILURE; + } + + while ( NULL != (matvar = Mat_VarReadNext(matfp)) ) { + Mat_VarPrint(matvar,1); + Mat_VarFree(matvar); + } + + Mat_Close(matfp); + return EXIT_SUCCESS; +} +.Ed +.Sh SEE ALSO +.Xr Mat_VarRead 3 , +.Xr Mat_VarReadNextInfo 3 , diff --git a/usr/share/man/man3/Mat_VarReadNextInfo.3 b/usr/share/man/man3/Mat_VarReadNextInfo.3 new file mode 100755 index 000000000..337242a7e --- /dev/null +++ b/usr/share/man/man3/Mat_VarReadNextInfo.3 @@ -0,0 +1,105 @@ +.\" Copyright (c) 2011 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd April 21, 2011 +.Dt MAT_VARREADNEXTINFO 3 +.Os +.Sh NAME +.Nm Mat_VarReadNextInfo +.Nd Reads the information for the next variable in a MATLAB MAT file. +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft matvar_t * +.Fo Mat_VarReadNextInfo +.Fa "mat_t *mat" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_VarReadNextInfo +function reads the information for the next variable stored in the open MAT +file. +.Sh RETURN VALUES +If there is another variable in the MAT file and is read successfully, a pointer +to the MATLAB variable structure is returned. If there are no more variables, or +there was an error reading the variable, NULL is returned. +.Sh EXAMPLES +This example program opens a MAT file named by the first argument to the +program, and uses +.Fn Mat_VarReadNextInfo +to read the information about each variable in the file. For each variable read,the name, size, and class are printed in a format similar to the MATLAB whos +command. +.Bd -literal +#include <stdlib.h> +#include <stdio.h> +#include "matio.h" + +static char *mxclass[16] = {"cell", "struct", "object","char","sparse", + "double","single","int8", "uint8","int16","uint16", + "int32","uint32","int64","uint64","function" + }; + +int +main(int argc,char **argv) +{ + mat_t *matfp; + matvar_t *matvar; + size_t nbytes; + int i; + char size[32] = {'\0',}; + + matfp = Mat_Open(argv[1],MAT_ACC_RDONLY); + if ( NULL == matfp ) { + fprintf(stderr,"Error opening MAT file %s\n",argv[1]); + return EXIT_FAILURE; + } + + printf("%-20s %-10s %-10s %-18s\n\n","Name","Size","Bytes", + "Class"); + while ( NULL != (matvar = Mat_VarReadNextInfo(matfp)) ) { + printf("%-20s", matvar->name); + if ( matvar->rank > 0 ) { + int cnt = 0; + printf("%8d", matvar->dims[0]); + for ( i = 1; i < matvar->rank; i++ ) { + if ( ceil(log10(matvar->dims[i]))+1 < 32 ) + cnt += sprintf(size+cnt,"x%d", matvar->dims[i]); + } + printf("%-10s",size); + } else { + printf(" "); + } + printf(" %-18s\n",mxclass[matvar->class_type-1]); + + Mat_VarFree(matvar); + } + + Mat_Close(matfp); + return EXIT_SUCCESS; +} +.Ed +.Sh SEE ALSO +.Xr Mat_VarRead 3 , +.Xr Mat_VarReadNextInfo 3 , diff --git a/usr/share/man/man3/Mat_VarWrite.3 b/usr/share/man/man3/Mat_VarWrite.3 new file mode 100755 index 000000000..ddd062267 --- /dev/null +++ b/usr/share/man/man3/Mat_VarWrite.3 @@ -0,0 +1,92 @@ +.\" Copyright (c) 2012 Christopher C. Hulbert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY CHRISTOPHER C. HULBERT ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CHRISTOPHER C. HULBERT OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd March 18, 2012 +.Dt MAT_VARWRITE 3 +.Os +.Sh NAME +.Nm Mat_VarWrite +.Nd Writes a MATLAB variable to a MATLAB MAT file. +.Sh SYNOPSIS +.Fd #include <matio.h> +.Ft int +.Fo Mat_VarWrite +.Fa "mat_t *mat" +.Fa "matvar_t *matvar" +.Fa "enum matio_compression compress" +.Fc +.Sh DESCRIPTION +The +.Fn Mat_VarWrite +function writes the MATLAB variable +.Fa matvar +to the MAT file +.Fa mat +which must be opened for writing. If the MAT file is a level 5 MAT file, the +compress option allows the variable to be written using zlib compression if +available. If compression is not available, the variable is written +uncompressed. +.Sh RETURN VALUES +The function returns 0 if the variable was successfully written to the MAT file. +otherwise, an error value is returned. +.Sh EXAMPLES +This example program creates a MAT file named by the first argument to the +program, and writes the variable named +.Em m_pi +to the file. +.Bd -literal +#include <math.h> +#include "matio.h" + +int +main(int argc,char **argv) +{ + mat_t *matfp; + matvar_t *matvar; + size_t dims[2] = {1,1}; + double m_pi = M_PI; + + matfp = Mat_CreateVer(argv[1],NULL,MAT_FT_DEFAULT); + if ( NULL == matfp ) { + fprintf(stderr,"Error creating MAT file %s\n",argv[1]); + return EXIT_FAILURE; + } + + matvar = Mat_VarCreate("m_pi",MAT_C_DOUBLE,MAT_T_DOUBLE,2,dims,&m_pi,0); + if ( NULL != matvar ) { + Mat_VarWrite(matfp,matvar,MAT_COMPRESSION_ZLIB); + Mat_VarFree(matvar); + } + + Mat_Close(matfp); + return EXIT_SUCCESS; +} + +.Ed +.Sh SEE ALSO +.Xr Mat_CreateVer 3 , +.Xr Mat_Open 3 , +.Xr Mat_VarRead 3 diff --git a/usr/share/man/man3/curl_easy_cleanup.3 b/usr/share/man/man3/curl_easy_cleanup.3 new file mode 100755 index 000000000..95ba39e4f --- /dev/null +++ b/usr/share/man/man3/curl_easy_cleanup.3 @@ -0,0 +1,51 @@ +.\" ************************************************************************** +.\" * _ _ ____ _ +.\" * Project ___| | | | _ \| | +.\" * / __| | | | |_) | | +.\" * | (__| |_| | _ <| |___ +.\" * \___|\___/|_| \_\_____| +.\" * +.\" * Copyright (C) 1998 - 2007, Daniel Stenberg, <daniel@haxx.se>, et al. +.\" * +.\" * This software is licensed as described in the file COPYING, which +.\" * you should have received as part of this distribution. The terms +.\" * are also available at http://curl.haxx.se/docs/copyright.html. +.\" * +.\" * You may opt to use, copy, modify, merge, publish, distribute and/or sell +.\" * copies of the Software, and permit persons to whom the Software is +.\" * furnished to do so, under the terms of the COPYING file. +.\" * +.\" * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY +.\" * KIND, either express or implied. +.\" * +.\" * $Id: curl_easy_cleanup.3,v 1.7 2007-08-22 11:28:26 bagder Exp $ +.\" ************************************************************************** +.\" +.TH curl_easy_cleanup 3 "22 aug 2007" "libcurl 7.17.0" "libcurl Manual" +.SH NAME +curl_easy_cleanup - End a libcurl easy session +.SH SYNOPSIS +.B #include <curl/curl.h> + +.BI "void curl_easy_cleanup(CURL *" handle ");" + +.SH DESCRIPTION +This function must be the last function to call for an easy session. It is the +opposite of the \fIcurl_easy_init(3)\fP function and must be called with the +same \fIhandle\fP as input that the curl_easy_init call returned. + +This will effectively close all connections this handle has used and possibly +has kept open until now. Don't call this function if you intend to transfer +more files. + +Any uses of the \fBhandle\fP after this function has been called are +illegal. This kills the handle and all memory associated with it! + +With libcurl versions prior to 7.17.: when you've called this, you can safely +remove all the strings you've previously told libcurl to use, as it won't use +them anymore now. +.SH RETURN VALUE +None +.SH "SEE ALSO" +.BR curl_easy_init "(3), " + diff --git a/usr/share/man/man3/curl_easy_duphandle.3 b/usr/share/man/man3/curl_easy_duphandle.3 new file mode 100755 index 000000000..fafa4a85c --- /dev/null +++ b/usr/share/man/man3/curl_easy_duphandle.3 @@ -0,0 +1,34 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: curl_easy_duphandle.3,v 1.5 2009-05-19 12:48:14 yangtse Exp $ +.\" +.TH curl_easy_duphandle 3 "18 September 2001" "libcurl 7.9" "libcurl Manual" +.SH NAME +curl_easy_duphandle - Clone a libcurl session handle +.SH SYNOPSIS +.B #include <curl/curl.h> + +.BI "CURL *curl_easy_duphandle(CURL *"handle ");" + +.SH DESCRIPTION +This function will return a new curl handle, a duplicate, using all the +options previously set in the input curl \fIhandle\fP. Both handles can +subsequently be used independently and they must both be freed with +\fIcurl_easy_cleanup(3)\fP. + +All strings that the input handle has been told to point to (as opposed to +copy) with previous calls to \fIcurl_easy_setopt(3)\fP using char * inputs, +will be pointed to by the new handle as well. You must therefore make sure to +keep the data around until both handles have been cleaned up. + +The new handle will \fBnot\fP inherit any state information, no connections, +no SSL sessions and no cookies. + +\fBNote\fP that even in multi-threaded programs, this function must be called +in a synchronous way, the input handle may not be in use when cloned. +.SH RETURN VALUE +If this function returns NULL, something went wrong and no valid handle was +returned. +.SH "SEE ALSO" +.BR curl_easy_init "(3)," curl_easy_cleanup "(3)," curl_global_init "(3) + diff --git a/usr/share/man/man3/curl_easy_escape.3 b/usr/share/man/man3/curl_easy_escape.3 new file mode 100755 index 000000000..c75557a15 --- /dev/null +++ b/usr/share/man/man3/curl_easy_escape.3 @@ -0,0 +1,47 @@ +.\" ************************************************************************** +.\" * _ _ ____ _ +.\" * Project ___| | | | _ \| | +.\" * / __| | | | |_) | | +.\" * | (__| |_| | _ <| |___ +.\" * \___|\___/|_| \_\_____| +.\" * +.\" * Copyright (C) 1998 - 2008, Daniel Stenberg, <daniel@haxx.se>, et al. +.\" * +.\" * This software is licensed as described in the file COPYING, which +.\" * you should have received as part of this distribution. The terms +.\" * are also available at http://curl.haxx.se/docs/copyright.html. +.\" * +.\" * You may opt to use, copy, modify, merge, publish, distribute and/or sell +.\" * copies of the Software, and permit persons to whom the Software is +.\" * furnished to do so, under the terms of the COPYING file. +.\" * +.\" * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY +.\" * KIND, either express or implied. +.\" * +.\" * $Id: curl_easy_escape.3,v 1.3 2009-05-19 12:48:14 yangtse Exp $ +.\" ************************************************************************** +.\" +.TH curl_easy_escape 3 "7 April 2006" "libcurl 7.15.4" "libcurl Manual" +.SH NAME +curl_easy_escape - URL encodes the given string +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "char *curl_easy_escape( CURL *" curl ", char *" url ", int "length " );" +.ad +.SH DESCRIPTION +This function converts the given input string to an URL encoded string and +returns that as a new allocated string. All input characters that are not a-z, +A-Z or 0-9 are converted to their "URL escaped" version (%NN where NN is a +two-digit hexadecimal number). + +If the \fBlength\fP argument is set to 0 (zero), \fIcurl_easy_escape(3)\fP +uses strlen() on the input \fBurl\fP to find out the size. + +You must \fIcurl_free(3)\fP the returned string when you're done with it. +.SH AVAILABILITY +Added in 7.15.4 and replaces the old \fIcurl_escape(3)\fP function. +.SH RETURN VALUE +A pointer to a zero terminated string or NULL if it failed. +.SH "SEE ALSO" +.BR curl_easy_unescape "(3), " curl_free "(3), " RFC 2396 diff --git a/usr/share/man/man3/curl_easy_getinfo.3 b/usr/share/man/man3/curl_easy_getinfo.3 new file mode 100755 index 000000000..c2422c2a9 --- /dev/null +++ b/usr/share/man/man3/curl_easy_getinfo.3 @@ -0,0 +1,252 @@ +.\" ************************************************************************** +.\" * _ _ ____ _ +.\" * Project ___| | | | _ \| | +.\" * / __| | | | |_) | | +.\" * | (__| |_| | _ <| |___ +.\" * \___|\___/|_| \_\_____| +.\" * +.\" * Copyright (C) 1998 - 2009, Daniel Stenberg, <daniel@haxx.se>, et al. +.\" * +.\" * This software is licensed as described in the file COPYING, which +.\" * you should have received as part of this distribution. The terms +.\" * are also available at http://curl.haxx.se/docs/copyright.html. +.\" * +.\" * You may opt to use, copy, modify, merge, publish, distribute and/or sell +.\" * copies of the Software, and permit persons to whom the Software is +.\" * furnished to do so, under the terms of the COPYING file. +.\" * +.\" * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY +.\" * KIND, either express or implied. +.\" * +.\" * $Id: curl_easy_getinfo.3,v 1.43 2009-07-15 11:49:13 mmarek Exp $ +.\" ************************************************************************** +.\" +.TH curl_easy_getinfo 3 "11 Feb 2009" "libcurl 7.19.4" "libcurl Manual" +.SH NAME +curl_easy_getinfo - extract information from a curl handle +.SH SYNOPSIS +.B #include <curl/curl.h> + +.B "CURLcode curl_easy_getinfo(CURL *curl, CURLINFO info, ... );" + +.SH DESCRIPTION +Request internal information from the curl session with this function. The +third argument \fBMUST\fP be a pointer to a long, a pointer to a char *, a +pointer to a struct curl_slist * or a pointer to a double (as this +documentation describes further down). The data pointed-to will be filled in +accordingly and can be relied upon only if the function returns CURLE_OK. Use +this function AFTER a performed transfer if you want to get transfer- oriented +data. + +You should not free the memory returned by this function unless it is +explicitly mentioned below. +.SH AVAILABLE INFORMATION +The following information can be extracted: +.IP CURLINFO_EFFECTIVE_URL +Pass a pointer to a char pointer to receive the last used effective URL. +.IP CURLINFO_RESPONSE_CODE +Pass a pointer to a long to receive the last received HTTP or FTP code. This +option was known as CURLINFO_HTTP_CODE in libcurl 7.10.7 and earlier. This +will be zero if no server response code has been received. Note that a proxy's +CONNECT response should be read with \fICURLINFO_HTTP_CONNECTCODE\fP and not +this. +.IP CURLINFO_HTTP_CONNECTCODE +Pass a pointer to a long to receive the last received proxy response code to a +CONNECT request. +.IP CURLINFO_FILETIME +Pass a pointer to a long to receive the remote time of the retrieved document +(in number of seconds since 1 jan 1970 in the GMT/UTC time zone). If you get +-1, it can be because of many reasons (unknown, the server hides it or the +server doesn't support the command that tells document time etc) and the time +of the document is unknown. Note that you must tell the server to collect this +information before the transfer is made, by using the CURLOPT_FILETIME option +to \fIcurl_easy_setopt(3)\fP or you will unconditionally get a -1 back. (Added +in 7.5) +.IP CURLINFO_TOTAL_TIME +Pass a pointer to a double to receive the total time in seconds for the +previous transfer, including name resolving, TCP connect etc. +.IP CURLINFO_NAMELOOKUP_TIME +Pass a pointer to a double to receive the time, in seconds, it took from the +start until the name resolving was completed. +.IP CURLINFO_CONNECT_TIME +Pass a pointer to a double to receive the time, in seconds, it took from the +start until the connect to the remote host (or proxy) was completed. +.IP CURLINFO_APPCONNECT_TIME +Pass a pointer to a double to receive the time, in seconds, it took from the +start until the SSL/SSH connect/handshake to the remote host was completed. +This time is most often very near to the PRETRANSFER time, except for cases +such as HTTP pippelining where the pretransfer time can be delayed due to +waits in line for the pipeline and more. (Added in 7.19.0) +.IP CURLINFO_PRETRANSFER_TIME +Pass a pointer to a double to receive the time, in seconds, it took from the +start until the file transfer is just about to begin. This includes all +pre-transfer commands and negotiations that are specific to the particular +protocol(s) involved. +.IP CURLINFO_STARTTRANSFER_TIME +Pass a pointer to a double to receive the time, in seconds, it took from the +start until the first byte is just about to be transferred. This includes +CURLINFO_PRETRANSFER_TIME and also the time the server needs to calculate +the result. +.IP CURLINFO_REDIRECT_TIME +Pass a pointer to a double to receive the total time, in seconds, it took for +all redirection steps include name lookup, connect, pretransfer and transfer +before final transaction was started. CURLINFO_REDIRECT_TIME contains the +complete execution time for multiple redirections. (Added in 7.9.7) +.IP CURLINFO_REDIRECT_COUNT +Pass a pointer to a long to receive the total number of redirections that were +actually followed. (Added in 7.9.7) +.IP CURLINFO_REDIRECT_URL +Pass a pointer to a char pointer to receive the URL a redirect \fIwould\fP +take you to if you would enable CURLOPT_FOLLOWLOCATION. This can come very +handy if you think using the built-in libcurl redirect logic isn't good enough +for you but you would still prefer to avoid implementing all the magic of +figuring out the new URL. (Added in 7.18.2) +.IP CURLINFO_SIZE_UPLOAD +Pass a pointer to a double to receive the total amount of bytes that were +uploaded. +.IP CURLINFO_SIZE_DOWNLOAD +Pass a pointer to a double to receive the total amount of bytes that were +downloaded. The amount is only for the latest transfer and will be reset again +for each new transfer. +.IP CURLINFO_SPEED_DOWNLOAD +Pass a pointer to a double to receive the average download speed that curl +measured for the complete download. Measured in bytes/second. +.IP CURLINFO_SPEED_UPLOAD +Pass a pointer to a double to receive the average upload speed that curl +measured for the complete upload. Measured in bytes/second. +.IP CURLINFO_HEADER_SIZE +Pass a pointer to a long to receive the total size of all the headers +received. Measured in number of bytes. +.IP CURLINFO_REQUEST_SIZE +Pass a pointer to a long to receive the total size of the issued +requests. This is so far only for HTTP requests. Note that this may be more +than one request if FOLLOWLOCATION is true. +.IP CURLINFO_SSL_VERIFYRESULT +Pass a pointer to a long to receive the result of the certification +verification that was requested (using the CURLOPT_SSL_VERIFYPEER option to +\fIcurl_easy_setopt(3)\fP). +.IP CURLINFO_SSL_ENGINES +Pass the address of a 'struct curl_slist *' to receive a linked-list of +OpenSSL crypto-engines supported. Note that engines are normally implemented +in separate dynamic libraries. Hence not all the returned engines may be +available at run-time. \fBNOTE:\fP you must call \fIcurl_slist_free_all(3)\fP +on the list pointer once you're done with it, as libcurl will not free the +data for you. (Added in 7.12.3) +.IP CURLINFO_CONTENT_LENGTH_DOWNLOAD +Pass a pointer to a double to receive the content-length of the download. This +is the value read from the Content-Length: field. Since 7.19.4, this returns -1 +if the size isn't known. +.IP CURLINFO_CONTENT_LENGTH_UPLOAD +Pass a pointer to a double to receive the specified size of the upload. Since +7.19.4, this returns -1 if the size isn't known. +.IP CURLINFO_CONTENT_TYPE +Pass a pointer to a char pointer to receive the content-type of the downloaded +object. This is the value read from the Content-Type: field. If you get NULL, +it means that the server didn't send a valid Content-Type header or that the +protocol used doesn't support this. +.IP CURLINFO_PRIVATE +Pass a pointer to a char pointer to receive the pointer to the private data +associated with the curl handle (set with the CURLOPT_PRIVATE option to +\fIcurl_easy_setopt(3)\fP). Please note that for internal reasons, the +value is returned as a char pointer, although effectively being a 'void *'. +(Added in 7.10.3) +.IP CURLINFO_HTTPAUTH_AVAIL +Pass a pointer to a long to receive a bitmask indicating the authentication +method(s) available. The meaning of the bits is explained in the +CURLOPT_HTTPAUTH option for \fIcurl_easy_setopt(3)\fP. (Added in 7.10.8) +.IP CURLINFO_PROXYAUTH_AVAIL +Pass a pointer to a long to receive a bitmask indicating the authentication +method(s) available for your proxy authentication. (Added in 7.10.8) +.IP CURLINFO_OS_ERRNO +Pass a pointer to a long to receive the errno variable from a connect failure. +Note that the value is only set on failure, it is not reset upon a +successfull operation. (Added in 7.12.2) +.IP CURLINFO_NUM_CONNECTS +Pass a pointer to a long to receive how many new connections libcurl had to +create to achieve the previous transfer (only the successful connects are +counted). Combined with \fICURLINFO_REDIRECT_COUNT\fP you are able to know +how many times libcurl successfully reused existing connection(s) or not. See +the Connection Options of \fIcurl_easy_setopt(3)\fP to see how libcurl tries +to make persistent connections to save time. (Added in 7.12.3) +.IP CURLINFO_PRIMARY_IP +Pass a pointer to a char pointer to receive the pointer to a zero-terminated +string holding the IP address of the most recent connection done with this +\fBcurl\fP handle. This string may be IPv6 if that's enabled. Note that you +get a pointer to a memory area that will be re-used at next request so you +need to copy the string if you want to keep the information. (Added in 7.19.0) +.IP CURLINFO_COOKIELIST +Pass a pointer to a 'struct curl_slist *' to receive a linked-list of all +cookies cURL knows (expired ones, too). Don't forget to +\fIcurl_slist_free_all(3)\fP the list after it has been used. If there are no +cookies (cookies for the handle have not been enabled or simply none have been +received) 'struct curl_slist *' will be set to point to NULL. (Added in +7.14.1) +.IP CURLINFO_LASTSOCKET +Pass a pointer to a long to receive the last socket used by this curl +session. If the socket is no longer valid, -1 is returned. When you finish +working with the socket, you must call curl_easy_cleanup() as usual and let +libcurl close the socket and cleanup other resources associated with the +handle. This is typically used in combination with \fICURLOPT_CONNECT_ONLY\fP. +(Added in 7.15.2) +.IP CURLINFO_FTP_ENTRY_PATH +Pass a pointer to a char pointer to receive a pointer to a string holding the +path of the entry path. That is the initial path libcurl ended up in when +logging on to the remote FTP server. This stores a NULL as pointer if +something is wrong. (Added in 7.15.4) +.IP CURLINFO_CERTINFO +Pass a pointer to a 'struct curl_certinfo *' and you'll get it set to point to +struct that holds a number of linked lists with info about the certificate +chain, assuming you had CURLOPT_CERTINFO enabled when the previous request was +done. The struct reports how many certs it found and then you can extract info +for each of those certs by following the linked lists. The info chain is +provided in a series of data in the format "name:content" where the content is +for the specific named data. See also the certinfo.c example. NOTE: this +option is only available in libcurl built with OpenSSL support. (Added in +7.19.1) +.IP CURLINFO_CONDITION_UNMET +Pass a pointer to a long to receive the number 1 if the condition provided in +the previous request didn't match (see \fICURLOPT_TIMECONDITION\fP). Alas, if +this returns a 1 you know that the reason you didn't get data in return is +because it didn't fulfill the condition. The long ths argument points to will +get a zero stored if the condition instead was met. (Added in 7.19.4) +.SH TIMES +.nf +An overview of the six time values available from curl_easy_getinfo() + +curl_easy_perform() + | + |--NAMELOOKUP + |--|--CONNECT + |--|--|--APPCONNECT + |--|--|--|--PRETRANSFER + |--|--|--|--|--STARTTRANSFER + |--|--|--|--|--|--TOTAL + |--|--|--|--|--|--REDIRECT +.fi +.IP NAMELOOKUP +\fICURLINFO_NAMELOOKUP_TIME\fP. The time it took from the start until the name +resolving was completed. +.IP CONNECT +\fICURLINFO_CONNECT_TIME\fP. The time it took from the start until the connect +to the remote host (or proxy) was completed. +.IP APPCONNECT +\fICURLINFO_APPCONNECT_TIME\fP. The time it took from the start until the SSL +connect/handshake with the remote host was completed. (Added in in 7.19.0) +.IP PRETRANSFER +\fICURLINFO_PRETRANSFER_TIME\fP. The time it took from the start until the +file transfer is just about to begin. This includes all pre-transfer commands +and negotiations that are specific to the particular protocol(s) involved. +.IP STARTTRANSFER +\fICURLINFO_STARTTRANSFER_TIME\fP. The time it took from the start until the +first byte is just about to be transferred. +.IP TOTAL +\fICURLINFO_TOTAL_TIME\fP. Total time of the previous request. +.IP REDIRECT +\fICURLINFO_REDIRECT_TIME\fP. The time it took for all redirection steps +include name lookup, connect, pretransfer and transfer before final +transaction was started. So, this is zero if no redirection took place. +.SH RETURN VALUE +If the operation was successful, CURLE_OK is returned. Otherwise an +appropriate error code will be returned. +.SH "SEE ALSO" +.BR curl_easy_setopt "(3)" diff --git a/usr/share/man/man3/curl_easy_init.3 b/usr/share/man/man3/curl_easy_init.3 new file mode 100755 index 000000000..ede6d07da --- /dev/null +++ b/usr/share/man/man3/curl_easy_init.3 @@ -0,0 +1,32 @@ +.\" $Id: curl_easy_init.3,v 1.10 2009-05-19 12:48:14 yangtse Exp $ +.\" +.TH curl_easy_init 3 "4 March 2002" "libcurl 7.8.1" "libcurl Manual" +.SH NAME +curl_easy_init - Start a libcurl easy session +.SH SYNOPSIS +.B #include <curl/curl.h> + +.BI "CURL *curl_easy_init( );" + +.SH DESCRIPTION +This function must be the first function to call, and it returns a CURL easy +handle that you must use as input to other easy-functions. curl_easy_init +initializes curl and this call \fBMUST\fP have a corresponding call to +\fIcurl_easy_cleanup(3)\fP when the operation is complete. + +If you did not already call \fIcurl_global_init(3)\fP, +\fIcurl_easy_init(3)\fP does it automatically. +This may be lethal in multi-threaded cases, since \fIcurl_global_init(3)\fP is +not thread-safe, and it may result in resource problems because there is +no corresponding cleanup. + +You are strongly advised to not allow this automatic behaviour, by +calling \fIcurl_global_init(3)\fP yourself properly. +See the description in \fBlibcurl\fP(3) of global environment +requirements for details of how to use this function. + +.SH RETURN VALUE +If this function returns NULL, something went wrong and you cannot use the +other curl functions. +.SH "SEE ALSO" +.BR curl_easy_cleanup "(3), " curl_global_init "(3), " curl_easy_reset "(3)" diff --git a/usr/share/man/man3/curl_easy_pause.3 b/usr/share/man/man3/curl_easy_pause.3 new file mode 100755 index 000000000..682afd194 --- /dev/null +++ b/usr/share/man/man3/curl_easy_pause.3 @@ -0,0 +1,66 @@ +.\" $Id: curl_easy_pause.3,v 1.4 2008-12-28 21:56:56 bagder Exp $ +.\" +.TH curl_easy_pause 3 "17 Dec 2007" "libcurl 7.18.0" "libcurl Manual" +.SH NAME +curl_easy_pause - pause and unpause a connection +.SH SYNOPSIS +.B #include <curl/curl.h> + +.BI "CURLcode curl_easy_pause(CURL *"handle ", int "bitmask " );" + +.SH DESCRIPTION +Using this function, you can explicitly mark a running connection to get +paused, and you can unpause a connection that was previously paused. + +A connection can be paused by using this function or by letting the read +or the write callbacks return the proper magic return code +(\fICURL_READFUNC_PAUSE\fP and \fICURL_WRITEFUNC_PAUSE\fP). A write callback +that returns pause signals to the library that it couldn't take care of any +data at all, and that data will then be delivered again to the callback when +the writing is later unpaused. + +NOTE: while it may feel tempting, take care and notice that you cannot call +this function from another thread. + +When this function is called to unpause reading, the chance is high that you +will get your write callback called before this function returns. + +The \fBhandle\fP argument is of course identifying the handle that operates on +the connection you want to pause or unpause. + +The \fBbitmask\fP argument is a set of bits that sets the new state of the +connection. The following bits can be used: +.IP CURLPAUSE_RECV +Pause receiving data. There will be no data received on this connection until +this function is called again without this bit set. Thus, the write callback +(\fICURLOPT_WRITEFUNCTION\fP) won't be called. +.IP CURLPAUSE_SEND +Pause sending data. There will be no data sent on this connection until this +function is called again without this bit set. Thus, the read callback +(\fICURLOPT_READFUNCTION\fP) won't be called. +.IP CURLPAUSE_ALL +Convenience define that pauses both directions. +.IP CURLPAUSE_CONT +Convenience define that unpauses both directions +.SH RETURN VALUE +CURLE_OK (zero) means that the option was set properly, and a non-zero return +code means something wrong occurred after the new state was set. See the +\fIlibcurl-errors(3)\fP man page for the full list with descriptions. +.SH AVAILABILITY +This function was added in libcurl 7.18.0. Before this version, there was no +explicit support for pausing transfers. +.SH "MEMORY USE" +When pausing a read by returning the magic return code from a write callback, +the read data is already in libcurl's internal buffers so it'll have to keep +it in an allocated buffer until the reading is again unpaused using this +function. + +If the downloaded data is compressed and is asked to get uncompressed +automatically on download, libcurl will continue to uncompress the entire +downloaded chunk and it will cache the data uncompressed. This has the side- +effect that if you download something that is compressed a lot, it can result +in a very large data amount needing to be allocated to save the data during +the pause. This said, you should probably consider not using paused reading if +you allow libcurl to uncompress data automatically. +.SH "SEE ALSO" +.BR curl_easy_cleanup "(3), " curl_easy_reset "(3)" diff --git a/usr/share/man/man3/curl_easy_perform.3 b/usr/share/man/man3/curl_easy_perform.3 new file mode 100755 index 000000000..60fc1e263 --- /dev/null +++ b/usr/share/man/man3/curl_easy_perform.3 @@ -0,0 +1,40 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: curl_easy_perform.3,v 1.3 2009-05-19 12:48:14 yangtse Exp $ +.\" +.TH curl_easy_perform 3 "5 Mar 2001" "libcurl 7.7" "libcurl Manual" +.SH NAME +curl_easy_perform - Perform a file transfer +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "CURLcode curl_easy_perform(CURL *" handle ");" +.ad +.SH DESCRIPTION +This function is called after the init and all the \fIcurl_easy_setopt(3)\fP +calls are made, and will perform the transfer as described in the options. It +must be called with the same +.I handle +as input as the curl_easy_init call returned. + +You can do any amount of calls to \fIcurl_easy_perform(3)\fP while using the +same handle. If you intend to transfer more than one file, you are even +encouraged to do so. libcurl will then attempt to re-use the same connection +for the following transfers, thus making the operations faster, less CPU +intense and using less network resources. Just note that you will have to use +\fIcurl_easy_setopt(3)\fP between the invokes to set options for the following +curl_easy_perform. + +You must never call this function simultaneously from two places using the +same handle. Let the function return first before invoking it another time. If +you want parallel transfers, you must use several curl handles. +.SH RETURN VALUE +0 means everything was ok, non-zero means an error occurred as +.I <curl/curl.h> +defines. If the CURLOPT_ERRORBUFFER was set with +.I curl_easy_setopt +there will be a readable error message in the error buffer when non-zero is +returned. +.SH "SEE ALSO" +.BR curl_easy_init "(3), " curl_easy_setopt "(3), " + diff --git a/usr/share/man/man3/curl_easy_recv.3 b/usr/share/man/man3/curl_easy_recv.3 new file mode 100755 index 000000000..df0b67db9 --- /dev/null +++ b/usr/share/man/man3/curl_easy_recv.3 @@ -0,0 +1,70 @@ +.\" ************************************************************************** +.\" * _ _ ____ _ +.\" * Project ___| | | | _ \| | +.\" * / __| | | | |_) | | +.\" * | (__| |_| | _ <| |___ +.\" * \___|\___/|_| \_\_____| +.\" * +.\" * Copyright (C) 1998 - 2008, Daniel Stenberg, <daniel@haxx.se>, et al. +.\" * +.\" * This software is licensed as described in the file COPYING, which +.\" * you should have received as part of this distribution. The terms +.\" * are also available at http://curl.haxx.se/docs/copyright.html. +.\" * +.\" * You may opt to use, copy, modify, merge, publish, distribute and/or sell +.\" * copies of the Software, and permit persons to whom the Software is +.\" * furnished to do so, under the terms of the COPYING file. +.\" * +.\" * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY +.\" * KIND, either express or implied. +.\" * +.\" * $Id: curl_easy_recv.3,v 1.1 2008-05-12 21:43:28 bagder Exp $ +.\" ************************************************************************** +.\" +.TH curl_easy_recv 3 "29 April 2008" "libcurl 7.18.2" "libcurl Manual" +.SH NAME +curl_easy_recv - receives raw data on an "easy" connection +.SH SYNOPSIS +.B #include <curl/easy.h> +.sp +.BI "CURLcode curl_easy_recv( CURL *" curl ", void *" buffer "," +.BI "size_t " buflen ", size_t *" n ");" +.ad +.SH DESCRIPTION +This function receives raw data from the established connection. You may use +it together with \fIcurl_easy_send(3)\fP to implement custom protocols using +libcurl. This functionality can be particularly useful if you use proxies +and/or SSL encryption: libcurl will take care of proxy negotiation and +connection set-up. + +\fBbuffer\fP is a pointer to your buffer that will get the received +data. \fBbuflen\fP is the maximum amount of data you can get in that +buffer. The variable \fBn\fP points to will receive the number of received +bytes. + +To establish the connection, set \fBCURLOPT_CONNECT_ONLY\fP option before +calling \fIcurl_easy_perform(3)\fP. Note that \fIcurl_easy_recv(3)\fP does not +work on connections that were created without this option. + +You must ensure that the socket has data to read before calling +\fIcurl_easy_recv(3)\fP, otherwise the call will return \fBCURLE_AGAIN\fP - +the socket is used in non-blocking mode internally. Use +\fIcurl_easy_getinfo(3)\fP with \fBCURLINFO_LASTSOCKET\fP to obtain the +socket; use your operating system facilities like \fIselect(2)\fP to check if +it has any data you can read. +.SH AVAILABILITY +Added in 7.18.2. +.SH RETURN VALUE +On success, returns \fBCURLE_OK\fP, stores the received data into +\fBbuffer\fP, and the number of bytes it actually read into \fB*n\fP. + +On failure, returns the appropriate error code. + +If there is no data to read, the function returns \fBCURLE_AGAIN\fP. Use +your operating system facilities to wait until the data is ready, and retry. +.SH EXAMPLE +See \fBsendrecv.c\fP in \fBdocs/examples\fP directory for usage example. +.SH "SEE ALSO" +.BR curl_easy_setopt "(3), " curl_easy_perform "(3), " +.BR curl_easy_getinfo "(3), " +.BR curl_easy_send "(3) " diff --git a/usr/share/man/man3/curl_easy_reset.3 b/usr/share/man/man3/curl_easy_reset.3 new file mode 100755 index 000000000..d5419117a --- /dev/null +++ b/usr/share/man/man3/curl_easy_reset.3 @@ -0,0 +1,24 @@ +.\" $Id: curl_easy_reset.3,v 1.4 2009-05-19 12:48:14 yangtse Exp $ +.\" +.TH curl_easy_reset 3 "31 July 2004" "libcurl 7.12.1" "libcurl Manual" +.SH NAME +curl_easy_reset - reset all options of a libcurl session handle +.SH SYNOPSIS +.B #include <curl/curl.h> + +.BI "void curl_easy_reset(CURL *"handle ");" + +.SH DESCRIPTION +Re-initializes all options previously set on a specified CURL handle to the +default values. This puts back the handle to the same state as it was in when +it was just created with \fIcurl_easy_init(3)\fP. + +It does not change the following information kept in the handle: live +connections, the Session ID cache, the DNS cache, the cookies and shares. +.SH AVAILABILITY +This function was added in libcurl 7.12.1 +.SH RETURN VALUE +Nothing +.SH "SEE ALSO" +.BR curl_easy_init "(3)," curl_easy_cleanup "(3)," curl_easy_setopt "(3) + diff --git a/usr/share/man/man3/curl_easy_send.3 b/usr/share/man/man3/curl_easy_send.3 new file mode 100755 index 000000000..8301348bc --- /dev/null +++ b/usr/share/man/man3/curl_easy_send.3 @@ -0,0 +1,65 @@ +.\" ************************************************************************** +.\" * _ _ ____ _ +.\" * Project ___| | | | _ \| | +.\" * / __| | | | |_) | | +.\" * | (__| |_| | _ <| |___ +.\" * \___|\___/|_| \_\_____| +.\" * +.\" * Copyright (C) 1998 - 2008, Daniel Stenberg, <daniel@haxx.se>, et al. +.\" * +.\" * This software is licensed as described in the file COPYING, which +.\" * you should have received as part of this distribution. The terms +.\" * are also available at http://curl.haxx.se/docs/copyright.html. +.\" * +.\" * You may opt to use, copy, modify, merge, publish, distribute and/or sell +.\" * copies of the Software, and permit persons to whom the Software is +.\" * furnished to do so, under the terms of the COPYING file. +.\" * +.\" * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY +.\" * KIND, either express or implied. +.\" * +.\" * $Id: curl_easy_send.3,v 1.1 2008-05-12 21:43:28 bagder Exp $ +.\" ************************************************************************** +.\" +.TH curl_easy_send 3 "29 April 2008" "libcurl 7.18.2" "libcurl Manual" +.SH NAME +curl_easy_send - sends raw data over an "easy" connection +.SH SYNOPSIS +.B #include <curl/easy.h> +.sp +.BI "CURLcode curl_easy_send( CURL *" curl ", const void *" buffer "," +.BI " size_t " buflen ", size_t *" n ");" +.ad +.SH DESCRIPTION +This function sends arbitrary data over the established connection. You may +use it together with \fIcurl_easy_recv(3)\fP to implement custom protocols +using libcurl. This functionality can be particularly useful if you use +proxies and/or SSL encryption: libcurl will take care of proxy negotiation and +connection set-up. + +\fBbuffer\fP is a pointer to the data of length \fBbuflen\fP that you want sent. +The variable \fBn\fP points to will receive the number of sent bytes. + +To establish the connection, set \fBCURLOPT_CONNECT_ONLY\fP option before +calling \fIcurl_easy_perform(3)\fP. Note that \fIcurl_easy_send(3)\fP will not +work on connections that were created without this option. + +You must ensure that the socket is writable before calling +\fIcurl_easy_send(3)\fP, otherwise the call will return \fBCURLE_AGAIN\fP - +the socket is used in non-blocking mode internally. Use +\fIcurl_easy_getinfo(3)\fP with \fBCURLINFO_LASTSOCKET\fP to obtain the +socket; use your operating system facilities like \fIselect(2)\fP to check if +it can be written to. +.SH AVAILABILITY +Added in 7.18.2. +.SH RETURN VALUE +On success, returns \fBCURLE_OK\fP and stores the number of bytes actually +sent into \fB*n\fP. Note that this may very well be less than the amount you +wanted to send. + +On failure, returns the appropriate error code. +.SH EXAMPLE +See \fBsendrecv.c\fP in \fBdocs/examples\fP directory for usage example. +.SH "SEE ALSO" +.BR curl_easy_setopt "(3), " curl_easy_perform "(3), " curl_easy_getinfo "(3), " +.BR curl_easy_recv "(3) " diff --git a/usr/share/man/man3/curl_easy_setopt.3 b/usr/share/man/man3/curl_easy_setopt.3 new file mode 100755 index 000000000..aa0e78121 --- /dev/null +++ b/usr/share/man/man3/curl_easy_setopt.3 @@ -0,0 +1,1832 @@ +.\" ************************************************************************** +.\" * _ _ ____ _ +.\" * Project ___| | | | _ \| | +.\" * / __| | | | |_) | | +.\" * | (__| |_| | _ <| |___ +.\" * \___|\___/|_| \_\_____| +.\" * +.\" * Copyright (C) 1998 - 2009, Daniel Stenberg, <daniel@haxx.se>, et al. +.\" * +.\" * This software is licensed as described in the file COPYING, which +.\" * you should have received as part of this distribution. The terms +.\" * are also available at http://curl.haxx.se/docs/copyright.html. +.\" * +.\" * You may opt to use, copy, modify, merge, publish, distribute and/or sell +.\" * copies of the Software, and permit persons to whom the Software is +.\" * furnished to do so, under the terms of the COPYING file. +.\" * +.\" * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY +.\" * KIND, either express or implied. +.\" * +.\" * $Id: curl_easy_setopt.3,v 1.266 2009-08-04 12:02:27 bagder Exp $ +.\" ************************************************************************** +.\" +.TH curl_easy_setopt 3 "11 Dec 2008" "libcurl 7.19.3" "libcurl Manual" +.SH NAME +curl_easy_setopt \- set options for a curl easy handle +.SH SYNOPSIS +#include <curl/curl.h> + +CURLcode curl_easy_setopt(CURL *handle, CURLoption option, parameter); +.SH DESCRIPTION +curl_easy_setopt() is used to tell libcurl how to behave. By using the +appropriate options to \fIcurl_easy_setopt\fP, you can change libcurl's +behavior. All options are set with the \fIoption\fP followed by a +\fIparameter\fP. That parameter can be a \fBlong\fP, a \fBfunction pointer\fP, +an \fBobject pointer\fP or a \fBcurl_off_t\fP, depending on what the specific +option expects. Read this manual carefully as bad input values may cause +libcurl to behave badly! You can only set one option in each function call. A +typical application uses many curl_easy_setopt() calls in the setup phase. + +Options set with this function call are valid for all forthcoming transfers +performed using this \fIhandle\fP. The options are not in any way reset +between transfers, so if you want subsequent transfers with different options, +you must change them between the transfers. You can optionally reset all +options back to internal default with \fIcurl_easy_reset(3)\fP. + +Strings passed to libcurl as 'char *' arguments, are copied by the library; +thus the string storage associated to the pointer argument may be overwritten +after curl_easy_setopt() returns. Exceptions to this rule are described in +the option details below. + +NOTE: before 7.17.0 strings were not copied. Instead the user was forced keep +them available until libcurl no longer needed them. + +The \fIhandle\fP is the return code from a \fIcurl_easy_init(3)\fP or +\fIcurl_easy_duphandle(3)\fP call. +.SH BEHAVIOR OPTIONS +.IP CURLOPT_VERBOSE +Set the parameter to 1 to get the library to display a lot of verbose +information about its operations. Very useful for libcurl and/or protocol +debugging and understanding. The verbose information will be sent to stderr, +or the stream set with \fICURLOPT_STDERR\fP. + +You hardly ever want this set in production use, you will almost always want +this when you debug/report problems. Another neat option for debugging is the +\fICURLOPT_DEBUGFUNCTION\fP. +.IP CURLOPT_HEADER +A parameter set to 1 tells the library to include the header in the body +output. This is only relevant for protocols that actually have headers +preceding the data (like HTTP). +.IP CURLOPT_NOPROGRESS +A parameter set to 1 tells the library to shut off the built-in progress meter +completely. + +Future versions of libcurl are likely to not have any built-in progress meter +at all. +.IP CURLOPT_NOSIGNAL +Pass a long. If it is 1, libcurl will not use any functions that +install signal handlers or any functions that cause signals to be sent to the +process. This option is mainly here to allow multi-threaded unix applications +to still set/use all timeout options etc, without risking getting signals. +(Added in 7.10) + +If this option is set and libcurl has been built with the standard name +resolver, timeouts will not occur while the name resolve takes place. +Consider building libcurl with c-ares support to enable asynchronous DNS +lookups, which enables nice timeouts for name resolves without signals. +.PP +.SH CALLBACK OPTIONS +.IP CURLOPT_WRITEFUNCTION +Function pointer that should match the following prototype: \fBsize_t +function( void *ptr, size_t size, size_t nmemb, void *stream);\fP This +function gets called by libcurl as soon as there is data received that needs +to be saved. The size of the data pointed to by \fIptr\fP is \fIsize\fP +multiplied with \fInmemb\fP, it will not be zero terminated. Return the number +of bytes actually taken care of. If that amount differs from the amount passed +to your function, it'll signal an error to the library and it will abort the +transfer and return \fICURLE_WRITE_ERROR\fP. + +From 7.18.0, the function can return CURL_WRITEFUNC_PAUSE which then will +cause writing to this connection to become paused. See +\fIcurl_easy_pause(3)\fP for further details. + +This function may be called with zero bytes data if the transferred file is +empty. + +Set this option to NULL to get the internal default function. The internal +default function will write the data to the FILE * given with +\fICURLOPT_WRITEDATA\fP. + +Set the \fIstream\fP argument with the \fICURLOPT_WRITEDATA\fP option. + +The callback function will be passed as much data as possible in all invokes, +but you cannot possibly make any assumptions. It may be one byte, it may be +thousands. The maximum amount of data that can be passed to the write callback +is defined in the curl.h header file: CURL_MAX_WRITE_SIZE. +.IP CURLOPT_WRITEDATA +Data pointer to pass to the file write function. If you use the +\fICURLOPT_WRITEFUNCTION\fP option, this is the pointer you'll get as +input. If you don't use a callback, you must pass a 'FILE *' as libcurl will +pass this to fwrite() when writing data. + +The internal \fICURLOPT_WRITEFUNCTION\fP will write the data to the FILE * +given with this option, or to stdout if this option hasn't been set. + +If you're using libcurl as a win32 DLL, you \fBMUST\fP use the +\fICURLOPT_WRITEFUNCTION\fP if you set this option or you will experience +crashes. + +This option is also known with the older name \fICURLOPT_FILE\fP, the name +\fICURLOPT_WRITEDATA\fP was introduced in 7.9.7. +.IP CURLOPT_READFUNCTION +Function pointer that should match the following prototype: \fBsize_t +function( void *ptr, size_t size, size_t nmemb, void *stream);\fP This +function gets called by libcurl as soon as it needs to read data in order to +send it to the peer. The data area pointed at by the pointer \fIptr\fP may be +filled with at most \fIsize\fP multiplied with \fInmemb\fP number of +bytes. Your function must return the actual number of bytes that you stored in +that memory area. Returning 0 will signal end-of-file to the library and cause +it to stop the current transfer. + +If you stop the current transfer by returning 0 "pre-maturely" (i.e before the +server expected it, like when you've said you will upload N bytes and you +upload less than N bytes), you may experience that the server "hangs" waiting +for the rest of the data that won't come. + +The read callback may return \fICURL_READFUNC_ABORT\fP to stop the current +operation immediately, resulting in a \fICURLE_ABORTED_BY_CALLBACK\fP error +code from the transfer (Added in 7.12.1) + +From 7.18.0, the function can return CURL_READFUNC_PAUSE which then will cause +reading from this connection to become paused. See \fIcurl_easy_pause(3)\fP +for further details. + +If you set the callback pointer to NULL, or don't set it at all, the default +internal read function will be used. It is simply doing an fread() on the FILE +* stream set with \fICURLOPT_READDATA\fP. +.IP CURLOPT_READDATA +Data pointer to pass to the file read function. If you use the +\fICURLOPT_READFUNCTION\fP option, this is the pointer you'll get as input. If +you don't specify a read callback but instead rely on the default internal +read function, this data must be a valid readable FILE *. + +If you're using libcurl as a win32 DLL, you MUST use a +\fICURLOPT_READFUNCTION\fP if you set this option. + +This option was also known by the older name \fICURLOPT_INFILE\fP, the name +\fICURLOPT_READDATA\fP was introduced in 7.9.7. +.IP CURLOPT_IOCTLFUNCTION +Function pointer that should match the \fIcurl_ioctl_callback\fP prototype +found in \fI<curl/curl.h>\fP. This function gets called by libcurl when +something special I/O-related needs to be done that the library can't do by +itself. For now, rewinding the read data stream is the only action it can +request. The rewinding of the read data stream may be necessary when doing a +HTTP PUT or POST with a multi-pass authentication method. (Option added in +7.12.3). + +Use \fICURLOPT_SEEKFUNCTION\fP instead to provide seeking! +.IP CURLOPT_IOCTLDATA +Pass a pointer that will be untouched by libcurl and passed as the 3rd +argument in the ioctl callback set with \fICURLOPT_IOCTLFUNCTION\fP. (Option +added in 7.12.3) +.IP CURLOPT_SEEKFUNCTION +Function pointer that should match the following prototype: \fIint +function(void *instream, curl_off_t offset, int origin);\fP This function gets +called by libcurl to seek to a certain position in the input stream and can be +used to fast forward a file in a resumed upload (instead of reading all +uploaded bytes with the normal read function/callback). It is also called to +rewind a stream when doing a HTTP PUT or POST with a multi-pass authentication +method. The function shall work like "fseek" or "lseek" and accepted SEEK_SET, +SEEK_CUR and SEEK_END as argument for origin, although (in 7.18.0) libcurl +only passes SEEK_SET. The callback must return 0 (CURL_SEEKFUNC_OK) on +success, 1 (CURL_SEEKFUNC_FAIL) to cause the upload operation to fail or 2 +(CURL_SEEKFUNC_CANTSEEK) to indicate that while the seek failed, libcurl is +free to work around the problem if possible. The latter can sometimes be done +by instead reading from the input or similar. + +If you forward the input arguments directly to "fseek" or "lseek", note that +the data type for \fIoffset\fP is not the same as defined for curl_off_t on +many systems! (Option added in 7.18.0) +.IP CURLOPT_SEEKDATA +Data pointer to pass to the file read function. If you use the +\fICURLOPT_SEEKFUNCTION\fP option, this is the pointer you'll get as input. If +you don't specify a seek callback, NULL is passed. (Option added in 7.18.0) +.IP CURLOPT_SOCKOPTFUNCTION +Function pointer that should match the \fIcurl_sockopt_callback\fP prototype +found in \fI<curl/curl.h>\fP. This function gets called by libcurl after the +socket() call but before the connect() call. The callback's \fIpurpose\fP +argument identifies the exact purpose for this particular socket, and +currently only one value is supported: \fICURLSOCKTYPE_IPCXN\fP for the +primary connection (meaning the control connection in the FTP case). Future +versions of libcurl may support more purposes. It passes the newly created +socket descriptor so additional setsockopt() calls can be done at the user's +discretion. Return 0 (zero) from the callback on success. Return 1 from the +callback function to signal an unrecoverable error to the library and it will +close the socket and return \fICURLE_COULDNT_CONNECT\fP. (Option added in +7.15.6.) +.IP CURLOPT_SOCKOPTDATA +Pass a pointer that will be untouched by libcurl and passed as the first +argument in the sockopt callback set with \fICURLOPT_SOCKOPTFUNCTION\fP. +(Option added in 7.15.6.) +.IP CURLOPT_OPENSOCKETFUNCTION +Function pointer that should match the \fIcurl_opensocket_callback\fP +prototype found in \fI<curl/curl.h>\fP. This function gets called by libcurl +instead of the \fIsocket(2)\fP call. The callback's \fIpurpose\fP argument +identifies the exact purpose for this particular socket, and currently only +one value is supported: \fICURLSOCKTYPE_IPCXN\fP for the primary connection +(meaning the control connection in the FTP case). Future versions of libcurl +may support more purposes. It passes the resolved peer address as a +\fIaddress\fP argument so the callback can modify the address or refuse to +connect at all. The callback function should return the socket or +\fICURL_SOCKET_BAD\fP in case no connection should be established or any error +detected. Any additional \fIsetsockopt(2)\fP calls can be done on the socket +at the user's discretion. \fICURL_SOCKET_BAD\fP return value from the +callback function will signal an unrecoverable error to the library and it +will return \fICURLE_COULDNT_CONNECT\fP. This return code can be used for IP +address blacklisting. The default behavior is: +.nf + return socket(addr->family, addr->socktype, addr->protocol); +.fi +(Option added in 7.17.1.) +.IP CURLOPT_OPENSOCKETDATA +Pass a pointer that will be untouched by libcurl and passed as the first +argument in the opensocket callback set with \fICURLOPT_OPENSOCKETFUNCTION\fP. +(Option added in 7.17.1.) +.IP CURLOPT_PROGRESSFUNCTION +Function pointer that should match the \fIcurl_progress_callback\fP prototype +found in \fI<curl/curl.h>\fP. This function gets called by libcurl instead of +its internal equivalent with a frequent interval during operation (roughly +once per second) no matter if data is being transfered or not. Unknown/unused +argument values passed to the callback will be set to zero (like if you only +download data, the upload size will remain 0). Returning a non-zero value from +this callback will cause libcurl to abort the transfer and return +\fICURLE_ABORTED_BY_CALLBACK\fP. + +If you transfer data with the multi interface, this function will not be +called during periods of idleness unless you call the appropriate libcurl +function that performs transfers. + +\fICURLOPT_NOPROGRESS\fP must be set to 0 to make this function actually +get called. +.IP CURLOPT_PROGRESSDATA +Pass a pointer that will be untouched by libcurl and passed as the first +argument in the progress callback set with \fICURLOPT_PROGRESSFUNCTION\fP. +.IP CURLOPT_HEADERFUNCTION +Function pointer that should match the following prototype: \fIsize_t +function( void *ptr, size_t size, size_t nmemb, void *stream);\fP. This +function gets called by libcurl as soon as it has received header data. The +header callback will be called once for each header and only complete header +lines are passed on to the callback. Parsing headers should be easy enough +using this. The size of the data pointed to by \fIptr\fP is \fIsize\fP +multiplied with \fInmemb\fP. Do not assume that the header line is zero +terminated! The pointer named \fIstream\fP is the one you set with the +\fICURLOPT_WRITEHEADER\fP option. The callback function must return the number +of bytes actually taken care of, or return -1 to signal error to the library +(it will cause it to abort the transfer with a \fICURLE_WRITE_ERROR\fP return +code). + +If this option is not set, or if it is set to NULL, but +\fICURLOPT_HEADERDATA\fP (\fICURLOPT_WRITEHEADER\fP) is set to anything but +NULL, the function used to accept response data will be used instead. That is, +it will be the function specified with \fICURLOPT_WRITEFUNCTION\fP, or if it +is not specified or NULL - the default, stream-writing function. + +It's important to note that the callback will be invoked for the headers of +all responses received after initiating a request and not just the final +response. This includes all responses which occur during authentication +negotiation. If you need to operate on only the headers from the final +response, you will need to collect headers in the callback yourself and use +HTTP status lines, for example, to delimit response boundaries. + +Since 7.14.1: When a server sends a chunked encoded transfer, it may contain a +trailer. That trailer is identical to a HTTP header and if such a trailer is +received it is passed to the application using this callback as well. There +are several ways to detect it being a trailer and not an ordinary header: 1) +it comes after the response-body. 2) it comes after the final header line (CR +LF) 3) a Trailer: header among the response-headers mention what header to +expect in the trailer. +.IP CURLOPT_WRITEHEADER +(This option is also known as \fBCURLOPT_HEADERDATA\fP) Pass a pointer to be +used to write the header part of the received data to. If you don't use your +own callback to take care of the writing, this must be a valid FILE *. See +also the \fICURLOPT_HEADERFUNCTION\fP option above on how to set a custom +get-all-headers callback. +.IP CURLOPT_DEBUGFUNCTION +Function pointer that should match the following prototype: \fIint +curl_debug_callback (CURL *, curl_infotype, char *, size_t, void *);\fP +\fICURLOPT_DEBUGFUNCTION\fP replaces the standard debug function used when +\fICURLOPT_VERBOSE \fP is in effect. This callback receives debug information, +as specified with the \fBcurl_infotype\fP argument. This function must return +0. The data pointed to by the char * passed to this function WILL NOT be zero +terminated, but will be exactly of the size as told by the size_t argument. + +Available curl_infotype values: +.RS +.IP CURLINFO_TEXT +The data is informational text. +.IP CURLINFO_HEADER_IN +The data is header (or header-like) data received from the peer. +.IP CURLINFO_HEADER_OUT +The data is header (or header-like) data sent to the peer. +.IP CURLINFO_DATA_IN +The data is protocol data received from the peer. +.IP CURLINFO_DATA_OUT +The data is protocol data sent to the peer. +.RE +.IP CURLOPT_DEBUGDATA +Pass a pointer to whatever you want passed in to your +\fICURLOPT_DEBUGFUNCTION\fP in the last void * argument. This pointer is not +used by libcurl, it is only passed to the callback. +.IP CURLOPT_SSL_CTX_FUNCTION +This option does only function for libcurl powered by OpenSSL. If libcurl was +built against another SSL library, this functionality is absent. + +Function pointer that should match the following prototype: \fBCURLcode +sslctxfun(CURL *curl, void *sslctx, void *parm);\fP This function gets called +by libcurl just before the initialization of an SSL connection after having +processed all other SSL related options to give a last chance to an +application to modify the behaviour of openssl's ssl initialization. The +\fIsslctx\fP parameter is actually a pointer to an openssl \fISSL_CTX\fP. If +an error is returned no attempt to establish a connection is made and the +perform operation will return the error code from this callback function. Set +the \fIparm\fP argument with the \fICURLOPT_SSL_CTX_DATA\fP option. This +option was introduced in 7.11.0. + +This function will get called on all new connections made to a server, during +the SSL negotiation. The SSL_CTX pointer will be a new one every time. + +To use this properly, a non-trivial amount of knowledge of the openssl +libraries is necessary. For example, using this function allows you to use openssl +callbacks to add additional validation code for certificates, and even to +change the actual URI of an HTTPS request (example used in the lib509 test +case). See also the example section for a replacement of the key, certificate +and trust file settings. +.IP CURLOPT_SSL_CTX_DATA +Data pointer to pass to the ssl context callback set by the option +\fICURLOPT_SSL_CTX_FUNCTION\fP, this is the pointer you'll get as third +parameter, otherwise \fBNULL\fP. (Added in 7.11.0) +.IP CURLOPT_CONV_TO_NETWORK_FUNCTION +.IP CURLOPT_CONV_FROM_NETWORK_FUNCTION +.IP CURLOPT_CONV_FROM_UTF8_FUNCTION +Function pointers that should match the following prototype: CURLcode +function(char *ptr, size_t length); + +These three options apply to non-ASCII platforms only. They are available +only if \fBCURL_DOES_CONVERSIONS\fP was defined when libcurl was built. When +this is the case, \fIcurl_version_info(3)\fP will return the CURL_VERSION_CONV +feature bit set. + +The data to be converted is in a buffer pointed to by the ptr parameter. The +amount of data to convert is indicated by the length parameter. The converted +data overlays the input data in the buffer pointed to by the ptr parameter. +CURLE_OK should be returned upon successful conversion. A CURLcode return +value defined by curl.h, such as CURLE_CONV_FAILED, should be returned if an +error was encountered. + +\fBCURLOPT_CONV_TO_NETWORK_FUNCTION\fP and +\fBCURLOPT_CONV_FROM_NETWORK_FUNCTION\fP convert between the host encoding and +the network encoding. They are used when commands or ASCII data are +sent/received over the network. + +\fBCURLOPT_CONV_FROM_UTF8_FUNCTION\fP is called to convert from UTF8 into the +host encoding. It is required only for SSL processing. + +If you set a callback pointer to NULL, or don't set it at all, the built-in +libcurl iconv functions will be used. If HAVE_ICONV was not defined when +libcurl was built, and no callback has been established, conversion will +return the CURLE_CONV_REQD error code. + +If HAVE_ICONV is defined, CURL_ICONV_CODESET_OF_HOST must also be defined. +For example: + + \&#define CURL_ICONV_CODESET_OF_HOST "IBM-1047" + +The iconv code in libcurl will default the network and UTF8 codeset names as +follows: + + \&#define CURL_ICONV_CODESET_OF_NETWORK "ISO8859-1" + + \&#define CURL_ICONV_CODESET_FOR_UTF8 "UTF-8" + +You will need to override these definitions if they are different on your +system. +.SH ERROR OPTIONS +.IP CURLOPT_ERRORBUFFER +Pass a char * to a buffer that the libcurl may store human readable error +messages in. This may be more helpful than just the return code from +\fIcurl_easy_perform\fP. The buffer must be at least CURL_ERROR_SIZE big. +Although this argument is a 'char *', it does not describe an input string. +Therefore the (probably undefined) contents of the buffer is NOT copied +by the library. You should keep the associated storage available until +libcurl no longer needs it. Failing to do so will cause very odd behavior +or even crashes. libcurl will need it until you call \fIcurl_easy_cleanup(3)\fP +or you set the same option again to use a different pointer. + +Use \fICURLOPT_VERBOSE\fP and \fICURLOPT_DEBUGFUNCTION\fP to better +debug/trace why errors happen. + +If the library does not return an error, the buffer may not have been +touched. Do not rely on the contents in those cases. + +.IP CURLOPT_STDERR +Pass a FILE * as parameter. Tell libcurl to use this stream instead of stderr +when showing the progress meter and displaying \fICURLOPT_VERBOSE\fP data. +.IP CURLOPT_FAILONERROR +A parameter set to 1 tells the library to fail silently if the HTTP code +returned is equal to or larger than 400. The default action would be to return +the page normally, ignoring that code. + +This method is not fail-safe and there are occasions where non-successful +response codes will slip through, especially when authentication is involved +(response codes 401 and 407). + +You might get some amounts of headers transferred before this situation is +detected, like when a "100-continue" is received as a response to a +POST/PUT and a 401 or 407 is received immediately afterwards. +.SH NETWORK OPTIONS +.IP CURLOPT_URL +The actual URL to deal with. The parameter should be a char * to a zero +terminated string. + +If the given URL lacks the protocol part ("http://" or "ftp://" etc), it will +attempt to guess which protocol to use based on the given host name. If the +given protocol of the set URL is not supported, libcurl will return on error +(\fICURLE_UNSUPPORTED_PROTOCOL\fP) when you call \fIcurl_easy_perform(3)\fP or +\fIcurl_multi_perform(3)\fP. Use \fIcurl_version_info(3)\fP for detailed info +on which protocols are supported. + +The string given to CURLOPT_URL must be url-encoded and follow RFC 2396 +(http://curl.haxx.se/rfc/rfc2396.txt). + +\fICURLOPT_URL\fP is the only option that \fBmust\fP be set before +\fIcurl_easy_perform(3)\fP is called. + +\fICURLOPT_PROTOCOLS\fP can be used to limit what protocols libcurl will use +for this transfer, independent of what libcurl has been compiled to +support. That may be useful if you accept the URL from an external source and +want to limit the accessibility. +.IP CURLOPT_PROTOCOLS +Pass a long that holds a bitmask of CURLPROTO_* defines. If used, this bitmask +limits what protocols libcurl may use in the transfer. This allows you to have +a libcurl built to support a wide range of protocols but still limit specific +transfers to only be allowed to use a subset of them. By default libcurl will +accept all protocols it supports. See also +\fICURLOPT_REDIR_PROTOCOLS\fP. (Added in 7.19.4) +.IP CURLOPT_REDIR_PROTOCOLS +Pass a long that holds a bitmask of CURLPROTO_* defines. If used, this bitmask +limits what protocols libcurl may use in a transfer that it follows to in a +redirect when \fICURLOPT_FOLLOWLOCATION\fP is enabled. This allows you to +limit specific transfers to only be allowed to use a subset of protocols in +redirections. By default libcurl will allow all protocols except for FILE and +SCP. This is a difference compared to pre-7.19.4 versions which +unconditionally would follow to all protocols supported. (Added in 7.19.4) +.IP CURLOPT_PROXY +Set HTTP proxy to use. The parameter should be a char * to a zero terminated +string holding the host name or dotted IP address. To specify port number in +this string, append :[port] to the end of the host name. The proxy string may +be prefixed with [protocol]:// since any such prefix will be ignored. The +proxy's port number may optionally be specified with the separate option. If +not specified, libcurl will default to using port 1080 for proxies. +\fICURLOPT_PROXYPORT\fP. + +When you tell the library to use an HTTP proxy, libcurl will transparently +convert operations to HTTP even if you specify an FTP URL etc. This may have +an impact on what other features of the library you can use, such as +\fICURLOPT_QUOTE\fP and similar FTP specifics that don't work unless you +tunnel through the HTTP proxy. Such tunneling is activated with +\fICURLOPT_HTTPPROXYTUNNEL\fP. + +libcurl respects the environment variables \fBhttp_proxy\fP, \fBftp_proxy\fP, +\fBall_proxy\fP etc, if any of those are set. The \fICURLOPT_PROXY\fP option +does however override any possibly set environment variables. + +Setting the proxy string to "" (an empty string) will explicitly disable the +use of a proxy, even if there is an environment variable set for it. + +Since 7.14.1, the proxy host string given in environment variables can be +specified the exact same way as the proxy can be set with \fICURLOPT_PROXY\fP, +include protocol prefix (http://) and embedded user + password. +.IP CURLOPT_PROXYPORT +Pass a long with this option to set the proxy port to connect to unless it is +specified in the proxy string \fICURLOPT_PROXY\fP. +.IP CURLOPT_PROXYTYPE +Pass a long with this option to set type of the proxy. Available options for +this are \fICURLPROXY_HTTP\fP, \fICURLPROXY_HTTP_1_0\fP (added in 7.19.4), +\fICURLPROXY_SOCKS4\fP (added in 7.15.2), \fICURLPROXY_SOCKS5\fP, +\fICURLPROXY_SOCKS4A\fP (added in 7.18.0) and \fICURLPROXY_SOCKS5_HOSTNAME\fP +(added in 7.18.0). The HTTP type is default. (Added in 7.10) +.IP CURLOPT_NOPROXY +Pass a pointer to a zero terminated string. The should be a comma- separated +list of hosts which do not use a proxy, if one is specified. The only +wildcard is a single * character, which matches all hosts, and effectively +disables the proxy. Each name in this list is matched as either a domain which +contains the hostname, or the hostname itself. For example, local.com would +match local.com, local.com:80, and www.local.com, but not www.notlocal.com. +(Added in 7.19.4) +.IP CURLOPT_HTTPPROXYTUNNEL +Set the parameter to 1 to make the library tunnel all operations through a +given HTTP proxy. There is a big difference between using a proxy and to +tunnel through it. If you don't know what this means, you probably don't want +this tunneling option. +.IP CURLOPT_SOCKS5_GSSAPI_SERVICE +Pass a char * as parameter to a string holding the name of the service. The +default service name for a SOCKS5 server is rcmd/server-fqdn. This option +allows you to change it. (Added in 7.19.4) +.IP CURLOPT_SOCKS5_GSSAPI_NEC +Pass a long set to 1 to enable or 0 to disable. As part of the gssapi +negotiation a protection mode is negotiated. The rfc1961 says in section +4.3/4.4 it should be protected, but the NEC reference implementation does not. +If enabled, this option allows the unprotected exchange of the protection mode +negotiation. (Added in 7.19.4). +.IP CURLOPT_INTERFACE +Pass a char * as parameter. This sets the interface name to use as outgoing +network interface. The name can be an interface name, an IP address, or a host +name. +.IP CURLOPT_LOCALPORT +Pass a long. This sets the local port number of the socket used for +connection. This can be used in combination with \fICURLOPT_INTERFACE\fP and +you are recommended to use \fICURLOPT_LOCALPORTRANGE\fP as well when this is +set. Note that the only valid port numbers are 1 - 65535. (Added in 7.15.2) +.IP CURLOPT_LOCALPORTRANGE +Pass a long. This is the number of attempts libcurl should make to find a +working local port number. It starts with the given \fICURLOPT_LOCALPORT\fP +and adds one to the number for each retry. Setting this to 1 or below will +make libcurl do only one try for the exact port number. Note that port numbers +by nature are scarce resources that will be busy at times so setting this +value to something too low might cause unnecessary connection setup +failures. (Added in 7.15.2) +.IP CURLOPT_DNS_CACHE_TIMEOUT +Pass a long, this sets the timeout in seconds. Name resolves will be kept in +memory for this number of seconds. Set to zero to completely disable +caching, or set to -1 to make the cached entries remain forever. By default, +libcurl caches this info for 60 seconds. + +NOTE: the name resolve functions of various libc implementations don't re-read +name server information unless explicitly told so (for example, by calling +\fIres_init(3)\fP). This may cause libcurl to keep using the older server even +if DHCP has updated the server info, and this may look like a DNS cache issue +to the casual libcurl-app user. +.IP CURLOPT_DNS_USE_GLOBAL_CACHE +Pass a long. If the value is 1, it tells curl to use a global DNS cache +that will survive between easy handle creations and deletions. This is not +thread-safe and this will use a global variable. + +\fBWARNING:\fP this option is considered obsolete. Stop using it. Switch over +to using the share interface instead! See \fICURLOPT_SHARE\fP and +\fIcurl_share_init(3)\fP. +.IP CURLOPT_BUFFERSIZE +Pass a long specifying your preferred size (in bytes) for the receive buffer +in libcurl. The main point of this would be that the write callback gets +called more often and with smaller chunks. This is just treated as a request, +not an order. You cannot be guaranteed to actually get the given size. (Added +in 7.10) + +This size is by default set as big as possible (CURL_MAX_WRITE_SIZE), so it +only makes sense to use this option if you want it smaller. +.IP CURLOPT_PORT +Pass a long specifying what remote port number to connect to, instead of the +one specified in the URL or the default port for the used protocol. +.IP CURLOPT_TCP_NODELAY +Pass a long specifying whether the TCP_NODELAY option should be set or +cleared (1 = set, 0 = clear). The option is cleared by default. This +will have no effect after the connection has been established. + +Setting this option will disable TCP's Nagle algorithm. The purpose of +this algorithm is to try to minimize the number of small packets on +the network (where "small packets" means TCP segments less than the +Maximum Segment Size (MSS) for the network). + +Maximizing the amount of data sent per TCP segment is good because it +amortizes the overhead of the send. However, in some cases (most +notably telnet or rlogin) small segments may need to be sent +without delay. This is less efficient than sending larger amounts of +data at a time, and can contribute to congestion on the network if +overdone. +.IP CURLOPT_ADDRESS_SCOPE +Pass a long specifying the scope_id value to use when connecting to IPv6 +link-local or site-local addresses. (Added in 7.19.0) +.SH NAMES and PASSWORDS OPTIONS (Authentication) +.IP CURLOPT_NETRC +This parameter controls the preference of libcurl between using user names and +passwords from your \fI~/.netrc\fP file, relative to user names and passwords +in the URL supplied with \fICURLOPT_URL\fP. + +libcurl uses a user name (and supplied or prompted password) supplied with +\fICURLOPT_USERPWD\fP in preference to any of the options controlled by this +parameter. + +Pass a long, set to one of the values described below. +.RS +.IP CURL_NETRC_OPTIONAL +The use of your \fI~/.netrc\fP file is optional, and information in the URL is +to be preferred. The file will be scanned for the host and user name (to +find the password only) or for the host only, to find the first user name and +password after that \fImachine\fP, which ever information is not specified in +the URL. + +Undefined values of the option will have this effect. +.IP CURL_NETRC_IGNORED +The library will ignore the file and use only the information in the URL. + +This is the default. +.IP CURL_NETRC_REQUIRED +This value tells the library that use of the file is required, to ignore the +information in the URL, and to search the file for the host only. +.RE +Only machine name, user name and password are taken into account +(init macros and similar things aren't supported). + +libcurl does not verify that the file has the correct properties set (as the +standard Unix ftp client does). It should only be readable by user. +.IP CURLOPT_NETRC_FILE +Pass a char * as parameter, pointing to a zero terminated string containing +the full path name to the file you want libcurl to use as .netrc file. If this +option is omitted, and \fICURLOPT_NETRC\fP is set, libcurl will attempt to +find a .netrc file in the current user's home directory. (Added in 7.10.9) +.IP CURLOPT_USERPWD +Pass a char * as parameter, which should be [user name]:[password] to use for +the connection. Use \fICURLOPT_HTTPAUTH\fP to decide the authentication method. + +When using NTLM, you can set the domain by prepending it to the user name and +separating the domain and name with a forward (/) or backward slash (\\). Like +this: "domain/user:password" or "domain\\user:password". Some HTTP servers (on +Windows) support this style even for Basic authentication. + +When using HTTP and \fICURLOPT_FOLLOWLOCATION\fP, libcurl might perform +several requests to possibly different hosts. libcurl will only send this user +and password information to hosts using the initial host name (unless +\fICURLOPT_UNRESTRICTED_AUTH\fP is set), so if libcurl follows locations to +other hosts it will not send the user and password to those. This is enforced +to prevent accidental information leakage. +.IP CURLOPT_PROXYUSERPWD +Pass a char * as parameter, which should be [user name]:[password] to use for +the connection to the HTTP proxy. Use \fICURLOPT_PROXYAUTH\fP to decide +the authentication method. +.IP CURLOPT_USERNAME +Pass a char * as parameter, which should be pointing to the zero terminated +user name to use for the transfer. + +\fBCURLOPT_USERNAME\fP sets the user name to be used in protocol +authentication. You should not use this option together with the (older) +CURLOPT_USERPWD option. + +In order to specify the password to be used in conjunction with the user name +use the \fICURLOPT_PASSWORD\fP option. (Added in 7.19.1) +.IP CURLOPT_PASSWORD +Pass a char * as parameter, which should be pointing to the zero terminated +password to use for the transfer. + +The CURLOPT_PASSWORD option should be used in conjunction with +the \fICURLOPT_USERNAME\fP option. (Added in 7.19.1) +.IP CURLOPT_PROXYUSERNAME +Pass a char * as parameter, which should be pointing to the zero terminated +user name to use for the transfer while connecting to Proxy. + +The CURLOPT_PROXYUSERNAME option should be used in same way as the +\fICURLOPT_PROXYUSERPWD\fP is used. In comparison to \fICURLOPT_PROXYUSERPWD\fP +the CURLOPT_PROXYUSERNAME allows the username to contain a colon, +like in the following example: "sip:user@example.com". +Note the CURLOPT_PROXYUSERNAME option is an alternative way to set the user name +while connecting to Proxy. There is no meaning to use it together +with the \fICURLOPT_PROXYUSERPWD\fP option. + +In order to specify the password to be used in conjunction with the user name +use the \fICURLOPT_PROXYPASSWORD\fP option. (Added in 7.19.1) +.IP CURLOPT_PROXYPASSWORD +Pass a char * as parameter, which should be pointing to the zero terminated +password to use for the transfer while connecting to Proxy. + +The CURLOPT_PROXYPASSWORD option should be used in conjunction with +the \fICURLOPT_PROXYUSERNAME\fP option. (Added in 7.19.1) +.IP CURLOPT_HTTPAUTH +Pass a long as parameter, which is set to a bitmask, to tell libcurl which +authentication method(s) you want it to use. The available bits are listed +below. If more than one bit is set, libcurl will first query the site to see +which authentication methods it supports and then pick the best one you allow +it to use. For some methods, this will induce an extra network round-trip. Set +the actual name and password with the \fICURLOPT_USERPWD\fP option or +with the \fICURLOPT_USERNAME\fP and the \fICURLOPT_USERPASSWORD\fP options. +(Added in 7.10.6) +.RS +.IP CURLAUTH_BASIC +HTTP Basic authentication. This is the default choice, and the only method +that is in wide-spread use and supported virtually everywhere. This sends +the user name and password over the network in plain text, easily captured by +others. +.IP CURLAUTH_DIGEST +HTTP Digest authentication. Digest authentication is defined in RFC2617 and +is a more secure way to do authentication over public networks than the +regular old-fashioned Basic method. +.IP CURLAUTH_DIGEST_IE +HTTP Digest authentication with an IE flavor. Digest authentication is +defined in RFC2617 and is a more secure way to do authentication over public +networks than the regular old-fashioned Basic method. The IE flavor is simply +that libcurl will use a special "quirk" that IE is known to have used before +version 7 and that some servers require the client to use. (This define was +added in 7.19.3) +.IP CURLAUTH_GSSNEGOTIATE +HTTP GSS-Negotiate authentication. The GSS-Negotiate (also known as plain +\&"Negotiate") method was designed by Microsoft and is used in their web +applications. It is primarily meant as a support for Kerberos5 authentication +but may also be used along with other authentication methods. For more +information see IETF draft draft-brezak-spnego-http-04.txt. + +You need to build libcurl with a suitable GSS-API library for this to work. +.IP CURLAUTH_NTLM +HTTP NTLM authentication. A proprietary protocol invented and used by +Microsoft. It uses a challenge-response and hash concept similar to Digest, to +prevent the password from being eavesdropped. + +You need to build libcurl with OpenSSL support for this option to work, or +build libcurl on Windows. +.IP CURLAUTH_ANY +This is a convenience macro that sets all bits and thus makes libcurl pick any +it finds suitable. libcurl will automatically select the one it finds most +secure. +.IP CURLAUTH_ANYSAFE +This is a convenience macro that sets all bits except Basic and thus makes +libcurl pick any it finds suitable. libcurl will automatically select the one it +finds most secure. +.RE +.IP CURLOPT_PROXYAUTH +Pass a long as parameter, which is set to a bitmask, to tell libcurl which +authentication method(s) you want it to use for your proxy authentication. If +more than one bit is set, libcurl will first query the site to see what +authentication methods it supports and then pick the best one you allow it to +use. For some methods, this will induce an extra network round-trip. Set the +actual name and password with the \fICURLOPT_PROXYUSERPWD\fP option. The +bitmask can be constructed by or'ing together the bits listed above for the +\fICURLOPT_HTTPAUTH\fP option. As of this writing, only Basic, Digest and NTLM +work. (Added in 7.10.7) +.SH HTTP OPTIONS +.IP CURLOPT_AUTOREFERER +Pass a parameter set to 1 to enable this. When enabled, libcurl will +automatically set the Referer: field in requests where it follows a Location: +redirect. +.IP CURLOPT_ENCODING +Sets the contents of the Accept-Encoding: header sent in an HTTP request, and +enables decoding of a response when a Content-Encoding: header is received. +Three encodings are supported: \fIidentity\fP, which does nothing, +\fIdeflate\fP which requests the server to compress its response using the +zlib algorithm, and \fIgzip\fP which requests the gzip algorithm. If a +zero-length string is set, then an Accept-Encoding: header containing all +supported encodings is sent. + +This is a request, not an order; the server may or may not do it. This option +must be set (to any non-NULL value) or else any unsolicited encoding done by +the server is ignored. See the special file lib/README.encoding for details. +.IP CURLOPT_FOLLOWLOCATION +A parameter set to 1 tells the library to follow any Location: header that the +server sends as part of an HTTP header. + +This means that the library will re-send the same request on the new location +and follow new Location: headers all the way until no more such headers are +returned. \fICURLOPT_MAXREDIRS\fP can be used to limit the number of redirects +libcurl will follow. + +NOTE: since 7.19.4, libcurl can limit to what protocols it will automatically +follow. The accepted protocols are set with \fICURLOPT_REDIR_PROTOCOLS\fP and +it excludes the FILE protocol by default. +.IP CURLOPT_UNRESTRICTED_AUTH +A parameter set to 1 tells the library it can continue to send authentication +(user+password) when following locations, even when hostname changed. This +option is meaningful only when setting \fICURLOPT_FOLLOWLOCATION\fP. +.IP CURLOPT_MAXREDIRS +Pass a long. The set number will be the redirection limit. If that many +redirections have been followed, the next redirect will cause an error +(\fICURLE_TOO_MANY_REDIRECTS\fP). This option only makes sense if the +\fICURLOPT_FOLLOWLOCATION\fP is used at the same time. Added in 7.15.1: +Setting the limit to 0 will make libcurl refuse any redirect. Set it to -1 for +an infinite number of redirects (which is the default) +.IP CURLOPT_POSTREDIR +Pass a bitmask to control how libcurl acts on redirects after POSTs that get a +301 or 302 response back. A parameter with bit 0 set (value +\fBCURL_REDIR_POST_301\fP) tells the library to respect RFC 2616/10.3.2 and +not convert POST requests into GET requests when following a 301 +redirection. Setting bit 1 (value CURL_REDIR_POST_302) makes libcurl maintain +the request method after a 302 redirect. CURL_REDIR_POST_ALL is a convenience +define that sets both bits. + +The non-RFC behaviour is ubiquitous in web browsers, so the library does the +conversion by default to maintain consistency. However, a server may require a +POST to remain a POST after such a redirection. This option is meaningful only +when setting \fICURLOPT_FOLLOWLOCATION\fP. (Added in 7.17.1) (This option was +known as CURLOPT_POST301 up to 7.19.0 as it only supported the 301 way before +then) +.IP CURLOPT_PUT +A parameter set to 1 tells the library to use HTTP PUT to transfer data. The +data should be set with \fICURLOPT_READDATA\fP and \fICURLOPT_INFILESIZE\fP. + +This option is deprecated and starting with version 7.12.1 you should instead +use \fICURLOPT_UPLOAD\fP. +.IP CURLOPT_POST +A parameter set to 1 tells the library to do a regular HTTP post. This will +also make the library use a "Content-Type: +application/x-www-form-urlencoded" header. (This is by far the most commonly +used POST method). + +Use one of \fICURLOPT_POSTFIELDS\fP or \fICURLOPT_COPYPOSTFIELDS\fP options to +specify what data to post and \fICURLOPT_POSTFIELDSIZE\fP or +\fICURLOPT_POSTFIELDSIZE_LARGE\fP to set the data size. + +Optionally, you can provide data to POST using the \fICURLOPT_READFUNCTION\fP +and \fICURLOPT_READDATA\fP options but then you must make sure to not set +\fICURLOPT_POSTFIELDS\fP to anything but NULL. When providing data with a +callback, you must transmit it using chunked transfer-encoding or you must set +the size of the data with the \fICURLOPT_POSTFIELDSIZE\fP or +\fICURLOPT_POSTFIELDSIZE_LARGE\fP option. To enable chunked encoding, you +simply pass in the appropriate Transfer-Encoding header, see the +post-callback.c example. + +You can override the default POST Content-Type: header by setting your own +with \fICURLOPT_HTTPHEADER\fP. + +Using POST with HTTP 1.1 implies the use of a "Expect: 100-continue" header. +You can disable this header with \fICURLOPT_HTTPHEADER\fP as usual. + +If you use POST to a HTTP 1.1 server, you can send data without knowing the +size before starting the POST if you use chunked encoding. You enable this by +adding a header like "Transfer-Encoding: chunked" with +\fICURLOPT_HTTPHEADER\fP. With HTTP 1.0 or without chunked transfer, you must +specify the size in the request. + +When setting \fICURLOPT_POST\fP to 1, it will automatically set +\fICURLOPT_NOBODY\fP to 0 (since 7.14.1). + +If you issue a POST request and then want to make a HEAD or GET using the same +re-used handle, you must explicitly set the new request type using +\fICURLOPT_NOBODY\fP or \fICURLOPT_HTTPGET\fP or similar. +.IP CURLOPT_POSTFIELDS +Pass a void * as parameter, which should be the full data to post in an HTTP +POST operation. You must make sure that the data is formatted the way you want +the server to receive it. libcurl will not convert or encode it for you. Most +web servers will assume this data to be url-encoded. Take note. + +The pointed data are NOT copied by the library: as a consequence, they must +be preserved by the calling application until the transfer finishes. + +This POST is a normal application/x-www-form-urlencoded kind (and libcurl will +set that Content-Type by default when this option is used), which is the most +commonly used one by HTML forms. See also the \fICURLOPT_POST\fP. Using +\fICURLOPT_POSTFIELDS\fP implies \fICURLOPT_POST\fP. + +If you want to do a zero-byte POST, you need to set +\fICURLOPT_POSTFIELDSIZE\fP explicitly to zero, as simply setting +\fICURLOPT_POSTFIELDS\fP to NULL or "" just effectively disables the sending +of the specified string. libcurl will instead assume that you'll send the POST +data using the read callback! + +Using POST with HTTP 1.1 implies the use of a "Expect: 100-continue" header. +You can disable this header with \fICURLOPT_HTTPHEADER\fP as usual. + +To make multipart/formdata posts (aka RFC2388-posts), check out the +\fICURLOPT_HTTPPOST\fP option. +.IP CURLOPT_POSTFIELDSIZE +If you want to post data to the server without letting libcurl do a strlen() +to measure the data size, this option must be used. When this option is used +you can post fully binary data, which otherwise is likely to fail. If this +size is set to -1, the library will use strlen() to get the size. +.IP CURLOPT_POSTFIELDSIZE_LARGE +Pass a curl_off_t as parameter. Use this to set the size of the +\fICURLOPT_POSTFIELDS\fP data to prevent libcurl from doing strlen() on the +data to figure out the size. This is the large file version of the +\fICURLOPT_POSTFIELDSIZE\fP option. (Added in 7.11.1) +.IP CURLOPT_COPYPOSTFIELDS +Pass a char * as parameter, which should be the full data to post in an HTTP +POST operation. It behaves as the \fICURLOPT_POSTFIELDS\fP option, but the +original data are copied by the library, allowing the application to overwrite +the original data after setting this option. + +Because data are copied, care must be taken when using this option in +conjunction with \fICURLOPT_POSTFIELDSIZE\fP or +\fICURLOPT_POSTFIELDSIZE_LARGE\fP: If the size has not been set prior to +\fICURLOPT_COPYPOSTFIELDS\fP, the data are assumed to be a NUL-terminated +string; else the stored size informs the library about the data byte count to +copy. In any case, the size must not be changed after +\fICURLOPT_COPYPOSTFIELDS\fP, unless another \fICURLOPT_POSTFIELDS\fP or +\fICURLOPT_COPYPOSTFIELDS\fP option is issued. +(Added in 7.17.1) +.IP CURLOPT_HTTPPOST +Tells libcurl you want a multipart/formdata HTTP POST to be made and you +instruct what data to pass on to the server. Pass a pointer to a linked list +of curl_httppost structs as parameter. The easiest way to create such a +list, is to use \fIcurl_formadd(3)\fP as documented. The data in this list +must remain intact until you close this curl handle again with +\fIcurl_easy_cleanup(3)\fP. + +Using POST with HTTP 1.1 implies the use of a "Expect: 100-continue" header. +You can disable this header with \fICURLOPT_HTTPHEADER\fP as usual. + +When setting \fICURLOPT_HTTPPOST\fP, it will automatically set +\fICURLOPT_NOBODY\fP to 0 (since 7.14.1). +.IP CURLOPT_REFERER +Pass a pointer to a zero terminated string as parameter. It will be used to +set the Referer: header in the http request sent to the remote server. This +can be used to fool servers or scripts. You can also set any custom header +with \fICURLOPT_HTTPHEADER\fP. +.IP CURLOPT_USERAGENT +Pass a pointer to a zero terminated string as parameter. It will be used to +set the User-Agent: header in the http request sent to the remote server. This +can be used to fool servers or scripts. You can also set any custom header +with \fICURLOPT_HTTPHEADER\fP. +.IP CURLOPT_HTTPHEADER +Pass a pointer to a linked list of HTTP headers to pass to the server in your +HTTP request. The linked list should be a fully valid list of \fBstruct +curl_slist\fP structs properly filled in. Use \fIcurl_slist_append(3)\fP to +create the list and \fIcurl_slist_free_all(3)\fP to clean up an entire +list. If you add a header that is otherwise generated and used by libcurl +internally, your added one will be used instead. If you add a header with no +content as in 'Accept:' (no data on the right side of the colon), the +internally used header will get disabled. Thus, using this option you can add +new headers, replace internal headers and remove internal headers. To add a +header with no content, make the content be two quotes: \&"". The headers +included in the linked list must not be CRLF-terminated, because curl adds +CRLF after each header item. Failure to comply with this will result in +strange bugs because the server will most likely ignore part of the headers +you specified. + +The first line in a request (containing the method, usually a GET or POST) is +not a header and cannot be replaced using this option. Only the lines +following the request-line are headers. Adding this method line in this list +of headers will only cause your request to send an invalid header. + +Pass a NULL to this to reset back to no custom headers. + +The most commonly replaced headers have "shortcuts" in the options +\fICURLOPT_COOKIE\fP, \fICURLOPT_USERAGENT\fP and \fICURLOPT_REFERER\fP. +.IP CURLOPT_HTTP200ALIASES +Pass a pointer to a linked list of aliases to be treated as valid HTTP 200 +responses. Some servers respond with a custom header response line. For +example, IceCast servers respond with "ICY 200 OK". By including this string +in your list of aliases, the response will be treated as a valid HTTP header +line such as "HTTP/1.0 200 OK". (Added in 7.10.3) + +The linked list should be a fully valid list of struct curl_slist structs, and +be properly filled in. Use \fIcurl_slist_append(3)\fP to create the list and +\fIcurl_slist_free_all(3)\fP to clean up an entire list. + +The alias itself is not parsed for any version strings. Before libcurl 7.16.3, +Libcurl used the value set by option \fICURLOPT_HTTP_VERSION\fP, but starting +with 7.16.3 the protocol is assumed to match HTTP 1.0 when an alias matched. +.IP CURLOPT_COOKIE +Pass a pointer to a zero terminated string as parameter. It will be used to +set a cookie in the http request. The format of the string should be +NAME=CONTENTS, where NAME is the cookie name and CONTENTS is what the cookie +should contain. + +If you need to set multiple cookies, you need to set them all using a single +option and thus you need to concatenate them all in one single string. Set +multiple cookies in one string like this: "name1=content1; name2=content2;" +etc. + +Note that this option sets the cookie header explictly in the outgoing +request(s). If multiple requests are done due to authentication, followed +redirections or similar, they will all get this cookie passed on. + +Using this option multiple times will only make the latest string override the +previous ones. +.IP CURLOPT_COOKIEFILE +Pass a pointer to a zero terminated string as parameter. It should contain the +name of your file holding cookie data to read. The cookie data may be in +Netscape / Mozilla cookie data format or just regular HTTP-style headers +dumped to a file. + +Given an empty or non-existing file or by passing the empty string (""), this +option will enable cookies for this curl handle, making it understand and +parse received cookies and then use matching cookies in future requests. + +If you use this option multiple times, you just add more files to read. +Subsequent files will add more cookies. +.IP CURLOPT_COOKIEJAR +Pass a file name as char *, zero terminated. This will make libcurl write all +internally known cookies to the specified file when \fIcurl_easy_cleanup(3)\fP +is called. If no cookies are known, no file will be created. Specify "-" to +instead have the cookies written to stdout. Using this option also enables +cookies for this session, so if you for example follow a location it will make +matching cookies get sent accordingly. + +If the cookie jar file can't be created or written to (when the +\fIcurl_easy_cleanup(3)\fP is called), libcurl will not and cannot report an +error for this. Using \fICURLOPT_VERBOSE\fP or \fICURLOPT_DEBUGFUNCTION\fP +will get a warning to display, but that is the only visible feedback you get +about this possibly lethal situation. +.IP CURLOPT_COOKIESESSION +Pass a long set to 1 to mark this as a new cookie "session". It will force +libcurl to ignore all cookies it is about to load that are "session cookies" +from the previous session. By default, libcurl always stores and loads all +cookies, independent if they are session cookies or not. Session cookies are +cookies without expiry date and they are meant to be alive and existing for +this "session" only. +.IP CURLOPT_COOKIELIST +Pass a char * to a cookie string. Cookie can be either in Netscape / Mozilla +format or just regular HTTP-style header (Set-Cookie: ...) format. If cURL +cookie engine was not enabled it will enable its cookie engine. Passing a +magic string \&"ALL" will erase all cookies known by cURL. (Added in 7.14.1) +Passing the special string \&"SESS" will only erase all session cookies known +by cURL. (Added in 7.15.4) Passing the special string \&"FLUSH" will write +all cookies known by cURL to the file specified by \fICURLOPT_COOKIEJAR\fP. +(Added in 7.17.1) +.IP CURLOPT_HTTPGET +Pass a long. If the long is 1, this forces the HTTP request to get back +to GET. Usable if a POST, HEAD, PUT, or a custom request has been used +previously using the same curl handle. + +When setting \fICURLOPT_HTTPGET\fP to 1, it will automatically set +\fICURLOPT_NOBODY\fP to 0 (since 7.14.1). +.IP CURLOPT_HTTP_VERSION +Pass a long, set to one of the values described below. They force libcurl to +use the specific HTTP versions. This is not sensible to do unless you have a +good reason. +.RS +.IP CURL_HTTP_VERSION_NONE +We don't care about what version the library uses. libcurl will use whatever +it thinks fit. +.IP CURL_HTTP_VERSION_1_0 +Enforce HTTP 1.0 requests. +.IP CURL_HTTP_VERSION_1_1 +Enforce HTTP 1.1 requests. +.RE +.IP CURLOPT_IGNORE_CONTENT_LENGTH +Ignore the Content-Length header. This is useful for Apache 1.x (and similar +servers) which will report incorrect content length for files over 2 +gigabytes. If this option is used, curl will not be able to accurately report +progress, and will simply stop the download when the server ends the +connection. (added in 7.14.1) +.IP CURLOPT_HTTP_CONTENT_DECODING +Pass a long to tell libcurl how to act on content decoding. If set to zero, +content decoding will be disabled. If set to 1 it is enabled. Note however +that libcurl has no default content decoding but requires you to use +\fICURLOPT_ENCODING\fP for that. (added in 7.16.2) +.IP CURLOPT_HTTP_TRANSFER_DECODING +Pass a long to tell libcurl how to act on transfer decoding. If set to zero, +transfer decoding will be disabled, if set to 1 it is enabled +(default). libcurl does chunked transfer decoding by default unless this +option is set to zero. (added in 7.16.2) +.SH TFTP OPTIONS +.IP CURLOPT_TFTPBLKSIZE +Specify block size to use for TFTP data transmission. Valid range as per RFC +2348 is 8-65464 bytes. The default of 512 bytes will be used if this option is +not specified. The specified block size will only be used pending support by +the remote server. If the server does not return an option acknowledgement or +returns an option acknowledgement with no blksize, the default of 512 bytes +will be used. (added in 7.19.4) +.SH FTP OPTIONS +.IP CURLOPT_FTPPORT +Pass a pointer to a zero terminated string as parameter. It will be used to +get the IP address to use for the FTP PORT instruction. The PORT instruction +tells the remote server to connect to our specified IP address. The string may +be a plain IP address, a host name, a network interface name (under Unix) or +just a '-' symbol to let the library use your system's default IP +address. Default FTP operations are passive, and thus won't use PORT. + +The address can be followed by a ':' to specify a port, optionally followed by +a '-' to specify a port range. If the port specified is 0, the operating +system will pick a free port. If a range is provided and all ports in the +range are not available, libcurl will report CURLE_FTP_PORT_FAILED for the +handle. Invalid port/range settings are ignored. IPv6 addresses followed by +a port or portrange have to be in brackets. IPv6 addresses without port/range +specifier can be in brackets. (added in 7.19.5) + +Examples with specified ports: + +.nf + eth0:0 + 192.168.1.2:32000-33000 + curl.se:32123 + [::1]:1234-4567 +.fi + +You disable PORT again and go back to using the passive version by setting +this option to NULL. +.IP CURLOPT_QUOTE +Pass a pointer to a linked list of FTP or SFTP commands to pass to +the server prior to your FTP request. This will be done before any +other commands are issued (even before the CWD command for FTP). The +linked list should be a fully valid list of 'struct curl_slist' structs +properly filled in with text strings. Use \fIcurl_slist_append(3)\fP +to append strings (commands) to the list, and clear the entire list +afterwards with \fIcurl_slist_free_all(3)\fP. Disable this operation +again by setting a NULL to this option. +The set of valid FTP commands depends on the server (see RFC959 for a +list of mandatory commands). +The valid SFTP commands are: chgrp, chmod, chown, ln, mkdir, pwd, +rename, rm, rmdir, symlink (see +.BR curl (1)) +(SFTP support added in 7.16.3) +.IP CURLOPT_POSTQUOTE +Pass a pointer to a linked list of FTP or SFTP commands to pass to the server +after your FTP transfer request. The commands will only be run if no error +occurred. The linked list should be a fully valid list of struct curl_slist +structs properly filled in as described for \fICURLOPT_QUOTE\fP. Disable this +operation again by setting a NULL to this option. +.IP CURLOPT_PREQUOTE +Pass a pointer to a linked list of FTP commands to pass to the server after +the transfer type is set. The linked list should be a fully valid list of +struct curl_slist structs properly filled in as described for +\fICURLOPT_QUOTE\fP. Disable this operation again by setting a NULL to this +option. Before version 7.15.6, if you also set \fICURLOPT_NOBODY\fP to 1, this +option didn't work. +.IP CURLOPT_DIRLISTONLY +A parameter set to 1 tells the library to just list the names of files in a +directory, instead of doing a full directory listing that would include file +sizes, dates etc. This works for FTP and SFTP URLs. + +This causes an FTP NLST command to be sent on an FTP server. Beware +that some FTP servers list only files in their response to NLST; they +might not include subdirectories and symbolic links. + +(This option was known as CURLOPT_FTPLISTONLY up to 7.16.4) +.IP CURLOPT_APPEND +A parameter set to 1 tells the library to append to the remote file instead of +overwrite it. This is only useful when uploading to an FTP site. + +(This option was known as CURLOPT_FTPAPPEND up to 7.16.4) +.IP CURLOPT_FTP_USE_EPRT +Pass a long. If the value is 1, it tells curl to use the EPRT (and +LPRT) command when doing active FTP downloads (which is enabled by +\fICURLOPT_FTPPORT\fP). Using EPRT means that it will first attempt to use +EPRT and then LPRT before using PORT, but if you pass zero to this +option, it will not try using EPRT or LPRT, only plain PORT. (Added in 7.10.5) + +If the server is an IPv6 host, this option will have no effect as of 7.12.3. +.IP CURLOPT_FTP_USE_EPSV +Pass a long. If the value is 1, it tells curl to use the EPSV command +when doing passive FTP downloads (which it always does by default). Using EPSV +means that it will first attempt to use EPSV before using PASV, but if you +pass zero to this option, it will not try using EPSV, only plain PASV. + +If the server is an IPv6 host, this option will have no effect as of 7.12.3. +.IP CURLOPT_FTP_CREATE_MISSING_DIRS +Pass a long. If the value is 1, curl will attempt to create any remote +directory that it fails to CWD into. CWD is the command that changes working +directory. (Added in 7.10.7) + +This setting also applies to SFTP-connections. curl will attempt to create +the remote directory if it can't obtain a handle to the target-location. The +creation will fail if a file of the same name as the directory to create +already exists or lack of permissions prevents creation. (Added in 7.16.3) + +Starting with 7.19.4, you can also set this value to 2, which will make +libcurl retry the CWD command again if the subsequent MKD command fails. This +is especially useful if you're doing many simultanoes connections against the +same server and they all have this option enabled, as then CWD may first fail +but then another connection does MKD before this connection and thus MKD fails +but trying CWD works! 7.19.4 also introduced the \fICURLFTP_CREATE_DIR\fP and +\fICURLFTP_CREATE_DIR_RETRY\fP enum names for these arguments. + +Before version 7.19.4, libcurl will simply ignore arguments set to 2 and act +as if 1 was selected. +.IP CURLOPT_FTP_RESPONSE_TIMEOUT +Pass a long. Causes curl to set a timeout period (in seconds) on the amount +of time that the server is allowed to take in order to generate a response +message for a command before the session is considered hung. While curl is +waiting for a response, this value overrides \fICURLOPT_TIMEOUT\fP. It is +recommended that if used in conjunction with \fICURLOPT_TIMEOUT\fP, you set +\fICURLOPT_FTP_RESPONSE_TIMEOUT\fP to a value smaller than +\fICURLOPT_TIMEOUT\fP. (Added in 7.10.8) +.IP CURLOPT_FTP_ALTERNATIVE_TO_USER +Pass a char * as parameter, pointing to a string which will be used to +authenticate if the usual FTP "USER user" and "PASS password" negotiation +fails. This is currently only known to be required when connecting to +Tumbleweed's Secure Transport FTPS server using client certificates for +authentication. (Added in 7.15.5) +.IP CURLOPT_FTP_SKIP_PASV_IP +Pass a long. If set to 1, it instructs libcurl to not use the IP address the +server suggests in its 227-response to libcurl's PASV command when libcurl +connects the data connection. Instead libcurl will re-use the same IP address +it already uses for the control connection. But it will use the port number +from the 227-response. (Added in 7.14.2) + +This option has no effect if PORT, EPRT or EPSV is used instead of PASV. +.IP CURLOPT_USE_SSL +Pass a long using one of the values from below, to make libcurl use your +desired level of SSL for the FTP transfer. (Added in 7.11.0) + +(This option was known as CURLOPT_FTP_SSL up to 7.16.4, and the constants +were known as CURLFTPSSL_*) +.RS +.IP CURLUSESSL_NONE +Don't attempt to use SSL. +.IP CURLUSESSL_TRY +Try using SSL, proceed as normal otherwise. +.IP CURLUSESSL_CONTROL +Require SSL for the control connection or fail with \fICURLE_USE_SSL_FAILED\fP. +.IP CURLUSESSL_ALL +Require SSL for all communication or fail with \fICURLE_USE_SSL_FAILED\fP. +.RE +.IP CURLOPT_FTPSSLAUTH +Pass a long using one of the values from below, to alter how libcurl issues +\&"AUTH TLS" or "AUTH SSL" when FTP over SSL is activated (see +\fICURLOPT_USE_SSL\fP). (Added in 7.12.2) +.RS +.IP CURLFTPAUTH_DEFAULT +Allow libcurl to decide. +.IP CURLFTPAUTH_SSL +Try "AUTH SSL" first, and only if that fails try "AUTH TLS". +.IP CURLFTPAUTH_TLS +Try "AUTH TLS" first, and only if that fails try "AUTH SSL". +.RE +.IP CURLOPT_FTP_SSL_CCC +If enabled, this option makes libcurl use CCC (Clear Command Channel). It +shuts down the SSL/TLS layer after authenticating. The rest of the +control channel communication will be unencrypted. This allows NAT routers +to follow the FTP transaction. Pass a long using one of the values below. +(Added in 7.16.1) +.RS +.IP CURLFTPSSL_CCC_NONE +Don't attempt to use CCC. +.IP CURLFTPSSL_CCC_PASSIVE +Do not initiate the shutdown, but wait for the server to do it. Do not send +a reply. +.IP CURLFTPSSL_CCC_ACTIVE +Initiate the shutdown and wait for a reply. +.RE +.IP CURLOPT_FTP_ACCOUNT +Pass a pointer to a zero-terminated string (or NULL to disable). When an FTP +server asks for "account data" after user name and password has been provided, +this data is sent off using the ACCT command. (Added in 7.13.0) +.IP CURLOPT_FTP_FILEMETHOD +Pass a long that should have one of the following values. This option controls +what method libcurl should use to reach a file on a FTP(S) server. The +argument should be one of the following alternatives: +.RS +.IP CURLFTPMETHOD_MULTICWD +libcurl does a single CWD operation for each path part in the given URL. For +deep hierarchies this means many commands. This is how RFC1738 says it +should be done. This is the default but the slowest behavior. +.IP CURLFTPMETHOD_NOCWD +libcurl does no CWD at all. libcurl will do SIZE, RETR, STOR etc and give a +full path to the server for all these commands. This is the fastest behavior. +.IP CURLFTPMETHOD_SINGLECWD +libcurl does one CWD with the full target directory and then operates on the +file \&"normally" (like in the multicwd case). This is somewhat more standards +compliant than 'nocwd' but without the full penalty of 'multicwd'. +.RE +(Added in 7.15.1) +.SH PROTOCOL OPTIONS +.IP CURLOPT_TRANSFERTEXT +A parameter set to 1 tells the library to use ASCII mode for FTP transfers, +instead of the default binary transfer. For win32 systems it does not set the +stdout to binary mode. This option can be usable when transferring text data +between systems with different views on certain characters, such as newlines +or similar. + +libcurl does not do a complete ASCII conversion when doing ASCII transfers +over FTP. This is a known limitation/flaw that nobody has rectified. libcurl +simply sets the mode to ASCII and performs a standard transfer. +.IP CURLOPT_PROXY_TRANSFER_MODE +Pass a long. If the value is set to 1 (one), it tells libcurl to set the +transfer mode (binary or ASCII) for FTP transfers done via an HTTP proxy, by +appending ;type=a or ;type=i to the URL. Without this setting, or it being set +to 0 (zero, the default), \fICURLOPT_TRANSFERTEXT\fP has no effect when doing +FTP via a proxy. Beware that not all proxies support this feature. (Added in +7.18.0) +.IP CURLOPT_CRLF +Convert Unix newlines to CRLF newlines on transfers. +.IP CURLOPT_RANGE +Pass a char * as parameter, which should contain the specified range you +want. It should be in the format "X-Y", where X or Y may be left out. HTTP +transfers also support several intervals, separated with commas as in +\fI"X-Y,N-M"\fP. Using this kind of multiple intervals will cause the HTTP +server to send the response document in pieces (using standard MIME separation +techniques). Pass a NULL to this option to disable the use of ranges. + +Ranges work on HTTP, FTP and FILE (since 7.18.0) transfers only. +.IP CURLOPT_RESUME_FROM +Pass a long as parameter. It contains the offset in number of bytes that you +want the transfer to start from. Set this option to 0 to make the transfer +start from the beginning (effectively disabling resume). For FTP, set this +option to -1 to make the transfer start from the end of the target file +(useful to continue an interrupted upload). +.IP CURLOPT_RESUME_FROM_LARGE +Pass a curl_off_t as parameter. It contains the offset in number of bytes that +you want the transfer to start from. (Added in 7.11.0) +.IP CURLOPT_CUSTOMREQUEST +Pass a pointer to a zero terminated string as parameter. It will be used +instead of GET or HEAD when doing an HTTP request, or instead of LIST or NLST +when doing a FTP directory listing. This is useful for doing DELETE or other +more or less obscure HTTP requests. Don't do this at will, make sure your +server supports the command first. + +When you change the request method by setting \fBCURLOPT_CUSTOMREQUEST\fP to +something, you don't actually change how libcurl behaves or acts in regards to +the particular request method, it will only change the actual string sent in +the request. + +For example: if you tell libcurl to do a HEAD request, but then change the +request to a "GET" with \fBCURLOPT_CUSTOMREQUEST\fP you'll still see libcurl +act as if it sent a HEAD even when it does send a GET. + +To switch to a proper HEAD, use \fICURLOPT_NOBODY\fP, to switch to a proper +POST, use \fICURLOPT_POST\fP or \fICURLOPT_POSTFIELDS\fP and so on. + +Restore to the internal default by setting this to NULL. + +Many people have wrongly used this option to replace the entire request with +their own, including multiple headers and POST contents. While that might work +in many cases, it will cause libcurl to send invalid requests and it could +possibly confuse the remote server badly. Use \fICURLOPT_POST\fP and +\fICURLOPT_POSTFIELDS\fP to set POST data. Use \fICURLOPT_HTTPHEADER\fP to +replace or extend the set of headers sent by libcurl. Use +\fICURLOPT_HTTP_VERSION\fP to change HTTP version. +.IP CURLOPT_FILETIME +Pass a long. If it is 1, libcurl will attempt to get the modification date of +the remote document in this operation. This requires that the remote server +sends the time or replies to a time querying command. The +\fIcurl_easy_getinfo(3)\fP function with the \fICURLINFO_FILETIME\fP argument +can be used after a transfer to extract the received time (if any). +.IP CURLOPT_NOBODY +A parameter set to 1 tells the library to not include the body-part in the +output. This is only relevant for protocols that have separate header and body +parts. On HTTP(S) servers, this will make libcurl do a HEAD request. + +To change request to GET, you should use \fICURLOPT_HTTPGET\fP. Change request +to POST with \fICURLOPT_POST\fP etc. +.IP CURLOPT_INFILESIZE +When uploading a file to a remote site, this option should be used to tell +libcurl what the expected size of the infile is. This value should be passed +as a long. See also \fICURLOPT_INFILESIZE_LARGE\fP. + +For uploading using SCP, this option or \fICURLOPT_INFILESIZE_LARGE\fP is +mandatory. + +Note that this option does not limit how much data libcurl will actually send, +as that is controlled entirely by what the read callback returns. +.IP CURLOPT_INFILESIZE_LARGE +When uploading a file to a remote site, this option should be used to tell +libcurl what the expected size of the infile is. This value should be passed +as a curl_off_t. (Added in 7.11.0) + +For uploading using SCP, this option or \fICURLOPT_INFILESIZE\fP is mandatory. + +Note that this option does not limit how much data libcurl will actually send, +as that is controlled entirely by what the read callback returns. +.IP CURLOPT_UPLOAD +A parameter set to 1 tells the library to prepare for an upload. The +\fICURLOPT_READDATA\fP and \fICURLOPT_INFILESIZE\fP or +\fICURLOPT_INFILESIZE_LARGE\fP options are also interesting for uploads. If +the protocol is HTTP, uploading means using the PUT request unless you tell +libcurl otherwise. + +Using PUT with HTTP 1.1 implies the use of a "Expect: 100-continue" header. +You can disable this header with \fICURLOPT_HTTPHEADER\fP as usual. + +If you use PUT to a HTTP 1.1 server, you can upload data without knowing the +size before starting the transfer if you use chunked encoding. You enable this +by adding a header like "Transfer-Encoding: chunked" with +\fICURLOPT_HTTPHEADER\fP. With HTTP 1.0 or without chunked transfer, you must +specify the size. +.IP CURLOPT_MAXFILESIZE +Pass a long as parameter. This allows you to specify the maximum size (in +bytes) of a file to download. If the file requested is larger than this value, +the transfer will not start and CURLE_FILESIZE_EXCEEDED will be returned. + +The file size is not always known prior to download, and for such files this +option has no effect even if the file transfer ends up being larger than this +given limit. This concerns both FTP and HTTP transfers. +.IP CURLOPT_MAXFILESIZE_LARGE +Pass a curl_off_t as parameter. This allows you to specify the maximum size +(in bytes) of a file to download. If the file requested is larger than this +value, the transfer will not start and \fICURLE_FILESIZE_EXCEEDED\fP will be +returned. (Added in 7.11.0) + +The file size is not always known prior to download, and for such files this +option has no effect even if the file transfer ends up being larger than this +given limit. This concerns both FTP and HTTP transfers. +.IP CURLOPT_TIMECONDITION +Pass a long as parameter. This defines how the \fICURLOPT_TIMEVALUE\fP time +value is treated. You can set this parameter to \fICURL_TIMECOND_IFMODSINCE\fP +or \fICURL_TIMECOND_IFUNMODSINCE\fP. This feature applies to HTTP and FTP. + +The last modification time of a file is not always known and in such instances +this feature will have no effect even if the given time condition would not +have been met. \fIcurl_easy_getinfo(3)\fP with the +\fICURLINFO_CONDITION_UNMET\fP option can be used after a transfer to learn if +a zero-byte successful "transfer" was due to this condition not matching. +.IP CURLOPT_TIMEVALUE +Pass a long as parameter. This should be the time in seconds since 1 Jan 1970, +and the time will be used in a condition as specified with +\fICURLOPT_TIMECONDITION\fP. +.SH CONNECTION OPTIONS +.IP CURLOPT_TIMEOUT +Pass a long as parameter containing the maximum time in seconds that you allow +the libcurl transfer operation to take. Normally, name lookups can take a +considerable time and limiting operations to less than a few minutes risk +aborting perfectly normal operations. This option will cause curl to use the +SIGALRM to enable time-outing system calls. + +In unix-like systems, this might cause signals to be used unless +\fICURLOPT_NOSIGNAL\fP is set. +.IP CURLOPT_TIMEOUT_MS +Like \fICURLOPT_TIMEOUT\fP but takes number of milliseconds instead. If +libcurl is built to use the standard system name resolver, that portion +of the transfer will still use full-second resolution for timeouts with +a minimum timeout allowed of one second. +(Added in 7.16.2) +.IP CURLOPT_LOW_SPEED_LIMIT +Pass a long as parameter. It contains the transfer speed in bytes per second +that the transfer should be below during \fICURLOPT_LOW_SPEED_TIME\fP seconds +for the library to consider it too slow and abort. +.IP CURLOPT_LOW_SPEED_TIME +Pass a long as parameter. It contains the time in seconds that the transfer +should be below the \fICURLOPT_LOW_SPEED_LIMIT\fP for the library to consider +it too slow and abort. +.IP CURLOPT_MAX_SEND_SPEED_LARGE +Pass a curl_off_t as parameter. If an upload exceeds this speed (counted in +bytes per second) on cumulative average during the transfer, the transfer will +pause to keep the average rate less than or equal to the parameter value. +Defaults to unlimited speed. (Added in 7.15.5) +.IP CURLOPT_MAX_RECV_SPEED_LARGE +Pass a curl_off_t as parameter. If a download exceeds this speed (counted in +bytes per second) on cumulative average during the transfer, the transfer will +pause to keep the average rate less than or equal to the parameter +value. Defaults to unlimited speed. (Added in 7.15.5) +.IP CURLOPT_MAXCONNECTS +Pass a long. The set number will be the persistent connection cache size. The +set amount will be the maximum amount of simultaneously open connections that +libcurl may cache in this easy handle. Default is 5, and there isn't much +point in changing this value unless you are perfectly aware of how this works +and changes libcurl's behaviour. This concerns connections using any of the +protocols that support persistent connections. + +When reaching the maximum limit, curl closes the oldest one in the cache to +prevent increasing the number of open connections. + +If you already have performed transfers with this curl handle, setting a +smaller MAXCONNECTS than before may cause open connections to get closed +unnecessarily. + +Note that if you add this easy handle to a multi handle, this setting is not +acknowledged, and you must instead use \fIcurl_multi_setopt(3)\fP and +the \fICURLMOPT_MAXCONNECTS\fP option. +.IP CURLOPT_CLOSEPOLICY +(Obsolete) This option does nothing. +.IP CURLOPT_FRESH_CONNECT +Pass a long. Set to 1 to make the next transfer use a new (fresh) connection +by force. If the connection cache is full before this connection, one of the +existing connections will be closed as according to the selected or default +policy. This option should be used with caution and only if you understand +what it does. Set this to 0 to have libcurl attempt re-using an existing +connection (default behavior). +.IP CURLOPT_FORBID_REUSE +Pass a long. Set to 1 to make the next transfer explicitly close the +connection when done. Normally, libcurl keeps all connections alive when done +with one transfer in case a succeeding one follows that can re-use them. +This option should be used with caution and only if you understand what it +does. Set to 0 to have libcurl keep the connection open for possible later +re-use (default behavior). +.IP CURLOPT_CONNECTTIMEOUT +Pass a long. It should contain the maximum time in seconds that you allow the +connection to the server to take. This only limits the connection phase, once +it has connected, this option is of no more use. Set to zero to disable +connection timeout (it will then only timeout on the system's internal +timeouts). See also the \fICURLOPT_TIMEOUT\fP option. + +In unix-like systems, this might cause signals to be used unless +\fICURLOPT_NOSIGNAL\fP is set. +.IP CURLOPT_CONNECTTIMEOUT_MS +Like \fICURLOPT_CONNECTTIMEOUT\fP but takes the number of milliseconds +instead. If libcurl is built to use the standard system name resolver, +that portion of the connect will still use full-second resolution for +timeouts with a minimum timeout allowed of one second. +(Added in 7.16.2) +.IP CURLOPT_IPRESOLVE +Allows an application to select what kind of IP addresses to use when +resolving host names. This is only interesting when using host names that +resolve addresses using more than one version of IP. The allowed values are: +.RS +.IP CURL_IPRESOLVE_WHATEVER +Default, resolves addresses to all IP versions that your system allows. +.IP CURL_IPRESOLVE_V4 +Resolve to IPv4 addresses. +.IP CURL_IPRESOLVE_V6 +Resolve to IPv6 addresses. +.RE +.IP CURLOPT_CONNECT_ONLY +Pass a long. If the parameter equals 1, it tells the library to perform all +the required proxy authentication and connection setup, but no data transfer. +This option is useful only on HTTP URLs. + +This option is useful with the \fICURLINFO_LASTSOCKET\fP option to +\fIcurl_easy_getinfo(3)\fP. The library can set up the connection and then the +application can obtain the most recently used socket for special data +transfers. (Added in 7.15.2) +.SH SSL and SECURITY OPTIONS +.IP CURLOPT_SSLCERT +Pass a pointer to a zero terminated string as parameter. The string should be +the file name of your certificate. The default format is "PEM" and can be +changed with \fICURLOPT_SSLCERTTYPE\fP. + +With NSS this is the nickname of the certificate you wish to authenticate +with. +.IP CURLOPT_SSLCERTTYPE +Pass a pointer to a zero terminated string as parameter. The string should be +the format of your certificate. Supported formats are "PEM" and "DER". (Added +in 7.9.3) +.IP CURLOPT_SSLKEY +Pass a pointer to a zero terminated string as parameter. The string should be +the file name of your private key. The default format is "PEM" and can be +changed with \fICURLOPT_SSLKEYTYPE\fP. +.IP CURLOPT_SSLKEYTYPE +Pass a pointer to a zero terminated string as parameter. The string should be +the format of your private key. Supported formats are "PEM", "DER" and "ENG". + +The format "ENG" enables you to load the private key from a crypto engine. In +this case \fICURLOPT_SSLKEY\fP is used as an identifier passed to the +engine. You have to set the crypto engine with \fICURLOPT_SSLENGINE\fP. +\&"DER" format key file currently does not work because of a bug in OpenSSL. +.IP CURLOPT_KEYPASSWD +Pass a pointer to a zero terminated string as parameter. It will be used as +the password required to use the \fICURLOPT_SSLKEY\fP or +\fICURLOPT_SSH_PRIVATE_KEYFILE\fP private key. +You never needed a pass phrase to load a certificate but you need one to +load your private key. + +(This option was known as CURLOPT_SSLKEYPASSWD up to 7.16.4 and +CURLOPT_SSLCERTPASSWD up to 7.9.2) +.IP CURLOPT_SSLENGINE +Pass a pointer to a zero terminated string as parameter. It will be used as +the identifier for the crypto engine you want to use for your private +key. + +If the crypto device cannot be loaded, \fICURLE_SSL_ENGINE_NOTFOUND\fP is +returned. +.IP CURLOPT_SSLENGINE_DEFAULT +Sets the actual crypto engine as the default for (asymmetric) crypto +operations. + +If the crypto device cannot be set, \fICURLE_SSL_ENGINE_SETFAILED\fP is +returned. + +Note that even though this option doesn't need any parameter, in some +configurations \fIcurl_easy_setopt\fP might be defined as a macro taking +exactly three arguments. Therefore, it's recommended to pass 1 as parameter to +this option. +.IP CURLOPT_SSLVERSION +Pass a long as parameter to control what version of SSL/TLS to attempt to use. +The available options are: +.RS +.IP CURL_SSLVERSION_DEFAULT +The default action. This will attempt to figure out the remote SSL protocol +version, i.e. either SSLv3 or TLSv1 (but not SSLv2, which became disabled +by default with 7.18.1). +.IP CURL_SSLVERSION_TLSv1 +Force TLSv1 +.IP CURL_SSLVERSION_SSLv2 +Force SSLv2 +.IP CURL_SSLVERSION_SSLv3 +Force SSLv3 +.RE +.IP CURLOPT_SSL_VERIFYPEER +Pass a long as parameter. + +This option determines whether curl verifies the authenticity of the peer's +certificate. A value of 1 means curl verifies; zero means it doesn't. The +default is nonzero, but before 7.10, it was zero. + +When negotiating an SSL connection, the server sends a certificate indicating +its identity. Curl verifies whether the certificate is authentic, i.e. that +you can trust that the server is who the certificate says it is. This trust +is based on a chain of digital signatures, rooted in certification authority +(CA) certificates you supply. As of 7.10, curl installs a default bundle of +CA certificates and you can specify alternate certificates with the +\fICURLOPT_CAINFO\fP option or the \fICURLOPT_CAPATH\fP option. + +When \fICURLOPT_SSL_VERIFYPEER\fP is nonzero, and the verification fails to +prove that the certificate is authentic, the connection fails. When the +option is zero, the connection succeeds regardless. + +Authenticating the certificate is not by itself very useful. You typically +want to ensure that the server, as authentically identified by its +certificate, is the server you mean to be talking to. Use +\fICURLOPT_SSL_VERIFYHOST\fP to control that. +.IP CURLOPT_CAINFO +Pass a char * to a zero terminated string naming a file holding one or more +certificates to verify the peer with. This makes sense only when used in +combination with the \fICURLOPT_SSL_VERIFYPEER\fP option. If +\fICURLOPT_SSL_VERIFYPEER\fP is zero, \fICURLOPT_CAINFO\fP need not +even indicate an accessible file. + +Note that option is by default set to the system path where libcurl's cacert +bundle is assumed to be stored, as established at build time. + +When built against NSS, this is the directory that the NSS certificate +database resides in. +.IP CURLOPT_ISSUERCERT +Pass a char * to a zero terminated string naming a file holding a CA +certificate in PEM format. If the option is set, an additional check against +the peer certificate is performed to verify the issuer is indeed the one +associated with the certificate provided by the option. This additional check +is useful in multi-level PKI where one needs to enforce that the peer certificate is +from a specific branch of the tree. + +This option makes sense only when used in combination with the +\fICURLOPT_SSL_VERIFYPEER\fP option. Otherwise, the result of the check is not +considered as failure. + +A specific error code (CURLE_SSL_ISSUER_ERROR) is defined with the option, +which is returned if the setup of the SSL/TLS session has failed due to a +mismatch with the issuer of peer certificate (\fICURLOPT_SSL_VERIFYPEER\fP has +to be set too for the check to fail). (Added in 7.19.0) +.IP CURLOPT_CAPATH +Pass a char * to a zero terminated string naming a directory holding multiple +CA certificates to verify the peer with. The certificate directory must be +prepared using the openssl c_rehash utility. This makes sense only when used +in combination with the \fICURLOPT_SSL_VERIFYPEER\fP option. If +\fICURLOPT_SSL_VERIFYPEER\fP is zero, \fICURLOPT_CAPATH\fP need not even +indicate an accessible path. The \fICURLOPT_CAPATH\fP function apparently +does not work in Windows due to some limitation in openssl. This option is +OpenSSL-specific and does nothing if libcurl is built to use GnuTLS. +.IP CURLOPT_CRLFILE +Pass a char * to a zero terminated string naming a file with the concatenation +of CRL (in PEM format) to use in the certificate validation that occurs during +the SSL exchange. + +When curl is built to use NSS or GnuTLS, there is no way to influence the use +of CRL passed to help in the verification process. When libcurl is built with +OpenSSL support, X509_V_FLAG_CRL_CHECK and X509_V_FLAG_CRL_CHECK_ALL are both +set, requiring CRL check against all the elements of the certificate chain if +a CRL file is passed. + +This option makes sense only when used in combination with the +\fICURLOPT_SSL_VERIFYPEER\fP option. + +A specific error code (CURLE_SSL_CRL_BADFILE) is defined with the option. It +is returned when the SSL exchange fails because the CRL file cannot be loaded. +Note that a failure in certificate verification due to a revocation information +found in the CRL does not trigger this specific error. (Added in 7.19.0) +.IP CURLOPT_CERTINFO +Pass a long set to 1 to enable libcurl's certificate chain info gatherer. With +this enabled, libcurl (if built with OpenSSL) will extract lots of information +and data about the certificates in the certificate chain used in the SSL +connection. This data is then possible to extract after a transfer using +\fIcurl_easy_getinfo(3)\fP and its option \fICURLINFO_CERTINFO\fP. (Added in +7.19.1) +.IP CURLOPT_RANDOM_FILE +Pass a char * to a zero terminated file name. The file will be used to read +from to seed the random engine for SSL. The more random the specified file is, +the more secure the SSL connection will become. +.IP CURLOPT_EGDSOCKET +Pass a char * to the zero terminated path name to the Entropy Gathering Daemon +socket. It will be used to seed the random engine for SSL. +.IP CURLOPT_SSL_VERIFYHOST +Pass a long as parameter. + +This option determines whether libcurl verifies that the server cert is for +the server it is known as. + +When negotiating a SSL connection, the server sends a certificate indicating +its identity. + +When \fICURLOPT_SSL_VERIFYHOST\fP is 2, that certificate must indicate that +the server is the server to which you meant to connect, or the connection +fails. + +Curl considers the server the intended one when the Common Name field or a +Subject Alternate Name field in the certificate matches the host name in the +URL to which you told Curl to connect. + +When the value is 1, the certificate must contain a Common Name field, but it +doesn't matter what name it says. (This is not ordinarily a useful setting). + +When the value is 0, the connection succeeds regardless of the names in the +certificate. + +The default, since 7.10, is 2. + +This option controls checking the server's claimed identity. The server could +be lying. To control lying, see \fICURLOPT_SSL_VERIFYPEER\fP. +.IP CURLOPT_SSL_CIPHER_LIST +Pass a char *, pointing to a zero terminated string holding the list of +ciphers to use for the SSL connection. The list must be syntactically correct, +it consists of one or more cipher strings separated by colons. Commas or +spaces are also acceptable separators but colons are normally used, \&!, \&- +and \&+ can be used as operators. + +For OpenSSL and GnuTLS valid examples of cipher lists include 'RC4-SHA', +\'SHA1+DES\', 'TLSv1' and 'DEFAULT'. The default list is normally set when you +compile OpenSSL. + +You'll find more details about cipher lists on this URL: +\fIhttp://www.openssl.org/docs/apps/ciphers.html\fP + +For NSS, valid examples of cipher lists include 'rsa_rc4_128_md5', +\'rsa_aes_128_sha\', etc. With NSS you don't add/remove ciphers. If one uses +this option then all known ciphers are disabled and only those passed in +are enabled. + +You'll find more details about the NSS cipher lists on this URL: +\fIhttp://directory.fedora.redhat.com/docs/mod_nss.html#Directives\fP + +.IP CURLOPT_SSL_SESSIONID_CACHE +Pass a long set to 0 to disable libcurl's use of SSL session-ID caching. Set +this to 1 to enable it. By default all transfers are done using the +cache. Note that while nothing ever should get hurt by attempting to reuse SSL +session-IDs, there seem to be broken SSL implementations in the wild that may +require you to disable this in order for you to succeed. (Added in 7.16.0) +.IP CURLOPT_KRBLEVEL +Pass a char * as parameter. Set the kerberos security level for FTP; this also +enables kerberos awareness. This is a string, \&'clear', \&'safe', +\&'confidential' or \&'private'. If the string is set but doesn't match one +of these, 'private' will be used. Set the string to NULL to disable kerberos +support for FTP. + +(This option was known as CURLOPT_KRB4LEVEL up to 7.16.3) +.SH SSH OPTIONS +.IP CURLOPT_SSH_AUTH_TYPES +Pass a long set to a bitmask consisting of one or more of +CURLSSH_AUTH_PUBLICKEY, CURLSSH_AUTH_PASSWORD, CURLSSH_AUTH_HOST, +CURLSSH_AUTH_KEYBOARD. Set CURLSSH_AUTH_ANY to let libcurl pick one. +(Added in 7.16.1) +.IP CURLOPT_SSH_HOST_PUBLIC_KEY_MD5 +Pass a char * pointing to a string containing 32 hexadecimal digits. The +string should be the 128 bit MD5 checksum of the remote host's public key, and +libcurl will reject the connection to the host unless the md5sums match. This +option is only for SCP and SFTP transfers. (Added in 7.17.1) +.IP CURLOPT_SSH_PUBLIC_KEYFILE +Pass a char * pointing to a file name for your public key. If not used, +libcurl defaults to using \fB~/.ssh/id_dsa.pub\fP. +(Added in 7.16.1) +.IP CURLOPT_SSH_PRIVATE_KEYFILE +Pass a char * pointing to a file name for your private key. If not used, +libcurl defaults to using \fB~/.ssh/id_dsa\fP. +If the file is password-protected, set the password with \fICURLOPT_KEYPASSWD\fP. +(Added in 7.16.1) +.IP CURLOPT_SSH_KNOWNHOSTS +Pass a pointer to a zero terminated string holding the file name of the +known_host file to use. The known_hosts file should use the OpenSSH file +format as supported by libssh2. If this file is specified, libcurl will only +accept connections with hosts that are known and present in that file, with a +matching public key. Use \fICURLOPT_SSH_KEYFUNCTION\fP to alter the default +behavior on host and key (mis)matching. (Added in 7.19.6) +.IP CURLOPT_SSH_KEYFUNCTION +Pass a pointer to a curl_sshkeycallback function. It gets called when the +known_host matching has been done, to allow the application to act and decide +for libcurl how to proceed. It gets passed the CURL handle, the key from the +known_hosts file, the key from the remote site, info from libcurl on the +matching status and a custom pointer (set with \fICURLOPT_SSH_KEYDATA\fP). It +MUST return one of the following return codes to tell libcurl how to act: +.RS +.IP CURLKHSTAT_FINE_ADD_TO_FILE +The host+key is accepted and libcurl will append it to the known_hosts file +before continuing with the connection. This will also add the host+key combo +to the known_host pool kept in memory if it wasn't already present there. Note +that the adding of data to the file is done by completely replacing the file +with a new copy, so the permissions of the file must allow this. +.IP CURLKHSTAT_FINE +The host+key is accepted libcurl will continue with the connection. This will +also add the host+key combo to the known_host pool kept in memory if it wasn't +already present there. +.IP CURLKHSTAT_REJECT +The host+key is rejected. libcurl will deny the connection to continue and it +will be closed. +.IP CURLKHSTAT_DEFER +The host+key is rejected, but the SSH connection is asked to be kept alive. +This feature could be used when the app wants to somehow return back and act +on the host+key situation and then retry without needing the overhead of +setting it up from scratch again. +.RE + (Added in 7.19.6) +.IP CURLOPT_SSH_KEYDATA +Pass a void * as parameter. This pointer will be passed along verbatim to the +callback set with \fICURLOPT_SSH_KEYFUNCTION\fP. (Added in 7.19.6) +.SH OTHER OPTIONS +.IP CURLOPT_PRIVATE +Pass a void * as parameter, pointing to data that should be associated with +this curl handle. The pointer can subsequently be retrieved using +\fIcurl_easy_getinfo(3)\fP with the CURLINFO_PRIVATE option. libcurl itself +does nothing with this data. (Added in 7.10.3) +.IP CURLOPT_SHARE +Pass a share handle as a parameter. The share handle must have been created by +a previous call to \fIcurl_share_init(3)\fP. Setting this option, will make +this curl handle use the data from the shared handle instead of keeping the +data to itself. This enables several curl handles to share data. If the curl +handles are used simultaneously in multiple threads, you \fBMUST\fP use the +locking methods in the share handle. See \fIcurl_share_setopt(3)\fP for +details. + +If you add a share that is set to share cookies, your easy handle will use +that cookie cache and get the cookie engine enabled. If you unshare an object +that was using cookies (or change to another object that doesn't share +cookies), the easy handle will get its cookie engine disabled. + +Data that the share object is not set to share will be dealt with the usual +way, as if no share was used. +.IP CURLOPT_NEW_FILE_PERMS +Pass a long as a parameter, containing the value of the permissions that will +be assigned to newly created files on the remote server. The default value is +\fI0644\fP, but any valid value can be used. The only protocols that can use +this are \fIsftp://\fP, \fIscp://\fP, and \fIfile://\fP. (Added in 7.16.4) +.IP CURLOPT_NEW_DIRECTORY_PERMS +Pass a long as a parameter, containing the value of the permissions that will +be assigned to newly created directories on the remote server. The default +value is \fI0755\fP, but any valid value can be used. The only protocols that +can use this are \fIsftp://\fP, \fIscp://\fP, and \fIfile://\fP. +(Added in 7.16.4) +.SH TELNET OPTIONS +.IP CURLOPT_TELNETOPTIONS +Provide a pointer to a curl_slist with variables to pass to the telnet +negotiations. The variables should be in the format <option=value>. libcurl +supports the options 'TTYPE', 'XDISPLOC' and 'NEW_ENV'. See the TELNET +standard for details. +.SH RETURN VALUE +CURLE_OK (zero) means that the option was set properly, non-zero means an +error occurred as \fI<curl/curl.h>\fP defines. See the \fIlibcurl-errors(3)\fP +man page for the full list with descriptions. + +If you try to set an option that libcurl doesn't know about, perhaps because +the library is too old to support it or the option was removed in a recent +version, this function will return \fICURLE_FAILED_INIT\fP. +.SH "SEE ALSO" +.BR curl_easy_init "(3), " curl_easy_cleanup "(3), " curl_easy_reset "(3)" diff --git a/usr/share/man/man3/curl_easy_strerror.3 b/usr/share/man/man3/curl_easy_strerror.3 new file mode 100755 index 000000000..4dd34c513 --- /dev/null +++ b/usr/share/man/man3/curl_easy_strerror.3 @@ -0,0 +1,20 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: curl_easy_strerror.3,v 1.4 2009-05-19 12:48:14 yangtse Exp $ +.\" +.TH curl_easy_strerror 3 "26 Apr 2004" "libcurl 7.12" "libcurl Manual" +.SH NAME +curl_easy_strerror - return string describing error code +.SH SYNOPSIS +.nf +.B #include <curl/curl.h> +.BI "const char *curl_easy_strerror(CURLcode " errornum ");" +.SH DESCRIPTION +The curl_easy_strerror() function returns a string describing the CURLcode +error code passed in the argument \fIerrornum\fP. +.SH AVAILABILITY +This function was added in libcurl 7.12.0 +.SH RETURN VALUE +A pointer to a zero terminated string. +.SH "SEE ALSO" +.BR libcurl-errors "(3), " curl_multi_strerror "(3), " curl_share_strerror "(3)" diff --git a/usr/share/man/man3/curl_easy_unescape.3 b/usr/share/man/man3/curl_easy_unescape.3 new file mode 100755 index 000000000..0fe1bb433 --- /dev/null +++ b/usr/share/man/man3/curl_easy_unescape.3 @@ -0,0 +1,52 @@ +.\" ************************************************************************** +.\" * _ _ ____ _ +.\" * Project ___| | | | _ \| | +.\" * / __| | | | |_) | | +.\" * | (__| |_| | _ <| |___ +.\" * \___|\___/|_| \_\_____| +.\" * +.\" * Copyright (C) 1998 - 2008, Daniel Stenberg, <daniel@haxx.se>, et al. +.\" * +.\" * This software is licensed as described in the file COPYING, which +.\" * you should have received as part of this distribution. The terms +.\" * are also available at http://curl.haxx.se/docs/copyright.html. +.\" * +.\" * You may opt to use, copy, modify, merge, publish, distribute and/or sell +.\" * copies of the Software, and permit persons to whom the Software is +.\" * furnished to do so, under the terms of the COPYING file. +.\" * +.\" * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY +.\" * KIND, either express or implied. +.\" * +.\" * $Id: curl_easy_unescape.3,v 1.4 2009-05-19 12:48:14 yangtse Exp $ +.\" ************************************************************************** +.\" +.TH curl_easy_unescape 3 "7 April 2006" "libcurl 7.15.4" "libcurl Manual" +.SH NAME +curl_easy_unescape - URL decodes the given string +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "char *curl_easy_unescape( CURL *" curl ", char *" url ", int "inlength +.BI ", int *" outlength " );" +.ad +.SH DESCRIPTION +This function converts the given URL encoded input string to a "plain string" +and returns that in an allocated memory area. All input characters that are +URL encoded (%XX where XX is a two-digit hexadecimal number) are converted to +their binary versions. + +If the \fBlength\fP argument is set to 0 (zero), \fIcurl_easy_unescape(3)\fP +will use strlen() on the input \fIurl\fP string to find out the size. + +If \fBoutlength\fP is non-NULL, the function will write the length of the +returned string in the integer it points to. This allows an escaped string +containing %00 to still get used properly after unescaping. + +You must \fIcurl_free(3)\fP the returned string when you're done with it. +.SH AVAILABILITY +Added in 7.15.4 and replaces the old \fIcurl_unescape(3)\fP function. +.SH RETURN VALUE +A pointer to a zero terminated string or NULL if it failed. +.SH "SEE ALSO" +.I curl_easy_escape(3), curl_free(3), RFC 2396 diff --git a/usr/share/man/man3/curl_escape.3 b/usr/share/man/man3/curl_escape.3 new file mode 100755 index 000000000..7f5e51348 --- /dev/null +++ b/usr/share/man/man3/curl_escape.3 @@ -0,0 +1,31 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: curl_escape.3,v 1.8 2009-05-19 12:48:14 yangtse Exp $ +.\" +.TH curl_escape 3 "6 March 2002" "libcurl 7.9" "libcurl Manual" +.SH NAME +curl_escape - URL encodes the given string +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "char *curl_escape( char *" url ", int "length " );" +.ad +.SH DESCRIPTION +Obsolete function. Use \fIcurl_easy_escape(3)\fP instead! + +This function will convert the given input string to an URL encoded string and +return that as a new allocated string. All input characters that are not a-z, +A-Z or 0-9 will be converted to their "URL escaped" version (%NN where NN is a +two-digit hexadecimal number). + +If the 'length' argument is set to 0, curl_escape() will use strlen() on the +input 'url' string to find out the size. + +You must curl_free() the returned string when you're done with it. +.SH AVAILABILITY +Since 7.15.4, \fIcurl_easy_escape(3)\fP should be used. This function will +be removed in a future release. +.SH RETURN VALUE +A pointer to a zero terminated string or NULL if it failed. +.SH "SEE ALSO" +.BR curl_unescape "(3), " curl_free "(3), " RFC 2396 diff --git a/usr/share/man/man3/curl_formadd.3 b/usr/share/man/man3/curl_formadd.3 new file mode 100755 index 000000000..6b272e055 --- /dev/null +++ b/usr/share/man/man3/curl_formadd.3 @@ -0,0 +1,217 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: curl_formadd.3,v 1.20 2009-08-04 12:02:27 bagder Exp $ +.\" +.TH curl_formadd 3 "24 June 2002" "libcurl 7.9.8" "libcurl Manual" +.SH NAME +curl_formadd - add a section to a multipart/formdata HTTP POST +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "CURLFORMcode curl_formadd(struct curl_httppost ** " firstitem, +.BI "struct curl_httppost ** " lastitem, " ...);" +.ad +.SH DESCRIPTION +curl_formadd() is used to append sections when building a multipart/formdata +HTTP POST (sometimes referred to as RFC2388-style posts). Append one section at +a time until you've added all the sections you want included and then you pass +the \fIfirstitem\fP pointer as parameter to \fBCURLOPT_HTTPPOST\fP. +\fIlastitem\fP is set after each call and on repeated invokes it should be +left as set to allow repeated invokes to find the end of the list faster. + +After the \fIlastitem\fP pointer follow the real arguments. + +The pointers \fI*firstitem\fP and \fI*lastitem\fP should both be pointing to +NULL in the first call to this function. All list-data will be allocated by +the function itself. You must call \fIcurl_formfree(3)\fP after the form post +has been done to free the resources. + +Using POST with HTTP 1.1 implies the use of a "Expect: 100-continue" header. +You can disable this header with \fICURLOPT_HTTPHEADER\fP as usual. + +First, there are some basics you need to understand about multipart/formdata +posts. Each part consists of at least a NAME and a CONTENTS part. If the part +is made for file upload, there are also a stored CONTENT-TYPE and a FILENAME. +Below, we'll discuss what options you use to set these properties in the +parts you want to add to your post. + +The options listed first are for making normal parts. The options from +\fICURLFORM_FILE\fP through \fICURLFORM_BUFFERLENGTH\fP are for file upload +parts. +.SH OPTIONS +.IP CURLFORM_COPYNAME +followed by a string which provides the \fIname\fP of this part. libcurl +copies the string so your application doesn't need to keep it around after +this function call. If the name isn't NUL-terminated, or if you'd +like it to contain zero bytes, you must set its length with +\fBCURLFORM_NAMELENGTH\fP. The copied data will be freed by +\fIcurl_formfree(3)\fP. +.IP CURLFORM_PTRNAME +followed by a string which provides the \fIname\fP of this part. libcurl +will use the pointer and refer to the data in your application, so you +must make sure it remains until curl no longer needs it. If the name +isn't NUL-terminated, or if you'd like it to contain zero +bytes, you must set its length with \fBCURLFORM_NAMELENGTH\fP. +.IP CURLFORM_COPYCONTENTS +followed by a pointer to the contents of this part, the actual data +to send away. libcurl copies the provided data, so your application doesn't +need to keep it around after this function call. If the data isn't null +terminated, or if you'd like it to contain zero bytes, you must +set the length of the name with \fBCURLFORM_CONTENTSLENGTH\fP. The copied +data will be freed by \fIcurl_formfree(3)\fP. +.IP CURLFORM_PTRCONTENTS +followed by a pointer to the contents of this part, the actual data +to send away. libcurl will use the pointer and refer to the data in your +application, so you must make sure it remains until curl no longer needs it. +If the data isn't NUL-terminated, or if you'd like it to contain zero bytes, +you must set its length with \fBCURLFORM_CONTENTSLENGTH\fP. +.IP CURLFORM_CONTENTSLENGTH +followed by a long giving the length of the contents. Note that for +\fICURLFORM_STREAM\fP contents, this option is mandatory. +.IP CURLFORM_FILECONTENT +followed by a filename, causes that file to be read and its contents used +as data in this part. This part does \fInot\fP automatically become a file +upload part simply because its data was read from a file. +.IP CURLFORM_FILE +followed by a filename, makes this part a file upload part. It sets the +\fIfilename\fP field to the basename of the provided filename, it reads the +contents of the file and passes them as data and sets the content-type if the +given file match one of the internally known file extensions. For +\fBCURLFORM_FILE\fP the user may send one or more files in one part by +providing multiple \fBCURLFORM_FILE\fP arguments each followed by the filename +(and each \fICURLFORM_FILE\fP is allowed to have a +\fICURLFORM_CONTENTTYPE\fP). +.IP CURLFORM_CONTENTTYPE +is used in combination with \fICURLFORM_FILE\fP. Followed by a pointer to a +string which provides the content-type for this part, possibly instead of an +internally chosen one. +.IP CURLFORM_FILENAME +is used in combination with \fICURLFORM_FILE\fP. Followed by a pointer to a +string, it tells libcurl to use the given string as the \fIfilename\fP in the +file upload part instead of the actual file name. +.IP CURLFORM_BUFFER +is used for custom file upload parts without use of \fICURLFORM_FILE\fP. It +tells libcurl that the file contents are already present in a buffer. The +parameter is a string which provides the \fIfilename\fP field in the content +header. +.IP CURLFORM_BUFFERPTR +is used in combination with \fICURLFORM_BUFFER\fP. The parameter is a pointer +to the buffer to be uploaded. This buffer must not be freed until after +\fIcurl_easy_cleanup(3)\fP is called. You must also use +\fICURLFORM_BUFFERLENGTH\fP to set the number of bytes in the buffer. +.IP CURLFORM_BUFFERLENGTH +is used in combination with \fICURLFORM_BUFFER\fP. The parameter is a +long which gives the length of the buffer. +.IP CURLFORM_STREAM +Tells libcurl to use the \fICURLOPT_READFUNCTION\fP callback to get data. The +parameter you pass to \fICURLFORM_STREAM\fP is the pointer passed on to the +read callback's fourth argument. If you want the part to look like a file +upload one, set the \fICURLFORM_FILENAME\fP parameter as well. Note that when +using \fICURLFORM_STREAM\fP, \fICURLFORM_CONTENTSLENGTH\fP must also be set +with the total expected length of the part. (Option added in libcurl 7.18.2) +.IP CURLFORM_ARRAY +Another possibility to send options to curl_formadd() is the +\fBCURLFORM_ARRAY\fP option, that passes a struct curl_forms array pointer as +its value. Each curl_forms structure element has a CURLformoption and a char +pointer. The final element in the array must be a CURLFORM_END. All available +options can be used in an array, except the CURLFORM_ARRAY option itself! The +last argument in such an array must always be \fBCURLFORM_END\fP. +.IP CURLFORM_CONTENTHEADER +specifies extra headers for the form POST section. This takes a curl_slist +prepared in the usual way using \fBcurl_slist_append\fP and appends the list +of headers to those libcurl automatically generates. The list must exist while +the POST occurs, if you free it before the post completes you may experience +problems. + +When you've passed the HttpPost pointer to \fIcurl_easy_setopt(3)\fP (using +the \fICURLOPT_HTTPPOST\fP option), you must not free the list until after +you've called \fIcurl_easy_cleanup(3)\fP for the curl handle. + +See example below. +.SH RETURN VALUE +0 means everything was ok, non-zero means an error occurred corresponding +to a CURL_FORMADD_* constant defined in +.I <curl/curl.h> +.SH EXAMPLE +.nf + + struct curl_httppost* post = NULL; + struct curl_httppost* last = NULL; + char namebuffer[] = "name buffer"; + long namelength = strlen(namebuffer); + char buffer[] = "test buffer"; + char htmlbuffer[] = "<HTML>test buffer</HTML>"; + long htmlbufferlength = strlen(htmlbuffer); + struct curl_forms forms[3]; + char file1[] = "my-face.jpg"; + char file2[] = "your-face.jpg"; + /* add null character into htmlbuffer, to demonstrate that + transfers of buffers containing null characters actually work + */ + htmlbuffer[8] = '\\0'; + + /* Add simple name/content section */ + curl_formadd(&post, &last, CURLFORM_COPYNAME, "name", + CURLFORM_COPYCONTENTS, "content", CURLFORM_END); + + /* Add simple name/content/contenttype section */ + curl_formadd(&post, &last, CURLFORM_COPYNAME, "htmlcode", + CURLFORM_COPYCONTENTS, "<HTML></HTML>", + CURLFORM_CONTENTTYPE, "text/html", CURLFORM_END); + + /* Add name/ptrcontent section */ + curl_formadd(&post, &last, CURLFORM_COPYNAME, "name_for_ptrcontent", + CURLFORM_PTRCONTENTS, buffer, CURLFORM_END); + + /* Add ptrname/ptrcontent section */ + curl_formadd(&post, &last, CURLFORM_PTRNAME, namebuffer, + CURLFORM_PTRCONTENTS, buffer, CURLFORM_NAMELENGTH, + namelength, CURLFORM_END); + + /* Add name/ptrcontent/contenttype section */ + curl_formadd(&post, &last, CURLFORM_COPYNAME, "html_code_with_hole", + CURLFORM_PTRCONTENTS, htmlbuffer, + CURLFORM_CONTENTSLENGTH, htmlbufferlength, + CURLFORM_CONTENTTYPE, "text/html", CURLFORM_END); + + /* Add simple file section */ + curl_formadd(&post, &last, CURLFORM_COPYNAME, "picture", + CURLFORM_FILE, "my-face.jpg", CURLFORM_END); + + /* Add file/contenttype section */ + curl_formadd(&post, &last, CURLFORM_COPYNAME, "picture", + CURLFORM_FILE, "my-face.jpg", + CURLFORM_CONTENTTYPE, "image/jpeg", CURLFORM_END); + + /* Add two file section */ + curl_formadd(&post, &last, CURLFORM_COPYNAME, "pictures", + CURLFORM_FILE, "my-face.jpg", + CURLFORM_FILE, "your-face.jpg", CURLFORM_END); + + /* Add two file section using CURLFORM_ARRAY */ + forms[0].option = CURLFORM_FILE; + forms[0].value = file1; + forms[1].option = CURLFORM_FILE; + forms[1].value = file2; + forms[2].option = CURLFORM_END; + + /* Add a buffer to upload */ + curl_formadd(&post, &last, + CURLFORM_COPYNAME, "name", + CURLFORM_BUFFER, "data", + CURLFORM_BUFFERPTR, record, + CURLFORM_BUFFERLENGTH, record_length, + CURLFORM_END); + + /* no option needed for the end marker */ + curl_formadd(&post, &last, CURLFORM_COPYNAME, "pictures", + CURLFORM_ARRAY, forms, CURLFORM_END); + /* Add the content of a file as a normal post text value */ + curl_formadd(&post, &last, CURLFORM_COPYNAME, "filecontent", + CURLFORM_FILECONTENT, ".bashrc", CURLFORM_END); + /* Set the form info */ + curl_easy_setopt(curl, CURLOPT_HTTPPOST, post); + +.SH "SEE ALSO" +.BR curl_easy_setopt "(3), " +.BR curl_formfree "(3)" diff --git a/usr/share/man/man3/curl_formfree.3 b/usr/share/man/man3/curl_formfree.3 new file mode 100755 index 000000000..626ff07a4 --- /dev/null +++ b/usr/share/man/man3/curl_formfree.3 @@ -0,0 +1,20 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: curl_formfree.3,v 1.4 2008-12-28 21:56:56 bagder Exp $ +.\" +.TH curl_formfree 3 "6 April 2001" "libcurl 7.7.1" "libcurl Manual" +.SH NAME +curl_formfree - free a previously build multipart/formdata HTTP POST chain +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "void curl_formfree(struct curl_httppost *" form); +.ad +.SH DESCRIPTION +curl_formfree() is used to clean up data previously built/appended with +\fIcurl_formadd(3)\fP. This must be called when the data has been used, which +typically means after \fIcurl_easy_perform(3)\fP has been called. +.SH RETURN VALUE +None +.SH "SEE ALSO" +.BR curl_formadd "(3) " diff --git a/usr/share/man/man3/curl_formget.3 b/usr/share/man/man3/curl_formget.3 new file mode 100755 index 000000000..f56675eda --- /dev/null +++ b/usr/share/man/man3/curl_formget.3 @@ -0,0 +1,49 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: curl_formget.3,v 1.3 2008-12-28 21:56:56 bagder Exp $ +.\" +.TH curl_formget 3 "20 June 2006" "libcurl 7.15.5" "libcurl Manual" +.SH NAME +curl_formget - serialize a previously built multipart/formdata HTTP POST chain +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "void curl_formget(struct curl_httppost *" form, " void *" arg, +.BI " curl_formget_callback " append ");" +.ad +.SH DESCRIPTION +curl_formget() is used to serialize data previously built/appended with +\fIcurl_formadd(3)\fP. Accepts a void pointer as second argument which will be +passed to the curl_formget_callback function. + +.BI "typedef size_t (*curl_formget_callback)(void *" arg, " const char *" buf, +.BI " size_t " len ");" +.nf + +The curl_formget_callback will be executed for each part of the HTTP POST +chain. The void *arg pointer will be the one passed as second argument to +curl_formget(). The character buffer passed to it must not be freed. The +callback should return the buffer length passed to it on success. +.SH RETURN VALUE +0 means everything was ok, non-zero means an error occurred +.SH EXAMPLE +.nf + + size_t print_httppost_callback(void *arg, const char *buf, size_t len) + { + fwrite(buf, len, 1, stdout); + (*(size_t *) arg) += len; + return len; + } + size_t print_httppost(struct curl_httppost *post) + { + size_t total_size = 0; + if(curl_formget(post, &total_size, print_httppost_callback)) { + return (size_t) -1; + } + return total_size; + } +.SH AVAILABILITY +This function was added in libcurl 7.15.5 +.SH "SEE ALSO" +.BR curl_formadd "(3) " diff --git a/usr/share/man/man3/curl_free.3 b/usr/share/man/man3/curl_free.3 new file mode 100755 index 000000000..faa1066d8 --- /dev/null +++ b/usr/share/man/man3/curl_free.3 @@ -0,0 +1,18 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: +.\" +.TH curl_free 3 "12 Aug 2003" "libcurl 7.10" "libcurl Manual" +.SH NAME +curl_free - reclaim memory that has been obtained through a libcurl call +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "void curl_free( char *" ptr " );" +.ad +.SH DESCRIPTION +curl_free reclaims memory that has been obtained through a libcurl call. Use +curl_free() instead of free() to avoid anomalies that can result from +differences in memory management between your application and libcurl. +.SH "SEE ALSO" +.I curl_unescape(3) diff --git a/usr/share/man/man3/curl_getdate.3 b/usr/share/man/man3/curl_getdate.3 new file mode 100755 index 000000000..286e6382d --- /dev/null +++ b/usr/share/man/man3/curl_getdate.3 @@ -0,0 +1,100 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: curl_getdate.3,v 1.10 2008-12-28 21:56:56 bagder Exp $ +.\" +.TH curl_getdate 3 "12 Aug 2005" "libcurl 7.0" "libcurl Manual" +.SH NAME +curl_getdate - Convert a date string to number of seconds since January 1, +1970 +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "time_t curl_getdate(char *" datestring ", time_t *"now " );" +.ad +.SH DESCRIPTION +This function returns the number of seconds since January 1st 1970 in the UTC +time zone, for the date and time that the \fIdatestring\fP parameter +specifies. The \fInow\fP parameter is not used, pass a NULL there. + +\fBNOTE:\fP This function was rewritten for the 7.12.2 release and this +documentation covers the functionality of the new one. The new one is not +feature-complete with the old one, but most of the formats supported by the +new one was supported by the old too. +.SH PARSING DATES AND TIMES +A "date" is a string containing several items separated by whitespace. The +order of the items is immaterial. A date string may contain many flavors of +items: +.TP 0.8i +.B calendar date items +Can be specified several ways. Month names can only be three-letter english +abbreviations, numbers can be zero-prefixed and the year may use 2 or 4 digits. +Examples: 06 Nov 1994, 06-Nov-94 and Nov-94 6. +.TP +.B time of the day items +This string specifies the time on a given day. You must specify it with 6 +digits with two colons: HH:MM:SS. To not include the time in a date string, +will make the function assume 00:00:00. Example: 18:19:21. +.TP +.B time zone items +Specifies international time zone. There are a few acronyms supported, but in +general you should instead use the specific relative time compared to +UTC. Supported formats include: -1200, MST, +0100. +.TP +.B day of the week items +Specifies a day of the week. Days of the week may be spelled out in full +(using english): `Sunday', `Monday', etc or they may be abbreviated to their +first three letters. This is usually not info that adds anything. +.TP +.B pure numbers +If a decimal number of the form YYYYMMDD appears, then YYYY is read as the +year, MM as the month number and DD as the day of the month, for the specified +calendar date. +.PP +.SH EXAMPLES +.nf +Sun, 06 Nov 1994 08:49:37 GMT +Sunday, 06-Nov-94 08:49:37 GMT +Sun Nov 6 08:49:37 1994 +06 Nov 1994 08:49:37 GMT +06-Nov-94 08:49:37 GMT +Nov 6 08:49:37 1994 +06 Nov 1994 08:49:37 +06-Nov-94 08:49:37 +1994 Nov 6 08:49:37 +GMT 08:49:37 06-Nov-94 Sunday +94 6 Nov 08:49:37 +1994 Nov 6 +06-Nov-94 +Sun Nov 6 94 +1994.Nov.6 +Sun/Nov/6/94/GMT +Sun, 06 Nov 1994 08:49:37 CET +06 Nov 1994 08:49:37 EST +Sun, 12 Sep 2004 15:05:58 -0700 +Sat, 11 Sep 2004 21:32:11 +0200 +20040912 15:05:58 -0700 +20040911 +0200 +.fi +.SH STANDARDS +This parser was written to handle date formats specified in RFC 822 (including +the update in RFC 1123) using time zone name or time zone delta and RFC 850 +(obsoleted by RFC 1036) and ANSI C's asctime() format. These formats are the +only ones RFC2616 says HTTP applications may use. +.SH RETURN VALUE +This function returns -1 when it fails to parse the date string. Otherwise it +returns the number of seconds as described. + +If the year is larger than 2037 on systems with 32 bit time_t, this function +will return 0x7fffffff (since that is the largest possible signed 32 bit +number). + +Having a 64 bit time_t is not a guarantee that dates beyond 03:14:07 UTC, +January 19, 2038 will work fine. On systems with a 64 bit time_t but with a +crippled mktime(), \fIcurl_getdate\fP will return -1 in this case. +.SH REWRITE +The former version of this function was built with yacc and was not only very +large, it was also never quite understood and it wasn't possible to build with +non-GNU tools since only GNU Bison could make it thread-safe! + +The rewrite was done for 7.12.2. The new one is much smaller and uses simpler +code. diff --git a/usr/share/man/man3/curl_getenv.3 b/usr/share/man/man3/curl_getenv.3 new file mode 100755 index 000000000..a0bfb9639 --- /dev/null +++ b/usr/share/man/man3/curl_getenv.3 @@ -0,0 +1,30 @@ +.\" $Id: curl_getenv.3,v 1.5 2008-12-28 21:56:56 bagder Exp $ +.\" +.TH curl_getenv 3 "30 April 2004" "libcurl 7.12" "libcurl Manual" +.SH NAME +curl_getenv - return value for environment name +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "char *curl_getenv(const char *" name ");" +.ad +.SH DESCRIPTION +curl_getenv() is a portable wrapper for the getenv() function, meant to +emulate its behaviour and provide an identical interface for all operating +systems libcurl builds on (including win32). +.SH AVAILABILITY +This function will be removed from the public libcurl API in a near future. It +will instead be made "available" by source code access only, and then as +curlx_getenv(). +.SH RETURN VALUE +If successful, curl_getenv() returns a pointer to the value of the specified +environment. The memory it refers to is malloc()ed so the application must +free() this when the data is no longer needed. When \fIcurl_getenv(3)\fP fails +to find the specified name, it returns a null pointer. +.SH NOTE +Under unix operating systems, there isn't any point in returning an allocated +memory, although other systems won't work properly if this isn't done. The +unix implementation thus has to suffer slightly from the drawbacks of other +systems. +.SH "SEE ALSO" +.BR getenv "(3C), " diff --git a/usr/share/man/man3/curl_global_cleanup.3 b/usr/share/man/man3/curl_global_cleanup.3 new file mode 100755 index 000000000..8f75f2c67 --- /dev/null +++ b/usr/share/man/man3/curl_global_cleanup.3 @@ -0,0 +1,32 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: curl_global_cleanup.3,v 1.4 2006-02-17 13:31:49 bagder Exp $ +.\" +.TH curl_global_cleanup 3 "17 Feb 2006" "libcurl 7.8" "libcurl Manual" +.SH NAME +curl_global_cleanup - global libcurl cleanup +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "void curl_global_cleanup(void);" +.ad +.SH DESCRIPTION +This function releases resources acquired by \fBcurl_global_init(3)\fP. + +You should call \fIcurl_global_cleanup(3)\fP once for each call you make to +\fIcurl_global_init(3)\fP, after you are done using libcurl. + +\fBThis function is not thread safe.\fP You must not call it when any other +thread in the program (i.e. a thread sharing the same memory) is running. +This doesn't just mean no other thread that is using libcurl. Because +\fBcurl_global_cleanup(3)\fP calls functions of other libraries that are +similarly thread unsafe, it could conflict with any other thread that uses +these other libraries. + +See the description in \fBlibcurl(3)\fP of global environment requirements for +details of how to use this function. + +.SH "SEE ALSO" +.BR curl_global_init "(3), " +.BR libcurl "(3), " + diff --git a/usr/share/man/man3/curl_global_init.3 b/usr/share/man/man3/curl_global_init.3 new file mode 100755 index 000000000..e81c51694 --- /dev/null +++ b/usr/share/man/man3/curl_global_init.3 @@ -0,0 +1,59 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: curl_global_init.3,v 1.7 2008-12-28 21:56:56 bagder Exp $ +.\" +.TH curl_global_init 3 "11 May 2004" "libcurl 7.12" "libcurl Manual" +.SH NAME +curl_global_init - Global libcurl initialisation +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "CURLcode curl_global_init(long " flags ");" +.ad +.SH DESCRIPTION +This function sets up the program environment that libcurl needs. Think of it +as an extension of the library loader. + +This function must be called at least once within a program (a program is all +the code that shares a memory space) before the program calls any other +function in libcurl. The environment it sets up is constant for the life of +the program and is the same for every program, so multiple calls have the same +effect as one call. + +The flags option is a bit pattern that tells libcurl exactly what features to +init, as described below. Set the desired bits by ORing the values together. +In normal operation, you must specify CURL_GLOBAL_ALL. Don't use any other +value unless you are familiar with it and mean to control internal operations of +libcurl. + +\fBThis function is not thread safe.\fP You must not call it when any other +thread in the program (i.e. a thread sharing the same memory) is running. +This doesn't just mean no other thread that is using libcurl. Because +\fIcurl_global_init()\fP calls functions of other libraries that are similarly +thread unsafe, it could conflict with any other thread that uses these other +libraries. + +See the description in \fBlibcurl\fP(3) of global environment requirements for +details of how to use this function. + +.SH FLAGS +.TP 5 +.B CURL_GLOBAL_ALL +Initialize everything possible. This sets all known bits. +.TP +.B CURL_GLOBAL_SSL +Initialize SSL +.TP +.B CURL_GLOBAL_WIN32 +Initialize the Win32 socket libraries. +.TP +.B CURL_GLOBAL_NOTHING +Initialise nothing extra. This sets no bit. +.SH RETURN VALUE +If this function returns non-zero, something went wrong and you cannot use the +other curl functions. +.SH "SEE ALSO" +.BR curl_global_init_mem "(3), " +.BR curl_global_cleanup "(3), " +.BR curl_easy_init "(3) " +.BR libcurl "(3) " diff --git a/usr/share/man/man3/curl_global_init_mem.3 b/usr/share/man/man3/curl_global_init_mem.3 new file mode 100755 index 000000000..57ae6aeea --- /dev/null +++ b/usr/share/man/man3/curl_global_init_mem.3 @@ -0,0 +1,42 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" +.TH curl_global_init_mem 3 "10 May 2004" "libcurl 7.12.0" "libcurl Manual" +.SH NAME +curl_global_init_mem - Global libcurl initialisation with memory callbacks +.SH SYNOPSIS +.B #include <curl/curl.h> +.nf +.B "CURLcode curl_global_init_mem(long " flags, +.B " curl_malloc_callback "m, +.B " curl_free_callback "f, +.B " curl_realloc_callback "r, +.B " curl_strdup_callback "s, +.B " curl_calloc_callback "c ");" +.SH DESCRIPTION +This function works exactly as \fIcurl_global_init(3)\fP with one addition: it +allows the application to set callbacks to replace the otherwise used internal +memory functions. + +This man page only adds documentation for the callbacks, see the +\fIcurl_global_init(3)\fP man page for all the rest. When you use this +function, all callback arguments must be set to valid function pointers. + +The prototypes for the given callbacks should match these: +.IP "void *malloc_callback(size_t size);" +To replace malloc() +.IP "void free_callback(void *ptr);" +To replace free() +.IP "void *realloc_callback(void *ptr, size_t size);" +To replace realloc() +.IP "char *strdup_callback(const char *str);" +To replace strdup() +.IP "void *calloc_callback(size_t nmemb, size_t size);" +To replace calloc() +.SH "CAUTION" +Manipulating these gives considerable powers to the application to severly +screw things up for libcurl. Take care! +.SH "SEE ALSO" +.BR curl_global_init "(3), " +.BR curl_global_cleanup "(3), " + diff --git a/usr/share/man/man3/curl_mprintf.3 b/usr/share/man/man3/curl_mprintf.3 new file mode 100755 index 000000000..b893911dc --- /dev/null +++ b/usr/share/man/man3/curl_mprintf.3 @@ -0,0 +1,90 @@ +.\" $Id: curl_mprintf.3,v 1.4 2008-12-28 21:56:56 bagder Exp $ +.\" +.TH curl_printf 3 "30 April 2004" "libcurl 7.12" "libcurl Manual" +.SH NAME +curl_maprintf, curl_mfprintf, curl_mprintf, curl_msnprintf, curl_msprintf +curl_mvaprintf, curl_mvfprintf, curl_mvprintf, curl_mvsnprintf, +curl_mvsprintf - formatted output conversion +.SH SYNOPSIS +.B #include <curl/mprintf.h> +.sp +.BI "int curl_mprintf(const char *" format ", ...);" +.br +.BI "int curl_mfprintf(FILE *" fd ", const char *" format ", ...);" +.br +.BI "int curl_msprintf(char *" buffer ", const char *" format ", ...);" +.br +.BI "int curl_msnprintf(char *" buffer ", size_t " maxlength ", const char *" format ", ...);" +.br +.BI "int curl_mvprintf(const char *" format ", va_list " args ");" +.br +.BI "int curl_mvfprintf(FILE *" fd ", const char *" format ", va_list " args ");" +.br +.BI "int curl_mvsprintf(char *" buffer ", const char *" format ", va_list " args ");" +.br +.BI "int curl_mvsnprintf(char *" buffer ", size_t " maxlength ", const char *" format ", va_list " args ");" +.br +.BI "char *curl_maprintf(const char *" format ", ...);" +.br +.BI "char *curl_mvaprintf(const char *" format ", va_list " args ");" +.SH DESCRIPTION +These are all functions that produce output according to a format string and +given arguments. These are mostly clones of the well-known C-style functions +and there will be no detailed explanation of all available formatting rules +and usage here. + +See this table for notable exceptions. +.RS +.TP +.B curl_mprintf() +Normal printf() clone. +.TP +.B curl_mfprintf() +Normal fprintf() clone. +.TP +.B curl_msprintf() +Normal sprintf() clone. +.TP +.B curl_msnprintf() +snprintf() clone. Many systems don't have this. It is just like \fBsprintf\fP +but with an extra argument after the buffer that specifies the length of the +target buffer. +.TP +.B curl_mvprintf() +Normal vprintf() clone. +.TP +.B curl_mvfprintf() +Normal vfprintf() clone. +.TP +.B curl_mvsprintf() +Normal vsprintf() clone. +.TP +.B curl_mvsnprintf() +vsnprintf() clone. Many systems don't have this. It is just like +\fBvsprintf\fP but with an extra argument after the buffer that specifies the +length of the target buffer. +.TP +.B curl_maprintf() +Like printf() but returns the output string as a malloc()ed string. The +returned string must be free()ed by the receiver. +.TP +.B curl_mvaprintf() +Like curl_maprintf() but takes a va_list pointer argument instead of a +variable amount of arguments. +.RE + +To easily use all these cloned functions instead of the normal ones, #define +_MPRINTF_REPLACE before you include the <curl/mprintf.h> file. Then all the +normal names like printf, fprintf, sprintf etc will use the curl-functions +instead. +.SH AVAILABILITY +These function will be removed from the public libcurl API in a near +future. They will instead be made "available" by source code access only, and +then as curlx_-prefixed functions. See lib/README.curlx for further details. +.SH RETURN VALUE +The \fBcurl_maprintf\fP and \fBcurl_mvaprintf\fP functions return a pointer to +a newly allocated string, or NULL if it failed. + +All other functions return the number of characters they actually outputted. +.SH "SEE ALSO" +.BR printf "(3), " sprintf "(3), " fprintf "(3), " vprintf "(3) " diff --git a/usr/share/man/man3/curl_multi_add_handle.3 b/usr/share/man/man3/curl_multi_add_handle.3 new file mode 100755 index 000000000..758ecf820 --- /dev/null +++ b/usr/share/man/man3/curl_multi_add_handle.3 @@ -0,0 +1,38 @@ +.\" $Id: curl_multi_add_handle.3,v 1.6 2008-05-24 19:19:49 bagder Exp $ +.\" +.TH curl_multi_add_handle 3 "4 March 2002" "libcurl 7.9.5" "libcurl Manual" +.SH NAME +curl_multi_add_handle - add an easy handle to a multi session +.SH SYNOPSIS +#include <curl/curl.h> + +CURLMcode curl_multi_add_handle(CURLM *multi_handle, CURL *easy_handle); +.ad +.SH DESCRIPTION +Adds a standard easy handle to the multi stack. This function call will make +this \fImulti_handle\fP control the specified \fIeasy_handle\fP. +Furthermore, libcurl now initiates the connection associated with the +specified \fIeasy_handle\fP. + +When an easy handle has been added to a multi stack, you can not and you must +not use \fIcurl_easy_perform(3)\fP on that handle! + +If the easy handle is not set to use a shared (CURLOPT_SHARE) or global DNS +cache (CURLOPT_DNS_USE_GLOBAL_CACHE), it will be made to use the DNS cache +that is shared between all easy handles within the multi handle when +\fIcurl_multi_add_handle(3)\fP is called. + +The easy handle will remain added until you remove it again with +\fIcurl_multi_remove_handle(3)\fP. You should remove the easy handle from the +multi stack before you terminate first the easy handle and then the multi +handle: + +1 - \fIcurl_multi_remove_handle(3)\fP + +2 - \fIcurl_easy_cleanup(3)\fP + +3 - \fIcurl_multi_cleanup(3)\fP +.SH RETURN VALUE +CURLMcode type, general libcurl multi interface error code. +.SH "SEE ALSO" +.BR curl_multi_cleanup "(3)," curl_multi_init "(3)" diff --git a/usr/share/man/man3/curl_multi_assign.3 b/usr/share/man/man3/curl_multi_assign.3 new file mode 100755 index 000000000..877b6ddfc --- /dev/null +++ b/usr/share/man/man3/curl_multi_assign.3 @@ -0,0 +1,44 @@ +.\" $Id: curl_multi_assign.3,v 1.2 2008-12-28 21:56:56 bagder Exp $ +.\" +.TH curl_multi_assign 3 "9 Jul 2006" "libcurl 7.16.0" "libcurl Manual" +.SH NAME +curl_multi_assign \- set data to association with an internal socket +.SH SYNOPSIS +#include <curl/curl.h> + +CURLMcode curl_multi_assign(CURLM *multi_handle, curl_socket_t sockfd, + void *sockptr); +.SH DESCRIPTION +This function assigns an association in the multi handle between the given +socket and a private pointer of the application. This is (only) useful for +\fIcurl_multi_socket(3)\fP uses. + +When set, the \fIsockptr\fP pointer will be passed to all future socket +callbacks for the specific \fIsockfd\fP socket. + +If the given \fIsockfd\fP isn't already in use by libcurl, this function will +return an error. + +libcurl only keeps one single pointer associated with a socket, so calling +this function several times for the same socket will make the last set pointer +get used. + +The idea here being that this association (socket to private pointer) is +something that just about every application that uses this API will need and +then libcurl can just as well do it since it already has an internal hash +table lookup for this. +.SH "RETURN VALUE" +The standard CURLMcode for multi interface error codes. +.SH "TYPICAL USAGE" +In a typical application you allocate a struct or at least use some kind of +semi-dynamic data for each socket that we must wait for action on when using +the \fIcurl_multi_socket(3)\fP approach. + +When our socket-callback gets called by libcurl and we get to know about yet +another socket to wait for, we can use \fIcurl_multi_assign(3)\fP to point out +the particular data so that when we get updates about this same socket again, +we don't have to find the struct associated with this socket by ourselves. +.SH AVAILABILITY +This function was added in libcurl 7.15.5, although not deemed stable yet. +.SH "SEE ALSO" +.BR curl_multi_setopt "(3), " curl_multi_socket "(3) " diff --git a/usr/share/man/man3/curl_multi_cleanup.3 b/usr/share/man/man3/curl_multi_cleanup.3 new file mode 100755 index 000000000..fe0d0414d --- /dev/null +++ b/usr/share/man/man3/curl_multi_cleanup.3 @@ -0,0 +1,27 @@ +.\" $Id: curl_multi_cleanup.3,v 1.4 2009-05-19 12:48:14 yangtse Exp $ +.\" +.TH curl_multi_cleanup 3 "1 March 2002" "libcurl 7.9.5" "libcurl Manual" +.SH NAME +curl_multi_cleanup - close down a multi session +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "CURLMcode curl_multi_cleanup( CURLM *multi_handle );" +.ad +.SH DESCRIPTION +Cleans up and removes a whole multi stack. It does not free or touch any +individual easy handles in any way - they still need to be closed +individually, using the usual \fIcurl_easy_cleanup(3)\fP way. The order of +cleaning up should be: + +1 - \fIcurl_multi_remove_handle(3)\fP before any easy handles are cleaned up + +2 - \fIcurl_easy_cleanup(3)\fP can now be called independently since the easy +handle is no longer connected to the multi handle + +3 - \fIcurl_multi_cleanup(3)\fP should be called when all easy handles are +removed +.SH RETURN VALUE +CURLMcode type, general libcurl multi interface error code. +.SH "SEE ALSO" +.BR curl_multi_init "(3)," curl_easy_cleanup "(3)," curl_easy_init "(3)" diff --git a/usr/share/man/man3/curl_multi_fdset.3 b/usr/share/man/man3/curl_multi_fdset.3 new file mode 100755 index 000000000..6b0a9907c --- /dev/null +++ b/usr/share/man/man3/curl_multi_fdset.3 @@ -0,0 +1,41 @@ +.\" $Id: curl_multi_fdset.3,v 1.13 2008-12-28 21:56:56 bagder Exp $ +.\" +.TH curl_multi_fdset 3 "2 Jan 2006" "libcurl 7.16.0" "libcurl Manual" +.SH NAME +curl_multi_fdset - extracts file descriptor information from a multi handle +.SH SYNOPSIS +.nf +#include <curl/curl.h> + +CURLMcode curl_multi_fdset(CURLM *multi_handle, + fd_set *read_fd_set, + fd_set *write_fd_set, + fd_set *exc_fd_set, + int *max_fd); +.ad +.SH DESCRIPTION +This function extracts file descriptor information from a given multi_handle. +libcurl returns its fd_set sets. The application can use these to select() on, +but be sure to FD_ZERO them before calling this function as +\fIcurl_multi_fdset(3)\fP only adds its own descriptors, it doesn't zero or +otherwise remove any others. The \fIcurl_multi_perform(3)\fP function should be +called as soon as one of them is ready to be read from or written to. + +If no file descriptors are set by libcurl, \fImax_fd\fP will contain -1 when +this function returns. Otherwise it will contain the higher descriptor number +libcurl set. + +You should also be aware that when doing select(), you should consider using a +rather small (single-digit number of seconds) timeout and call +\fIcurl_multi_perform\fP regularly - even if no activity has been seen on the +fd_sets - as otherwise libcurl-internal retries and timeouts may not work as +you'd think and want. + +Starting with libcurl 7.16.0, you should use \fBcurl_multi_timeout\fP to +figure out how long to wait for action. +.SH RETURN VALUE +CURLMcode type, general libcurl multi interface error code. See +\fIlibcurl-errors(3)\fP +.SH "SEE ALSO" +.BR curl_multi_cleanup "(3)," curl_multi_init "(3), " +.BR curl_multi_timeout "(3) " diff --git a/usr/share/man/man3/curl_multi_info_read.3 b/usr/share/man/man3/curl_multi_info_read.3 new file mode 100755 index 000000000..4cfd5cac0 --- /dev/null +++ b/usr/share/man/man3/curl_multi_info_read.3 @@ -0,0 +1,57 @@ +.\" $Id: curl_multi_info_read.3,v 1.11 2009-05-07 09:31:24 bagder Exp $ +.\" +.TH curl_multi_info_read 3 "18 Dec 2004" "libcurl 7.10.3" "libcurl Manual" +.SH NAME +curl_multi_info_read - read multi stack informationals +.SH SYNOPSIS +#include <curl/curl.h> + +CURLMsg *curl_multi_info_read( CURLM *multi_handle, + int *msgs_in_queue); +.ad +.SH DESCRIPTION +Ask the multi handle if there are any messages/informationals from the +individual transfers. Messages may include informationals such as an error +code from the transfer or just the fact that a transfer is completed. More +details on these should be written down as well. + +Repeated calls to this function will return a new struct each time, until a +NULL is returned as a signal that there is no more to get at this point. The +integer pointed to with \fImsgs_in_queue\fP will contain the number of +remaining messages after this function was called. + +When you fetch a message using this function, it is removed from the internal +queue so calling this function again will not return the same message +again. It will instead return new messages at each new invoke until the queue +is emptied. + +\fBWARNING:\fP The data the returned pointer points to will not survive +calling \fIcurl_multi_cleanup(3)\fP, \fIcurl_multi_remove_handle(3)\fP or +\fIcurl_easy_cleanup(3)\fP. + +The 'CURLMsg' struct is very simple and only contains very basic information. +If more involved information is wanted, the particular "easy handle" in +present in that struct and can thus be used in subsequent regular +\fIcurl_easy_getinfo(3)\fP calls (or similar): + +.nf + struct CURLMsg { + CURLMSG msg; /* what this message means */ + CURL *easy_handle; /* the handle it concerns */ + union { + void *whatever; /* message-specific data */ + CURLcode result; /* return code for transfer */ + } data; + }; +.fi +When \fBmsg\fP is \fICURLMSG_DONE\fP, the message identifies a transfer that +is done, and then \fBresult\fP contains the return code for the easy handle +that just completed. + +At this point, there are no other \fBmsg\fP types defined. +.SH "RETURN VALUE" +A pointer to a filled-in struct, or NULL if it failed or ran out of +structs. It also writes the number of messages left in the queue (after this +read) in the integer the second argument points to. +.SH "SEE ALSO" +.BR curl_multi_cleanup "(3), " curl_multi_init "(3), " curl_multi_perform "(3)" diff --git a/usr/share/man/man3/curl_multi_init.3 b/usr/share/man/man3/curl_multi_init.3 new file mode 100755 index 000000000..e80fffbdd --- /dev/null +++ b/usr/share/man/man3/curl_multi_init.3 @@ -0,0 +1,21 @@ +.\" $Id: curl_multi_init.3,v 1.5 2008-12-28 21:56:56 bagder Exp $ +.\" +.TH curl_multi_init 3 "1 March 2002" "libcurl 7.9.5" "libcurl Manual" +.SH NAME +curl_multi_init - create a multi handle +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "CURLM *curl_multi_init( );" +.ad +.SH DESCRIPTION +This function returns a CURLM handle to be used as input to all the other +multi-functions, sometimes referred to as a multi handle in some places in the +documentation. This init call MUST have a corresponding call to +\fIcurl_multi_cleanup(3)\fP when the operation is complete. +.SH RETURN VALUE +If this function returns NULL, something went wrong and you cannot use the +other curl functions. +.SH "SEE ALSO" +.BR curl_multi_cleanup "(3)," curl_global_init "(3)," curl_easy_init "(3)" + diff --git a/usr/share/man/man3/curl_multi_perform.3 b/usr/share/man/man3/curl_multi_perform.3 new file mode 100755 index 000000000..d5cb68f04 --- /dev/null +++ b/usr/share/man/man3/curl_multi_perform.3 @@ -0,0 +1,53 @@ +.\" $Id: curl_multi_perform.3,v 1.10 2009-09-02 14:57:05 bagder Exp $ +.\" +.TH curl_multi_perform 3 "1 March 2002" "libcurl 7.9.5" "libcurl Manual" +.SH NAME +curl_multi_perform - reads/writes available data from each easy handle +.SH SYNOPSIS +#include <curl/curl.h> + +CURLMcode curl_multi_perform(CURLM *multi_handle, int *running_handles); +.ad +.SH DESCRIPTION +When the app thinks there's data available for the multi_handle, it should +call this function to read/write whatever there is to read or write right +now. curl_multi_perform() returns as soon as the reads/writes are done. This +function does not require that there actually is any data available for +reading or that data can be written, it can be called just in case. It will +write the number of handles that still transfer data in the second argument's +integer-pointer. + +When you call curl_multi_perform() and the amount of \fIrunning_handles\fP is +changed from the previous call (or is less than the amount of easy handles +you've added to the multi handle), you know that there is one or more +transfers less "running". You can then call \fIcurl_multi_info_read(3)\fP to +get information about each individual completed transfer, and that returned +info includes CURLcode and more. + +When \fIrunning_handles\fP is set to zero (0) on the return of this function, +there is no longer any transfers in progress. +.SH "RETURN VALUE" +CURLMcode type, general libcurl multi interface error code. + +If you receive \fICURLM_CALL_MULTI_PERFORM\fP, this basically means that you +should call \fIcurl_multi_perform\fP again, before you select() on more +actions. You don't have to do it immediately, but the return code means that +libcurl may have more data available to return or that there may be more data +to send off before it is "satisfied". Do note that \fIcurl_multi_perform(3)\fP +will return \fICURLM_CALL_MULTI_PERFORM\fP only when it wants to be called +again \fBimmediately\fP. When things are fine and there is nothing immediate +it wants done, it'll return \fICURLM_OK\fP and you need to wait for \&"action" +and then call this function again. + +NOTE that this only returns errors etc regarding the whole multi stack. Problems +still might have occurred on individual transfers even when this +function returns \fICURLM_OK\fP. +.SH "TYPICAL USAGE" +Most applications will use \fIcurl_multi_fdset(3)\fP to get the multi_handle's +file descriptors, then it'll wait for action on them using \fBselect(3)\fP and +as soon as one or more of them are ready, \fIcurl_multi_perform(3)\fP gets +called. +.SH "SEE ALSO" +.BR curl_multi_cleanup "(3), " curl_multi_init "(3), " +.BR curl_multi_fdset "(3), " curl_multi_info_read "(3), " +.BR libcurl-errors "(3)" diff --git a/usr/share/man/man3/curl_multi_remove_handle.3 b/usr/share/man/man3/curl_multi_remove_handle.3 new file mode 100755 index 000000000..7fb4761c8 --- /dev/null +++ b/usr/share/man/man3/curl_multi_remove_handle.3 @@ -0,0 +1,24 @@ +.\" $Id: curl_multi_remove_handle.3,v 1.5 2009-04-07 20:51:01 bagder Exp $ +.\" +.TH curl_multi_remove_handle 3 "6 March 2002" "libcurl 7.9.5" "libcurl Manual" +.SH NAME +curl_multi_remove_handle - remove an easy handle from a multi session +.SH SYNOPSIS +#include <curl/curl.h> + +CURLMcode curl_multi_remove_handle(CURLM *multi_handle, CURL *easy_handle); +.ad +.SH DESCRIPTION +Removes a given easy_handle from the multi_handle. This will make the +specified easy handle be removed from this multi handle's control. + +When the easy handle has been removed from a multi stack, it is again +perfectly legal to invoke \fIcurl_easy_perform()\fP on this easy handle. + +Removing an easy handle while being used, will effectively halt the transfer +in progress involving that easy handle. All other easy handles and transfers +will remain unaffected. +.SH RETURN VALUE +CURLMcode type, general libcurl multi interface error code. +.SH "SEE ALSO" +.BR curl_multi_cleanup "(3)," curl_multi_init "(3)" diff --git a/usr/share/man/man3/curl_multi_setopt.3 b/usr/share/man/man3/curl_multi_setopt.3 new file mode 100755 index 000000000..74f03a3e4 --- /dev/null +++ b/usr/share/man/man3/curl_multi_setopt.3 @@ -0,0 +1,84 @@ +.\" $Id: curl_multi_setopt.3,v 1.10 2009-09-11 20:19:21 bagder Exp $ +.\" +.TH curl_multi_setopt 3 "10 Oct 2006" "libcurl 7.16.0" "libcurl Manual" +.SH NAME +curl_multi_setopt \- set options for a curl multi handle +.SH SYNOPSIS +#include <curl/curl.h> + +CURLMcode curl_multi_setopt(CURLM * multi_handle, CURLMoption option, param); +.SH DESCRIPTION +curl_multi_setopt() is used to tell a libcurl multi handle how to behave. By +using the appropriate options to \fIcurl_multi_setopt(3)\fP, you can change +libcurl's behaviour when using that multi handle. All options are set with +the \fIoption\fP followed by the parameter \fIparam\fP. That parameter can be +a \fBlong\fP, a \fBfunction pointer\fP, an \fBobject pointer\fP or a +\fBcurl_off_t\fP type, depending on what the specific option expects. Read +this manual carefully as bad input values may cause libcurl to behave badly! +You can only set one option in each function call. + +.SH OPTIONS +.IP CURLMOPT_SOCKETFUNCTION +Pass a pointer to a function matching the \fBcurl_socket_callback\fP +prototype. The \fIcurl_multi_socket_action(3)\fP function informs the +application about updates in the socket (file descriptor) status by doing +none, one, or multiple calls to the curl_socket_callback given in the +\fBparam\fP argument. They update the status with changes since the previous +time a \fIcurl_multi_socket(3)\fP function was called. If the given callback +pointer is NULL, no callback will be called. Set the callback's \fBuserp\fP +argument with \fICURLMOPT_SOCKETDATA\fP. See \fIcurl_multi_socket(3)\fP for +more callback details. +.IP CURLMOPT_SOCKETDATA +Pass a pointer to whatever you want passed to the \fBcurl_socket_callback\fP's +forth argument, the userp pointer. This is not used by libcurl but only +passed-thru as-is. Set the callback pointer with +\fICURLMOPT_SOCKETFUNCTION\fP. +.IP CURLMOPT_PIPELINING +Pass a long set to 1 to enable or 0 to disable. Enabling pipelining on a multi +handle will make it attempt to perform HTTP Pipelining as far as possible for +transfers using this handle. This means that if you add a second request that +can use an already existing connection, the second request will be \&"piped" +on the same connection rather than being executed in parallel. (Added in +7.16.0) +.IP CURLMOPT_TIMERFUNCTION +Pass a pointer to a function matching the \fBcurl_multi_timer_callback\fP +prototype. This function will then be called when the timeout value +changes. The timeout value is at what latest time the application should call +one of the \&"performing" functions of the multi interface +(\fIcurl_multi_socket_action(3)\fP and \fIcurl_multi_perform(3)\fP) - to allow +libcurl to keep timeouts and retries etc to work. A timeout value of -1 means +that there is no timeout at all, and 0 means that the timeout is already +reached. Libcurl attempts to limit calling this only when the fixed future +timeout time actually changes. See also \fICURLMOPT_TIMERDATA\fP. This +callback can be used instead of, or in addition to, +\fIcurl_multi_timeout(3)\fP. (Added in 7.16.0) +.IP CURLMOPT_TIMERDATA +Pass a pointer to whatever you want passed to the +\fBcurl_multi_timer_callback\fP's third argument, the userp pointer. This is +not used by libcurl but only passed-thru as-is. Set the callback pointer with +\fICURLMOPT_TIMERFUNCTION\fP. (Added in 7.16.0) +.IP CURLMOPT_MAXCONNECTS +Pass a long. The set number will be used as the maximum amount of +simultaneously open connections that libcurl may cache. Default is 10, and +libcurl will enlarge the size for each added easy handle to make it fit 4 +times the number of added easy handles. + +By setting this option, you can prevent the cache size from growing beyond the +limit set by you. + +When the cache is full, curl closes the oldest one in the cache to prevent the +number of open connections from increasing. + +This option is for the multi handle's use only, when using the easy interface +you should instead use the \fICURLOPT_MAXCONNECTS\fP option. + +(Added in 7.16.3) +.SH RETURNS +The standard CURLMcode for multi interface error codes. Note that it returns a +CURLM_UNKNOWN_OPTION if you try setting an option that this version of libcurl +doesn't know of. +.SH AVAILABILITY +This function was added in libcurl 7.15.4. +.SH "SEE ALSO" +.BR curl_multi_cleanup "(3), " curl_multi_init "(3), " +.BR curl_multi_socket "(3), " curl_multi_info_read "(3)" diff --git a/usr/share/man/man3/curl_multi_socket.3 b/usr/share/man/man3/curl_multi_socket.3 new file mode 100755 index 000000000..4f96c7caa --- /dev/null +++ b/usr/share/man/man3/curl_multi_socket.3 @@ -0,0 +1,139 @@ +.\" $Id: curl_multi_socket.3,v 1.16 2009-05-11 20:32:51 bagder Exp $ +.\" +.TH curl_multi_socket 3 "9 Jul 2006" "libcurl 7.16.0" "libcurl Manual" +.SH NAME +curl_multi_socket \- reads/writes available data +.SH SYNOPSIS +.nf +#include <curl/curl.h> +CURLMcode curl_multi_socket(CURLM * multi_handle, curl_socket_t sockfd, + int *running_handles); + +CURLMcode curl_multi_socket_all(CURLM *multi_handle, + int *running_handles); +.fi +.SH DESCRIPTION +These functions are deprecated. Do not use! See +\fIcurl_multi_socket_action(3)\fP instead! + +At return, the integer \fBrunning_handles\fP points to will contain the number +of still running easy handles within the multi handle. When this number +reaches zero, all transfers are complete/done. Note that when you call +\fIcurl_multi_socket_action(3)\fP on a specific socket and the counter +decreases by one, it DOES NOT necessarily mean that this exact socket/transfer +is the one that completed. Use \fIcurl_multi_info_read(3)\fP to figure out +which easy handle that completed. + +The \fBcurl_multi_socket_action(3)\fP functions inform the application about +updates in the socket (file descriptor) status by doing none, one, or multiple +calls to the socket callback function set with the CURLMOPT_SOCKETFUNCTION +option to \fIcurl_multi_setopt(3)\fP. They update the status with changes +since the previous time the callback was called. + +Get the timeout time by setting the \fICURLMOPT_TIMERFUNCTION\fP option with +\fIcurl_multi_setopt(3)\fP. Your application will then get called with +information on how long to wait for socket actions at most before doing the +timeout action: call the \fBcurl_multi_socket_action(3)\fP function with the +\fBsockfd\fP argument set to CURL_SOCKET_TIMEOUT. You can also use the +\fIcurl_multi_timeout(3)\fP function to poll the value at any given time, but +for an event-based system using the callback is far better than relying on +polling the timeout value. + +Usage of \fIcurl_multi_socket(3)\fP is deprecated, whereas the function is +equivalent to \fIcurl_multi_socket_action(3)\fP with \fBev_bitmask\fP set to +0. + +Force libcurl to (re-)check all its internal sockets and transfers instead of +just a single one by calling \fBcurl_multi_socket_all(3)\fP. Note that there +should not be any reason to use this function! +.SH "CALLBACK DETAILS" + +The socket \fBcallback\fP function uses a prototype like this +.nf + + int curl_socket_callback(CURL *easy, /* easy handle */ + curl_socket_t s, /* socket */ + int action, /* see values below */ + void *userp, /* private callback pointer */ + void *socketp); /* private socket pointer */ + +.fi +The callback MUST return 0. + +The \fIeasy\fP argument is a pointer to the easy handle that deals with this +particular socket. Note that a single handle may work with several sockets +simultaneously. + +The \fIs\fP argument is the actual socket value as you use it within your +system. + +The \fIaction\fP argument to the callback has one of five values: +.RS +.IP "CURL_POLL_NONE (0)" +register, not interested in readiness (yet) +.IP "CURL_POLL_IN (1)" +register, interested in read readiness +.IP "CURL_POLL_OUT (2)" +register, interested in write readiness +.IP "CURL_POLL_INOUT (3)" +register, interested in both read and write readiness +.IP "CURL_POLL_REMOVE (4)" +unregister +.RE + +The \fIsocketp\fP argument is a private pointer you have previously set with +\fIcurl_multi_assign(3)\fP to be associated with the \fIs\fP socket. If no +pointer has been set, socketp will be NULL. This argument is of course a +service to applications that want to keep certain data or structs that are +strictly associated to the given socket. + +The \fIuserp\fP argument is a private pointer you have previously set with +\fIcurl_multi_setopt(3)\fP and the CURLMOPT_SOCKETDATA option. +.SH "RETURN VALUE" +CURLMcode type, general libcurl multi interface error code. + +Legacy: If you receive \fICURLM_CALL_MULTI_PERFORM\fP, this basically means +that you should call \fIcurl_multi_socket(3)\fP again, before you wait for +more actions on libcurl's sockets. You don't have to do it immediately, but +the return code means that libcurl may have more data available to return or +that there may be more data to send off before it is "satisfied". + +In modern libcurls, \fICURLM_CALL_MULTI_PERFORM\fP or +\fICURLM_CALL_MULTI_SOKCET\fP should not be returned and no application needs +to care about them. + +NOTE that the return code is for the whole multi stack. Problems still might have +occurred on individual transfers even when one of these functions +return OK. +.SH "TYPICAL USAGE" +1. Create a multi handle + +2. Set the socket callback with CURLMOPT_SOCKETFUNCTION + +3. Set the timeout callback with CURLMOPT_TIMERFUNCTION, to get to know what +timeout value to use when waiting for socket activities. + +4. Add easy handles with curl_multi_add_handle() + +5. Provide some means to manage the sockets libcurl is using, so you can check +them for activity. This can be done through your application code, or by way +of an external library such as libevent or glib. + +6. Wait for activity on any of libcurl's sockets, use the timeout value your +callback has been told + +7, When activity is detected, call curl_multi_socket_action() for the +socket(s) that got action. If no activity is detected and the timeout expires, +call \fIcurl_multi_socket_action(3)\fP with \fICURL_SOCKET_TIMEOUT\fP + +8. Go back to step 6. +.SH AVAILABILITY +This function was added in libcurl 7.15.4, and is deemed stable since +7.16.0. + +\fIcurl_multi_socket(3)\fP is deprecated, use +\fIcurl_multi_socket_action(3)\fP instead! +.SH "SEE ALSO" +.BR curl_multi_cleanup "(3), " curl_multi_init "(3), " +.BR curl_multi_fdset "(3), " curl_multi_info_read "(3), " +.BR "the hiperfifo.c example" diff --git a/usr/share/man/man3/curl_multi_socket_action.3 b/usr/share/man/man3/curl_multi_socket_action.3 new file mode 100755 index 000000000..4f945bca7 --- /dev/null +++ b/usr/share/man/man3/curl_multi_socket_action.3 @@ -0,0 +1,132 @@ +.\" $Id: curl_multi_socket_action.3,v 1.1 2009-05-11 20:32:51 bagder Exp $ +.\" +.TH curl_multi_socket_action 3 "9 Jul 2006" "libcurl 7.16.0" "libcurl Manual" +.SH NAME +curl_multi_socket_action \- reads/writes available data given an action +.SH SYNOPSIS +.nf +#include <curl/curl.h> + +CURLMcode curl_multi_socket_action(CURLM * multi_handle, + curl_socket_t sockfd, int ev_bitmask, + int *running_handles); +.fi +.SH DESCRIPTION +When the application has detected action on a socket handled by libcurl, it +should call \fIcurl_multi_socket_action(3)\fP with the \fBsockfd\fP argument +set to the socket with the action. When the events on a socket are known, they +can be passed as an events bitmask \fBev_bitmask\fP by first setting +\fBev_bitmask\fP to 0, and then adding using bitwise OR (|) any combination of +events to be chosen from CURL_CSELECT_IN, CURL_CSELECT_OUT or +CURL_CSELECT_ERR. When the events on a socket are unknown, pass 0 instead, and +libcurl will test the descriptor internally. + +At return, the integer \fBrunning_handles\fP points to will contain the number +of still running easy handles within the multi handle. When this number +reaches zero, all transfers are complete/done. Note that when you call +\fIcurl_multi_socket_action(3)\fP on a specific socket and the counter +decreases by one, it DOES NOT necessarily mean that this exact socket/transfer +is the one that completed. Use \fIcurl_multi_info_read(3)\fP to figure out +which easy handle that completed. + +The \fBcurl_multi_socket_action(3)\fP functions inform the application about +updates in the socket (file descriptor) status by doing none, one, or multiple +calls to the socket callback function set with the CURLMOPT_SOCKETFUNCTION +option to \fIcurl_multi_setopt(3)\fP. They update the status with changes +since the previous time the callback was called. + +Get the timeout time by setting the \fICURLMOPT_TIMERFUNCTION\fP option with +\fIcurl_multi_setopt(3)\fP. Your application will then get called with +information on how long to wait for socket actions at most before doing the +timeout action: call the \fBcurl_multi_socket_action(3)\fP function with the +\fBsockfd\fP argument set to CURL_SOCKET_TIMEOUT. You can also use the +\fIcurl_multi_timeout(3)\fP function to poll the value at any given time, but +for an event-based system using the callback is far better than relying on +polling the timeout value. +.SH "CALLBACK DETAILS" + +The socket \fBcallback\fP function uses a prototype like this +.nf + + int curl_socket_callback(CURL *easy, /* easy handle */ + curl_socket_t s, /* socket */ + int action, /* see values below */ + void *userp, /* private callback pointer */ + void *socketp); /* private socket pointer */ + +.fi +The callback MUST return 0. + +The \fIeasy\fP argument is a pointer to the easy handle that deals with this +particular socket. Note that a single handle may work with several sockets +simultaneously. + +The \fIs\fP argument is the actual socket value as you use it within your +system. + +The \fIaction\fP argument to the callback has one of five values: +.RS +.IP "CURL_POLL_NONE (0)" +register, not interested in readiness (yet) +.IP "CURL_POLL_IN (1)" +register, interested in read readiness +.IP "CURL_POLL_OUT (2)" +register, interested in write readiness +.IP "CURL_POLL_INOUT (3)" +register, interested in both read and write readiness +.IP "CURL_POLL_REMOVE (4)" +unregister +.RE + +The \fIsocketp\fP argument is a private pointer you have previously set with +\fIcurl_multi_assign(3)\fP to be associated with the \fIs\fP socket. If no +pointer has been set, socketp will be NULL. This argument is of course a +service to applications that want to keep certain data or structs that are +strictly associated to the given socket. + +The \fIuserp\fP argument is a private pointer you have previously set with +\fIcurl_multi_setopt(3)\fP and the CURLMOPT_SOCKETDATA option. +.SH "RETURN VALUE" +CURLMcode type, general libcurl multi interface error code. + +Legacy: If you receive \fICURLM_CALL_MULTI_PERFORM\fP, this basically means +that you should call \fIcurl_multi_socket_action(3)\fP again, before you wait +for more actions on libcurl's sockets. You don't have to do it immediately, +but the return code means that libcurl may have more data available to return +or that there may be more data to send off before it is "satisfied". + +In modern libcurls, \fICURLM_CALL_MULTI_PERFORM\fP or +\fICURLM_CALL_MULTI_SOKCET\fP should not be returned and no application needs +to care about them. + +NOTE that the return code is for the whole multi stack. Problems still might have +occurred on individual transfers even when one of these functions +return OK. +.SH "TYPICAL USAGE" +1. Create a multi handle + +2. Set the socket callback with CURLMOPT_SOCKETFUNCTION + +3. Set the timeout callback with CURLMOPT_TIMERFUNCTION, to get to know what +timeout value to use when waiting for socket activities. + +4. Add easy handles with curl_multi_add_handle() + +5. Provide some means to manage the sockets libcurl is using, so you can check +them for activity. This can be done through your application code, or by way +of an external library such as libevent or glib. + +6. Wait for activity on any of libcurl's sockets, use the timeout value your +callback has been told + +7, When activity is detected, call curl_multi_socket_action() for the +socket(s) that got action. If no activity is detected and the timeout expires, +call \fIcurl_multi_socket_action(3)\fP with \fICURL_SOCKET_TIMEOUT\fP + +8. Go back to step 6. +.SH AVAILABILITY +This function was added in libcurl 7.15.4, and is deemed stable since 7.16.0. +.SH "SEE ALSO" +.BR curl_multi_cleanup "(3), " curl_multi_init "(3), " +.BR curl_multi_fdset "(3), " curl_multi_info_read "(3), " +.BR "the hiperfifo.c example" diff --git a/usr/share/man/man3/curl_multi_strerror.3 b/usr/share/man/man3/curl_multi_strerror.3 new file mode 100755 index 000000000..29a5e3895 --- /dev/null +++ b/usr/share/man/man3/curl_multi_strerror.3 @@ -0,0 +1,20 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: curl_multi_strerror.3,v 1.4 2009-05-19 12:48:14 yangtse Exp $ +.\" +.TH curl_multi_strerror 3 "26 Apr 2004" "libcurl 7.12" "libcurl Manual" +.SH NAME +curl_multi_strerror - return string describing error code +.SH SYNOPSIS +.nf +.B #include <curl/curl.h> +.BI "const char *curl_multi_strerror(CURLMcode " errornum ");" +.SH DESCRIPTION +The curl_multi_strerror() function returns a string describing the CURLMcode +error code passed in the argument \fIerrornum\fP. +.SH AVAILABILITY +This function was added in libcurl 7.12.0 +.SH RETURN VALUE +A pointer to a zero terminated string. +.SH "SEE ALSO" +.BR libcurl-errors "(3), " curl_easy_strerror "(3), " curl_share_strerror "(3)" diff --git a/usr/share/man/man3/curl_multi_timeout.3 b/usr/share/man/man3/curl_multi_timeout.3 new file mode 100755 index 000000000..50224e65b --- /dev/null +++ b/usr/share/man/man3/curl_multi_timeout.3 @@ -0,0 +1,41 @@ +.\" $Id: curl_multi_timeout.3,v 1.6 2008-08-06 21:22:07 bagder Exp $ +.\" +.TH curl_multi_timeout 3 "2 Jan 2006" "libcurl 7.16.0" "libcurl Manual" +.SH NAME +curl_multi_timeout \- how long to wait for action before proceeding +.SH SYNOPSIS +#include <curl/curl.h> + +CURLMcode curl_multi_timeout(CURLM *multi_handle, long *timeout); +.SH DESCRIPTION + +An application using the libcurl multi interface should call +\fBcurl_multi_timeout(3)\fP to figure out how long it should wait for socket +actions \- at most \- before proceeding. + +Proceeding means either doing the socket-style timeout action: call the +\fBcurl_multi_socket_action(3)\fP function with the \fBsockfd\fP argument set +to CURL_SOCKET_TIMEOUT, or call \fBcurl_multi_perform(3)\fP if you're using +the simpler and older multi interface approach. + +The timeout value returned in the long \fBtimeout\fP points to, is in number +of milliseconds at this very moment. If 0, it means you should proceed +immediately without waiting for anything. If it returns -1, there's no timeout +at all set. + +Note: if libcurl returns a -1 timeout here, it just means that libcurl +currently has no stored timeout value. You must not wait too long (more than a +few seconds perhaps) before you call curl_multi_perform() again. +.SH "RETURN VALUE" +The standard CURLMcode for multi interface error codes. +.SH "TYPICAL USAGE" +Call \fBcurl_multi_timeout(3)\fP, then wait for action on the sockets. You +figure out which sockets to wait for by calling \fBcurl_multi_fdset(3)\fP or +by a previous call to \fBcurl_multi_socket(3)\fP. +.SH AVAILABILITY +This function was added in libcurl 7.15.4. +.SH "SEE ALSO" +.BR curl_multi_cleanup "(3), " curl_multi_init "(3), " +.BR curl_multi_fdset "(3), " curl_multi_info_read "(3), " +.BR curl_multi_socket "(3) " + diff --git a/usr/share/man/man3/curl_share_cleanup.3 b/usr/share/man/man3/curl_share_cleanup.3 new file mode 100755 index 000000000..d7cba0bf5 --- /dev/null +++ b/usr/share/man/man3/curl_share_cleanup.3 @@ -0,0 +1,21 @@ +.\" $Id: curl_share_cleanup.3,v 1.4 2009-05-19 12:48:14 yangtse Exp $ +.\" +.TH curl_share_cleanup 3 "8 Aug 2003" "libcurl 7.10.7" "libcurl Manual" +.SH NAME +curl_share_cleanup - Clean up a shared object +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "CURLSHcode curl_share_cleanup(CURLSH *" share_handle ");" +.ad +.SH DESCRIPTION +This function deletes a shared object. The share handle cannot be used anymore +when this function has been called. + +.SH RETURN VALUE +CURLSHE_OK (zero) means that the option was set properly, non-zero means an +error occurred as \fI<curl/curl.h>\fP defines. See the \fIlibcurl-errors.3\fP +man page for the full list with descriptions. If an error occurs, then the +share object will not be deleted. +.SH "SEE ALSO" +.BR curl_share_init "(3), " curl_share_setopt "(3)" diff --git a/usr/share/man/man3/curl_share_init.3 b/usr/share/man/man3/curl_share_init.3 new file mode 100755 index 000000000..07f37dbf1 --- /dev/null +++ b/usr/share/man/man3/curl_share_init.3 @@ -0,0 +1,25 @@ +.\" $Id: curl_share_init.3,v 1.5 2008-12-28 21:56:56 bagder Exp $ +.\" +.TH curl_share_init 3 "8 Aug 2003" "libcurl 7.10.7" "libcurl Manual" +.SH NAME +curl_share_init - Create a shared object +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "CURLSH *curl_share_init( );" +.ad +.SH DESCRIPTION +This function returns a CURLSH handle to be used as input to all the other +share-functions, sometimes referred to as a share handle in some places in the +documentation. This init call MUST have a corresponding call to +\fIcurl_share_cleanup\fP when all operations using the share are complete. + +This \fIshare handle\fP is what you pass to curl using the \fICURLOPT_SHARE\fP +option with \fIcurl_easy_setopt(3)\fP, to make that specific curl handle use +the data in this share. +.SH RETURN VALUE +If this function returns NULL, something went wrong (out of memory, etc.) +and therefore the share object was not created. +.SH "SEE ALSO" +.BR curl_share_cleanup "(3), " curl_share_setopt "(3)" + diff --git a/usr/share/man/man3/curl_share_setopt.3 b/usr/share/man/man3/curl_share_setopt.3 new file mode 100755 index 000000000..277044f49 --- /dev/null +++ b/usr/share/man/man3/curl_share_setopt.3 @@ -0,0 +1,61 @@ +.\" $Id: curl_share_setopt.3,v 1.5 2008-12-28 21:56:56 bagder Exp $ +.\" +.TH curl_share_setopt 3 "8 Aug 2003" "libcurl 7.10.7" "libcurl Manual" +.SH NAME +curl_share_setopt - Set options for a shared object +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +CURLSHcode curl_share_setopt(CURLSH *share, CURLSHoption option, parameter); +.ad +.SH DESCRIPTION +Set the \fIoption\fP to \fIparameter\fP for the given \fIshare\fP. +.SH OPTIONS +.IP CURLSHOPT_LOCKFUNC +The \fIparameter\fP must be a pointer to a function matching the following +prototype: + +void lock_function(CURL *handle, curl_lock_data data, curl_lock_access access, +void *userptr); + +\fIdata\fP defines what data libcurl wants to lock, and you must make sure that +only one lock is given at any time for each kind of data. + +\fIaccess\fP defines what access type libcurl wants, shared or single. + +\fIuserptr\fP is the pointer you set with \fICURLSHOPT_USERDATA\fP. +.IP CURLSHOPT_UNLOCKFUNC +The \fIparameter\fP must be a pointer to a function matching the following +prototype: + +void unlock_function(CURL *handle, curl_lock_data data, void *userptr); + +\fIdata\fP defines what data libcurl wants to unlock, and you must make sure +that only one lock is given at any time for each kind of data. + +\fIuserptr\fP is the pointer you set with \fICURLSHOPT_USERDATA\fP. +.IP CURLSHOPT_SHARE +The \fIparameter\fP specifies a type of data that should be shared. This may +be set to one of the values described below. +.RS +.IP CURL_LOCK_DATA_COOKIE +Cookie data will be shared across the easy handles using this shared object. +.IP CURL_LOCK_DATA_DNS +Cached DNS hosts will be shared across the easy handles using this shared +object. Note that when you use the multi interface, all easy handles added to +the same multi handle will share DNS cache by default without this having to +be used! +.RE +.IP CURLSHOPT_UNSHARE +This option does the opposite of \fICURLSHOPT_SHARE\fP. It specifies that +the specified \fIparameter\fP will no longer be shared. Valid values are +the same as those for \fICURLSHOPT_SHARE\fP. +.IP CURLSHOPT_USERDATA +The \fIparameter\fP allows you to specify a pointer to data that will be passed +to the lock_function and unlock_function each time it is called. +.SH RETURN VALUE +CURLSHE_OK (zero) means that the option was set properly, non-zero means an +error occurred as \fI<curl/curl.h>\fP defines. See the \fIlibcurl-errors.3\fP +man page for the full list with descriptions. +.SH "SEE ALSO" +.BR curl_share_cleanup "(3), " curl_share_init "(3)" diff --git a/usr/share/man/man3/curl_share_strerror.3 b/usr/share/man/man3/curl_share_strerror.3 new file mode 100755 index 000000000..8607bedef --- /dev/null +++ b/usr/share/man/man3/curl_share_strerror.3 @@ -0,0 +1,20 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: curl_share_strerror.3,v 1.4 2009-05-19 12:48:14 yangtse Exp $ +.\" +.TH curl_share_strerror 3 "26 Apr 2004" "libcurl 7.12" "libcurl Manual" +.SH NAME +curl_share_strerror - return string describing error code +.SH SYNOPSIS +.nf +.B #include <curl/curl.h> +.BI "const char *curl_share_strerror(CURLSHcode " errornum ");" +.SH DESCRIPTION +The curl_share_strerror() function returns a string describing the CURLSHcode +error code passed in the argument \fIerrornum\fP. +.SH AVAILABILITY +This function was added in libcurl 7.12.0 +.SH RETURN VALUE +A pointer to a zero terminated string. +.SH "SEE ALSO" +.BR libcurl-errors "(3), " curl_multi_strerror "(3), " curl_easy_strerror "(3)" diff --git a/usr/share/man/man3/curl_slist_append.3 b/usr/share/man/man3/curl_slist_append.3 new file mode 100755 index 000000000..885fbcb30 --- /dev/null +++ b/usr/share/man/man3/curl_slist_append.3 @@ -0,0 +1,39 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: curl_slist_append.3,v 1.5 2007-01-09 18:58:16 bagder Exp $ +.\" +.TH curl_slist_append 3 "19 Jun 2003" "libcurl 7.10.4" "libcurl Manual" +.SH NAME +curl_slist_append - add a string to an slist +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "struct curl_slist *curl_slist_append(struct curl_slist *" list, +.BI "const char * "string ");" +.ad +.SH DESCRIPTION +curl_slist_append() appends a specified string to a linked list of +strings. The existing \fIlist\fP should be passed as the first argument while +the new list is returned from this function. The specified \fIstring\fP has +been appended when this function returns. curl_slist_append() copies the +string. + +The list should be freed again (after usage) with +\fBcurl_slist_free_all(3)\fP. +.SH RETURN VALUE +A null pointer is returned if anything went wrong, otherwise the new list +pointer is returned. +.SH EXAMPLE +.nf + CURL handle; + struct curl_slist *slist=NULL; + + slist = curl_slist_append(slist, "pragma:"); + curl_easy_setopt(handle, CURLOPT_HTTPHEADER, slist); + + curl_easy_perform(handle); + + curl_slist_free_all(slist); /* free the list again */ +.fi +.SH "SEE ALSO" +.BR curl_slist_free_all "(3), " diff --git a/usr/share/man/man3/curl_slist_free_all.3 b/usr/share/man/man3/curl_slist_free_all.3 new file mode 100755 index 000000000..951141925 --- /dev/null +++ b/usr/share/man/man3/curl_slist_free_all.3 @@ -0,0 +1,20 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: curl_slist_free_all.3,v 1.3 2009-05-19 12:48:14 yangtse Exp $ +.\" +.TH curl_slist_free_all 3 "5 March 2001" "libcurl 7.0" "libcurl Manual" +.SH NAME +curl_slist_free_all - free an entire curl_slist list +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "void curl_slist_free_all(struct curl_slist *" list); +.ad +.SH DESCRIPTION +curl_slist_free_all() removes all traces of a previously built curl_slist +linked list. +.SH RETURN VALUE +Nothing. +.SH "SEE ALSO" +.BR curl_slist_append "(3), " + diff --git a/usr/share/man/man3/curl_strequal.3 b/usr/share/man/man3/curl_strequal.3 new file mode 100755 index 000000000..e816ab5ea --- /dev/null +++ b/usr/share/man/man3/curl_strequal.3 @@ -0,0 +1,32 @@ +.\" $Id: curl_strequal.3,v 1.3 2008-12-28 21:56:56 bagder Exp $ +.\" +.TH curl_strequal 3 "30 April 2004" "libcurl 7.12" "libcurl Manual" +.SH NAME +curl_strequal, curl_strnequal - case insensitive string comparisons +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "int curl_strequal(char *" str1 ", char *" str2 ");" +.sp +.BI "int curl_strenqual(char *" str1 ", char *" str2 ", size_t " len ");" +.SH DESCRIPTION +The +.B curl_strequal() +function compares the two strings \fIstr1\fP and \fIstr2\fP, ignoring the case +of the characters. It returns a non-zero (TRUE) integer if the strings are +identical. +.sp +The \fBcurl_strnequal()\fP function is similar, except it only compares the +first \fIlen\fP characters of \fIstr1\fP. +.sp +These functions are provided by libcurl to enable applications to compare +strings in a truly portable manner. There are no standard portable case +insensitive string comparison functions. These two work on all platforms. +.SH AVAILABILITY +These functions will be removed from the public libcurl API in a near +future. They will instead be made "available" by source code access only, and +then as curlx_strequal() and curlx_strenqual(). +.SH RETURN VALUE +Non-zero if the strings are identical. Zero if they're not. +.SH "SEE ALSO" +.BR strcmp "(3), " strcasecmp "(3)" diff --git a/usr/share/man/man3/curl_unescape.3 b/usr/share/man/man3/curl_unescape.3 new file mode 100755 index 000000000..b4e7283ca --- /dev/null +++ b/usr/share/man/man3/curl_unescape.3 @@ -0,0 +1,31 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: curl_unescape.3,v 1.7 2009-05-19 12:48:14 yangtse Exp $ +.\" +.TH curl_unescape 3 "22 March 2001" "libcurl 7.7" "libcurl Manual" +.SH NAME +curl_unescape - URL decodes the given string +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "char *curl_unescape( char *" url ", int "length " );" +.ad +.SH DESCRIPTION +Obsolete function. Use \fIcurl_easy_unescape(3)\fP instead! + +This function will convert the given URL encoded input string to a "plain +string" and return that as a new allocated string. All input characters that +are URL encoded (%XX where XX is a two-digit hexadecimal number) will be +converted to their plain text versions. + +If the 'length' argument is set to 0, curl_unescape() will use strlen() on the +input 'url' string to find out the size. + +You must curl_free() the returned string when you're done with it. +.SH AVAILABILITY +Since 7.15.4, \fIcurl_easy_unescape(3)\fP should be used. This function will +be removed in a future release. +.SH RETURN VALUE +A pointer to a zero terminated string or NULL if it failed. +.SH "SEE ALSO" +.I curl_easy_escape(3), curl_easy_unescape(3), curl_free(3), RFC 2396 diff --git a/usr/share/man/man3/curl_version.3 b/usr/share/man/man3/curl_version.3 new file mode 100755 index 000000000..576e38b5d --- /dev/null +++ b/usr/share/man/man3/curl_version.3 @@ -0,0 +1,19 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: curl_version.3,v 1.3 2009-05-19 12:48:14 yangtse Exp $ +.\" +.TH curl_version 3 "5 March 2001" "libcurl 7.0" "libcurl Manual" +.SH NAME +curl_version - returns the libcurl version string +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "char *curl_version( );" +.ad +.SH DESCRIPTION +Returns a human readable string with the version number of libcurl and some of +its important components (like OpenSSL version). +.SH RETURN VALUE +A pointer to a zero terminated string. +.SH "SEE ALSO" +.BR curl_version_info "(3)" diff --git a/usr/share/man/man3/curl_version_info.3 b/usr/share/man/man3/curl_version_info.3 new file mode 100755 index 000000000..5de5a5e6b --- /dev/null +++ b/usr/share/man/man3/curl_version_info.3 @@ -0,0 +1,150 @@ +.\" ************************************************************************** +.\" * _ _ ____ _ +.\" * Project ___| | | | _ \| | +.\" * / __| | | | |_) | | +.\" * | (__| |_| | _ <| |___ +.\" * \___|\___/|_| \_\_____| +.\" * +.\" * Copyright (C) 1998 - 2009, Daniel Stenberg, <daniel@haxx.se>, et al. +.\" * +.\" * This software is licensed as described in the file COPYING, which +.\" * you should have received as part of this distribution. The terms +.\" * are also available at http://curl.haxx.se/docs/copyright.html. +.\" * +.\" * You may opt to use, copy, modify, merge, publish, distribute and/or sell +.\" * copies of the Software, and permit persons to whom the Software is +.\" * furnished to do so, under the terms of the COPYING file. +.\" * +.\" * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY +.\" * KIND, either express or implied. +.\" * +.\" * $Id: curl_version_info.3,v 1.13 2009-06-10 02:49:43 yangtse Exp $ +.\" ************************************************************************** +.\" +.TH curl_version_info 3 "10 June 2009" "libcurl 7.19.6" "libcurl Manual" +.SH NAME +curl_version_info - returns run-time libcurl version info +.SH SYNOPSIS +.B #include <curl/curl.h> +.sp +.BI "curl_version_info_data *curl_version_info( CURLversion "type ");" +.ad +.SH DESCRIPTION +Returns a pointer to a filled in struct with information about various +run-time features in libcurl. \fItype\fP should be set to the version of this +functionality by the time you write your program. This way, libcurl will +always return a proper struct that your program understands, while programs in +the future might get a different struct. CURLVERSION_NOW will be the most +recent one for the library you have installed: + + data = curl_version_info(CURLVERSION_NOW); + +Applications should use this information to judge if things are possible to do +or not, instead of using compile-time checks, as dynamic/DLL libraries can be +changed independent of applications. + +The curl_version_info_data struct looks like this + +.nf +typedef struct { + CURLversion age; /* see description below */ + + /* when 'age' is 0 or higher, the members below also exist: */ + const char *version; /* human readable string */ + unsigned int version_num; /* numeric representation */ + const char *host; /* human readable string */ + int features; /* bitmask, see below */ + char *ssl_version; /* human readable string */ + long ssl_version_num; /* not used, always zero */ + const char *libz_version; /* human readable string */ + const char **protocols; /* list of protocols */ + + /* when 'age' is 1 or higher, the members below also exist: */ + const char *ares; /* human readable string */ + int ares_num; /* number */ + + /* when 'age' is 2 or higher, the member below also exists: */ + const char *libidn; /* human readable string */ + + /* when 'age' is 3 or higher, the members below also exist: */ + int iconv_ver_num; /* '_libiconv_version' if iconv support enabled */ + + const char *libssh_version; /* human readable string */ + +} curl_version_info_data; +.fi + +\fIage\fP describes what the age of this struct is. The number depends on how +new the libcurl you're using is. You are however guaranteed to get a struct that you +have a matching struct for in the header, as you tell libcurl your "age" with +the input argument. + +\fIversion\fP is just an ascii string for the libcurl version. + +\fIversion_num\fP is a 24 bit number created like this: <8 bits major number> +| <8 bits minor number> | <8 bits patch number>. Version 7.9.8 is therefore +returned as 0x070908. + +\fIhost\fP is an ascii string showing what host information that this libcurl +was built for. As discovered by a configure script or set by the build +environment. + +\fIfeatures\fP can have none, one or more bits set, and the currently defined +bits are: +.RS +.IP CURL_VERSION_IPV6 +supports IPv6 +.IP CURL_VERSION_KERBEROS4 +supports kerberos4 (when using FTP) +.IP CURL_VERSION_SSL +supports SSL (HTTPS/FTPS) (Added in 7.10) +.IP CURL_VERSION_LIBZ +supports HTTP deflate using libz (Added in 7.10) +.IP CURL_VERSION_NTLM +supports HTTP NTLM (added in 7.10.6) +.IP CURL_VERSION_GSSNEGOTIATE +supports HTTP GSS-Negotiate (added in 7.10.6) +.IP CURL_VERSION_DEBUG +libcurl was built with debug capabilities (added in 7.10.6) +.IP CURL_VERSION_CURLDEBUG +libcurl was built with memory tracking debug capabilities. This is mainly of +interest for libcurl hackers. (added in 7.19.6) +.IP CURL_VERSION_ASYNCHDNS +libcurl was built with support for asynchronous name lookups, which allows +more exact timeouts (even on Windows) and less blocking when using the multi +interface. (added in 7.10.7) +.IP CURL_VERSION_SPNEGO +libcurl was built with support for SPNEGO authentication (Simple and Protected +GSS-API Negotiation Mechanism, defined in RFC 2478.) (added in 7.10.8) +.IP CURL_VERSION_LARGEFILE +libcurl was built with support for large files. (Added in 7.11.1) +.IP CURL_VERSION_IDN +libcurl was built with support for IDNA, domain names with international +letters. (Added in 7.12.0) +.IP CURL_VERSION_SSPI +libcurl was built with support for SSPI. This is only available on Windows and +makes libcurl use Windows-provided functions for NTLM authentication. It also +allows libcurl to use the current user and the current user's password without +the app having to pass them on. (Added in 7.13.2) +.IP CURL_VERSION_CONV +libcurl was built with support for character conversions, as provided by the +CURLOPT_CONV_* callbacks. (Added in 7.15.4) +.RE +\fIssl_version\fP is an ASCII string for the OpenSSL version used. If libcurl +has no SSL support, this is NULL. + +\fIssl_version_num\fP is the numerical OpenSSL version value as defined by the +OpenSSL project. If libcurl has no SSL support, this is 0. + +\fIlibz_version\fP is an ASCII string (there is no numerical version). If +libcurl has no libz support, this is NULL. + +\fIprotocols\fP is a pointer to an array of char * pointers, containing the +names protocols that libcurl supports (using lowercase letters). The protocol +names are the same as would be used in URLs. The array is terminated by a NULL +entry. +.SH RETURN VALUE +A pointer to a curl_version_info_data struct. +.SH "SEE ALSO" +\fIcurl_version(3)\fP + diff --git a/usr/share/man/man3/libcurl-easy.3 b/usr/share/man/man3/libcurl-easy.3 new file mode 100755 index 000000000..58e75f162 --- /dev/null +++ b/usr/share/man/man3/libcurl-easy.3 @@ -0,0 +1,28 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: libcurl-easy.3,v 1.5 2009-05-19 12:48:14 yangtse Exp $ +.\" +.TH libcurl 3 "12 Aug 2003" "libcurl 7.10.7" "libcurl easy interface" +.SH NAME +libcurl-easy \- easy interface overview +.SH DESCRIPTION +When using libcurl's "easy" interface you init your session and get a handle +(often referred to as an "easy handle"), which you use as input to the easy +interface functions you use. Use \fIcurl_easy_init(3)\fP to get the handle. + +You continue by setting all the options you want in the upcoming transfer, the +most important among them is the URL itself (you can't transfer anything +without a specified URL as you may have figured out yourself). You might want +to set some callbacks as well that will be called from the library when data +is available etc. \fIcurl_easy_setopt(3)\fP is used for all this. + +When all is setup, you tell libcurl to perform the transfer using +\fIcurl_easy_perform(3)\fP. It will then do the entire operation and won't +return until it is done (successfully or not). + +After the transfer has been made, you can set new options and make another +transfer, or if you're done, cleanup the session by calling +\fIcurl_easy_cleanup(3)\fP. If you want persistent connections, you don't +cleanup immediately, but instead run ahead and perform other transfers using +the same easy handle. + diff --git a/usr/share/man/man3/libcurl-errors.3 b/usr/share/man/man3/libcurl-errors.3 new file mode 100755 index 000000000..636bc2793 --- /dev/null +++ b/usr/share/man/man3/libcurl-errors.3 @@ -0,0 +1,259 @@ +.\" ************************************************************************** +.\" * _ _ ____ _ +.\" * Project ___| | | | _ \| | +.\" * / __| | | | |_) | | +.\" * | (__| |_| | _ <| |___ +.\" * \___|\___/|_| \_\_____| +.\" * +.\" * Copyright (C) 1998 - 2008, Daniel Stenberg, <daniel@haxx.se>, et al. +.\" * +.\" * This software is licensed as described in the file COPYING, which +.\" * you should have received as part of this distribution. The terms +.\" * are also available at http://curl.haxx.se/docs/copyright.html. +.\" * +.\" * You may opt to use, copy, modify, merge, publish, distribute and/or sell +.\" * copies of the Software, and permit persons to whom the Software is +.\" * furnished to do so, under the terms of the COPYING file. +.\" * +.\" * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY +.\" * KIND, either express or implied. +.\" * +.\" * $Id: libcurl-errors.3,v 1.35 2008-12-28 21:56:56 bagder Exp $ +.\" ************************************************************************** +.\" +.TH libcurl-errors 3 "8 Jun 2008" "libcurl 7.19.0" "libcurl errors" +.SH NAME +libcurl-errors \- error codes in libcurl +.SH DESCRIPTION +This man page includes most, if not all, available error codes in libcurl. +Why they occur and possibly what you can do to fix the problem are also included. +.SH "CURLcode" +Almost all "easy" interface functions return a CURLcode error code. No matter +what, using the \fIcurl_easy_setopt(3)\fP option \fICURLOPT_ERRORBUFFER\fP is +a good idea as it will give you a human readable error string that may offer +more details about the cause of the error than just the error code. +\fIcurl_easy_strerror(3)\fP can be called to get an error string from a +given CURLcode number. + +CURLcode is one of the following: +.IP "CURLE_OK (0)" +All fine. Proceed as usual. +.IP "CURLE_UNSUPPORTED_PROTOCOL (1)" +The URL you passed to libcurl used a protocol that this libcurl does not +support. The support might be a compile-time option that you didn't use, it +can be a misspelled protocol string or just a protocol libcurl has no code +for. +.IP "CURLE_FAILED_INIT (2)" +Very early initialization code failed. This is likely to be an internal error +or problem. +.IP "CURLE_URL_MALFORMAT (3)" +The URL was not properly formatted. +.IP "CURLE_COULDNT_RESOLVE_PROXY (5)" +Couldn't resolve proxy. The given proxy host could not be resolved. +.IP "CURLE_COULDNT_RESOLVE_HOST (6)" +Couldn't resolve host. The given remote host was not resolved. +.IP "CURLE_COULDNT_CONNECT (7)" +Failed to connect() to host or proxy. +.IP "CURLE_FTP_WEIRD_SERVER_REPLY (8)" +After connecting to a FTP server, libcurl expects to get a certain reply +back. This error code implies that it got a strange or bad reply. The given +remote server is probably not an OK FTP server. +.IP "CURLE_REMOTE_ACCESS_DENIED (9)" +We were denied access to the resource given in the URL. For FTP, this occurs +while trying to change to the remote directory. +.IP "CURLE_FTP_WEIRD_PASS_REPLY (11)" +After having sent the FTP password to the server, libcurl expects a proper +reply. This error code indicates that an unexpected code was returned. +.IP "CURLE_FTP_WEIRD_PASV_REPLY (13)" +libcurl failed to get a sensible result back from the server as a response to +either a PASV or a EPSV command. The server is flawed. +.IP "CURLE_FTP_WEIRD_227_FORMAT (14)" +FTP servers return a 227-line as a response to a PASV command. If libcurl +fails to parse that line, this return code is passed back. +.IP "CURLE_FTP_CANT_GET_HOST (15)" +An internal failure to lookup the host used for the new connection. +.IP "CURLE_FTP_COULDNT_SET_TYPE (17)" +Received an error when trying to set the transfer mode to binary or ASCII. +.IP "CURLE_PARTIAL_FILE (18)" +A file transfer was shorter or larger than expected. This happens when the +server first reports an expected transfer size, and then delivers data that +doesn't match the previously given size. +.IP "CURLE_FTP_COULDNT_RETR_FILE (19)" +This was either a weird reply to a 'RETR' command or a zero byte transfer +complete. +.IP "CURLE_QUOTE_ERROR (21)" +When sending custom "QUOTE" commands to the remote server, one of the commands +returned an error code that was 400 or higher (for FTP) or otherwise +indicated unsuccessful completion of the command. +.IP "CURLE_HTTP_RETURNED_ERROR (22)" +This is returned if CURLOPT_FAILONERROR is set TRUE and the HTTP server +returns an error code that is >= 400. (This error code was formerly known as +CURLE_HTTP_NOT_FOUND.) +.IP "CURLE_WRITE_ERROR (23)" +An error occurred when writing received data to a local file, or an error was +returned to libcurl from a write callback. +.IP "CURLE_UPLOAD_FAILED (25)" +Failed starting the upload. For FTP, the server typically denied the STOR +command. The error buffer usually contains the server's explanation for this. +(This error code was formerly known as CURLE_FTP_COULDNT_STOR_FILE.) +.IP "CURLE_READ_ERROR (26)" +There was a problem reading a local file or an error returned by the read +callback. +.IP "CURLE_OUT_OF_MEMORY (27)" +A memory allocation request failed. This is serious badness and +things are severely screwed up if this ever occurs. +.IP "CURLE_OPERATION_TIMEDOUT (28)" +Operation timeout. The specified time-out period was reached according to the +conditions. +.IP "CURLE_FTP_PORT_FAILED (30)" +The FTP PORT command returned error. This mostly happens when you haven't +specified a good enough address for libcurl to use. See \fICURLOPT_FTPPORT\fP. +.IP "CURLE_FTP_COULDNT_USE_REST (31)" +The FTP REST command returned error. This should never happen if the server is +sane. +.IP "CURLE_RANGE_ERROR (33)" +The server does not support or accept range requests. +.IP "CURLE_HTTP_POST_ERROR (34)" +This is an odd error that mainly occurs due to internal confusion. +.IP "CURLE_SSL_CONNECT_ERROR (35)" +A problem occurred somewhere in the SSL/TLS handshake. You really want the +error buffer and read the message there as it pinpoints the problem slightly +more. Could be certificates (file formats, paths, permissions), passwords, and +others. +.IP "CURLE_FTP_BAD_DOWNLOAD_RESUME (36)" +Attempting FTP resume beyond file size. +.IP "CURLE_FILE_COULDNT_READ_FILE (37)" +A file given with FILE:// couldn't be opened. Most likely because the file +path doesn't identify an existing file. Did you check file permissions? +.IP "CURLE_LDAP_CANNOT_BIND (38)" +LDAP cannot bind. LDAP bind operation failed. +.IP "CURLE_LDAP_SEARCH_FAILED (39)" +LDAP search failed. +.IP "CURLE_FUNCTION_NOT_FOUND (41)" +Function not found. A required zlib function was not found. +.IP "CURLE_ABORTED_BY_CALLBACK (42)" +Aborted by callback. A callback returned "abort" to libcurl. +.IP "CURLE_BAD_FUNCTION_ARGUMENT (43)" +Internal error. A function was called with a bad parameter. +.IP "CURLE_INTERFACE_FAILED (45)" +Interface error. A specified outgoing interface could not be used. Set which +interface to use for outgoing connections' source IP address with +CURLOPT_INTERFACE. (This error code was formerly known as +CURLE_HTTP_PORT_FAILED.) +.IP "CURLE_TOO_MANY_REDIRECTS (47)" +Too many redirects. When following redirects, libcurl hit the maximum amount. +Set your limit with CURLOPT_MAXREDIRS. +.IP "CURLE_UNKNOWN_TELNET_OPTION (48)" +An option set with CURLOPT_TELNETOPTIONS was not recognized/known. Refer to +the appropriate documentation. +.IP "CURLE_TELNET_OPTION_SYNTAX (49)" +A telnet option string was Illegally formatted. +.IP "CURLE_PEER_FAILED_VERIFICATION (51)" +The remote server's SSL certificate or SSH md5 fingerprint was deemed not OK. +.IP "CURLE_GOT_NOTHING (52)" +Nothing was returned from the server, and under the circumstances, getting +nothing is considered an error. +.IP "CURLE_SSL_ENGINE_NOTFOUND (53)" +The specified crypto engine wasn't found. +.IP "CURLE_SSL_ENGINE_SETFAILED (54)" +Failed setting the selected SSL crypto engine as default! +.IP "CURLE_SEND_ERROR (55)" +Failed sending network data. +.IP "CURLE_RECV_ERROR (56)" +Failure with receiving network data. +.IP "CURLE_SSL_CERTPROBLEM (58)" +problem with the local client certificate. +.IP "CURLE_SSL_CIPHER (59)" +Couldn't use specified cipher. +.IP "CURLE_SSL_CACERT (60)" +Peer certificate cannot be authenticated with known CA certificates. +.IP "CURLE_BAD_CONTENT_ENCODING (61)" +Unrecognized transfer encoding. +.IP "CURLE_LDAP_INVALID_URL (62)" +Invalid LDAP URL. +.IP "CURLE_FILESIZE_EXCEEDED (63)" +Maximum file size exceeded. +.IP "CURLE_USE_SSL_FAILED (64)" +Requested FTP SSL level failed. +.IP "CURLE_SEND_FAIL_REWIND (65)" +When doing a send operation curl had to rewind the data to retransmit, but the +rewinding operation failed. +.IP "CURLE_SSL_ENGINE_INITFAILED (66)" +Initiating the SSL Engine failed. +.IP "CURLE_LOGIN_DENIED (67)" +The remote server denied curl to login (Added in 7.13.1) +.IP "CURLE_TFTP_NOTFOUND (68)" +File not found on TFTP server. +.IP "CURLE_TFTP_PERM (69)" +Permission problem on TFTP server. +.IP "CURLE_REMOTE_DISK_FULL (70)" +Out of disk space on the server. +.IP "CURLE_TFTP_ILLEGAL (71)" +Illegal TFTP operation. +.IP "CURLE_TFTP_UNKNOWNID (72)" +Unknown TFTP transfer ID. +.IP "CURLE_REMOTE_FILE_EXISTS (73)" +File already exists and will not be overwritten. +.IP "CURLE_TFTP_NOSUCHUSER (74)" +This error should never be returned by a properly functioning TFTP server. +.IP "CURLE_CONV_FAILED (75)" +Character conversion failed. +.IP "CURLE_CONV_REQD (76)" +Caller must register conversion callbacks. +.IP "CURLE_SSL_CACERT_BADFILE (77)" +Problem with reading the SSL CA cert (path? access rights?) +.IP "CURLE_REMOTE_FILE_NOT_FOUND (78)" +The resource referenced in the URL does not exist. +.IP "CURLE_SSH (79)" +An unspecified error occurred during the SSH session. +.IP "CURLE_SSL_SHUTDOWN_FAILED (80)" +Failed to shut down the SSL connection. +.IP "CURLE_AGAIN (81)" +Socket is not ready for send/recv wait till it's ready and try again. This +return code is only returned from \fIcurl_easy_recv(3)\fP and +\fIcurl_easy_send(3)\fP (Added in 7.18.2) +.IP "CURLE_SSL_CRL_BADFILE (82)" +Failed to load CRL file (Added in 7.19.0) +.IP "CURLE_SSL_ISSUER_ERROR (83)" +Issuer check failed (Added in 7.19.0) +.IP "CURLE_OBSOLETE*" +These error codes will never be returned. They were used in an old libcurl +version and are currently unused. +.SH "CURLMcode" +This is the generic return code used by functions in the libcurl multi +interface. Also consider \fIcurl_multi_strerror(3)\fP. +.IP "CURLM_CALL_MULTI_PERFORM (-1)" +This is not really an error. It means you should call +\fIcurl_multi_perform(3)\fP again without doing select() or similar in between. +.IP "CURLM_OK (0)" +Things are fine. +.IP "CURLM_BAD_HANDLE (1)" +The passed-in handle is not a valid CURLM handle. +.IP "CURLM_BAD_EASY_HANDLE (2)" +An easy handle was not good/valid. It could mean that it isn't an easy handle +at all, or possibly that the handle already is in used by this or another +multi handle. +.IP "CURLM_OUT_OF_MEMORY (3)" +You are doomed. +.IP "CURLM_INTERNAL_ERROR (4)" +This can only be returned if libcurl bugs. Please report it to us! +.IP "CURLM_BAD_SOCKET (5)" +The passed-in socket is not a valid one that libcurl already knows about. +(Added in 7.15.4) +.IP "CURLM_UNKNOWN_OPTION (6)" +curl_multi_setopt() with unsupported option +(Added in 7.15.4) +.SH "CURLSHcode" +The "share" interface will return a CURLSHcode to indicate when an error has +occurred. Also consider \fIcurl_share_strerror(3)\fP. +.IP "CURLSHE_OK (0)" +All fine. Proceed as usual. +.IP "CURLSHE_BAD_OPTION (1)" +An invalid option was passed to the function. +.IP "CURLSHE_IN_USE (2)" +The share object is currently in use. +.IP "CURLSHE_INVALID (3)" +An invalid share object was passed to the function. +.IP "CURLSHE_NOMEM (4)" +Not enough memory was available. +(Added in 7.12.0) diff --git a/usr/share/man/man3/libcurl-multi.3 b/usr/share/man/man3/libcurl-multi.3 new file mode 100755 index 000000000..e68d2db43 --- /dev/null +++ b/usr/share/man/man3/libcurl-multi.3 @@ -0,0 +1,143 @@ +.\" ************************************************************************** +.\" * _ _ ____ _ +.\" * Project ___| | | | _ \| | +.\" * / __| | | | |_) | | +.\" * | (__| |_| | _ <| |___ +.\" * \___|\___/|_| \_\_____| +.\" * +.\" * Copyright (C) 1998 - 2009, Daniel Stenberg, <daniel@haxx.se>, et al. +.\" * +.\" * This software is licensed as described in the file COPYING, which +.\" * you should have received as part of this distribution. The terms +.\" * are also available at http://curl.haxx.se/docs/copyright.html. +.\" * +.\" * You may opt to use, copy, modify, merge, publish, distribute and/or sell +.\" * copies of the Software, and permit persons to whom the Software is +.\" * furnished to do so, under the terms of the COPYING file. +.\" * +.\" * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY +.\" * KIND, either express or implied. +.\" * +.\" * $Id: libcurl-multi.3,v 1.23 2009-06-15 20:49:23 bagder Exp $ +.\" ************************************************************************** +.\" +.TH libcurl-multi 3 "3 Feb 2007" "libcurl 7.16.0" "libcurl multi interface" +.SH NAME +libcurl-multi \- how to use the multi interface +.SH DESCRIPTION +This is an overview on how to use the libcurl multi interface in your C +programs. There are specific man pages for each function mentioned in +here. There's also the \fIlibcurl-tutorial(3)\fP man page for a complete +tutorial to programming with libcurl and the \fIlibcurl-easy(3)\fP man page +for an overview of the libcurl easy interface. + +All functions in the multi interface are prefixed with curl_multi. +.SH "OBJECTIVES" +The multi interface offers several abilities that the easy interface doesn't. +They are mainly: + +1. Enable a "pull" interface. The application that uses libcurl decides where +and when to ask libcurl to get/send data. + +2. Enable multiple simultaneous transfers in the same thread without making it +complicated for the application. + +3. Enable the application to wait for action on its own file descriptors and +curl's file descriptors simultaneous easily. +.SH "ONE MULTI HANDLE MANY EASY HANDLES" +To use the multi interface, you must first create a 'multi handle' with +\fIcurl_multi_init(3)\fP. This handle is then used as input to all further +curl_multi_* functions. + +Each single transfer is built up with an easy handle. You must create them, +and setup the appropriate options for each easy handle, as outlined in the +\fIlibcurl(3)\fP man page, using \fIcurl_easy_setopt(3)\fP. + +When the easy handle is setup for a transfer, then instead of using +\fIcurl_easy_perform(3)\fP (as when using the easy interface for transfers), +you should instead add the easy handle to the multi handle using +\fIcurl_multi_add_handle(3)\fP. The multi handle is sometimes referred to as a +\'multi stack\' because of the fact that it may hold a large amount of easy +handles. + +Should you change your mind, the easy handle is again removed from the multi +stack using \fIcurl_multi_remove_handle(3)\fP. Once removed from the multi +handle, you can again use other easy interface functions like +\fIcurl_easy_perform(3)\fP on the handle or whatever you think is necessary. + +Adding the easy handle to the multi handle does not start the transfer. +Remember that one of the main ideas with this interface is to let your +application drive. You drive the transfers by invoking +\fIcurl_multi_perform(3)\fP. libcurl will then transfer data if there is +anything available to transfer. It'll use the callbacks and everything else +you have setup in the individual easy handles. It'll transfer data on all +current transfers in the multi stack that are ready to transfer anything. It +may be all, it may be none. + +Your application can acquire knowledge from libcurl when it would like to get +invoked to transfer data, so that you don't have to busy-loop and call that +\fIcurl_multi_perform(3)\fP like crazy. \fIcurl_multi_fdset(3)\fP offers an +interface using which you can extract fd_sets from libcurl to use in select() +or poll() calls in order to get to know when the transfers in the multi stack +might need attention. This also makes it very easy for your program to wait +for input on your own private file descriptors at the same time or perhaps +timeout every now and then, should you want that. + +A little note here about the return codes from the multi functions, and +especially the \fIcurl_multi_perform(3)\fP: if you receive +\fICURLM_CALL_MULTI_PERFORM\fP, this basically means that you should call +\fIcurl_multi_perform(3)\fP again, before you select() on more actions. You +don't have to do it immediately, but the return code means that libcurl may +have more data available to return or that there may be more data to send off +before it is "satisfied". + +\fIcurl_multi_perform(3)\fP stores the number of still running transfers in +one of its input arguments, and by reading that you can figure out when all +the transfers in the multi handles are done. 'done' does not mean +successful. One or more of the transfers may have failed. Tracking when this +number changes, you know when one or more transfers are done. + +To get information about completed transfers, to figure out success or not and +similar, \fIcurl_multi_info_read(3)\fP should be called. It can return a +message about a current or previous transfer. Repeated invokes of the function +get more messages until the message queue is empty. The information you +receive there includes an easy handle pointer which you may use to identify +which easy handle the information regards. + +When a single transfer is completed, the easy handle is still left added to +the multi stack. You need to first remove the easy handle with +\fIcurl_multi_remove_handle(3)\fP and then close it with +\fIcurl_easy_cleanup(3)\fP, or possibly set new options to it and add it again +with \fIcurl_multi_add_handle(3)\fP to start another transfer. + +When all transfers in the multi stack are done, cleanup the multi handle with +\fIcurl_multi_cleanup(3)\fP. Be careful and please note that you \fBMUST\fP +invoke separate \fIcurl_easy_cleanup(3)\fP calls on every single easy handle +to clean them up properly. + +If you want to re-use an easy handle that was added to the multi handle for +transfer, you must first remove it from the multi stack and then re-add it +again (possibly after having altered some options at your own choice). +.SH "MULTI_SOCKET" +Since 7.16.0, the \fIcurl_multi_socket_action(3)\fP function offers a way for +applications to not only avoid being forced to use select(), but it also +offers a much more high-performance API that will make a significant +difference for applications using large numbers of simultaneous connections. + +\fIcurl_multi_socket_action(3)\fP is then used +instead of \fIcurl_multi_perform(3)\fP. +.SH "BLOCKING" +A few areas in the code are still using blocking code, even when used from the +multi interface. While we certainly want and intend for these to get fixed in +the future, you should be aware of the following current restrictions: + +.nf + - Name resolves on non-windows unless c-ares is used + - GnuTLS SSL connections + - Active FTP connections + - HTTP proxy CONNECT operations + - SOCKS proxy handshakes + - TFTP transfers + - file:// transfers + - TELNET transfers +.fi diff --git a/usr/share/man/man3/libcurl-share.3 b/usr/share/man/man3/libcurl-share.3 new file mode 100755 index 000000000..3f61ba99f --- /dev/null +++ b/usr/share/man/man3/libcurl-share.3 @@ -0,0 +1,46 @@ +.\" You can view this file with: +.\" nroff -man [file] +.\" $Id: libcurl-share.3,v 1.3 2009-05-19 12:48:14 yangtse Exp $ +.\" +.TH libcurl-share 3 "8 Aug 2003" "libcurl 7.10.7" "libcurl share interface" +.SH NAME +libcurl-share \- how to use the share interface +.SH DESCRIPTION +This is an overview on how to use the libcurl share interface in your C +programs. There are specific man pages for each function mentioned in +here. + +All functions in the share interface are prefixed with curl_share. + +.SH "OBJECTIVES" +The share interface was added to enable sharing of data between curl +\&"handles". +.SH "ONE SET OF DATA - MANY TRANSFERS" +You can have multiple easy handles share data between them. Have them update +and use the \fBsame\fP cookie database or DNS cache! This way, each single +transfer will take advantage from data updates made by the other transfer(s). +.SH "SHARE OBJECT" +You create a shared object with \fIcurl_share_init(3)\fP. It returns a handle +for a newly created one. + +You tell the shared object what data you want it to share by using +\fIcurl_share_setopt(3)\fP. Currently you can only share DNS and/or COOKIE +data. + +Since you can use this share from multiple threads, and libcurl has no +internal thread synchronization, you must provide mutex callbacks if you're +using this multi-threaded. You set lock and unlock functions with +\fIcurl_share_setopt(3)\fP too. + +Then, you make an easy handle to use this share, you set the +\fICURLOPT_SHARE\fP option with \fIcurl_easy_setopt(3)\fP, and pass in share +handle. You can make any number of easy handles share the same share handle. + +To make an easy handle stop using that particular share, you set +\fICURLOPT_SHARE\fP to NULL for that easy handle. To make a handle stop +sharing a particular data, you can \fICURLSHOPT_UNSHARE\fP it. + +When you're done using the share, make sure that no easy handle is still using +it, and call \fIcurl_share_cleanup(3)\fP on the handle. +.SH "SEE ALSO" +.BR curl_share_init "(3), " curl_share_setopt "(3), " curl_share_cleanup "(3)" diff --git a/usr/share/man/man3/libcurl-tutorial.3 b/usr/share/man/man3/libcurl-tutorial.3 new file mode 100755 index 000000000..aefeaaaf1 --- /dev/null +++ b/usr/share/man/man3/libcurl-tutorial.3 @@ -0,0 +1,1345 @@ +.\" ************************************************************************** +.\" * _ _ ____ _ +.\" * Project ___| | | | _ \| | +.\" * / __| | | | |_) | | +.\" * | (__| |_| | _ <| |___ +.\" * \___|\___/|_| \_\_____| +.\" * +.\" * Copyright (C) 1998 - 2009, Daniel Stenberg, <daniel@haxx.se>, et al. +.\" * +.\" * This software is licensed as described in the file COPYING, which +.\" * you should have received as part of this distribution. The terms +.\" * are also available at http://curl.haxx.se/docs/copyright.html. +.\" * +.\" * You may opt to use, copy, modify, merge, publish, distribute and/or sell +.\" * copies of the Software, and permit persons to whom the Software is +.\" * furnished to do so, under the terms of the COPYING file. +.\" * +.\" * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY +.\" * KIND, either express or implied. +.\" * +.\" * $Id: libcurl-tutorial.3,v 1.28 2009-08-04 12:02:27 bagder Exp $ +.\" ************************************************************************** +.\" +.TH libcurl-tutorial 3 "4 Mar 2009" "libcurl" "libcurl programming" +.SH NAME +libcurl-tutorial \- libcurl programming tutorial +.SH "Objective" +This document attempts to describe the general principles and some basic +approaches to consider when programming with libcurl. The text will focus +mainly on the C interface but might apply fairly well on other interfaces as +well as they usually follow the C one pretty closely. + +This document will refer to 'the user' as the person writing the source code +that uses libcurl. That would probably be you or someone in your position. +What will be generally referred to as 'the program' will be the collected +source code that you write that is using libcurl for transfers. The program +is outside libcurl and libcurl is outside of the program. + +To get more details on all options and functions described herein, please +refer to their respective man pages. + +.SH "Building" +There are many different ways to build C programs. This chapter will assume a +UNIX-style build process. If you use a different build system, you can still +read this to get general information that may apply to your environment as +well. +.IP "Compiling the Program" +Your compiler needs to know where the libcurl headers are located. Therefore +you must set your compiler's include path to point to the directory where you +installed them. The 'curl-config'[3] tool can be used to get this information: + +$ curl-config --cflags + +.IP "Linking the Program with libcurl" +When having compiled the program, you need to link your object files to create +a single executable. For that to succeed, you need to link with libcurl and +possibly also with other libraries that libcurl itself depends on. Like the +OpenSSL libraries, but even some standard OS libraries may be needed on the +command line. To figure out which flags to use, once again the 'curl-config' +tool comes to the rescue: + +$ curl-config --libs + +.IP "SSL or Not" +libcurl can be built and customized in many ways. One of the things that +varies from different libraries and builds is the support for SSL-based +transfers, like HTTPS and FTPS. If a supported SSL library was detected +properly at build-time, libcurl will be built with SSL support. To figure out +if an installed libcurl has been built with SSL support enabled, use +\&'curl-config' like this: + +$ curl-config --feature + +And if SSL is supported, the keyword 'SSL' will be written to stdout, +possibly together with a few other features that could be either on or off on +for different libcurls. + +See also the "Features libcurl Provides" further down. +.IP "autoconf macro" +When you write your configure script to detect libcurl and setup variables +accordingly, we offer a prewritten macro that probably does everything you +need in this area. See docs/libcurl/libcurl.m4 file - it includes docs on how +to use it. + +.SH "Portable Code in a Portable World" +The people behind libcurl have put a considerable effort to make libcurl work +on a large amount of different operating systems and environments. + +You program libcurl the same way on all platforms that libcurl runs on. There +are only very few minor considerations that differ. If you just make sure to +write your code portable enough, you may very well create yourself a very +portable program. libcurl shouldn't stop you from that. + +.SH "Global Preparation" +The program must initialize some of the libcurl functionality globally. That +means it should be done exactly once, no matter how many times you intend to +use the library. Once for your program's entire life time. This is done using + + curl_global_init() + +and it takes one parameter which is a bit pattern that tells libcurl what to +initialize. Using \fICURL_GLOBAL_ALL\fP will make it initialize all known +internal sub modules, and might be a good default option. The current two bits +that are specified are: +.RS +.IP "CURL_GLOBAL_WIN32" +which only does anything on Windows machines. When used on +a Windows machine, it'll make libcurl initialize the win32 socket +stuff. Without having that initialized properly, your program cannot use +sockets properly. You should only do this once for each application, so if +your program already does this or of another library in use does it, you +should not tell libcurl to do this as well. +.IP CURL_GLOBAL_SSL +which only does anything on libcurls compiled and built SSL-enabled. On these +systems, this will make libcurl initialize the SSL library properly for this +application. This only needs to be done once for each application so if your +program or another library already does this, this bit should not be needed. +.RE + +libcurl has a default protection mechanism that detects if +\fIcurl_global_init(3)\fP hasn't been called by the time +\fIcurl_easy_perform(3)\fP is called and if that is the case, libcurl runs the +function itself with a guessed bit pattern. Please note that depending solely +on this is not considered nice nor very good. + +When the program no longer uses libcurl, it should call +\fIcurl_global_cleanup(3)\fP, which is the opposite of the init call. It will +then do the reversed operations to cleanup the resources the +\fIcurl_global_init(3)\fP call initialized. + +Repeated calls to \fIcurl_global_init(3)\fP and \fIcurl_global_cleanup(3)\fP +should be avoided. They should only be called once each. + +.SH "Features libcurl Provides" +It is considered best-practice to determine libcurl features at run-time +rather than at build-time (if possible of course). By calling +\fIcurl_version_info(3)\fP and checking out the details of the returned +struct, your program can figure out exactly what the currently running libcurl +supports. + +.SH "Handle the Easy libcurl" +libcurl first introduced the so called easy interface. All operations in the +easy interface are prefixed with 'curl_easy'. + +Recent libcurl versions also offer the multi interface. More about that +interface, what it is targeted for and how to use it is detailed in a separate +chapter further down. You still need to understand the easy interface first, +so please continue reading for better understanding. + +To use the easy interface, you must first create yourself an easy handle. You +need one handle for each easy session you want to perform. Basically, you +should use one handle for every thread you plan to use for transferring. You +must never share the same handle in multiple threads. + +Get an easy handle with + + easyhandle = curl_easy_init(); + +It returns an easy handle. Using that you proceed to the next step: setting +up your preferred actions. A handle is just a logic entity for the upcoming +transfer or series of transfers. + +You set properties and options for this handle using +\fIcurl_easy_setopt(3)\fP. They control how the subsequent transfer or +transfers will be made. Options remain set in the handle until set again to +something different. Alas, multiple requests using the same handle will use +the same options. + +Many of the options you set in libcurl are "strings", pointers to data +terminated with a zero byte. When you set strings with +\fIcurl_easy_setopt(3)\fP, libcurl makes its own copy so that they don't +need to be kept around in your application after being set[4]. + +One of the most basic properties to set in the handle is the URL. You set +your preferred URL to transfer with CURLOPT_URL in a manner similar to: + +.nf + curl_easy_setopt(handle, CURLOPT_URL, "http://domain.com/"); +.fi + +Let's assume for a while that you want to receive data as the URL identifies a +remote resource you want to get here. Since you write a sort of application +that needs this transfer, I assume that you would like to get the data passed +to you directly instead of simply getting it passed to stdout. So, you write +your own function that matches this prototype: + + size_t write_data(void *buffer, size_t size, size_t nmemb, void *userp); + +You tell libcurl to pass all data to this function by issuing a function +similar to this: + + curl_easy_setopt(easyhandle, CURLOPT_WRITEFUNCTION, write_data); + +You can control what data your callback function gets in the fourth argument +by setting another property: + + curl_easy_setopt(easyhandle, CURLOPT_WRITEDATA, &internal_struct); + +Using that property, you can easily pass local data between your application +and the function that gets invoked by libcurl. libcurl itself won't touch the +data you pass with \fICURLOPT_WRITEDATA\fP. + +libcurl offers its own default internal callback that will take care of the data +if you don't set the callback with \fICURLOPT_WRITEFUNCTION\fP. It will then +simply output the received data to stdout. You can have the default callback +write the data to a different file handle by passing a 'FILE *' to a file +opened for writing with the \fICURLOPT_WRITEDATA\fP option. + +Now, we need to take a step back and have a deep breath. Here's one of those +rare platform-dependent nitpicks. Did you spot it? On some platforms[2], +libcurl won't be able to operate on files opened by the program. Thus, if you +use the default callback and pass in an open file with +\fICURLOPT_WRITEDATA\fP, it will crash. You should therefore avoid this to +make your program run fine virtually everywhere. + +(\fICURLOPT_WRITEDATA\fP was formerly known as \fICURLOPT_FILE\fP. Both names +still work and do the same thing). + +If you're using libcurl as a win32 DLL, you MUST use the +\fICURLOPT_WRITEFUNCTION\fP if you set \fICURLOPT_WRITEDATA\fP - or you will +experience crashes. + +There are of course many more options you can set, and we'll get back to a few +of them later. Let's instead continue to the actual transfer: + + success = curl_easy_perform(easyhandle); + +\fIcurl_easy_perform(3)\fP will connect to the remote site, do the necessary +commands and receive the transfer. Whenever it receives data, it calls the +callback function we previously set. The function may get one byte at a time, +or it may get many kilobytes at once. libcurl delivers as much as possible as +often as possible. Your callback function should return the number of bytes it +\&"took care of". If that is not the exact same amount of bytes that was +passed to it, libcurl will abort the operation and return with an error code. + +When the transfer is complete, the function returns a return code that informs +you if it succeeded in its mission or not. If a return code isn't enough for +you, you can use the CURLOPT_ERRORBUFFER to point libcurl to a buffer of yours +where it'll store a human readable error message as well. + +If you then want to transfer another file, the handle is ready to be used +again. Mind you, it is even preferred that you re-use an existing handle if +you intend to make another transfer. libcurl will then attempt to re-use the +previous connection. + +For some protocols, downloading a file can involve a complicated process of +logging in, setting the transfer mode, changing the current directory and +finally transferring the file data. libcurl takes care of all that +complication for you. Given simply the URL to a file, libcurl will take care +of all the details needed to get the file moved from one machine to another. + +.SH "Multi-threading Issues" +The first basic rule is that you must \fBnever\fP share a libcurl handle (be +it easy or multi or whatever) between multiple threads. Only use one handle in +one thread at a time. + +libcurl is completely thread safe, except for two issues: signals and SSL/TLS +handlers. Signals are used for timing out name resolves (during DNS lookup) - +when built without c-ares support and not on Windows. + +If you are accessing HTTPS or FTPS URLs in a multi-threaded manner, you are +then of course using the underlying SSL library multi-threaded and those libs +might have their own requirements on this issue. Basically, you need to +provide one or two functions to allow it to function properly. For all +details, see this: + +OpenSSL + + http://www.openssl.org/docs/crypto/threads.html#DESCRIPTION + +GnuTLS + + http://www.gnu.org/software/gnutls/manual/html_node/Multi_002dthreaded-applications.html + +NSS + + is claimed to be thread-safe already without anything required. + +yassl + + Required actions unknown. + +When using multiple threads you should set the CURLOPT_NOSIGNAL option to 1 +for all handles. Everything will or might work fine except that timeouts are +not honored during the DNS lookup - which you can work around by building +libcurl with c-ares support. c-ares is a library that provides asynchronous +name resolves. On some platforms, libcurl simply will not function properly +multi-threaded unless this option is set. + +Also, note that CURLOPT_DNS_USE_GLOBAL_CACHE is not thread-safe. + +.SH "When It Doesn't Work" +There will always be times when the transfer fails for some reason. You might +have set the wrong libcurl option or misunderstood what the libcurl option +actually does, or the remote server might return non-standard replies that +confuse the library which then confuses your program. + +There's one golden rule when these things occur: set the CURLOPT_VERBOSE +option to 1. It'll cause the library to spew out the entire protocol +details it sends, some internal info and some received protocol data as well +(especially when using FTP). If you're using HTTP, adding the headers in the +received output to study is also a clever way to get a better understanding +why the server behaves the way it does. Include headers in the normal body +output with CURLOPT_HEADER set 1. + +Of course, there are bugs left. We need to know about them to be able +to fix them, so we're quite dependent on your bug reports! When you do report +suspected bugs in libcurl, please include as many details as you possibly can: a +protocol dump that CURLOPT_VERBOSE produces, library version, as much as +possible of your code that uses libcurl, operating system name and version, +compiler name and version etc. + +If CURLOPT_VERBOSE is not enough, you increase the level of debug data your +application receive by using the CURLOPT_DEBUGFUNCTION. + +Getting some in-depth knowledge about the protocols involved is never wrong, +and if you're trying to do funny things, you might very well understand +libcurl and how to use it better if you study the appropriate RFC documents +at least briefly. + +.SH "Upload Data to a Remote Site" +libcurl tries to keep a protocol independent approach to most transfers, thus +uploading to a remote FTP site is very similar to uploading data to a HTTP +server with a PUT request. + +Of course, first you either create an easy handle or you re-use one existing +one. Then you set the URL to operate on just like before. This is the remote +URL, that we now will upload. + +Since we write an application, we most likely want libcurl to get the upload +data by asking us for it. To make it do that, we set the read callback and +the custom pointer libcurl will pass to our read callback. The read callback +should have a prototype similar to: + + size_t function(char *bufptr, size_t size, size_t nitems, void *userp); + +Where bufptr is the pointer to a buffer we fill in with data to upload and +size*nitems is the size of the buffer and therefore also the maximum amount +of data we can return to libcurl in this call. The 'userp' pointer is the +custom pointer we set to point to a struct of ours to pass private data +between the application and the callback. + + curl_easy_setopt(easyhandle, CURLOPT_READFUNCTION, read_function); + + curl_easy_setopt(easyhandle, CURLOPT_READDATA, &filedata); + +Tell libcurl that we want to upload: + + curl_easy_setopt(easyhandle, CURLOPT_UPLOAD, 1L); + +A few protocols won't behave properly when uploads are done without any prior +knowledge of the expected file size. So, set the upload file size using the +CURLOPT_INFILESIZE_LARGE for all known file sizes like this[1]: + +.nf + /* in this example, file_size must be an curl_off_t variable */ + curl_easy_setopt(easyhandle, CURLOPT_INFILESIZE_LARGE, file_size); +.fi + +When you call \fIcurl_easy_perform(3)\fP this time, it'll perform all the +necessary operations and when it has invoked the upload it'll call your +supplied callback to get the data to upload. The program should return as much +data as possible in every invoke, as that is likely to make the upload perform +as fast as possible. The callback should return the number of bytes it wrote +in the buffer. Returning 0 will signal the end of the upload. + +.SH "Passwords" +Many protocols use or even require that user name and password are provided +to be able to download or upload the data of your choice. libcurl offers +several ways to specify them. + +Most protocols support that you specify the name and password in the URL +itself. libcurl will detect this and use them accordingly. This is written +like this: + + protocol://user:password@example.com/path/ + +If you need any odd letters in your user name or password, you should enter +them URL encoded, as %XX where XX is a two-digit hexadecimal number. + +libcurl also provides options to set various passwords. The user name and +password as shown embedded in the URL can instead get set with the +CURLOPT_USERPWD option. The argument passed to libcurl should be a char * to +a string in the format "user:password". In a manner like this: + + curl_easy_setopt(easyhandle, CURLOPT_USERPWD, "myname:thesecret"); + +Another case where name and password might be needed at times, is for those +users who need to authenticate themselves to a proxy they use. libcurl offers +another option for this, the CURLOPT_PROXYUSERPWD. It is used quite similar +to the CURLOPT_USERPWD option like this: + + curl_easy_setopt(easyhandle, CURLOPT_PROXYUSERPWD, "myname:thesecret"); + +There's a long time UNIX "standard" way of storing ftp user names and +passwords, namely in the $HOME/.netrc file. The file should be made private +so that only the user may read it (see also the "Security Considerations" +chapter), as it might contain the password in plain text. libcurl has the +ability to use this file to figure out what set of user name and password to +use for a particular host. As an extension to the normal functionality, +libcurl also supports this file for non-FTP protocols such as HTTP. To make +curl use this file, use the CURLOPT_NETRC option: + + curl_easy_setopt(easyhandle, CURLOPT_NETRC, 1L); + +And a very basic example of how such a .netrc file may look like: + +.nf + machine myhost.mydomain.com + login userlogin + password secretword +.fi + +All these examples have been cases where the password has been optional, or +at least you could leave it out and have libcurl attempt to do its job +without it. There are times when the password isn't optional, like when +you're using an SSL private key for secure transfers. + +To pass the known private key password to libcurl: + + curl_easy_setopt(easyhandle, CURLOPT_KEYPASSWD, "keypassword"); + +.SH "HTTP Authentication" +The previous chapter showed how to set user name and password for getting +URLs that require authentication. When using the HTTP protocol, there are +many different ways a client can provide those credentials to the server and +you can control which way libcurl will (attempt to) use them. The default HTTP +authentication method is called 'Basic', which is sending the name and +password in clear-text in the HTTP request, base64-encoded. This is insecure. + +At the time of this writing, libcurl can be built to use: Basic, Digest, NTLM, +Negotiate, GSS-Negotiate and SPNEGO. You can tell libcurl which one to use +with CURLOPT_HTTPAUTH as in: + + curl_easy_setopt(easyhandle, CURLOPT_HTTPAUTH, CURLAUTH_DIGEST); + +And when you send authentication to a proxy, you can also set authentication +type the same way but instead with CURLOPT_PROXYAUTH: + + curl_easy_setopt(easyhandle, CURLOPT_PROXYAUTH, CURLAUTH_NTLM); + +Both these options allow you to set multiple types (by ORing them together), +to make libcurl pick the most secure one out of the types the server/proxy +claims to support. This method does however add a round-trip since libcurl +must first ask the server what it supports: + + curl_easy_setopt(easyhandle, CURLOPT_HTTPAUTH, + CURLAUTH_DIGEST|CURLAUTH_BASIC); + +For convenience, you can use the 'CURLAUTH_ANY' define (instead of a list +with specific types) which allows libcurl to use whatever method it wants. + +When asking for multiple types, libcurl will pick the available one it +considers "best" in its own internal order of preference. + +.SH "HTTP POSTing" +We get many questions regarding how to issue HTTP POSTs with libcurl the +proper way. This chapter will thus include examples using both different +versions of HTTP POST that libcurl supports. + +The first version is the simple POST, the most common version, that most HTML +pages using the <form> tag uses. We provide a pointer to the data and tell +libcurl to post it all to the remote site: + +.nf + char *data="name=daniel&project=curl"; + curl_easy_setopt(easyhandle, CURLOPT_POSTFIELDS, data); + curl_easy_setopt(easyhandle, CURLOPT_URL, "http://posthere.com/"); + + curl_easy_perform(easyhandle); /* post away! */ +.fi + +Simple enough, huh? Since you set the POST options with the +CURLOPT_POSTFIELDS, this automatically switches the handle to use POST in the +upcoming request. + +Ok, so what if you want to post binary data that also requires you to set the +Content-Type: header of the post? Well, binary posts prevent libcurl from +being able to do strlen() on the data to figure out the size, so therefore we +must tell libcurl the size of the post data. Setting headers in libcurl +requests are done in a generic way, by building a list of our own headers and +then passing that list to libcurl. + +.nf + struct curl_slist *headers=NULL; + headers = curl_slist_append(headers, "Content-Type: text/xml"); + + /* post binary data */ + curl_easy_setopt(easyhandle, CURLOPT_POSTFIELDS, binaryptr); + + /* set the size of the postfields data */ + curl_easy_setopt(easyhandle, CURLOPT_POSTFIELDSIZE, 23L); + + /* pass our list of custom made headers */ + curl_easy_setopt(easyhandle, CURLOPT_HTTPHEADER, headers); + + curl_easy_perform(easyhandle); /* post away! */ + + curl_slist_free_all(headers); /* free the header list */ +.fi + +While the simple examples above cover the majority of all cases where HTTP +POST operations are required, they don't do multi-part formposts. Multi-part +formposts were introduced as a better way to post (possibly large) binary data +and were first documented in the RFC1867 (updated in RFC2388). They're called +multi-part because they're built by a chain of parts, each part being a single +unit of data. Each part has its own name and contents. You can in fact create +and post a multi-part formpost with the regular libcurl POST support described +above, but that would require that you build a formpost yourself and provide +to libcurl. To make that easier, libcurl provides \fIcurl_formadd(3)\fP. Using +this function, you add parts to the form. When you're done adding parts, you +post the whole form. + +The following example sets two simple text parts with plain textual contents, +and then a file with binary contents and uploads the whole thing. + +.nf + struct curl_httppost *post=NULL; + struct curl_httppost *last=NULL; + curl_formadd(&post, &last, + CURLFORM_COPYNAME, "name", + CURLFORM_COPYCONTENTS, "daniel", CURLFORM_END); + curl_formadd(&post, &last, + CURLFORM_COPYNAME, "project", + CURLFORM_COPYCONTENTS, "curl", CURLFORM_END); + curl_formadd(&post, &last, + CURLFORM_COPYNAME, "logotype-image", + CURLFORM_FILECONTENT, "curl.png", CURLFORM_END); + + /* Set the form info */ + curl_easy_setopt(easyhandle, CURLOPT_HTTPPOST, post); + + curl_easy_perform(easyhandle); /* post away! */ + + /* free the post data again */ + curl_formfree(post); +.fi + +Multipart formposts are chains of parts using MIME-style separators and +headers. It means that each one of these separate parts get a few headers set +that describe the individual content-type, size etc. To enable your +application to handicraft this formpost even more, libcurl allows you to +supply your own set of custom headers to such an individual form part. You can +of course supply headers to as many parts as you like, but this little example +will show how you set headers to one specific part when you add that to the +post handle: + +.nf + struct curl_slist *headers=NULL; + headers = curl_slist_append(headers, "Content-Type: text/xml"); + + curl_formadd(&post, &last, + CURLFORM_COPYNAME, "logotype-image", + CURLFORM_FILECONTENT, "curl.xml", + CURLFORM_CONTENTHEADER, headers, + CURLFORM_END); + + curl_easy_perform(easyhandle); /* post away! */ + + curl_formfree(post); /* free post */ + curl_slist_free_all(headers); /* free custom header list */ +.fi + +Since all options on an easyhandle are "sticky", they remain the same until +changed even if you do call \fIcurl_easy_perform(3)\fP, you may need to tell +curl to go back to a plain GET request if you intend to do one as your +next request. You force an easyhandle to go back to GET by using the +CURLOPT_HTTPGET option: + + curl_easy_setopt(easyhandle, CURLOPT_HTTPGET, 1L); + +Just setting CURLOPT_POSTFIELDS to "" or NULL will *not* stop libcurl from +doing a POST. It will just make it POST without any data to send! + +.SH "Showing Progress" + +For historical and traditional reasons, libcurl has a built-in progress meter +that can be switched on and then makes it present a progress meter in your +terminal. + +Switch on the progress meter by, oddly enough, setting CURLOPT_NOPROGRESS to +zero. This option is set to 1 by default. + +For most applications however, the built-in progress meter is useless and +what instead is interesting is the ability to specify a progress +callback. The function pointer you pass to libcurl will then be called on +irregular intervals with information about the current transfer. + +Set the progress callback by using CURLOPT_PROGRESSFUNCTION. And pass a +pointer to a function that matches this prototype: + +.nf + int progress_callback(void *clientp, + double dltotal, + double dlnow, + double ultotal, + double ulnow); +.fi + +If any of the input arguments is unknown, a 0 will be passed. The first +argument, the 'clientp' is the pointer you pass to libcurl with +CURLOPT_PROGRESSDATA. libcurl won't touch it. + +.SH "libcurl with C++" + +There's basically only one thing to keep in mind when using C++ instead of C +when interfacing libcurl: + +The callbacks CANNOT be non-static class member functions + +Example C++ code: + +.nf +class AClass { + static size_t write_data(void *ptr, size_t size, size_t nmemb, + void *ourpointer) + { + /* do what you want with the data */ + } + } +.fi + +.SH "Proxies" + +What "proxy" means according to Merriam-Webster: "a person authorized to act +for another" but also "the agency, function, or office of a deputy who acts as +a substitute for another". + +Proxies are exceedingly common these days. Companies often only offer Internet +access to employees through their proxies. Network clients or user-agents ask +the proxy for documents, the proxy does the actual request and then it returns +them. + +libcurl supports SOCKS and HTTP proxies. When a given URL is wanted, libcurl +will ask the proxy for it instead of trying to connect to the actual host +identified in the URL. + +If you're using a SOCKS proxy, you may find that libcurl doesn't quite support +all operations through it. + +For HTTP proxies: the fact that the proxy is a HTTP proxy puts certain +restrictions on what can actually happen. A requested URL that might not be a +HTTP URL will be still be passed to the HTTP proxy to deliver back to +libcurl. This happens transparently, and an application may not need to +know. I say "may", because at times it is very important to understand that +all operations over a HTTP proxy use the HTTP protocol. For example, you +can't invoke your own custom FTP commands or even proper FTP directory +listings. + +.IP "Proxy Options" + +To tell libcurl to use a proxy at a given port number: + + curl_easy_setopt(easyhandle, CURLOPT_PROXY, "proxy-host.com:8080"); + +Some proxies require user authentication before allowing a request, and you +pass that information similar to this: + + curl_easy_setopt(easyhandle, CURLOPT_PROXYUSERPWD, "user:password"); + +If you want to, you can specify the host name only in the CURLOPT_PROXY +option, and set the port number separately with CURLOPT_PROXYPORT. + +Tell libcurl what kind of proxy it is with CURLOPT_PROXYTYPE (if not, it will +default to assume a HTTP proxy): + + curl_easy_setopt(easyhandle, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4); + +.IP "Environment Variables" + +libcurl automatically checks and uses a set of environment variables to know +what proxies to use for certain protocols. The names of the variables are +following an ancient de facto standard and are built up as "[protocol]_proxy" +(note the lower casing). Which makes the variable \&'http_proxy' checked for a +name of a proxy to use when the input URL is HTTP. Following the same rule, +the variable named 'ftp_proxy' is checked for FTP URLs. Again, the proxies are +always HTTP proxies, the different names of the variables simply allows +different HTTP proxies to be used. + +The proxy environment variable contents should be in the format +\&"[protocol://][user:password@]machine[:port]". Where the protocol:// part is +simply ignored if present (so http://proxy and bluerk://proxy will do the +same) and the optional port number specifies on which port the proxy operates +on the host. If not specified, the internal default port number will be used +and that is most likely *not* the one you would like it to be. + +There are two special environment variables. 'all_proxy' is what sets proxy +for any URL in case the protocol specific variable wasn't set, and +\&'no_proxy' defines a list of hosts that should not use a proxy even though a +variable may say so. If 'no_proxy' is a plain asterisk ("*") it matches all +hosts. + +To explicitly disable libcurl's checking for and using the proxy environment +variables, set the proxy name to "" - an empty string - with CURLOPT_PROXY. +.IP "SSL and Proxies" + +SSL is for secure point-to-point connections. This involves strong encryption +and similar things, which effectively makes it impossible for a proxy to +operate as a "man in between" which the proxy's task is, as previously +discussed. Instead, the only way to have SSL work over a HTTP proxy is to ask +the proxy to tunnel trough everything without being able to check or fiddle +with the traffic. + +Opening an SSL connection over a HTTP proxy is therefor a matter of asking the +proxy for a straight connection to the target host on a specified port. This +is made with the HTTP request CONNECT. ("please mr proxy, connect me to that +remote host"). + +Because of the nature of this operation, where the proxy has no idea what kind +of data that is passed in and out through this tunnel, this breaks some of the +very few advantages that come from using a proxy, such as caching. Many +organizations prevent this kind of tunneling to other destination port numbers +than 443 (which is the default HTTPS port number). + +.IP "Tunneling Through Proxy" +As explained above, tunneling is required for SSL to work and often even +restricted to the operation intended for SSL; HTTPS. + +This is however not the only time proxy-tunneling might offer benefits to +you or your application. + +As tunneling opens a direct connection from your application to the remote +machine, it suddenly also re-introduces the ability to do non-HTTP +operations over a HTTP proxy. You can in fact use things such as FTP +upload or FTP custom commands this way. + +Again, this is often prevented by the administrators of proxies and is +rarely allowed. + +Tell libcurl to use proxy tunneling like this: + + curl_easy_setopt(easyhandle, CURLOPT_HTTPPROXYTUNNEL, 1L); + +In fact, there might even be times when you want to do plain HTTP +operations using a tunnel like this, as it then enables you to operate on +the remote server instead of asking the proxy to do so. libcurl will not +stand in the way for such innovative actions either! + +.IP "Proxy Auto-Config" + +Netscape first came up with this. It is basically a web page (usually using a +\&.pac extension) with a Javascript that when executed by the browser with the +requested URL as input, returns information to the browser on how to connect +to the URL. The returned information might be "DIRECT" (which means no proxy +should be used), "PROXY host:port" (to tell the browser where the proxy for +this particular URL is) or "SOCKS host:port" (to direct the browser to a SOCKS +proxy). + +libcurl has no means to interpret or evaluate Javascript and thus it doesn't +support this. If you get yourself in a position where you face this nasty +invention, the following advice have been mentioned and used in the past: + +- Depending on the Javascript complexity, write up a script that translates it +to another language and execute that. + +- Read the Javascript code and rewrite the same logic in another language. + +- Implement a Javascript interpreter; people have successfully used the +Mozilla Javascript engine in the past. + +- Ask your admins to stop this, for a static proxy setup or similar. + +.SH "Persistence Is The Way to Happiness" + +Re-cycling the same easy handle several times when doing multiple requests is +the way to go. + +After each single \fIcurl_easy_perform(3)\fP operation, libcurl will keep the +connection alive and open. A subsequent request using the same easy handle to +the same host might just be able to use the already open connection! This +reduces network impact a lot. + +Even if the connection is dropped, all connections involving SSL to the same +host again, will benefit from libcurl's session ID cache that drastically +reduces re-connection time. + +FTP connections that are kept alive save a lot of time, as the command- +response round-trips are skipped, and also you don't risk getting blocked +without permission to login again like on many FTP servers only allowing N +persons to be logged in at the same time. + +libcurl caches DNS name resolving results, to make lookups of a previously +looked up name a lot faster. + +Other interesting details that improve performance for subsequent requests +may also be added in the future. + +Each easy handle will attempt to keep the last few connections alive for a +while in case they are to be used again. You can set the size of this "cache" +with the CURLOPT_MAXCONNECTS option. Default is 5. There is very seldom any +point in changing this value, and if you think of changing this it is often +just a matter of thinking again. + +To force your upcoming request to not use an already existing connection (it +will even close one first if there happens to be one alive to the same host +you're about to operate on), you can do that by setting CURLOPT_FRESH_CONNECT +to 1. In a similar spirit, you can also forbid the upcoming request to be +"lying" around and possibly get re-used after the request by setting +CURLOPT_FORBID_REUSE to 1. + +.SH "HTTP Headers Used by libcurl" +When you use libcurl to do HTTP requests, it'll pass along a series of headers +automatically. It might be good for you to know and understand these. You +can replace or remove them by using the CURLOPT_HTTPHEADER option. + +.IP "Host" +This header is required by HTTP 1.1 and even many 1.0 servers and should be +the name of the server we want to talk to. This includes the port number if +anything but default. + +.IP "Pragma" +\&"no-cache". Tells a possible proxy to not grab a copy from the cache but to +fetch a fresh one. + +.IP "Accept" +\&"*/*". + +.IP "Expect" +When doing POST requests, libcurl sets this header to \&"100-continue" to ask +the server for an "OK" message before it proceeds with sending the data part +of the post. If the POSTed data amount is deemed "small", libcurl will not use +this header. + +.SH "Customizing Operations" +There is an ongoing development today where more and more protocols are built +upon HTTP for transport. This has obvious benefits as HTTP is a tested and +reliable protocol that is widely deployed and has excellent proxy-support. + +When you use one of these protocols, and even when doing other kinds of +programming you may need to change the traditional HTTP (or FTP or...) +manners. You may need to change words, headers or various data. + +libcurl is your friend here too. + +.IP CUSTOMREQUEST +If just changing the actual HTTP request keyword is what you want, like when +GET, HEAD or POST is not good enough for you, CURLOPT_CUSTOMREQUEST is there +for you. It is very simple to use: + + curl_easy_setopt(easyhandle, CURLOPT_CUSTOMREQUEST, "MYOWNREQUEST"); + +When using the custom request, you change the request keyword of the actual +request you are performing. Thus, by default you make a GET request but you can +also make a POST operation (as described before) and then replace the POST +keyword if you want to. You're the boss. + +.IP "Modify Headers" +HTTP-like protocols pass a series of headers to the server when doing the +request, and you're free to pass any amount of extra headers that you +think fit. Adding headers is this easy: + +.nf + struct curl_slist *headers=NULL; /* init to NULL is important */ + + headers = curl_slist_append(headers, "Hey-server-hey: how are you?"); + headers = curl_slist_append(headers, "X-silly-content: yes"); + + /* pass our list of custom made headers */ + curl_easy_setopt(easyhandle, CURLOPT_HTTPHEADER, headers); + + curl_easy_perform(easyhandle); /* transfer http */ + + curl_slist_free_all(headers); /* free the header list */ +.fi + +\&... and if you think some of the internally generated headers, such as +Accept: or Host: don't contain the data you want them to contain, you can +replace them by simply setting them too: + +.nf + headers = curl_slist_append(headers, "Accept: Agent-007"); + headers = curl_slist_append(headers, "Host: munged.host.line"); +.fi + +.IP "Delete Headers" +If you replace an existing header with one with no contents, you will prevent +the header from being sent. For instance, if you want to completely prevent the +\&"Accept:" header from being sent, you can disable it with code similar to this: + + headers = curl_slist_append(headers, "Accept:"); + +Both replacing and canceling internal headers should be done with careful +consideration and you should be aware that you may violate the HTTP protocol +when doing so. + +.IP "Enforcing chunked transfer-encoding" + +By making sure a request uses the custom header "Transfer-Encoding: chunked" +when doing a non-GET HTTP operation, libcurl will switch over to "chunked" +upload, even though the size of the data to upload might be known. By default, +libcurl usually switches over to chunked upload automatically if the upload +data size is unknown. + +.IP "HTTP Version" + +All HTTP requests includes the version number to tell the server which version +we support. libcurl speaks HTTP 1.1 by default. Some very old servers don't +like getting 1.1-requests and when dealing with stubborn old things like that, +you can tell libcurl to use 1.0 instead by doing something like this: + + curl_easy_setopt(easyhandle, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_0); + +.IP "FTP Custom Commands" + +Not all protocols are HTTP-like, and thus the above may not help you when +you want to make, for example, your FTP transfers to behave differently. + +Sending custom commands to a FTP server means that you need to send the +commands exactly as the FTP server expects them (RFC959 is a good guide +here), and you can only use commands that work on the control-connection +alone. All kinds of commands that require data interchange and thus need +a data-connection must be left to libcurl's own judgement. Also be aware +that libcurl will do its very best to change directory to the target +directory before doing any transfer, so if you change directory (with CWD +or similar) you might confuse libcurl and then it might not attempt to +transfer the file in the correct remote directory. + +A little example that deletes a given file before an operation: + +.nf + headers = curl_slist_append(headers, "DELE file-to-remove"); + + /* pass the list of custom commands to the handle */ + curl_easy_setopt(easyhandle, CURLOPT_QUOTE, headers); + + curl_easy_perform(easyhandle); /* transfer ftp data! */ + + curl_slist_free_all(headers); /* free the header list */ +.fi + +If you would instead want this operation (or chain of operations) to happen +_after_ the data transfer took place the option to \fIcurl_easy_setopt(3)\fP +would instead be called CURLOPT_POSTQUOTE and used the exact same way. + +The custom FTP command will be issued to the server in the same order they are +added to the list, and if a command gets an error code returned back from the +server, no more commands will be issued and libcurl will bail out with an +error code (CURLE_QUOTE_ERROR). Note that if you use CURLOPT_QUOTE to send +commands before a transfer, no transfer will actually take place when a quote +command has failed. + +If you set the CURLOPT_HEADER to 1, you will tell libcurl to get +information about the target file and output "headers" about it. The headers +will be in "HTTP-style", looking like they do in HTTP. + +The option to enable headers or to run custom FTP commands may be useful to +combine with CURLOPT_NOBODY. If this option is set, no actual file content +transfer will be performed. + +.IP "FTP Custom CUSTOMREQUEST" +If you do want to list the contents of a FTP directory using your own defined FTP +command, CURLOPT_CUSTOMREQUEST will do just that. "NLST" is the default one +for listing directories but you're free to pass in your idea of a good +alternative. + +.SH "Cookies Without Chocolate Chips" +In the HTTP sense, a cookie is a name with an associated value. A server sends +the name and value to the client, and expects it to get sent back on every +subsequent request to the server that matches the particular conditions +set. The conditions include that the domain name and path match and that the +cookie hasn't become too old. + +In real-world cases, servers send new cookies to replace existing ones to +update them. Server use cookies to "track" users and to keep "sessions". + +Cookies are sent from server to clients with the header Set-Cookie: and +they're sent from clients to servers with the Cookie: header. + +To just send whatever cookie you want to a server, you can use CURLOPT_COOKIE +to set a cookie string like this: + + curl_easy_setopt(easyhandle, CURLOPT_COOKIE, "name1=var1; name2=var2;"); + +In many cases, that is not enough. You might want to dynamically save +whatever cookies the remote server passes to you, and make sure those cookies +are then used accordingly on later requests. + +One way to do this, is to save all headers you receive in a plain file and +when you make a request, you tell libcurl to read the previous headers to +figure out which cookies to use. Set the header file to read cookies from with +CURLOPT_COOKIEFILE. + +The CURLOPT_COOKIEFILE option also automatically enables the cookie parser in +libcurl. Until the cookie parser is enabled, libcurl will not parse or +understand incoming cookies and they will just be ignored. However, when the +parser is enabled the cookies will be understood and the cookies will be kept +in memory and used properly in subsequent requests when the same handle is +used. Many times this is enough, and you may not have to save the cookies to +disk at all. Note that the file you specify to CURLOPT_COOKIEFILE doesn't have +to exist to enable the parser, so a common way to just enable the parser and +not read any cookies is to use the name of a file you know doesn't exist. + +If you would rather use existing cookies that you've previously received with +your Netscape or Mozilla browsers, you can make libcurl use that cookie file +as input. The CURLOPT_COOKIEFILE is used for that too, as libcurl will +automatically find out what kind of file it is and act accordingly. + +Perhaps the most advanced cookie operation libcurl offers, is saving the +entire internal cookie state back into a Netscape/Mozilla formatted cookie +file. We call that the cookie-jar. When you set a file name with +CURLOPT_COOKIEJAR, that file name will be created and all received cookies +will be stored in it when \fIcurl_easy_cleanup(3)\fP is called. This enables +cookies to get passed on properly between multiple handles without any +information getting lost. + +.SH "FTP Peculiarities We Need" + +FTP transfers use a second TCP/IP connection for the data transfer. This is +usually a fact you can forget and ignore but at times this fact will come +back to haunt you. libcurl offers several different ways to customize how the +second connection is being made. + +libcurl can either connect to the server a second time or tell the server to +connect back to it. The first option is the default and it is also what works +best for all the people behind firewalls, NATs or IP-masquerading setups. +libcurl then tells the server to open up a new port and wait for a second +connection. This is by default attempted with EPSV first, and if that doesn't +work it tries PASV instead. (EPSV is an extension to the original FTP spec +and does not exist nor work on all FTP servers.) + +You can prevent libcurl from first trying the EPSV command by setting +CURLOPT_FTP_USE_EPSV to zero. + +In some cases, you will prefer to have the server connect back to you for the +second connection. This might be when the server is perhaps behind a firewall +or something and only allows connections on a single port. libcurl then +informs the remote server which IP address and port number to connect to. +This is made with the CURLOPT_FTPPORT option. If you set it to "-", libcurl +will use your system's "default IP address". If you want to use a particular +IP, you can set the full IP address, a host name to resolve to an IP address +or even a local network interface name that libcurl will get the IP address +from. + +When doing the "PORT" approach, libcurl will attempt to use the EPRT and the +LPRT before trying PORT, as they work with more protocols. You can disable +this behavior by setting CURLOPT_FTP_USE_EPRT to zero. + +.SH "Headers Equal Fun" + +Some protocols provide "headers", meta-data separated from the normal +data. These headers are by default not included in the normal data stream, +but you can make them appear in the data stream by setting CURLOPT_HEADER to +1. + +What might be even more useful, is libcurl's ability to separate the headers +from the data and thus make the callbacks differ. You can for example set a +different pointer to pass to the ordinary write callback by setting +CURLOPT_WRITEHEADER. + +Or, you can set an entirely separate function to receive the headers, by +using CURLOPT_HEADERFUNCTION. + +The headers are passed to the callback function one by one, and you can +depend on that fact. It makes it easier for you to add custom header parsers +etc. + +\&"Headers" for FTP transfers equal all the FTP server responses. They aren't +actually true headers, but in this case we pretend they are! ;-) + +.SH "Post Transfer Information" + + [ curl_easy_getinfo ] + +.SH "Security Considerations" + +The libcurl project takes security seriously. The library is written with +caution and precautions are taken to mitigate many kinds of risks encountered +while operating with potentially malicious servers on the Internet. It is a +powerful library, however, which allows application writers to make trade offs +between ease of writing and exposure to potential risky operations. If +used the right way, you can use libcurl to transfer data pretty safely. + +Many applications are used in closed networks where users and servers +can be trusted, but many others are used on arbitrary servers and are fed +input from potentially untrusted users. Following is a discussion about +some risks in the ways in which applications commonly use libcurl and +potential mitigations of those risks. It is by no means comprehensive, but +shows classes of attacks that robust applications should consider. The +Common Weakness Enumeration project at http://cwe.mitre.org/ is a good +reference for many of these and similar types of weaknesses of which +application writers should be aware. + +.IP "Command Lines" +If you use a command line tool (such as curl) that uses libcurl, and you give +options to the tool on the command line those options can very likely get read +by other users of your system when they use 'ps' or other tools to list +currently running processes. + +To avoid this problem, never feed sensitive things to programs using command +line options. Write them to a protected file and use the \-K option to +avoid this. + +.IP ".netrc" +\&.netrc is a pretty handy file/feature that allows you to login quickly and +automatically to frequently visited sites. The file contains passwords in +clear text and is a real security risk. In some cases, your .netrc is also +stored in a home directory that is NFS mounted or used on another network +based file system, so the clear text password will fly through your network +every time anyone reads that file! + +To avoid this problem, don't use .netrc files and never store passwords in +plain text anywhere. + +.IP "Clear Text Passwords" +Many of the protocols libcurl supports send name and password unencrypted as +clear text (HTTP Basic authentication, FTP, TELNET etc). It is very easy for +anyone on your network or a network nearby yours to just fire up a network +analyzer tool and eavesdrop on your passwords. Don't let the fact that HTTP +Basic uses base64 encoded passwords fool you. They may not look readable at a +first glance, but they very easily "deciphered" by anyone within seconds. + +To avoid this problem, use HTTP authentication methods or other protocols that +don't let snoopers see your password: HTTP with Digest, NTLM or GSS +authentication, HTTPS, FTPS, SCP, SFTP and FTP-Kerberos are a few examples. + +.IP "Redirects" +The CURLOPT_FOLLOWLOCATION option automatically follows HTTP redirects sent +by a remote server. These redirects can refer to any kind of URL, not just +HTTP. A redirect to a file: URL would cause the libcurl to read (or write) +arbitrary files from the local filesystem. If the application returns +the data back to the user (as would happen in some kinds of CGI scripts), +an attacker could leverage this to read otherwise forbidden data (e.g. +file://localhost/etc/passwd). + +If authentication credentials are stored in the ~/.netrc file, or Kerberos +is in use, any other URL type (not just file:) that requires +authentication is also at risk. A redirect such as +ftp://some-internal-server/private-file would then return data even when +the server is password protected. + +In the same way, if an unencrypted SSH private key has been configured for +the user running the libcurl application, SCP: or SFTP: URLs could access +password or private-key protected resources, +e.g. sftp://user@some-internal-server/etc/passwd + +The CURLOPT_REDIR_PROTOCOLS and CURLOPT_NETRC options can be used to +mitigate against this kind of attack. + +A redirect can also specify a location available only on the machine running +libcurl, including servers hidden behind a firewall from the attacker. +e.g. http://127.0.0.1/ or http://intranet/delete-stuff.cgi?delete=all or +tftp://bootp-server/pc-config-data + +Apps can mitigate against this by disabling CURLOPT_FOLLOWLOCATION and +handling redirects itself, sanitizing URLs as necessary. Alternately, an +app could leave CURLOPT_FOLLOWLOCATION enabled but set CURLOPT_REDIR_PROTOCOLS +and install a CURLOPT_OPENSOCKETFUNCTION callback function in which addresses +are sanitized before use. + +.IP "Private Resources" +A user who can control the DNS server of a domain being passed in within +a URL can change the address of the host to a local, private address +which the libcurl application will then use. e.g. The innocuous URL +http://fuzzybunnies.example.com/ could actually resolve to the IP address +of a server behind a firewall, such as 127.0.0.1 or 10.1.2.3 +Apps can mitigate against this by setting a CURLOPT_OPENSOCKETFUNCTION +and checking the address before a connection. + +All the malicious scenarios regarding redirected URLs apply just as well +to non-redirected URLs, if the user is allowed to specify an arbitrary URL +that could point to a private resource. For example, a web app providing +a translation service might happily translate file://localhost/etc/passwd +and display the result. Apps can mitigate against this with the +CURLOPT_PROTOCOLS option as well as by similar mitigation techniques for +redirections. + +A malicious FTP server could in response to the PASV command return an +IP address and port number for a server local to the app running libcurl +but behind a firewall. Apps can mitigate against this by using the +CURLOPT_FTP_SKIP_PASV_IP option or CURLOPT_FTPPORT. + +.IP Uploads +When uploading, a redirect can cause a local (or remote) file to be +overwritten. Apps must not allow any unsanitized URL to be passed in +for uploads. Also, CURLOPT_FOLLOWLOCATION should not be used on uploads. +Instead, the app should handle redirects itself, sanitizing each URL first. + +.IP Authentication +Use of CURLOPT_UNRESTRICTED_AUTH could cause authentication information to +be sent to an unknown second server. Apps can mitigate against this +by disabling CURLOPT_FOLLOWLOCATION and handling redirects itself, +sanitizing where necessary. + +Use of the CURLAUTH_ANY option to CURLOPT_HTTPAUTH could result in user +name and password being sent in clear text to an HTTP server. Instead, +use CURLAUTH_ANYSAFE which ensures that the password is encrypted over +the network, or else fail the request. + +Use of the CURLUSESSL_TRY option to CURLOPT_USE_SSL could result in user +name and password being sent in clear text to an FTP server. Instead, +use CURLUSESSL_CONTROL to ensure that an encrypted connection is used or +else fail the request. + +.IP Cookies +If cookies are enabled and cached, then a user could craft a URL which +performs some malicious action to a site whose authentication is already +stored in a cookie. e.g. http://mail.example.com/delete-stuff.cgi?delete=all +Apps can mitigate against this by disabling cookies or clearing them +between requests. + +.IP "Dangerous URLs" +SCP URLs can contain raw commands within the scp: URL, which is a side effect +of how the SCP protocol is designed. e.g. +scp://user:pass@host/a;date >/tmp/test; +Apps must not allow unsanitized SCP: URLs to be passed in for downloads. + +.IP "Denial of Service" +A malicious server could cause libcurl to effectively hang by sending +a trickle of data through, or even no data at all but just keeping the TCP +connection open. This could result in a denial-of-service attack. The +CURLOPT_TIMEOUT and/or CURLOPT_LOW_SPEED_LIMIT options can be used to +mitigate against this. + +A malicious server could cause libcurl to effectively hang by starting to +send data, then severing the connection without cleanly closing the +TCP connection. The app could install a CURLOPT_SOCKOPTFUNCTION callback +function and set the TCP SO_KEEPALIVE option to mitigate against this. +Setting one of the timeout options would also work against this attack. + +A malicious server could cause libcurl to download an infinite amount of +data, potentially causing all of memory or disk to be filled. Setting +the CURLOPT_MAXFILESIZE_LARGE option is not sufficient to guard against this. +Instead, the app should monitor the amount of data received within the +write or progress callback and abort once the limit is reached. + +A malicious HTTP server could cause an infinite redirection loop, causing a +denial-of-service. This can be mitigated by using the CURLOPT_MAXREDIRS +option. + +.IP "Arbitrary Headers" +User-supplied data must be sanitized when used in options like +CURLOPT_USERAGENT, CURLOPT_HTTPHEADER, CURLOPT_POSTFIELDS and others that +are used to generate structured data. Characters like embedded carriage +returns or ampersands could allow the user to create additional headers or +fields that could cause malicious transactions. + +.IP "Server Certificates" +A secure application should never use the CURLOPT_SSL_VERIFYPEER option to +disable certificate validation. There are numerous attacks that are enabled +by apps that fail to properly validate server TLS/SSL certificates, +thus enabling a malicious server to spoof a legitimate one. HTTPS without +validated certificates is potentially as insecure as a plain HTTP connection. + +.IP "Showing What You Do" +On a related issue, be aware that even in situations like when you have +problems with libcurl and ask someone for help, everything you reveal in order +to get best possible help might also impose certain security related +risks. Host names, user names, paths, operating system specifics, etc (not to +mention passwords of course) may in fact be used by intruders to gain +additional information of a potential target. + +To avoid this problem, you must of course use your common sense. Often, you +can just edit out the sensitive data or just search/replace your true +information with faked data. + +.SH "Multiple Transfers Using the multi Interface" + +The easy interface as described in detail in this document is a synchronous +interface that transfers one file at a time and doesn't return until it is +done. + +The multi interface, on the other hand, allows your program to transfer +multiple files in both directions at the same time, without forcing you +to use multiple threads. The name might make it seem that the multi +interface is for multi-threaded programs, but the truth is almost the +reverse. The multi interface can allow a single-threaded application +to perform the same kinds of multiple, simultaneous transfers that +multi-threaded programs can perform. It allows many of the benefits +of multi-threaded transfers without the complexity of managing and +synchronizing many threads. + +To use this interface, you are better off if you first understand the basics +of how to use the easy interface. The multi interface is simply a way to make +multiple transfers at the same time by adding up multiple easy handles into +a "multi stack". + +You create the easy handles you want and you set all the options just like you +have been told above, and then you create a multi handle with +\fIcurl_multi_init(3)\fP and add all those easy handles to that multi handle +with \fIcurl_multi_add_handle(3)\fP. + +When you've added the handles you have for the moment (you can still add new +ones at any time), you start the transfers by calling +\fIcurl_multi_perform(3)\fP. + +\fIcurl_multi_perform(3)\fP is asynchronous. It will only execute as little as +possible and then return back control to your program. It is designed to never +block. If it returns CURLM_CALL_MULTI_PERFORM you better call it again soon, +as that is a signal that it still has local data to send or remote data to +receive. + +The best usage of this interface is when you do a select() on all possible +file descriptors or sockets to know when to call libcurl again. This also +makes it easy for you to wait and respond to actions on your own application's +sockets/handles. You figure out what to select() for by using +\fIcurl_multi_fdset(3)\fP, that fills in a set of fd_set variables for you +with the particular file descriptors libcurl uses for the moment. + +When you then call select(), it'll return when one of the file handles signal +action and you then call \fIcurl_multi_perform(3)\fP to allow libcurl to do +what it wants to do. Take note that libcurl does also feature some time-out +code so we advise you to never use very long timeouts on select() before you +call \fIcurl_multi_perform(3)\fP, which thus should be called unconditionally +every now and then even if none of its file descriptors have signaled +ready. Another precaution you should use: always call +\fIcurl_multi_fdset(3)\fP immediately before the select() call since the +current set of file descriptors may change when calling a curl function. + +If you want to stop the transfer of one of the easy handles in the stack, you +can use \fIcurl_multi_remove_handle(3)\fP to remove individual easy +handles. Remember that easy handles should be \fIcurl_easy_cleanup(3)\fPed. + +When a transfer within the multi stack has finished, the counter of running +transfers (as filled in by \fIcurl_multi_perform(3)\fP) will decrease. When +the number reaches zero, all transfers are done. + +\fIcurl_multi_info_read(3)\fP can be used to get information about completed +transfers. It then returns the CURLcode for each easy transfer, to allow you +to figure out success on each individual transfer. + +.SH "SSL, Certificates and Other Tricks" + + [ seeding, passwords, keys, certificates, ENGINE, ca certs ] + +.SH "Sharing Data Between Easy Handles" + + [ fill in ] + +.SH "Footnotes" + +.IP "[1]" +libcurl 7.10.3 and later have the ability to switch over to chunked +Transfer-Encoding in cases where HTTP uploads are done with data of an unknown +size. +.IP "[2]" +This happens on Windows machines when libcurl is built and used as a +DLL. However, you can still do this on Windows if you link with a static +library. +.IP "[3]" +The curl-config tool is generated at build-time (on UNIX-like systems) and +should be installed with the 'make install' or similar instruction that +installs the library, header files, man pages etc. +.IP "[4]" +This behavior was different in versions before 7.17.0, where strings had to +remain valid past the end of the \fIcurl_easy_setopt(3)\fP call. diff --git a/usr/share/man/man3/libcurl.3 b/usr/share/man/man3/libcurl.3 new file mode 100755 index 000000000..90465f71e --- /dev/null +++ b/usr/share/man/man3/libcurl.3 @@ -0,0 +1,203 @@ +.\" $Id: libcurl.3,v 1.14 2008-12-22 13:07:14 bagder Exp $ +.\" +.TH libcurl 3 "19 March 2002" "libcurl 7.9.6" "libcurl overview" +.SH NAME +libcurl \- client-side URL transfers +.SH DESCRIPTION +This is a short overview on how to use libcurl in your C programs. There are +specific man pages for each function mentioned in here. There are also the +\fIlibcurl-easy(3)\fP man page, the \fIlibcurl-multi(3)\fP man page, the +\fIlibcurl-share(3)\fP man page and the \fIlibcurl-tutorial(3)\fP man page for +in-depth understanding on how to program with libcurl. + +There are more than thirty custom bindings available that bring libcurl access +to your favourite language. Look elsewhere for documentation on those. + +libcurl has a global constant environment that you must set up and +maintain while using libcurl. This essentially means you call +\fIcurl_global_init(3)\fP at the start of your program and +\fIcurl_global_cleanup(3)\fP at the end. See GLOBAL CONSTANTS below +for details. + +To transfer files, you always set up an "easy handle" using +\fIcurl_easy_init(3)\fP, but when you want the file(s) transferred you have +the option of using the "easy" interface, or the "multi" interface. + +The easy interface is a synchronous interface with which you call +\fIcurl_easy_perform(3)\fP and let it perform the transfer. When it is +completed, the function returns and you can continue. More details are found in +the \fIlibcurl-easy(3)\fP man page. + +The multi interface on the other hand is an asynchronous interface, that you +call and that performs only a little piece of the transfer on each invoke. It +is perfect if you want to do things while the transfer is in progress, or +similar. The multi interface allows you to select() on libcurl action, and +even to easily download multiple files simultaneously using a single thread. See further details in the \fIlibcurl-multi(3)\fP man page. + +You can have multiple easy handles share certain data, even if they are used +in different threads. This magic is setup using the share interface, as +described in the \fIlibcurl-share(3)\fP man page. + +There is also a series of other helpful functions to use, including these: +.RS +.IP curl_version_info() +gets detailed libcurl (and other used libraries) version info +.IP curl_getdate() +converts a date string to time_t +.IP curl_easy_getinfo() +get information about a performed transfer +.IP curl_formadd() +helps building an HTTP form POST +.IP curl_formfree() +free a list built with \fIcurl_formadd(3)\fP +.IP curl_slist_append() +builds a linked list +.IP curl_slist_free_all() +frees a whole curl_slist +.RE + +.SH "LINKING WITH LIBCURL" +On unix-like machines, there's a tool named curl-config that gets installed +with the rest of the curl stuff when 'make install' is performed. + +curl-config is added to make it easier for applications to link with libcurl +and developers to learn about libcurl and how to use it. + +Run 'curl-config --libs' to get the (additional) linker options you need to +link with the particular version of libcurl you've installed. See the +\fIcurl-config(1)\fP man page for further details. + +Unix-like operating system that ship libcurl as part of their distributions +often don't provide the curl-config tool, but simply install the library and +headers in the common path for this purpose. + +.SH "LIBCURL SYMBOL NAMES" +All public functions in the libcurl interface are prefixed with 'curl_' (with +a lowercase c). You can find other functions in the library source code, but +other prefixes indicate that the functions are private and may change without +further notice in the next release. + +Only use documented functions and functionality! +.SH "PORTABILITY" +libcurl works +.B exactly +the same, on any of the platforms it compiles and builds on. +.SH "THREADS" +Never ever call curl-functions simultaneously using the same handle from +several threads. libcurl is thread-safe and can be used in any number of +threads, but you must use separate curl handles if you want to use libcurl in +more than one thread simultaneously. + +The global environment functions are not thread-safe. See GLOBAL CONSTANTS +below for details. + +.SH "PERSISTENT CONNECTIONS" +Persistent connections means that libcurl can re-use the same connection for +several transfers, if the conditions are right. + +libcurl will \fBalways\fP attempt to use persistent connections. Whenever you +use \fIcurl_easy_perform(3)\fP or \fIcurl_multi_perform(3)\fP, libcurl will +attempt to use an existing connection to do the transfer, and if none exists +it'll open a new one that will be subject for re-use on a possible following +call to \fIcurl_easy_perform(3)\fP or \fIcurl_multi_perform(3)\fP. + +To allow libcurl to take full advantage of persistent connections, you should +do as many of your file transfers as possible using the same curl handle. When +you call \fIcurl_easy_cleanup(3)\fP, all the possibly open connections held by +libcurl will be closed and forgotten. + +Note that the options set with \fIcurl_easy_setopt(3)\fP will be used on +every repeated \fIcurl_easy_perform(3)\fP call. + +.SH "GLOBAL CONSTANTS" +There are a variety of constants that libcurl uses, mainly through its +internal use of other libraries, which are too complicated for the +library loader to set up. Therefore, a program must call a library +function after the program is loaded and running to finish setting up +the library code. For example, when libcurl is built for SSL +capability via the GNU TLS library, there is an elaborate tree inside +that library that describes the SSL protocol. + +\fIcurl_global_init()\fP is the function that you must call. This may +allocate resources (e.g. the memory for the GNU TLS tree mentioned +above), so the companion function \fIcurl_global_cleanup()\fP releases +them. + +The basic rule for constructing a program that uses libcurl is this: +Call \fIcurl_global_init()\fP, with a \fICURL_GLOBAL_ALL\fP argument, +immediately after the program starts, while it is still only one +thread and before it uses libcurl at all. Call +\fIcurl_global_cleanup()\fP immediately before the program exits, when +the program is again only one thread and after its last use of +libcurl. + +You can call both of these multiple times, as long as all calls meet +these requirements and the number of calls to each is the same. + +It isn't actually required that the functions be called at the beginning +and end of the program -- that's just usually the easiest way to do it. +It \fIis\fP required that the functions be called when no other thread +in the program is running. + +These global constant functions are \fInot thread safe\fP, so you must +not call them when any other thread in the program is running. It +isn't good enough that no other thread is using libcurl at the time, +because these functions internally call similar functions of other +libraries, and those functions are similarly thread-unsafe. You can't +generally know what these libraries are, or whether other threads are +using them. + +The global constant situation merits special consideration when the +code you are writing to use libcurl is not the main program, but rather +a modular piece of a program, e.g. another library. As a module, +your code doesn't know about other parts of the program -- it doesn't +know whether they use libcurl or not. And its code doesn't necessarily +run at the start and end of the whole program. + +A module like this must have global constant functions of its own, +just like \fIcurl_global_init()\fP and \fIcurl_global_cleanup()\fP. +The module thus has control at the beginning and end of the program +and has a place to call the libcurl functions. Note that if multiple +modules in the program use libcurl, they all will separately call the +libcurl functions, and that's OK because only the first +\fIcurl_global_init()\fP and the last \fIcurl_global_cleanup()\fP in a +program change anything. (libcurl uses a reference count in static +memory). + +In a C++ module, it is common to deal with the global constant +situation by defining a special class that represents the global +constant environment of the module. A program always has exactly one +object of the class, in static storage. That way, the program +automatically calls the constructor of the object as the program +starts up and the destructor as it terminates. As the author of this +libcurl-using module, you can make the constructor call +\fIcurl_global_init()\fP and the destructor call +\fIcurl_global_cleanup()\fP and satisfy libcurl's requirements without +your user having to think about it. + +\fIcurl_global_init()\fP has an argument that tells what particular +parts of the global constant environment to set up. In order to +successfully use any value except \fICURL_GLOBAL_ALL\fP (which says to +set up the whole thing), you must have specific knowledge of internal +workings of libcurl and all other parts of the program of which it is +part. + +A special part of the global constant environment is the identity of +the memory allocator. \fIcurl_global_init()\fP selects the system +default memory allocator, but you can use \fIcurl_global_init_mem()\fP +to supply one of your own. However, there is no way to use +\fIcurl_global_init_mem()\fP in a modular program -- all modules in +the program that might use libcurl would have to agree on one +allocator. + +There is a failsafe in libcurl that makes it usable in simple +situations without you having to worry about the global constant +environment at all: \fIcurl_easy_init()\fP sets up the environment +itself if it hasn't been done yet. The resources it acquires to do so +get released by the operating system automatically when the program +exits. + +This failsafe feature exists mainly for backward compatibility because +there was a time when the global functions didn't exist. Because it +is sufficient only in the simplest of programs, it is not recommended +for any program to rely on it. diff --git a/usr/share/man/man3/libxml.3 b/usr/share/man/man3/libxml.3 new file mode 100755 index 000000000..b3e9fef06 --- /dev/null +++ b/usr/share/man/man3/libxml.3 @@ -0,0 +1,71 @@ +.TH libxml 3 "12 April 2000" +.SH NAME +libxml \- library used to parse XML files +.SH DESCRIPTION +The +.I libxml +library is used to parse XML files. +Its internal document representation is as close as possible to the +.I DOM +(Document Object Model) interface, +an API for accessing XML or HTML structured documents. +.LP +The +.I libxml +library also has a +.IR SAX -like +interface, +which is designed to be compatible with +.IR expat (1). +NOTE: +.IR SAX , +the Simple API for XML, +is a standard interface for event-based XML parsing, +developed collaboratively by the members of the XML-DEV mailing list, +currently hosted by OASIS. +The +.I expat +library is a XML 1.0 parser written in C, +which aims to be fully conforming. +It is currently not a validating XML processor. +.LP +The +.I libxml +library now includes a nearly complete +.I XPath +implementation. +The +.I XPath +(XML Path Language) is a language for addressing parts of an +XML document, +designed to be used by both +.I XSLT +and +.IR XPointer . +.LP +The +.I libxml +library exports Push and Pull type parser interfaces for both XML and +.IR html . +.SH FILES +.TP 2.2i +.B /depot/lib/libxml_2.0.0/libxml.a +static library +.TP +.B /depot/lib/libxml_2.0.0/libxml.so +shared library +.TP +.B /depot/package/libxml_2.0.0/bin/xmllint +binary application for parsing XML files +.SH AUTHORS +Daniel Veillard (daniel@veillard.com). +Red Hat Inc. +Manual page by Ziying Sherwin (sherwin@nlm.nih.gov), +Lister Hill National Center for Biomedical Communications, +U.S. National Library of Medicine. +.SH SEE ALSO +.IR xmllint (1), +.IR libxslt (3), +.IR libexslt (3), +.IR xsltproc (1) +.\" end of manual page diff --git a/usr/share/man/man3/pcre.3 b/usr/share/man/man3/pcre.3 new file mode 100755 index 000000000..4eda404cc --- /dev/null +++ b/usr/share/man/man3/pcre.3 @@ -0,0 +1,218 @@ +.TH PCRE 3 "08 January 2014" "PCRE 8.35" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH INTRODUCTION +.rs +.sp +The PCRE library is a set of functions that implement regular expression +pattern matching using the same syntax and semantics as Perl, with just a few +differences. Some features that appeared in Python and PCRE before they +appeared in Perl are also available using the Python syntax, there is some +support for one or two .NET and Oniguruma syntax items, and there is an option +for requesting some minor changes that give better JavaScript compatibility. +.P +Starting with release 8.30, it is possible to compile two separate PCRE +libraries: the original, which supports 8-bit character strings (including +UTF-8 strings), and a second library that supports 16-bit character strings +(including UTF-16 strings). The build process allows either one or both to be +built. The majority of the work to make this possible was done by Zoltan +Herczeg. +.P +Starting with release 8.32 it is possible to compile a third separate PCRE +library that supports 32-bit character strings (including UTF-32 strings). The +build process allows any combination of the 8-, 16- and 32-bit libraries. The +work to make this possible was done by Christian Persch. +.P +The three libraries contain identical sets of functions, except that the names +in the 16-bit library start with \fBpcre16_\fP instead of \fBpcre_\fP, and the +names in the 32-bit library start with \fBpcre32_\fP instead of \fBpcre_\fP. To +avoid over-complication and reduce the documentation maintenance load, most of +the documentation describes the 8-bit library, with the differences for the +16-bit and 32-bit libraries described separately in the +.\" HREF +\fBpcre16\fP +and +.\" HREF +\fBpcre32\fP +.\" +pages. References to functions or structures of the form \fIpcre[16|32]_xxx\fP +should be read as meaning "\fIpcre_xxx\fP when using the 8-bit library, +\fIpcre16_xxx\fP when using the 16-bit library, or \fIpcre32_xxx\fP when using +the 32-bit library". +.P +The current implementation of PCRE corresponds approximately with Perl 5.12, +including support for UTF-8/16/32 encoded strings and Unicode general category +properties. However, UTF-8/16/32 and Unicode support has to be explicitly +enabled; it is not the default. The Unicode tables correspond to Unicode +release 6.3.0. +.P +In addition to the Perl-compatible matching function, PCRE contains an +alternative function that matches the same compiled patterns in a different +way. In certain circumstances, the alternative function has some advantages. +For a discussion of the two matching algorithms, see the +.\" HREF +\fBpcrematching\fP +.\" +page. +.P +PCRE is written in C and released as a C library. A number of people have +written wrappers and interfaces of various kinds. In particular, Google Inc. +have provided a comprehensive C++ wrapper for the 8-bit library. This is now +included as part of the PCRE distribution. The +.\" HREF +\fBpcrecpp\fP +.\" +page has details of this interface. Other people's contributions can be found +in the \fIContrib\fP directory at the primary FTP site, which is: +.sp +.\" HTML <a href="ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre"> +.\" </a> +ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre +.\" +.P +Details of exactly which Perl regular expression features are and are not +supported by PCRE are given in separate documents. See the +.\" HREF +\fBpcrepattern\fP +.\" +and +.\" HREF +\fBpcrecompat\fP +.\" +pages. There is a syntax summary in the +.\" HREF +\fBpcresyntax\fP +.\" +page. +.P +Some features of PCRE can be included, excluded, or changed when the library is +built. The +.\" HREF +\fBpcre_config()\fP +.\" +function makes it possible for a client to discover which features are +available. The features themselves are described in the +.\" HREF +\fBpcrebuild\fP +.\" +page. Documentation about building PCRE for various operating systems can be +found in the +.\" HTML <a href="README.txt"> +.\" </a> +\fBREADME\fP +.\" +and +.\" HTML <a href="NON-AUTOTOOLS-BUILD.txt"> +.\" </a> +\fBNON-AUTOTOOLS_BUILD\fP +.\" +files in the source distribution. +.P +The libraries contains a number of undocumented internal functions and data +tables that are used by more than one of the exported external functions, but +which are not intended for use by external callers. Their names all begin with +"_pcre_" or "_pcre16_" or "_pcre32_", which hopefully will not provoke any name +clashes. In some environments, it is possible to control which external symbols +are exported when a shared library is built, and in these cases the +undocumented symbols are not exported. +. +. +.SH "SECURITY CONSIDERATIONS" +.rs +.sp +If you are using PCRE in a non-UTF application that permits users to supply +arbitrary patterns for compilation, you should be aware of a feature that +allows users to turn on UTF support from within a pattern, provided that PCRE +was built with UTF support. For example, an 8-bit pattern that begins with +"(*UTF8)" or "(*UTF)" turns on UTF-8 mode, which interprets patterns and +subjects as strings of UTF-8 characters instead of individual 8-bit characters. +This causes both the pattern and any data against which it is matched to be +checked for UTF-8 validity. If the data string is very long, such a check might +use sufficiently many resources as to cause your application to lose +performance. +.P +One way of guarding against this possibility is to use the +\fBpcre_fullinfo()\fP function to check the compiled pattern's options for UTF. +Alternatively, from release 8.33, you can set the PCRE_NEVER_UTF option at +compile time. This causes an compile time error if a pattern contains a +UTF-setting sequence. +.P +If your application is one that supports UTF, be aware that validity checking +can take time. If the same data string is to be matched many times, you can use +the PCRE_NO_UTF[8|16|32]_CHECK option for the second and subsequent matches to +save redundant checks. +.P +Another way that performance can be hit is by running a pattern that has a very +large search tree against a string that will never match. Nested unlimited +repeats in a pattern are a common example. PCRE provides some protection +against this: see the PCRE_EXTRA_MATCH_LIMIT feature in the +.\" HREF +\fBpcreapi\fP +.\" +page. +. +. +.SH "USER DOCUMENTATION" +.rs +.sp +The user documentation for PCRE comprises a number of different sections. In +the "man" format, each of these is a separate "man page". In the HTML format, +each is a separate page, linked from the index page. In the plain text format, +the descriptions of the \fBpcregrep\fP and \fBpcretest\fP programs are in files +called \fBpcregrep.txt\fP and \fBpcretest.txt\fP, respectively. The remaining +sections, except for the \fBpcredemo\fP section (which is a program listing), +are concatenated in \fBpcre.txt\fP, for ease of searching. The sections are as +follows: +.sp + pcre this document + pcre-config show PCRE installation configuration information + pcre16 details of the 16-bit library + pcre32 details of the 32-bit library + pcreapi details of PCRE's native C API + pcrebuild building PCRE + pcrecallout details of the callout feature + pcrecompat discussion of Perl compatibility + pcrecpp details of the C++ wrapper for the 8-bit library + pcredemo a demonstration C program that uses PCRE + pcregrep description of the \fBpcregrep\fP command (8-bit only) + pcrejit discussion of the just-in-time optimization support + pcrelimits details of size and other limits + pcrematching discussion of the two matching algorithms + pcrepartial details of the partial matching facility +.\" JOIN + pcrepattern syntax and semantics of supported + regular expressions + pcreperform discussion of performance issues + pcreposix the POSIX-compatible C API for the 8-bit library + pcreprecompile details of saving and re-using precompiled patterns + pcresample discussion of the pcredemo program + pcrestack discussion of stack usage + pcresyntax quick syntax reference + pcretest description of the \fBpcretest\fP testing command + pcreunicode discussion of Unicode and UTF-8/16/32 support +.sp +In the "man" and HTML formats, there is also a short page for each C library +function, listing its arguments and results. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +University Computing Service +Cambridge CB2 3QH, England. +.fi +.P +Putting an actual email address here seems to have been a spam magnet, so I've +taken it away. If you want to email me, use my two initials, followed by the +two digits 10, at the domain cam.ac.uk. +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 08 January 2014 +Copyright (c) 1997-2014 University of Cambridge. +.fi diff --git a/usr/share/man/man3/pcre16.3 b/usr/share/man/man3/pcre16.3 new file mode 100755 index 000000000..85126a679 --- /dev/null +++ b/usr/share/man/man3/pcre16.3 @@ -0,0 +1,371 @@ +.TH PCRE 3 "12 May 2013" "PCRE 8.33" +.SH NAME +PCRE - Perl-compatible regular expressions +.sp +.B #include <pcre.h> +. +. +.SH "PCRE 16-BIT API BASIC FUNCTIONS" +.rs +.sp +.nf +.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP, +.B " const char **\fIerrptr\fP, int *\fIerroffset\fP," +.B " const unsigned char *\fItableptr\fP);" +.sp +.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP, +.B " int *\fIerrorcodeptr\fP," +.B " const char **\fIerrptr\fP, int *\fIerroffset\fP," +.B " const unsigned char *\fItableptr\fP);" +.sp +.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP, +.B " const char **\fIerrptr\fP);" +.sp +.B void pcre16_free_study(pcre16_extra *\fIextra\fP); +.sp +.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP," +.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP," +.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);" +.sp +.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP," +.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP," +.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP," +.B " int *\fIworkspace\fP, int \fIwscount\fP);" +.fi +. +. +.SH "PCRE 16-BIT API STRING EXTRACTION FUNCTIONS" +.rs +.sp +.nf +.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP, +.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP," +.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP," +.B " PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);" +.sp +.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP, +.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP," +.B " int \fIbuffersize\fP);" +.sp +.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP, +.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP," +.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP," +.B " PCRE_SPTR16 *\fIstringptr\fP);" +.sp +.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP, +.B " PCRE_SPTR16 \fIname\fP); +.sp +.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP, +.B " PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);" +.sp +.B int pcre16_get_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP, +.B " int \fIstringcount\fP, int \fIstringnumber\fP," +.B " PCRE_SPTR16 *\fIstringptr\fP);" +.sp +.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP, +.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR16 **\fIlistptr\fP);" +.sp +.B void pcre16_free_substring(PCRE_SPTR16 \fIstringptr\fP); +.sp +.B void pcre16_free_substring_list(PCRE_SPTR16 *\fIstringptr\fP); +.fi +. +. +.SH "PCRE 16-BIT API AUXILIARY FUNCTIONS" +.rs +.sp +.nf +.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP); +.sp +.B void pcre16_jit_stack_free(pcre16_jit_stack *\fIstack\fP); +.sp +.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP, +.B " pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);" +.sp +.B const unsigned char *pcre16_maketables(void); +.sp +.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP," +.B " int \fIwhat\fP, void *\fIwhere\fP);" +.sp +.B int pcre16_refcount(pcre16 *\fIcode\fP, int \fIadjust\fP); +.sp +.B int pcre16_config(int \fIwhat\fP, void *\fIwhere\fP); +.sp +.B const char *pcre16_version(void); +.sp +.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP, +.B " pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);" +.fi +. +. +.SH "PCRE 16-BIT API INDIRECTED FUNCTIONS" +.rs +.sp +.nf +.B void *(*pcre16_malloc)(size_t); +.sp +.B void (*pcre16_free)(void *); +.sp +.B void *(*pcre16_stack_malloc)(size_t); +.sp +.B void (*pcre16_stack_free)(void *); +.sp +.B int (*pcre16_callout)(pcre16_callout_block *); +.fi +. +. +.SH "PCRE 16-BIT API 16-BIT-ONLY FUNCTION" +.rs +.sp +.nf +.B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP, +.B " PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP," +.B " int \fIkeep_boms\fP);" +.fi +. +. +.SH "THE PCRE 16-BIT LIBRARY" +.rs +.sp +Starting with release 8.30, it is possible to compile a PCRE library that +supports 16-bit character strings, including UTF-16 strings, as well as or +instead of the original 8-bit library. The majority of the work to make this +possible was done by Zoltan Herczeg. The two libraries contain identical sets +of functions, used in exactly the same way. Only the names of the functions and +the data types of their arguments and results are different. To avoid +over-complication and reduce the documentation maintenance load, most of the +PCRE documentation describes the 8-bit library, with only occasional references +to the 16-bit library. This page describes what is different when you use the +16-bit library. +.P +WARNING: A single application can be linked with both libraries, but you must +take care when processing any particular pattern to use functions from just one +library. For example, if you want to study a pattern that was compiled with +\fBpcre16_compile()\fP, you must do so with \fBpcre16_study()\fP, not +\fBpcre_study()\fP, and you must free the study data with +\fBpcre16_free_study()\fP. +. +. +.SH "THE HEADER FILE" +.rs +.sp +There is only one header file, \fBpcre.h\fP. It contains prototypes for all the +functions in all libraries, as well as definitions of flags, structures, error +codes, etc. +. +. +.SH "THE LIBRARY NAME" +.rs +.sp +In Unix-like systems, the 16-bit library is called \fBlibpcre16\fP, and can +normally be accesss by adding \fB-lpcre16\fP to the command for linking an +application that uses PCRE. +. +. +.SH "STRING TYPES" +.rs +.sp +In the 8-bit library, strings are passed to PCRE library functions as vectors +of bytes with the C type "char *". In the 16-bit library, strings are passed as +vectors of unsigned 16-bit quantities. The macro PCRE_UCHAR16 specifies an +appropriate data type, and PCRE_SPTR16 is defined as "const PCRE_UCHAR16 *". In +very many environments, "short int" is a 16-bit data type. When PCRE is built, +it defines PCRE_UCHAR16 as "unsigned short int", but checks that it really is a +16-bit data type. If it is not, the build fails with an error message telling +the maintainer to modify the definition appropriately. +. +. +.SH "STRUCTURE TYPES" +.rs +.sp +The types of the opaque structures that are used for compiled 16-bit patterns +and JIT stacks are \fBpcre16\fP and \fBpcre16_jit_stack\fP respectively. The +type of the user-accessible structure that is returned by \fBpcre16_study()\fP +is \fBpcre16_extra\fP, and the type of the structure that is used for passing +data to a callout function is \fBpcre16_callout_block\fP. These structures +contain the same fields, with the same names, as their 8-bit counterparts. The +only difference is that pointers to character strings are 16-bit instead of +8-bit types. +. +. +.SH "16-BIT FUNCTIONS" +.rs +.sp +For every function in the 8-bit library there is a corresponding function in +the 16-bit library with a name that starts with \fBpcre16_\fP instead of +\fBpcre_\fP. The prototypes are listed above. In addition, there is one extra +function, \fBpcre16_utf16_to_host_byte_order()\fP. This is a utility function +that converts a UTF-16 character string to host byte order if necessary. The +other 16-bit functions expect the strings they are passed to be in host byte +order. +.P +The \fIinput\fP and \fIoutput\fP arguments of +\fBpcre16_utf16_to_host_byte_order()\fP may point to the same address, that is, +conversion in place is supported. The output buffer must be at least as long as +the input. +.P +The \fIlength\fP argument specifies the number of 16-bit data units in the +input string; a negative value specifies a zero-terminated string. +.P +If \fIbyte_order\fP is NULL, it is assumed that the string starts off in host +byte order. This may be changed by byte-order marks (BOMs) anywhere in the +string (commonly as the first character). +.P +If \fIbyte_order\fP is not NULL, a non-zero value of the integer to which it +points means that the input starts off in host byte order, otherwise the +opposite order is assumed. Again, BOMs in the string can change this. The final +byte order is passed back at the end of processing. +.P +If \fIkeep_boms\fP is not zero, byte-order mark characters (0xfeff) are copied +into the output string. Otherwise they are discarded. +.P +The result of the function is the number of 16-bit units placed into the output +buffer, including the zero terminator if the string was zero-terminated. +. +. +.SH "SUBJECT STRING OFFSETS" +.rs +.sp +The lengths and starting offsets of subject strings must be specified in 16-bit +data units, and the offsets within subject strings that are returned by the +matching functions are in also 16-bit units rather than bytes. +. +. +.SH "NAMED SUBPATTERNS" +.rs +.sp +The name-to-number translation table that is maintained for named subpatterns +uses 16-bit characters. The \fBpcre16_get_stringtable_entries()\fP function +returns the length of each entry in the table as the number of 16-bit data +units. +. +. +.SH "OPTION NAMES" +.rs +.sp +There are two new general option names, PCRE_UTF16 and PCRE_NO_UTF16_CHECK, +which correspond to PCRE_UTF8 and PCRE_NO_UTF8_CHECK in the 8-bit library. In +fact, these new options define the same bits in the options word. There is a +discussion about the +.\" HTML <a href="pcreunicode.html#utf16strings"> +.\" </a> +validity of UTF-16 strings +.\" +in the +.\" HREF +\fBpcreunicode\fP +.\" +page. +.P +For the \fBpcre16_config()\fP function there is an option PCRE_CONFIG_UTF16 +that returns 1 if UTF-16 support is configured, otherwise 0. If this option is +given to \fBpcre_config()\fP or \fBpcre32_config()\fP, or if the +PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF32 option is given to \fBpcre16_config()\fP, +the result is the PCRE_ERROR_BADOPTION error. +. +. +.SH "CHARACTER CODES" +.rs +.sp +In 16-bit mode, when PCRE_UTF16 is not set, character values are treated in the +same way as in 8-bit, non UTF-8 mode, except, of course, that they can range +from 0 to 0xffff instead of 0 to 0xff. Character types for characters less than +0xff can therefore be influenced by the locale in the same way as before. +Characters greater than 0xff have only one case, and no "type" (such as letter +or digit). +.P +In UTF-16 mode, the character code is Unicode, in the range 0 to 0x10ffff, with +the exception of values in the range 0xd800 to 0xdfff because those are +"surrogate" values that are used in pairs to encode values greater than 0xffff. +.P +A UTF-16 string can indicate its endianness by special code knows as a +byte-order mark (BOM). The PCRE functions do not handle this, expecting strings +to be in host byte order. A utility function called +\fBpcre16_utf16_to_host_byte_order()\fP is provided to help with this (see +above). +. +. +.SH "ERROR NAMES" +.rs +.sp +The errors PCRE_ERROR_BADUTF16_OFFSET and PCRE_ERROR_SHORTUTF16 correspond to +their 8-bit counterparts. The error PCRE_ERROR_BADMODE is given when a compiled +pattern is passed to a function that processes patterns in the other +mode, for example, if a pattern compiled with \fBpcre_compile()\fP is passed to +\fBpcre16_exec()\fP. +.P +There are new error codes whose names begin with PCRE_UTF16_ERR for invalid +UTF-16 strings, corresponding to the PCRE_UTF8_ERR codes for UTF-8 strings that +are described in the section entitled +.\" HTML <a href="pcreapi.html#badutf8reasons"> +.\" </a> +"Reason codes for invalid UTF-8 strings" +.\" +in the main +.\" HREF +\fBpcreapi\fP +.\" +page. The UTF-16 errors are: +.sp + PCRE_UTF16_ERR1 Missing low surrogate at end of string + PCRE_UTF16_ERR2 Invalid low surrogate follows high surrogate + PCRE_UTF16_ERR3 Isolated low surrogate + PCRE_UTF16_ERR4 Non-character +. +. +.SH "ERROR TEXTS" +.rs +.sp +If there is an error while compiling a pattern, the error text that is passed +back by \fBpcre16_compile()\fP or \fBpcre16_compile2()\fP is still an 8-bit +character string, zero-terminated. +. +. +.SH "CALLOUTS" +.rs +.sp +The \fIsubject\fP and \fImark\fP fields in the callout block that is passed to +a callout function point to 16-bit vectors. +. +. +.SH "TESTING" +.rs +.sp +The \fBpcretest\fP program continues to operate with 8-bit input and output +files, but it can be used for testing the 16-bit library. If it is run with the +command line option \fB-16\fP, patterns and subject strings are converted from +8-bit to 16-bit before being passed to PCRE, and the 16-bit library functions +are used instead of the 8-bit ones. Returned 16-bit strings are converted to +8-bit for output. If both the 8-bit and the 32-bit libraries were not compiled, +\fBpcretest\fP defaults to 16-bit and the \fB-16\fP option is ignored. +.P +When PCRE is being built, the \fBRunTest\fP script that is called by "make +check" uses the \fBpcretest\fP \fB-C\fP option to discover which of the 8-bit, +16-bit and 32-bit libraries has been built, and runs the tests appropriately. +. +. +.SH "NOT SUPPORTED IN 16-BIT MODE" +.rs +.sp +Not all the features of the 8-bit library are available with the 16-bit +library. The C++ and POSIX wrapper functions support only the 8-bit library, +and the \fBpcregrep\fP program is at present 8-bit only. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +University Computing Service +Cambridge CB2 3QH, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 12 May 2013 +Copyright (c) 1997-2013 University of Cambridge. +.fi diff --git a/usr/share/man/man3/pcre16_assign_jit_stack.3 b/usr/share/man/man3/pcre16_assign_jit_stack.3 new file mode 120000 index 000000000..40c8775a7 --- /dev/null +++ b/usr/share/man/man3/pcre16_assign_jit_stack.3 @@ -0,0 +1 @@ +pcre_assign_jit_stack.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_compile.3 b/usr/share/man/man3/pcre16_compile.3 new file mode 120000 index 000000000..9364e7518 --- /dev/null +++ b/usr/share/man/man3/pcre16_compile.3 @@ -0,0 +1 @@ +pcre_compile.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_compile2.3 b/usr/share/man/man3/pcre16_compile2.3 new file mode 120000 index 000000000..2d6aff35c --- /dev/null +++ b/usr/share/man/man3/pcre16_compile2.3 @@ -0,0 +1 @@ +pcre_compile2.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_config.3 b/usr/share/man/man3/pcre16_config.3 new file mode 120000 index 000000000..db964059e --- /dev/null +++ b/usr/share/man/man3/pcre16_config.3 @@ -0,0 +1 @@ +pcre_config.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_copy_named_substring.3 b/usr/share/man/man3/pcre16_copy_named_substring.3 new file mode 120000 index 000000000..c15b97746 --- /dev/null +++ b/usr/share/man/man3/pcre16_copy_named_substring.3 @@ -0,0 +1 @@ +pcre_copy_named_substring.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_copy_substring.3 b/usr/share/man/man3/pcre16_copy_substring.3 new file mode 120000 index 000000000..98d1f84db --- /dev/null +++ b/usr/share/man/man3/pcre16_copy_substring.3 @@ -0,0 +1 @@ +pcre_copy_substring.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_dfa_exec.3 b/usr/share/man/man3/pcre16_dfa_exec.3 new file mode 120000 index 000000000..ed408df0b --- /dev/null +++ b/usr/share/man/man3/pcre16_dfa_exec.3 @@ -0,0 +1 @@ +pcre_dfa_exec.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_exec.3 b/usr/share/man/man3/pcre16_exec.3 new file mode 120000 index 000000000..ebaa25275 --- /dev/null +++ b/usr/share/man/man3/pcre16_exec.3 @@ -0,0 +1 @@ +pcre_exec.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_free_study.3 b/usr/share/man/man3/pcre16_free_study.3 new file mode 120000 index 000000000..4c53ea9f0 --- /dev/null +++ b/usr/share/man/man3/pcre16_free_study.3 @@ -0,0 +1 @@ +pcre_free_study.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_free_substring.3 b/usr/share/man/man3/pcre16_free_substring.3 new file mode 120000 index 000000000..48567bfb7 --- /dev/null +++ b/usr/share/man/man3/pcre16_free_substring.3 @@ -0,0 +1 @@ +pcre_free_substring.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_free_substring_list.3 b/usr/share/man/man3/pcre16_free_substring_list.3 new file mode 120000 index 000000000..84b7b7e5c --- /dev/null +++ b/usr/share/man/man3/pcre16_free_substring_list.3 @@ -0,0 +1 @@ +pcre_free_substring_list.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_fullinfo.3 b/usr/share/man/man3/pcre16_fullinfo.3 new file mode 120000 index 000000000..b7386a985 --- /dev/null +++ b/usr/share/man/man3/pcre16_fullinfo.3 @@ -0,0 +1 @@ +pcre_fullinfo.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_get_named_substring.3 b/usr/share/man/man3/pcre16_get_named_substring.3 new file mode 120000 index 000000000..c095ca50a --- /dev/null +++ b/usr/share/man/man3/pcre16_get_named_substring.3 @@ -0,0 +1 @@ +pcre_get_named_substring.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_get_stringnumber.3 b/usr/share/man/man3/pcre16_get_stringnumber.3 new file mode 120000 index 000000000..f8e81d32e --- /dev/null +++ b/usr/share/man/man3/pcre16_get_stringnumber.3 @@ -0,0 +1 @@ +pcre_get_stringnumber.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_get_stringtable_entries.3 b/usr/share/man/man3/pcre16_get_stringtable_entries.3 new file mode 120000 index 000000000..9f8cc4a78 --- /dev/null +++ b/usr/share/man/man3/pcre16_get_stringtable_entries.3 @@ -0,0 +1 @@ +pcre_get_stringtable_entries.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_get_substring.3 b/usr/share/man/man3/pcre16_get_substring.3 new file mode 120000 index 000000000..26383b5d3 --- /dev/null +++ b/usr/share/man/man3/pcre16_get_substring.3 @@ -0,0 +1 @@ +pcre_get_substring.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_get_substring_list.3 b/usr/share/man/man3/pcre16_get_substring_list.3 new file mode 120000 index 000000000..2faa865f2 --- /dev/null +++ b/usr/share/man/man3/pcre16_get_substring_list.3 @@ -0,0 +1 @@ +pcre_get_substring_list.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_jit_exec.3 b/usr/share/man/man3/pcre16_jit_exec.3 new file mode 120000 index 000000000..59089adac --- /dev/null +++ b/usr/share/man/man3/pcre16_jit_exec.3 @@ -0,0 +1 @@ +pcre_jit_exec.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_jit_stack_alloc.3 b/usr/share/man/man3/pcre16_jit_stack_alloc.3 new file mode 120000 index 000000000..dab43b82c --- /dev/null +++ b/usr/share/man/man3/pcre16_jit_stack_alloc.3 @@ -0,0 +1 @@ +pcre_jit_stack_alloc.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_jit_stack_free.3 b/usr/share/man/man3/pcre16_jit_stack_free.3 new file mode 120000 index 000000000..e92aa3100 --- /dev/null +++ b/usr/share/man/man3/pcre16_jit_stack_free.3 @@ -0,0 +1 @@ +pcre_jit_stack_free.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_maketables.3 b/usr/share/man/man3/pcre16_maketables.3 new file mode 120000 index 000000000..3b6308e2d --- /dev/null +++ b/usr/share/man/man3/pcre16_maketables.3 @@ -0,0 +1 @@ +pcre_maketables.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_pattern_to_host_byte_order.3 b/usr/share/man/man3/pcre16_pattern_to_host_byte_order.3 new file mode 120000 index 000000000..6540917e1 --- /dev/null +++ b/usr/share/man/man3/pcre16_pattern_to_host_byte_order.3 @@ -0,0 +1 @@ +pcre_pattern_to_host_byte_order.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_refcount.3 b/usr/share/man/man3/pcre16_refcount.3 new file mode 120000 index 000000000..63cece3fc --- /dev/null +++ b/usr/share/man/man3/pcre16_refcount.3 @@ -0,0 +1 @@ +pcre_refcount.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_study.3 b/usr/share/man/man3/pcre16_study.3 new file mode 120000 index 000000000..9871217f8 --- /dev/null +++ b/usr/share/man/man3/pcre16_study.3 @@ -0,0 +1 @@ +pcre_study.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_utf16_to_host_byte_order.3 b/usr/share/man/man3/pcre16_utf16_to_host_byte_order.3 new file mode 120000 index 000000000..d9233991f --- /dev/null +++ b/usr/share/man/man3/pcre16_utf16_to_host_byte_order.3 @@ -0,0 +1 @@ +pcre_utf16_to_host_byte_order.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre16_version.3 b/usr/share/man/man3/pcre16_version.3 new file mode 120000 index 000000000..c31893c6b --- /dev/null +++ b/usr/share/man/man3/pcre16_version.3 @@ -0,0 +1 @@ +pcre_version.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32.3 b/usr/share/man/man3/pcre32.3 new file mode 100755 index 000000000..7cde8c087 --- /dev/null +++ b/usr/share/man/man3/pcre32.3 @@ -0,0 +1,369 @@ +.TH PCRE 3 "12 May 2013" "PCRE 8.33" +.SH NAME +PCRE - Perl-compatible regular expressions +.sp +.B #include <pcre.h> +. +. +.SH "PCRE 32-BIT API BASIC FUNCTIONS" +.rs +.sp +.nf +.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP, +.B " const char **\fIerrptr\fP, int *\fIerroffset\fP," +.B " const unsigned char *\fItableptr\fP);" +.sp +.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP, +.B " int *\fIerrorcodeptr\fP," +.B " const unsigned char *\fItableptr\fP);" +.sp +.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP, +.B " const char **\fIerrptr\fP);" +.sp +.B void pcre32_free_study(pcre32_extra *\fIextra\fP); +.sp +.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP," +.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP," +.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);" +.sp +.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP," +.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP," +.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP," +.B " int *\fIworkspace\fP, int \fIwscount\fP);" +.fi +. +. +.SH "PCRE 32-BIT API STRING EXTRACTION FUNCTIONS" +.rs +.sp +.nf +.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP, +.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP," +.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP," +.B " PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);" +.sp +.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP, +.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP," +.B " int \fIbuffersize\fP);" +.sp +.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP, +.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP," +.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP," +.B " PCRE_SPTR32 *\fIstringptr\fP);" +.sp +.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP, +.B " PCRE_SPTR32 \fIname\fP);" +.sp +.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP, +.B " PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);" +.sp +.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP, +.B " int \fIstringcount\fP, int \fIstringnumber\fP," +.B " PCRE_SPTR32 *\fIstringptr\fP);" +.sp +.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP, +.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR32 **\fIlistptr\fP);" +.sp +.B void pcre32_free_substring(PCRE_SPTR32 \fIstringptr\fP); +.sp +.B void pcre32_free_substring_list(PCRE_SPTR32 *\fIstringptr\fP); +.fi +. +. +.SH "PCRE 32-BIT API AUXILIARY FUNCTIONS" +.rs +.sp +.nf +.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP); +.sp +.B void pcre32_jit_stack_free(pcre32_jit_stack *\fIstack\fP); +.sp +.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP, +.B " pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);" +.sp +.B const unsigned char *pcre32_maketables(void); +.sp +.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP," +.B " int \fIwhat\fP, void *\fIwhere\fP);" +.sp +.B int pcre32_refcount(pcre32 *\fIcode\fP, int \fIadjust\fP); +.sp +.B int pcre32_config(int \fIwhat\fP, void *\fIwhere\fP); +.sp +.B const char *pcre32_version(void); +.sp +.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP, +.B " pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);" +.fi +. +. +.SH "PCRE 32-BIT API INDIRECTED FUNCTIONS" +.rs +.sp +.nf +.B void *(*pcre32_malloc)(size_t); +.sp +.B void (*pcre32_free)(void *); +.sp +.B void *(*pcre32_stack_malloc)(size_t); +.sp +.B void (*pcre32_stack_free)(void *); +.sp +.B int (*pcre32_callout)(pcre32_callout_block *); +.fi +. +. +.SH "PCRE 32-BIT API 32-BIT-ONLY FUNCTION" +.rs +.sp +.nf +.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP, +.B " PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP," +.B " int \fIkeep_boms\fP);" +.fi +. +. +.SH "THE PCRE 32-BIT LIBRARY" +.rs +.sp +Starting with release 8.32, it is possible to compile a PCRE library that +supports 32-bit character strings, including UTF-32 strings, as well as or +instead of the original 8-bit library. This work was done by Christian Persch, +based on the work done by Zoltan Herczeg for the 16-bit library. All three +libraries contain identical sets of functions, used in exactly the same way. +Only the names of the functions and the data types of their arguments and +results are different. To avoid over-complication and reduce the documentation +maintenance load, most of the PCRE documentation describes the 8-bit library, +with only occasional references to the 16-bit and 32-bit libraries. This page +describes what is different when you use the 32-bit library. +.P +WARNING: A single application can be linked with all or any of the three +libraries, but you must take care when processing any particular pattern +to use functions from just one library. For example, if you want to study +a pattern that was compiled with \fBpcre32_compile()\fP, you must do so +with \fBpcre32_study()\fP, not \fBpcre_study()\fP, and you must free the +study data with \fBpcre32_free_study()\fP. +. +. +.SH "THE HEADER FILE" +.rs +.sp +There is only one header file, \fBpcre.h\fP. It contains prototypes for all the +functions in all libraries, as well as definitions of flags, structures, error +codes, etc. +. +. +.SH "THE LIBRARY NAME" +.rs +.sp +In Unix-like systems, the 32-bit library is called \fBlibpcre32\fP, and can +normally be accesss by adding \fB-lpcre32\fP to the command for linking an +application that uses PCRE. +. +. +.SH "STRING TYPES" +.rs +.sp +In the 8-bit library, strings are passed to PCRE library functions as vectors +of bytes with the C type "char *". In the 32-bit library, strings are passed as +vectors of unsigned 32-bit quantities. The macro PCRE_UCHAR32 specifies an +appropriate data type, and PCRE_SPTR32 is defined as "const PCRE_UCHAR32 *". In +very many environments, "unsigned int" is a 32-bit data type. When PCRE is +built, it defines PCRE_UCHAR32 as "unsigned int", but checks that it really is +a 32-bit data type. If it is not, the build fails with an error message telling +the maintainer to modify the definition appropriately. +. +. +.SH "STRUCTURE TYPES" +.rs +.sp +The types of the opaque structures that are used for compiled 32-bit patterns +and JIT stacks are \fBpcre32\fP and \fBpcre32_jit_stack\fP respectively. The +type of the user-accessible structure that is returned by \fBpcre32_study()\fP +is \fBpcre32_extra\fP, and the type of the structure that is used for passing +data to a callout function is \fBpcre32_callout_block\fP. These structures +contain the same fields, with the same names, as their 8-bit counterparts. The +only difference is that pointers to character strings are 32-bit instead of +8-bit types. +. +. +.SH "32-BIT FUNCTIONS" +.rs +.sp +For every function in the 8-bit library there is a corresponding function in +the 32-bit library with a name that starts with \fBpcre32_\fP instead of +\fBpcre_\fP. The prototypes are listed above. In addition, there is one extra +function, \fBpcre32_utf32_to_host_byte_order()\fP. This is a utility function +that converts a UTF-32 character string to host byte order if necessary. The +other 32-bit functions expect the strings they are passed to be in host byte +order. +.P +The \fIinput\fP and \fIoutput\fP arguments of +\fBpcre32_utf32_to_host_byte_order()\fP may point to the same address, that is, +conversion in place is supported. The output buffer must be at least as long as +the input. +.P +The \fIlength\fP argument specifies the number of 32-bit data units in the +input string; a negative value specifies a zero-terminated string. +.P +If \fIbyte_order\fP is NULL, it is assumed that the string starts off in host +byte order. This may be changed by byte-order marks (BOMs) anywhere in the +string (commonly as the first character). +.P +If \fIbyte_order\fP is not NULL, a non-zero value of the integer to which it +points means that the input starts off in host byte order, otherwise the +opposite order is assumed. Again, BOMs in the string can change this. The final +byte order is passed back at the end of processing. +.P +If \fIkeep_boms\fP is not zero, byte-order mark characters (0xfeff) are copied +into the output string. Otherwise they are discarded. +.P +The result of the function is the number of 32-bit units placed into the output +buffer, including the zero terminator if the string was zero-terminated. +. +. +.SH "SUBJECT STRING OFFSETS" +.rs +.sp +The lengths and starting offsets of subject strings must be specified in 32-bit +data units, and the offsets within subject strings that are returned by the +matching functions are in also 32-bit units rather than bytes. +. +. +.SH "NAMED SUBPATTERNS" +.rs +.sp +The name-to-number translation table that is maintained for named subpatterns +uses 32-bit characters. The \fBpcre32_get_stringtable_entries()\fP function +returns the length of each entry in the table as the number of 32-bit data +units. +. +. +.SH "OPTION NAMES" +.rs +.sp +There are two new general option names, PCRE_UTF32 and PCRE_NO_UTF32_CHECK, +which correspond to PCRE_UTF8 and PCRE_NO_UTF8_CHECK in the 8-bit library. In +fact, these new options define the same bits in the options word. There is a +discussion about the +.\" HTML <a href="pcreunicode.html#utf32strings"> +.\" </a> +validity of UTF-32 strings +.\" +in the +.\" HREF +\fBpcreunicode\fP +.\" +page. +.P +For the \fBpcre32_config()\fP function there is an option PCRE_CONFIG_UTF32 +that returns 1 if UTF-32 support is configured, otherwise 0. If this option is +given to \fBpcre_config()\fP or \fBpcre16_config()\fP, or if the +PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF16 option is given to \fBpcre32_config()\fP, +the result is the PCRE_ERROR_BADOPTION error. +. +. +.SH "CHARACTER CODES" +.rs +.sp +In 32-bit mode, when PCRE_UTF32 is not set, character values are treated in the +same way as in 8-bit, non UTF-8 mode, except, of course, that they can range +from 0 to 0x7fffffff instead of 0 to 0xff. Character types for characters less +than 0xff can therefore be influenced by the locale in the same way as before. +Characters greater than 0xff have only one case, and no "type" (such as letter +or digit). +.P +In UTF-32 mode, the character code is Unicode, in the range 0 to 0x10ffff, with +the exception of values in the range 0xd800 to 0xdfff because those are +"surrogate" values that are ill-formed in UTF-32. +.P +A UTF-32 string can indicate its endianness by special code knows as a +byte-order mark (BOM). The PCRE functions do not handle this, expecting strings +to be in host byte order. A utility function called +\fBpcre32_utf32_to_host_byte_order()\fP is provided to help with this (see +above). +. +. +.SH "ERROR NAMES" +.rs +.sp +The error PCRE_ERROR_BADUTF32 corresponds to its 8-bit counterpart. +The error PCRE_ERROR_BADMODE is given when a compiled +pattern is passed to a function that processes patterns in the other +mode, for example, if a pattern compiled with \fBpcre_compile()\fP is passed to +\fBpcre32_exec()\fP. +.P +There are new error codes whose names begin with PCRE_UTF32_ERR for invalid +UTF-32 strings, corresponding to the PCRE_UTF8_ERR codes for UTF-8 strings that +are described in the section entitled +.\" HTML <a href="pcreapi.html#badutf8reasons"> +.\" </a> +"Reason codes for invalid UTF-8 strings" +.\" +in the main +.\" HREF +\fBpcreapi\fP +.\" +page. The UTF-32 errors are: +.sp + PCRE_UTF32_ERR1 Surrogate character (range from 0xd800 to 0xdfff) + PCRE_UTF32_ERR2 Non-character + PCRE_UTF32_ERR3 Character > 0x10ffff +. +. +.SH "ERROR TEXTS" +.rs +.sp +If there is an error while compiling a pattern, the error text that is passed +back by \fBpcre32_compile()\fP or \fBpcre32_compile2()\fP is still an 8-bit +character string, zero-terminated. +. +. +.SH "CALLOUTS" +.rs +.sp +The \fIsubject\fP and \fImark\fP fields in the callout block that is passed to +a callout function point to 32-bit vectors. +. +. +.SH "TESTING" +.rs +.sp +The \fBpcretest\fP program continues to operate with 8-bit input and output +files, but it can be used for testing the 32-bit library. If it is run with the +command line option \fB-32\fP, patterns and subject strings are converted from +8-bit to 32-bit before being passed to PCRE, and the 32-bit library functions +are used instead of the 8-bit ones. Returned 32-bit strings are converted to +8-bit for output. If both the 8-bit and the 16-bit libraries were not compiled, +\fBpcretest\fP defaults to 32-bit and the \fB-32\fP option is ignored. +.P +When PCRE is being built, the \fBRunTest\fP script that is called by "make +check" uses the \fBpcretest\fP \fB-C\fP option to discover which of the 8-bit, +16-bit and 32-bit libraries has been built, and runs the tests appropriately. +. +. +.SH "NOT SUPPORTED IN 32-BIT MODE" +.rs +.sp +Not all the features of the 8-bit library are available with the 32-bit +library. The C++ and POSIX wrapper functions support only the 8-bit library, +and the \fBpcregrep\fP program is at present 8-bit only. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +University Computing Service +Cambridge CB2 3QH, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 12 May 2013 +Copyright (c) 1997-2013 University of Cambridge. +.fi diff --git a/usr/share/man/man3/pcre32_assign_jit_stack.3 b/usr/share/man/man3/pcre32_assign_jit_stack.3 new file mode 120000 index 000000000..40c8775a7 --- /dev/null +++ b/usr/share/man/man3/pcre32_assign_jit_stack.3 @@ -0,0 +1 @@ +pcre_assign_jit_stack.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_compile.3 b/usr/share/man/man3/pcre32_compile.3 new file mode 120000 index 000000000..9364e7518 --- /dev/null +++ b/usr/share/man/man3/pcre32_compile.3 @@ -0,0 +1 @@ +pcre_compile.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_compile2.3 b/usr/share/man/man3/pcre32_compile2.3 new file mode 120000 index 000000000..2d6aff35c --- /dev/null +++ b/usr/share/man/man3/pcre32_compile2.3 @@ -0,0 +1 @@ +pcre_compile2.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_config.3 b/usr/share/man/man3/pcre32_config.3 new file mode 120000 index 000000000..db964059e --- /dev/null +++ b/usr/share/man/man3/pcre32_config.3 @@ -0,0 +1 @@ +pcre_config.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_copy_named_substring.3 b/usr/share/man/man3/pcre32_copy_named_substring.3 new file mode 120000 index 000000000..c15b97746 --- /dev/null +++ b/usr/share/man/man3/pcre32_copy_named_substring.3 @@ -0,0 +1 @@ +pcre_copy_named_substring.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_copy_substring.3 b/usr/share/man/man3/pcre32_copy_substring.3 new file mode 120000 index 000000000..98d1f84db --- /dev/null +++ b/usr/share/man/man3/pcre32_copy_substring.3 @@ -0,0 +1 @@ +pcre_copy_substring.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_dfa_exec.3 b/usr/share/man/man3/pcre32_dfa_exec.3 new file mode 120000 index 000000000..ed408df0b --- /dev/null +++ b/usr/share/man/man3/pcre32_dfa_exec.3 @@ -0,0 +1 @@ +pcre_dfa_exec.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_exec.3 b/usr/share/man/man3/pcre32_exec.3 new file mode 120000 index 000000000..ebaa25275 --- /dev/null +++ b/usr/share/man/man3/pcre32_exec.3 @@ -0,0 +1 @@ +pcre_exec.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_free_study.3 b/usr/share/man/man3/pcre32_free_study.3 new file mode 120000 index 000000000..4c53ea9f0 --- /dev/null +++ b/usr/share/man/man3/pcre32_free_study.3 @@ -0,0 +1 @@ +pcre_free_study.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_free_substring.3 b/usr/share/man/man3/pcre32_free_substring.3 new file mode 120000 index 000000000..48567bfb7 --- /dev/null +++ b/usr/share/man/man3/pcre32_free_substring.3 @@ -0,0 +1 @@ +pcre_free_substring.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_free_substring_list.3 b/usr/share/man/man3/pcre32_free_substring_list.3 new file mode 120000 index 000000000..84b7b7e5c --- /dev/null +++ b/usr/share/man/man3/pcre32_free_substring_list.3 @@ -0,0 +1 @@ +pcre_free_substring_list.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_fullinfo.3 b/usr/share/man/man3/pcre32_fullinfo.3 new file mode 120000 index 000000000..b7386a985 --- /dev/null +++ b/usr/share/man/man3/pcre32_fullinfo.3 @@ -0,0 +1 @@ +pcre_fullinfo.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_get_named_substring.3 b/usr/share/man/man3/pcre32_get_named_substring.3 new file mode 120000 index 000000000..c095ca50a --- /dev/null +++ b/usr/share/man/man3/pcre32_get_named_substring.3 @@ -0,0 +1 @@ +pcre_get_named_substring.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_get_stringnumber.3 b/usr/share/man/man3/pcre32_get_stringnumber.3 new file mode 120000 index 000000000..f8e81d32e --- /dev/null +++ b/usr/share/man/man3/pcre32_get_stringnumber.3 @@ -0,0 +1 @@ +pcre_get_stringnumber.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_get_stringtable_entries.3 b/usr/share/man/man3/pcre32_get_stringtable_entries.3 new file mode 120000 index 000000000..9f8cc4a78 --- /dev/null +++ b/usr/share/man/man3/pcre32_get_stringtable_entries.3 @@ -0,0 +1 @@ +pcre_get_stringtable_entries.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_get_substring.3 b/usr/share/man/man3/pcre32_get_substring.3 new file mode 120000 index 000000000..26383b5d3 --- /dev/null +++ b/usr/share/man/man3/pcre32_get_substring.3 @@ -0,0 +1 @@ +pcre_get_substring.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_get_substring_list.3 b/usr/share/man/man3/pcre32_get_substring_list.3 new file mode 120000 index 000000000..2faa865f2 --- /dev/null +++ b/usr/share/man/man3/pcre32_get_substring_list.3 @@ -0,0 +1 @@ +pcre_get_substring_list.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_jit_exec.3 b/usr/share/man/man3/pcre32_jit_exec.3 new file mode 120000 index 000000000..59089adac --- /dev/null +++ b/usr/share/man/man3/pcre32_jit_exec.3 @@ -0,0 +1 @@ +pcre_jit_exec.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_jit_stack_alloc.3 b/usr/share/man/man3/pcre32_jit_stack_alloc.3 new file mode 120000 index 000000000..dab43b82c --- /dev/null +++ b/usr/share/man/man3/pcre32_jit_stack_alloc.3 @@ -0,0 +1 @@ +pcre_jit_stack_alloc.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_jit_stack_free.3 b/usr/share/man/man3/pcre32_jit_stack_free.3 new file mode 120000 index 000000000..e92aa3100 --- /dev/null +++ b/usr/share/man/man3/pcre32_jit_stack_free.3 @@ -0,0 +1 @@ +pcre_jit_stack_free.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_maketables.3 b/usr/share/man/man3/pcre32_maketables.3 new file mode 120000 index 000000000..3b6308e2d --- /dev/null +++ b/usr/share/man/man3/pcre32_maketables.3 @@ -0,0 +1 @@ +pcre_maketables.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_pattern_to_host_byte_order.3 b/usr/share/man/man3/pcre32_pattern_to_host_byte_order.3 new file mode 120000 index 000000000..6540917e1 --- /dev/null +++ b/usr/share/man/man3/pcre32_pattern_to_host_byte_order.3 @@ -0,0 +1 @@ +pcre_pattern_to_host_byte_order.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_refcount.3 b/usr/share/man/man3/pcre32_refcount.3 new file mode 120000 index 000000000..63cece3fc --- /dev/null +++ b/usr/share/man/man3/pcre32_refcount.3 @@ -0,0 +1 @@ +pcre_refcount.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_study.3 b/usr/share/man/man3/pcre32_study.3 new file mode 120000 index 000000000..9871217f8 --- /dev/null +++ b/usr/share/man/man3/pcre32_study.3 @@ -0,0 +1 @@ +pcre_study.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_utf32_to_host_byte_order.3 b/usr/share/man/man3/pcre32_utf32_to_host_byte_order.3 new file mode 120000 index 000000000..e9699f5f5 --- /dev/null +++ b/usr/share/man/man3/pcre32_utf32_to_host_byte_order.3 @@ -0,0 +1 @@ +pcre_utf32_to_host_byte_order.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre32_version.3 b/usr/share/man/man3/pcre32_version.3 new file mode 120000 index 000000000..c31893c6b --- /dev/null +++ b/usr/share/man/man3/pcre32_version.3 @@ -0,0 +1 @@ +pcre_version.3
\ No newline at end of file diff --git a/usr/share/man/man3/pcre_assign_jit_stack.3 b/usr/share/man/man3/pcre_assign_jit_stack.3 new file mode 100755 index 000000000..0ecf6f2c6 --- /dev/null +++ b/usr/share/man/man3/pcre_assign_jit_stack.3 @@ -0,0 +1,59 @@ +.TH PCRE_ASSIGN_JIT_STACK 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B void pcre_assign_jit_stack(pcre_extra *\fIextra\fP, +.B " pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);" +.sp +.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP, +.B " pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);" +.sp +.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP, +.B " pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function provides control over the memory used as a stack at run-time by a +call to \fBpcre[16|32]_exec()\fP with a pattern that has been successfully +compiled with JIT optimization. The arguments are: +.sp + extra the data pointer returned by \fBpcre[16|32]_study()\fP + callback a callback function + data a JIT stack or a value to be passed to the callback + function +.P +If \fIcallback\fP is NULL and \fIdata\fP is NULL, an internal 32K block on +the machine stack is used. +.P +If \fIcallback\fP is NULL and \fIdata\fP is not NULL, \fIdata\fP must +be a valid JIT stack, the result of calling \fBpcre[16|32]_jit_stack_alloc()\fP. +.P +If \fIcallback\fP not NULL, it is called with \fIdata\fP as an argument at +the start of matching, in order to set up a JIT stack. If the result is NULL, +the internal 32K stack is used; otherwise the return value must be a valid JIT +stack, the result of calling \fBpcre[16|32]_jit_stack_alloc()\fP. +.P +You may safely assign the same JIT stack to multiple patterns, as long as they +are all matched in the same thread. In a multithread application, each thread +must use its own JIT stack. For more details, see the +.\" HREF +\fBpcrejit\fP +.\" +page. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_compile.3 b/usr/share/man/man3/pcre_compile.3 new file mode 100755 index 000000000..5c16ebe26 --- /dev/null +++ b/usr/share/man/man3/pcre_compile.3 @@ -0,0 +1,96 @@ +.TH PCRE_COMPILE 3 "01 October 2013" "PCRE 8.34" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP, +.B " const char **\fIerrptr\fP, int *\fIerroffset\fP," +.B " const unsigned char *\fItableptr\fP);" +.sp +.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP, +.B " const char **\fIerrptr\fP, int *\fIerroffset\fP," +.B " const unsigned char *\fItableptr\fP);" +.sp +.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP, +.B " const char **\fIerrptr\fP, int *\fIerroffset\fP," +.B " const unsigned char *\fItableptr\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function compiles a regular expression into an internal form. It is the +same as \fBpcre[16|32]_compile2()\fP, except for the absence of the +\fIerrorcodeptr\fP argument. Its arguments are: +.sp + \fIpattern\fP A zero-terminated string containing the + regular expression to be compiled + \fIoptions\fP Zero or more option bits + \fIerrptr\fP Where to put an error message + \fIerroffset\fP Offset in pattern where error was found + \fItableptr\fP Pointer to character tables, or NULL to + use the built-in default +.sp +The option bits are: +.sp + PCRE_ANCHORED Force pattern anchoring + PCRE_AUTO_CALLOUT Compile automatic callouts + PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF + PCRE_BSR_UNICODE \eR matches all Unicode line endings + PCRE_CASELESS Do caseless matching + PCRE_DOLLAR_ENDONLY $ not to match newline at end + PCRE_DOTALL . matches anything including NL + PCRE_DUPNAMES Allow duplicate names for subpatterns + PCRE_EXTENDED Ignore white space and # comments + PCRE_EXTRA PCRE extra features + (not much use currently) + PCRE_FIRSTLINE Force matching to be before newline + PCRE_JAVASCRIPT_COMPAT JavaScript compatibility + PCRE_MULTILINE ^ and $ match newlines within data + PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF) + PCRE_NEWLINE_ANY Recognize any Unicode newline sequence + PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline + sequences + PCRE_NEWLINE_CR Set CR as the newline sequence + PCRE_NEWLINE_CRLF Set CRLF as the newline sequence + PCRE_NEWLINE_LF Set LF as the newline sequence + PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren- + theses (named ones available) + PCRE_NO_AUTO_POSSESS Disable auto-possessification + PCRE_NO_START_OPTIMIZE Disable match-time start optimizations + PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16 + validity (only relevant if + PCRE_UTF16 is set) + PCRE_NO_UTF32_CHECK Do not check the pattern for UTF-32 + validity (only relevant if + PCRE_UTF32 is set) + PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8 + validity (only relevant if + PCRE_UTF8 is set) + PCRE_UCP Use Unicode properties for \ed, \ew, etc. + PCRE_UNGREEDY Invert greediness of quantifiers + PCRE_UTF16 Run in \fBpcre16_compile()\fP UTF-16 mode + PCRE_UTF32 Run in \fBpcre32_compile()\fP UTF-32 mode + PCRE_UTF8 Run in \fBpcre_compile()\fP UTF-8 mode +.sp +PCRE must be built with UTF support in order to use PCRE_UTF8/16/32 and +PCRE_NO_UTF8/16/32_CHECK, and with UCP support if PCRE_UCP is used. +.P +The yield of the function is a pointer to a private data structure that +contains the compiled pattern, or NULL if an error was detected. Note that +compiling regular expressions with one version of PCRE for use with a different +version is not guaranteed to work and may cause crashes. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_compile2.3 b/usr/share/man/man3/pcre_compile2.3 new file mode 100755 index 000000000..377420180 --- /dev/null +++ b/usr/share/man/man3/pcre_compile2.3 @@ -0,0 +1,101 @@ +.TH PCRE_COMPILE2 3 "01 October 2013" "PCRE 8.34" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP, +.B " int *\fIerrorcodeptr\fP," +.B " const char **\fIerrptr\fP, int *\fIerroffset\fP," +.B " const unsigned char *\fItableptr\fP);" +.sp +.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP, +.B " int *\fIerrorcodeptr\fP," +.B " const char **\fIerrptr\fP, int *\fIerroffset\fP," +.B " const unsigned char *\fItableptr\fP);" +.sp +.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP, +.B " int *\fIerrorcodeptr\fP,£ +.B " const char **\fIerrptr\fP, int *\fIerroffset\fP," +.B " const unsigned char *\fItableptr\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function compiles a regular expression into an internal form. It is the +same as \fBpcre[16|32]_compile()\fP, except for the addition of the +\fIerrorcodeptr\fP argument. The arguments are: +. +.sp + \fIpattern\fP A zero-terminated string containing the + regular expression to be compiled + \fIoptions\fP Zero or more option bits + \fIerrorcodeptr\fP Where to put an error code + \fIerrptr\fP Where to put an error message + \fIerroffset\fP Offset in pattern where error was found + \fItableptr\fP Pointer to character tables, or NULL to + use the built-in default +.sp +The option bits are: +.sp + PCRE_ANCHORED Force pattern anchoring + PCRE_AUTO_CALLOUT Compile automatic callouts + PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF + PCRE_BSR_UNICODE \eR matches all Unicode line endings + PCRE_CASELESS Do caseless matching + PCRE_DOLLAR_ENDONLY $ not to match newline at end + PCRE_DOTALL . matches anything including NL + PCRE_DUPNAMES Allow duplicate names for subpatterns + PCRE_EXTENDED Ignore white space and # comments + PCRE_EXTRA PCRE extra features + (not much use currently) + PCRE_FIRSTLINE Force matching to be before newline + PCRE_JAVASCRIPT_COMPAT JavaScript compatibility + PCRE_MULTILINE ^ and $ match newlines within data + PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF) + PCRE_NEWLINE_ANY Recognize any Unicode newline sequence + PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline + sequences + PCRE_NEWLINE_CR Set CR as the newline sequence + PCRE_NEWLINE_CRLF Set CRLF as the newline sequence + PCRE_NEWLINE_LF Set LF as the newline sequence + PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren- + theses (named ones available) + PCRE_NO_AUTO_POSSESS Disable auto-possessification + PCRE_NO_START_OPTIMIZE Disable match-time start optimizations + PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16 + validity (only relevant if + PCRE_UTF16 is set) + PCRE_NO_UTF32_CHECK Do not check the pattern for UTF-32 + validity (only relevant if + PCRE_UTF32 is set) + PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8 + validity (only relevant if + PCRE_UTF8 is set) + PCRE_UCP Use Unicode properties for \ed, \ew, etc. + PCRE_UNGREEDY Invert greediness of quantifiers + PCRE_UTF16 Run \fBpcre16_compile()\fP in UTF-16 mode + PCRE_UTF32 Run \fBpcre32_compile()\fP in UTF-32 mode + PCRE_UTF8 Run \fBpcre_compile()\fP in UTF-8 mode +.sp +PCRE must be built with UTF support in order to use PCRE_UTF8/16/32 and +PCRE_NO_UTF8/16/32_CHECK, and with UCP support if PCRE_UCP is used. +.P +The yield of the function is a pointer to a private data structure that +contains the compiled pattern, or NULL if an error was detected. Note that +compiling regular expressions with one version of PCRE for use with a different +version is not guaranteed to work and may cause crashes. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_config.3 b/usr/share/man/man3/pcre_config.3 new file mode 100755 index 000000000..d3de14bb7 --- /dev/null +++ b/usr/share/man/man3/pcre_config.3 @@ -0,0 +1,77 @@ +.TH PCRE_CONFIG 3 "05 November 2013" "PCRE 8.34" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.SM +.B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP); +.PP +.B int pcre16_config(int \fIwhat\fP, void *\fIwhere\fP); +.PP +.B int pcre32_config(int \fIwhat\fP, void *\fIwhere\fP); +. +.SH DESCRIPTION +.rs +.sp +This function makes it possible for a client program to find out which optional +features are available in the version of the PCRE library it is using. The +arguments are as follows: +.sp + \fIwhat\fP A code specifying what information is required + \fIwhere\fP Points to where to put the data +.sp +The \fIwhere\fP argument must point to an integer variable, except for +PCRE_CONFIG_MATCH_LIMIT and PCRE_CONFIG_MATCH_LIMIT_RECURSION, when it must +point to an unsigned long integer. The available codes are: +.sp + PCRE_CONFIG_JIT Availability of just-in-time compiler + support (1=yes 0=no) + PCRE_CONFIG_JITTARGET String containing information about the + target architecture for the JIT compiler, + or NULL if there is no JIT support + PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4 + PCRE_CONFIG_PARENS_LIMIT Parentheses nesting limit + PCRE_CONFIG_MATCH_LIMIT Internal resource limit + PCRE_CONFIG_MATCH_LIMIT_RECURSION + Internal recursion depth limit + PCRE_CONFIG_NEWLINE Value of the default newline sequence: + 13 (0x000d) for CR + 10 (0x000a) for LF + 3338 (0x0d0a) for CRLF + -2 for ANYCRLF + -1 for ANY + PCRE_CONFIG_BSR Indicates what \eR matches by default: + 0 all Unicode line endings + 1 CR, LF, or CRLF only + PCRE_CONFIG_POSIX_MALLOC_THRESHOLD + Threshold of return slots, above which + \fBmalloc()\fP is used by the POSIX API + PCRE_CONFIG_STACKRECURSE Recursion implementation (1=stack 0=heap) + PCRE_CONFIG_UTF16 Availability of UTF-16 support (1=yes + 0=no); option for \fBpcre16_config()\fP + PCRE_CONFIG_UTF32 Availability of UTF-32 support (1=yes + 0=no); option for \fBpcre32_config()\fP + PCRE_CONFIG_UTF8 Availability of UTF-8 support (1=yes 0=no); + option for \fBpcre_config()\fP + PCRE_CONFIG_UNICODE_PROPERTIES + Availability of Unicode property support + (1=yes 0=no) +.sp +The function yields 0 on success or PCRE_ERROR_BADOPTION otherwise. That error +is also given if PCRE_CONFIG_UTF16 or PCRE_CONFIG_UTF32 is passed to +\fBpcre_config()\fP, if PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF32 is passed to +\fBpcre16_config()\fP, or if PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF16 is passed to +\fBpcre32_config()\fP. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_copy_named_substring.3 b/usr/share/man/man3/pcre_copy_named_substring.3 new file mode 100755 index 000000000..52582aecb --- /dev/null +++ b/usr/share/man/man3/pcre_copy_named_substring.3 @@ -0,0 +1,51 @@ +.TH PCRE_COPY_NAMED_SUBSTRING 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B int pcre_copy_named_substring(const pcre *\fIcode\fP, +.B " const char *\fIsubject\fP, int *\fIovector\fP," +.B " int \fIstringcount\fP, const char *\fIstringname\fP," +.B " char *\fIbuffer\fP, int \fIbuffersize\fP);" +.sp +.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP, +.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP," +.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP," +.B " PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);" +.sp +.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP, +.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP," +.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP," +.B " PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for extracting a captured substring, identified +by name, into a given buffer. The arguments are: +.sp + \fIcode\fP Pattern that was successfully matched + \fIsubject\fP Subject that has been successfully matched + \fIovector\fP Offset vector that \fBpcre[16|32]_exec()\fP used + \fIstringcount\fP Value returned by \fBpcre[16|32]_exec()\fP + \fIstringname\fP Name of the required substring + \fIbuffer\fP Buffer to receive the string + \fIbuffersize\fP Size of buffer +.sp +The yield is the length of the substring, PCRE_ERROR_NOMEMORY if the buffer was +too small, or PCRE_ERROR_NOSUBSTRING if the string name is invalid. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_copy_substring.3 b/usr/share/man/man3/pcre_copy_substring.3 new file mode 100755 index 000000000..83af6e800 --- /dev/null +++ b/usr/share/man/man3/pcre_copy_substring.3 @@ -0,0 +1,47 @@ +.TH PCRE_COPY_SUBSTRING 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP, +.B " int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP," +.B " int \fIbuffersize\fP);" +.sp +.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP, +.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP," +.B " int \fIbuffersize\fP);" +.sp +.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP, +.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP," +.B " int \fIbuffersize\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for extracting a captured substring into a given +buffer. The arguments are: +.sp + \fIsubject\fP Subject that has been successfully matched + \fIovector\fP Offset vector that \fBpcre[16|32]_exec()\fP used + \fIstringcount\fP Value returned by \fBpcre[16|32]_exec()\fP + \fIstringnumber\fP Number of the required substring + \fIbuffer\fP Buffer to receive the string + \fIbuffersize\fP Size of buffer +.sp +The yield is the length of the string, PCRE_ERROR_NOMEMORY if the buffer was +too small, or PCRE_ERROR_NOSUBSTRING if the string number is invalid. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_dfa_exec.3 b/usr/share/man/man3/pcre_dfa_exec.3 new file mode 100755 index 000000000..39c2e836d --- /dev/null +++ b/usr/share/man/man3/pcre_dfa_exec.3 @@ -0,0 +1,118 @@ +.TH PCRE_DFA_EXEC 3 "12 May 2013" "PCRE 8.33" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP," +.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP," +.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP," +.B " int *\fIworkspace\fP, int \fIwscount\fP);" +.sp +.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP," +.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP," +.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP," +.B " int *\fIworkspace\fP, int \fIwscount\fP);" +.sp +.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP," +.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP," +.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP," +.B " int *\fIworkspace\fP, int \fIwscount\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function matches a compiled regular expression against a given subject +string, using an alternative matching algorithm that scans the subject string +just once (\fInot\fP Perl-compatible). Note that the main, Perl-compatible, +matching function is \fBpcre[16|32]_exec()\fP. The arguments for this function +are: +.sp + \fIcode\fP Points to the compiled pattern + \fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure, + or is NULL + \fIsubject\fP Points to the subject string + \fIlength\fP Length of the subject string + \fIstartoffset\fP Offset in the subject at which to start matching + \fIoptions\fP Option bits + \fIovector\fP Points to a vector of ints for result offsets + \fIovecsize\fP Number of elements in the vector + \fIworkspace\fP Points to a vector of ints used as working space + \fIwscount\fP Number of elements in the vector +.sp +The units for \fIlength\fP and \fIstartoffset\fP are bytes for +\fBpcre_exec()\fP, 16-bit data items for \fBpcre16_exec()\fP, and 32-bit items +for \fBpcre32_exec()\fP. The options are: +.sp + PCRE_ANCHORED Match only at the first position + PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF + PCRE_BSR_UNICODE \eR matches all Unicode line endings + PCRE_NEWLINE_ANY Recognize any Unicode newline sequence + PCRE_NEWLINE_ANYCRLF Recognize CR, LF, & CRLF as newline sequences + PCRE_NEWLINE_CR Recognize CR as the only newline sequence + PCRE_NEWLINE_CRLF Recognize CRLF as the only newline sequence + PCRE_NEWLINE_LF Recognize LF as the only newline sequence + PCRE_NOTBOL Subject is not the beginning of a line + PCRE_NOTEOL Subject is not the end of a line + PCRE_NOTEMPTY An empty string is not a valid match + PCRE_NOTEMPTY_ATSTART An empty string at the start of the subject + is not a valid match + PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations + PCRE_NO_UTF16_CHECK Do not check the subject for UTF-16 + validity (only relevant if PCRE_UTF16 + was set at compile time) + PCRE_NO_UTF32_CHECK Do not check the subject for UTF-32 + validity (only relevant if PCRE_UTF32 + was set at compile time) + PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8 + validity (only relevant if PCRE_UTF8 + was set at compile time) + PCRE_PARTIAL ) Return PCRE_ERROR_PARTIAL for a partial + PCRE_PARTIAL_SOFT ) match if no full matches are found + PCRE_PARTIAL_HARD Return PCRE_ERROR_PARTIAL for a partial match + even if there is a full match as well + PCRE_DFA_SHORTEST Return only the shortest match + PCRE_DFA_RESTART Restart after a partial match +.sp +There are restrictions on what may appear in a pattern when using this matching +function. Details are given in the +.\" HREF +\fBpcrematching\fP +.\" +documentation. For details of partial matching, see the +.\" HREF +\fBpcrepartial\fP +.\" +page. +.P +A \fBpcre[16|32]_extra\fP structure contains the following fields: +.sp + \fIflags\fP Bits indicating which fields are set + \fIstudy_data\fP Opaque data from \fBpcre[16|32]_study()\fP + \fImatch_limit\fP Limit on internal resource use + \fImatch_limit_recursion\fP Limit on internal recursion depth + \fIcallout_data\fP Opaque data passed back to callouts + \fItables\fP Points to character tables or is NULL + \fImark\fP For passing back a *MARK pointer + \fIexecutable_jit\fP Opaque data from JIT compilation +.sp +The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT, +PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, +PCRE_EXTRA_TABLES, PCRE_EXTRA_MARK and PCRE_EXTRA_EXECUTABLE_JIT. For this +matching function, the \fImatch_limit\fP and \fImatch_limit_recursion\fP fields +are not used, and must not be set. The PCRE_EXTRA_EXECUTABLE_JIT flag and +the corresponding variable are ignored. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_exec.3 b/usr/share/man/man3/pcre_exec.3 new file mode 100755 index 000000000..4686bd6de --- /dev/null +++ b/usr/share/man/man3/pcre_exec.3 @@ -0,0 +1,99 @@ +.TH PCRE_EXEC 3 "12 May 2013" "PCRE 8.33" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP," +.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP," +.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);" +.sp +.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP," +.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP," +.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);" +.sp +.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP," +.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP," +.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function matches a compiled regular expression against a given subject +string, using a matching algorithm that is similar to Perl's. It returns +offsets to captured substrings. Its arguments are: +.sp + \fIcode\fP Points to the compiled pattern + \fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure, + or is NULL + \fIsubject\fP Points to the subject string + \fIlength\fP Length of the subject string + \fIstartoffset\fP Offset in the subject at which to start matching + \fIoptions\fP Option bits + \fIovector\fP Points to a vector of ints for result offsets + \fIovecsize\fP Number of elements in the vector (a multiple of 3) +.sp +The units for \fIlength\fP and \fIstartoffset\fP are bytes for +\fBpcre_exec()\fP, 16-bit data items for \fBpcre16_exec()\fP, and 32-bit items +for \fBpcre32_exec()\fP. The options are: +.sp + PCRE_ANCHORED Match only at the first position + PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF + PCRE_BSR_UNICODE \eR matches all Unicode line endings + PCRE_NEWLINE_ANY Recognize any Unicode newline sequence + PCRE_NEWLINE_ANYCRLF Recognize CR, LF, & CRLF as newline sequences + PCRE_NEWLINE_CR Recognize CR as the only newline sequence + PCRE_NEWLINE_CRLF Recognize CRLF as the only newline sequence + PCRE_NEWLINE_LF Recognize LF as the only newline sequence + PCRE_NOTBOL Subject string is not the beginning of a line + PCRE_NOTEOL Subject string is not the end of a line + PCRE_NOTEMPTY An empty string is not a valid match + PCRE_NOTEMPTY_ATSTART An empty string at the start of the subject + is not a valid match + PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations + PCRE_NO_UTF16_CHECK Do not check the subject for UTF-16 + validity (only relevant if PCRE_UTF16 + was set at compile time) + PCRE_NO_UTF32_CHECK Do not check the subject for UTF-32 + validity (only relevant if PCRE_UTF32 + was set at compile time) + PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8 + validity (only relevant if PCRE_UTF8 + was set at compile time) + PCRE_PARTIAL ) Return PCRE_ERROR_PARTIAL for a partial + PCRE_PARTIAL_SOFT ) match if no full matches are found + PCRE_PARTIAL_HARD Return PCRE_ERROR_PARTIAL for a partial match + if that is found before a full match +.sp +For details of partial matching, see the +.\" HREF +\fBpcrepartial\fP +.\" +page. A \fBpcre_extra\fP structure contains the following fields: +.sp + \fIflags\fP Bits indicating which fields are set + \fIstudy_data\fP Opaque data from \fBpcre[16|32]_study()\fP + \fImatch_limit\fP Limit on internal resource use + \fImatch_limit_recursion\fP Limit on internal recursion depth + \fIcallout_data\fP Opaque data passed back to callouts + \fItables\fP Points to character tables or is NULL + \fImark\fP For passing back a *MARK pointer + \fIexecutable_jit\fP Opaque data from JIT compilation +.sp +The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT, +PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, +PCRE_EXTRA_TABLES, PCRE_EXTRA_MARK and PCRE_EXTRA_EXECUTABLE_JIT. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_free_study.3 b/usr/share/man/man3/pcre_free_study.3 new file mode 100755 index 000000000..8826b7359 --- /dev/null +++ b/usr/share/man/man3/pcre_free_study.3 @@ -0,0 +1,31 @@ +.TH PCRE_FREE_STUDY 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.SM +.B void pcre_free_study(pcre_extra *\fIextra\fP); +.PP +.B void pcre16_free_study(pcre16_extra *\fIextra\fP); +.PP +.B void pcre32_free_study(pcre32_extra *\fIextra\fP); +. +.SH DESCRIPTION +.rs +.sp +This function is used to free the memory used for the data generated by a call +to \fBpcre[16|32]_study()\fP when it is no longer needed. The argument must be the +result of such a call. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_free_substring.3 b/usr/share/man/man3/pcre_free_substring.3 new file mode 100755 index 000000000..88c04019f --- /dev/null +++ b/usr/share/man/man3/pcre_free_substring.3 @@ -0,0 +1,31 @@ +.TH PCRE_FREE_SUBSTRING 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.SM +.B void pcre_free_substring(const char *\fIstringptr\fP); +.PP +.B void pcre16_free_substring(PCRE_SPTR16 \fIstringptr\fP); +.PP +.B void pcre32_free_substring(PCRE_SPTR32 \fIstringptr\fP); +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for freeing the store obtained by a previous +call to \fBpcre[16|32]_get_substring()\fP or \fBpcre[16|32]_get_named_substring()\fP. +Its only argument is a pointer to the string. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_free_substring_list.3 b/usr/share/man/man3/pcre_free_substring_list.3 new file mode 100755 index 000000000..248b4bd01 --- /dev/null +++ b/usr/share/man/man3/pcre_free_substring_list.3 @@ -0,0 +1,31 @@ +.TH PCRE_FREE_SUBSTRING_LIST 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.SM +.B void pcre_free_substring_list(const char **\fIstringptr\fP); +.PP +.B void pcre16_free_substring_list(PCRE_SPTR16 *\fIstringptr\fP); +.PP +.B void pcre32_free_substring_list(PCRE_SPTR32 *\fIstringptr\fP); +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for freeing the store obtained by a previous +call to \fBpcre[16|32]_get_substring_list()\fP. Its only argument is a pointer to +the list of string pointers. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_fullinfo.3 b/usr/share/man/man3/pcre_fullinfo.3 new file mode 100755 index 000000000..01e2e9287 --- /dev/null +++ b/usr/share/man/man3/pcre_fullinfo.3 @@ -0,0 +1,93 @@ +.TH PCRE_FULLINFO 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP," +.B " int \fIwhat\fP, void *\fIwhere\fP);" +.sp +.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP," +.B " int \fIwhat\fP, void *\fIwhere\fP);" +.sp +.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP," +.B " int \fIwhat\fP, void *\fIwhere\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function returns information about a compiled pattern. Its arguments are: +.sp + \fIcode\fP Compiled regular expression + \fIextra\fP Result of \fBpcre[16|32]_study()\fP or NULL + \fIwhat\fP What information is required + \fIwhere\fP Where to put the information +.sp +The following information is available: +.sp + PCRE_INFO_BACKREFMAX Number of highest back reference + PCRE_INFO_CAPTURECOUNT Number of capturing subpatterns + PCRE_INFO_DEFAULT_TABLES Pointer to default tables + PCRE_INFO_FIRSTBYTE Fixed first data unit for a match, or + -1 for start of string + or after newline, or + -2 otherwise + PCRE_INFO_FIRSTTABLE Table of first data units (after studying) + PCRE_INFO_HASCRORLF Return 1 if explicit CR or LF matches exist + PCRE_INFO_JCHANGED Return 1 if (?J) or (?-J) was used + PCRE_INFO_JIT Return 1 after successful JIT compilation + PCRE_INFO_JITSIZE Size of JIT compiled code + PCRE_INFO_LASTLITERAL Literal last data unit required + PCRE_INFO_MINLENGTH Lower bound length of matching strings + PCRE_INFO_NAMECOUNT Number of named subpatterns + PCRE_INFO_NAMEENTRYSIZE Size of name table entry + PCRE_INFO_NAMETABLE Pointer to name table + PCRE_INFO_OKPARTIAL Return 1 if partial matching can be tried + (always returns 1 after release 8.00) + PCRE_INFO_OPTIONS Option bits used for compilation + PCRE_INFO_SIZE Size of compiled pattern + PCRE_INFO_STUDYSIZE Size of study data + PCRE_INFO_FIRSTCHARACTER Fixed first data unit for a match + PCRE_INFO_FIRSTCHARACTERFLAGS Returns + 1 if there is a first data character set, which can + then be retrieved using PCRE_INFO_FIRSTCHARACTER, + 2 if the first character is at the start of the data + string or after a newline, and + 0 otherwise + PCRE_INFO_REQUIREDCHAR Literal last data unit required + PCRE_INFO_REQUIREDCHARFLAGS Returns 1 if the last data character is set (which can then + be retrieved using PCRE_INFO_REQUIREDCHAR); 0 otherwise +.sp +The \fIwhere\fP argument must point to an integer variable, except for the +following \fIwhat\fP values: +.sp + PCRE_INFO_DEFAULT_TABLES const unsigned char * + PCRE_INFO_FIRSTTABLE const unsigned char * + PCRE_INFO_NAMETABLE PCRE_SPTR16 (16-bit library) + PCRE_INFO_NAMETABLE PCRE_SPTR32 (32-bit library) + PCRE_INFO_NAMETABLE const unsigned char * (8-bit library) + PCRE_INFO_OPTIONS unsigned long int + PCRE_INFO_SIZE size_t + PCRE_INFO_FIRSTCHARACTER uint32_t + PCRE_INFO_REQUIREDCHAR uint32_t +.sp +The yield of the function is zero on success or: +.sp + PCRE_ERROR_NULL the argument \fIcode\fP was NULL + the argument \fIwhere\fP was NULL + PCRE_ERROR_BADMAGIC the "magic number" was not found + PCRE_ERROR_BADOPTION the value of \fIwhat\fP was invalid +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_get_named_substring.3 b/usr/share/man/man3/pcre_get_named_substring.3 new file mode 100755 index 000000000..84d4ee7db --- /dev/null +++ b/usr/share/man/man3/pcre_get_named_substring.3 @@ -0,0 +1,54 @@ +.TH PCRE_GET_NAMED_SUBSTRING 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B int pcre_get_named_substring(const pcre *\fIcode\fP, +.B " const char *\fIsubject\fP, int *\fIovector\fP," +.B " int \fIstringcount\fP, const char *\fIstringname\fP," +.B " const char **\fIstringptr\fP);" +.sp +.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP, +.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP," +.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP," +.B " PCRE_SPTR16 *\fIstringptr\fP);" +.sp +.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP, +.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP," +.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP," +.B " PCRE_SPTR32 *\fIstringptr\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for extracting a captured substring by name. The +arguments are: +.sp + \fIcode\fP Compiled pattern + \fIsubject\fP Subject that has been successfully matched + \fIovector\fP Offset vector that \fBpcre[16|32]_exec()\fP used + \fIstringcount\fP Value returned by \fBpcre[16|32]_exec()\fP + \fIstringname\fP Name of the required substring + \fIstringptr\fP Where to put the string pointer +.sp +The memory in which the substring is placed is obtained by calling +\fBpcre[16|32]_malloc()\fP. The convenience function +\fBpcre[16|32]_free_substring()\fP can be used to free it when it is no longer +needed. The yield of the function is the length of the extracted substring, +PCRE_ERROR_NOMEMORY if sufficient memory could not be obtained, or +PCRE_ERROR_NOSUBSTRING if the string name is invalid. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_get_stringnumber.3 b/usr/share/man/man3/pcre_get_stringnumber.3 new file mode 100755 index 000000000..9fc5291dc --- /dev/null +++ b/usr/share/man/man3/pcre_get_stringnumber.3 @@ -0,0 +1,43 @@ +.TH PCRE_GET_STRINGNUMBER 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B int pcre_get_stringnumber(const pcre *\fIcode\fP, +.B " const char *\fIname\fP);" +.sp +.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP, +.B " PCRE_SPTR16 \fIname\fP);" +.sp +.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP, +.B " PCRE_SPTR32 \fIname\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This convenience function finds the number of a named substring capturing +parenthesis in a compiled pattern. Its arguments are: +.sp + \fIcode\fP Compiled regular expression + \fIname\fP Name whose number is required +.sp +The yield of the function is the number of the parenthesis if the name is +found, or PCRE_ERROR_NOSUBSTRING otherwise. When duplicate names are allowed +(PCRE_DUPNAMES is set), it is not defined which of the numbers is returned by +\fBpcre[16|32]_get_stringnumber()\fP. You can obtain the complete list by calling +\fBpcre[16|32]_get_stringtable_entries()\fP. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_get_stringtable_entries.3 b/usr/share/man/man3/pcre_get_stringtable_entries.3 new file mode 100755 index 000000000..5c58c90c0 --- /dev/null +++ b/usr/share/man/man3/pcre_get_stringtable_entries.3 @@ -0,0 +1,46 @@ +.TH PCRE_GET_STRINGTABLE_ENTRIES 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP, +.B " const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);" +.sp +.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP, +.B " PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);" +.sp +.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP, +.B " PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This convenience function finds, for a compiled pattern, the first and last +entries for a given name in the table that translates capturing parenthesis +names into numbers. When names are required to be unique (PCRE_DUPNAMES is +\fInot\fP set), it is usually easier to use \fBpcre[16|32]_get_stringnumber()\fP +instead. +.sp + \fIcode\fP Compiled regular expression + \fIname\fP Name whose entries required + \fIfirst\fP Where to return a pointer to the first entry + \fIlast\fP Where to return a pointer to the last entry +.sp +The yield of the function is the length of each entry, or +PCRE_ERROR_NOSUBSTRING if none are found. +.P +There is a complete description of the PCRE native API, including the format of +the table entries, in the +.\" HREF +\fBpcreapi\fP +.\" +page, and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_get_substring.3 b/usr/share/man/man3/pcre_get_substring.3 new file mode 100755 index 000000000..1e62b2c0c --- /dev/null +++ b/usr/share/man/man3/pcre_get_substring.3 @@ -0,0 +1,50 @@ +.TH PCRE_GET_SUBSTRING 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP, +.B " int \fIstringcount\fP, int \fIstringnumber\fP," +.B " const char **\fIstringptr\fP);" +.sp +.B int pcre16_get_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP, +.B " int \fIstringcount\fP, int \fIstringnumber\fP," +.B " PCRE_SPTR16 *\fIstringptr\fP);" +.sp +.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP, +.B " int \fIstringcount\fP, int \fIstringnumber\fP," +.B " PCRE_SPTR32 *\fIstringptr\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for extracting a captured substring. The +arguments are: +.sp + \fIsubject\fP Subject that has been successfully matched + \fIovector\fP Offset vector that \fBpcre[16|32]_exec()\fP used + \fIstringcount\fP Value returned by \fBpcre[16|32]_exec()\fP + \fIstringnumber\fP Number of the required substring + \fIstringptr\fP Where to put the string pointer +.sp +The memory in which the substring is placed is obtained by calling +\fBpcre[16|32]_malloc()\fP. The convenience function +\fBpcre[16|32]_free_substring()\fP can be used to free it when it is no longer +needed. The yield of the function is the length of the substring, +PCRE_ERROR_NOMEMORY if sufficient memory could not be obtained, or +PCRE_ERROR_NOSUBSTRING if the string number is invalid. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_get_substring_list.3 b/usr/share/man/man3/pcre_get_substring_list.3 new file mode 100755 index 000000000..511a4a39d --- /dev/null +++ b/usr/share/man/man3/pcre_get_substring_list.3 @@ -0,0 +1,47 @@ +.TH PCRE_GET_SUBSTRING_LIST 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B int pcre_get_substring_list(const char *\fIsubject\fP, +.B " int *\fIovector\fP, int \fIstringcount\fP, const char ***\fIlistptr\fP);" +.sp +.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP, +.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR16 **\fIlistptr\fP);" +.sp +.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP, +.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR32 **\fIlistptr\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for extracting a list of all the captured +substrings. The arguments are: +.sp + \fIsubject\fP Subject that has been successfully matched + \fIovector\fP Offset vector that \fBpcre[16|32]_exec\fP used + \fIstringcount\fP Value returned by \fBpcre[16|32]_exec\fP + \fIlistptr\fP Where to put a pointer to the list +.sp +The memory in which the substrings and the list are placed is obtained by +calling \fBpcre[16|32]_malloc()\fP. The convenience function +\fBpcre[16|32]_free_substring_list()\fP can be used to free it when it is no +longer needed. A pointer to a list of pointers is put in the variable whose +address is in \fIlistptr\fP. The list is terminated by a NULL pointer. The +yield of the function is zero on success or PCRE_ERROR_NOMEMORY if sufficient +memory could not be obtained. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_jit_exec.3 b/usr/share/man/man3/pcre_jit_exec.3 new file mode 100755 index 000000000..ba8516817 --- /dev/null +++ b/usr/share/man/man3/pcre_jit_exec.3 @@ -0,0 +1,96 @@ +.TH PCRE_EXEC 3 "31 October 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B int pcre_jit_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP," +.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP," +.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP," +.B " pcre_jit_stack *\fIjstack\fP);" +.sp +.B int pcre16_jit_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP," +.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP," +.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP," +.B " pcre_jit_stack *\fIjstack\fP);" +.sp +.B int pcre32_jit_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP," +.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP," +.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP," +.B " pcre_jit_stack *\fIjstack\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function matches a compiled regular expression that has been successfully +studied with one of the JIT options against a given subject string, using a +matching algorithm that is similar to Perl's. It is a "fast path" interface to +JIT, and it bypasses some of the sanity checks that \fBpcre_exec()\fP applies. +It returns offsets to captured substrings. Its arguments are: +.sp + \fIcode\fP Points to the compiled pattern + \fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure, + or is NULL + \fIsubject\fP Points to the subject string + \fIlength\fP Length of the subject string, in bytes + \fIstartoffset\fP Offset in bytes in the subject at which to + start matching + \fIoptions\fP Option bits + \fIovector\fP Points to a vector of ints for result offsets + \fIovecsize\fP Number of elements in the vector (a multiple of 3) + \fIjstack\fP Pointer to a JIT stack +.sp +The allowed options are: +.sp + PCRE_NOTBOL Subject string is not the beginning of a line + PCRE_NOTEOL Subject string is not the end of a line + PCRE_NOTEMPTY An empty string is not a valid match + PCRE_NOTEMPTY_ATSTART An empty string at the start of the subject + is not a valid match + PCRE_NO_UTF16_CHECK Do not check the subject for UTF-16 + validity (only relevant if PCRE_UTF16 + was set at compile time) + PCRE_NO_UTF32_CHECK Do not check the subject for UTF-32 + validity (only relevant if PCRE_UTF32 + was set at compile time) + PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8 + validity (only relevant if PCRE_UTF8 + was set at compile time) + PCRE_PARTIAL ) Return PCRE_ERROR_PARTIAL for a partial + PCRE_PARTIAL_SOFT ) match if no full matches are found + PCRE_PARTIAL_HARD Return PCRE_ERROR_PARTIAL for a partial match + if that is found before a full match +.sp +However, the PCRE_NO_UTF[8|16|32]_CHECK options have no effect, as this check +is never applied. For details of partial matching, see the +.\" HREF +\fBpcrepartial\fP +.\" +page. A \fBpcre_extra\fP structure contains the following fields: +.sp + \fIflags\fP Bits indicating which fields are set + \fIstudy_data\fP Opaque data from \fBpcre[16|32]_study()\fP + \fImatch_limit\fP Limit on internal resource use + \fImatch_limit_recursion\fP Limit on internal recursion depth + \fIcallout_data\fP Opaque data passed back to callouts + \fItables\fP Points to character tables or is NULL + \fImark\fP For passing back a *MARK pointer + \fIexecutable_jit\fP Opaque data from JIT compilation +.sp +The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT, +PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, +PCRE_EXTRA_TABLES, PCRE_EXTRA_MARK and PCRE_EXTRA_EXECUTABLE_JIT. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the JIT API in the +.\" HREF +\fBpcrejit\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_jit_stack_alloc.3 b/usr/share/man/man3/pcre_jit_stack_alloc.3 new file mode 100755 index 000000000..11c97a0fc --- /dev/null +++ b/usr/share/man/man3/pcre_jit_stack_alloc.3 @@ -0,0 +1,43 @@ +.TH PCRE_JIT_STACK_ALLOC 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B pcre_jit_stack *pcre_jit_stack_alloc(int \fIstartsize\fP, +.B " int \fImaxsize\fP);" +.sp +.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP, +.B " int \fImaxsize\fP);" +.sp +.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP, +.B " int \fImaxsize\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is used to create a stack for use by the code compiled by the JIT +optimization of \fBpcre[16|32]_study()\fP. The arguments are a starting size for +the stack, and a maximum size to which it is allowed to grow. The result can be +passed to the JIT run-time code by \fBpcre[16|32]_assign_jit_stack()\fP, or that +function can set up a callback for obtaining a stack. A maximum stack size of +512K to 1M should be more than enough for any pattern. For more details, see +the +.\" HREF +\fBpcrejit\fP +.\" +page. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_jit_stack_free.3 b/usr/share/man/man3/pcre_jit_stack_free.3 new file mode 100755 index 000000000..494724e84 --- /dev/null +++ b/usr/share/man/man3/pcre_jit_stack_free.3 @@ -0,0 +1,35 @@ +.TH PCRE_JIT_STACK_FREE 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.SM +.B void pcre_jit_stack_free(pcre_jit_stack *\fIstack\fP); +.PP +.B void pcre16_jit_stack_free(pcre16_jit_stack *\fIstack\fP); +.PP +.B void pcre32_jit_stack_free(pcre32_jit_stack *\fIstack\fP); +. +.SH DESCRIPTION +.rs +.sp +This function is used to free a JIT stack that was created by +\fBpcre[16|32]_jit_stack_alloc()\fP when it is no longer needed. For more details, +see the +.\" HREF +\fBpcrejit\fP +.\" +page. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_maketables.3 b/usr/share/man/man3/pcre_maketables.3 new file mode 100755 index 000000000..b2c3d23aa --- /dev/null +++ b/usr/share/man/man3/pcre_maketables.3 @@ -0,0 +1,33 @@ +.TH PCRE_MAKETABLES 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.SM +.B const unsigned char *pcre_maketables(void); +.PP +.B const unsigned char *pcre16_maketables(void); +.PP +.B const unsigned char *pcre32_maketables(void); +. +.SH DESCRIPTION +.rs +.sp +This function builds a set of character tables for character values less than +256. These can be passed to \fBpcre[16|32]_compile()\fP to override PCRE's +internal, built-in tables (which were made by \fBpcre[16|32]_maketables()\fP when +PCRE was compiled). You might want to do this if you are using a non-standard +locale. The function yields a pointer to the tables. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_pattern_to_host_byte_order.3 b/usr/share/man/man3/pcre_pattern_to_host_byte_order.3 new file mode 100755 index 000000000..b0c41c38e --- /dev/null +++ b/usr/share/man/man3/pcre_pattern_to_host_byte_order.3 @@ -0,0 +1,44 @@ +.TH PCRE_PATTERN_TO_HOST_BYTE_ORDER 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B int pcre_pattern_to_host_byte_order(pcre *\fIcode\fP, +.B " pcre_extra *\fIextra\fP, const unsigned char *\fItables\fP);" +.sp +.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP, +.B " pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);" +.sp +.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP, +.B " pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function ensures that the bytes in 2-byte and 4-byte values in a compiled +pattern are in the correct order for the current host. It is useful when a +pattern that has been compiled on one host is transferred to another that might +have different endianness. The arguments are: +.sp + \fIcode\fP A compiled regular expression + \fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure, + or is NULL + \fItables\fP Pointer to character tables, or NULL to + set the built-in default +.sp +The result is 0 for success, a negative PCRE_ERROR_xxx value otherwise. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_refcount.3 b/usr/share/man/man3/pcre_refcount.3 new file mode 100755 index 000000000..45a41fef6 --- /dev/null +++ b/usr/share/man/man3/pcre_refcount.3 @@ -0,0 +1,36 @@ +.TH PCRE_REFCOUNT 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.SM +.B int pcre_refcount(pcre *\fIcode\fP, int \fIadjust\fP); +.PP +.B int pcre16_refcount(pcre16 *\fIcode\fP, int \fIadjust\fP); +.PP +.B int pcre32_refcount(pcre32 *\fIcode\fP, int \fIadjust\fP); +. +.SH DESCRIPTION +.rs +.sp +This function is used to maintain a reference count inside a data block that +contains a compiled pattern. Its arguments are: +.sp + \fIcode\fP Compiled regular expression + \fIadjust\fP Adjustment to reference value +.sp +The yield of the function is the adjusted reference value, which is constrained +to lie between 0 and 65535. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_study.3 b/usr/share/man/man3/pcre_study.3 new file mode 100755 index 000000000..1200e0a66 --- /dev/null +++ b/usr/share/man/man3/pcre_study.3 @@ -0,0 +1,54 @@ +.TH PCRE_STUDY 3 " 24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP, +.B " const char **\fIerrptr\fP);" +.sp +.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP, +.B " const char **\fIerrptr\fP);" +.sp +.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP, +.B " const char **\fIerrptr\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function studies a compiled pattern, to see if additional information can +be extracted that might speed up matching. Its arguments are: +.sp + \fIcode\fP A compiled regular expression + \fIoptions\fP Options for \fBpcre[16|32]_study()\fP + \fIerrptr\fP Where to put an error message +.sp +If the function succeeds, it returns a value that can be passed to +\fBpcre[16|32]_exec()\fP or \fBpcre[16|32]_dfa_exec()\fP via their \fIextra\fP +arguments. +.P +If the function returns NULL, either it could not find any additional +information, or there was an error. You can tell the difference by looking at +the error value. It is NULL in first case. +.P +The only option is PCRE_STUDY_JIT_COMPILE. It requests just-in-time compilation +if possible. If PCRE has been compiled without JIT support, this option is +ignored. See the +.\" HREF +\fBpcrejit\fP +.\" +page for further details. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_utf16_to_host_byte_order.3 b/usr/share/man/man3/pcre_utf16_to_host_byte_order.3 new file mode 100755 index 000000000..1851b619d --- /dev/null +++ b/usr/share/man/man3/pcre_utf16_to_host_byte_order.3 @@ -0,0 +1,45 @@ +.TH PCRE_UTF16_TO_HOST_BYTE_ORDER 3 "21 January 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP, +.B " PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP," +.B " int \fIkeep_boms\fP);" +.fi +. +. +.SH DESCRIPTION +.rs +.sp +This function, which exists only in the 16-bit library, converts a UTF-16 +string to the correct order for the current host, taking account of any byte +order marks (BOMs) within the string. Its arguments are: +.sp + \fIoutput\fP pointer to output buffer, may be the same as \fIinput\fP + \fIinput\fP pointer to input buffer + \fIlength\fP number of 16-bit units in the input, or negative for + a zero-terminated string + \fIhost_byte_order\fP a NULL value or a non-zero value pointed to means + start in host byte order + \fIkeep_boms\fP if non-zero, BOMs are copied to the output string +.sp +The result of the function is the number of 16-bit units placed into the output +buffer, including the zero terminator if the string was zero-terminated. +.P +If \fIhost_byte_order\fP is not NULL, it is set to indicate the byte order that +is current at the end of the string. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_utf32_to_host_byte_order.3 b/usr/share/man/man3/pcre_utf32_to_host_byte_order.3 new file mode 100755 index 000000000..a415dcf5f --- /dev/null +++ b/usr/share/man/man3/pcre_utf32_to_host_byte_order.3 @@ -0,0 +1,45 @@ +.TH PCRE_UTF32_TO_HOST_BYTE_ORDER 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.nf +.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP, +.B " PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP," +.B " int \fIkeep_boms\fP);" +.fi +. +. +.SH DESCRIPTION +.rs +.sp +This function, which exists only in the 32-bit library, converts a UTF-32 +string to the correct order for the current host, taking account of any byte +order marks (BOMs) within the string. Its arguments are: +.sp + \fIoutput\fP pointer to output buffer, may be the same as \fIinput\fP + \fIinput\fP pointer to input buffer + \fIlength\fP number of 32-bit units in the input, or negative for + a zero-terminated string + \fIhost_byte_order\fP a NULL value or a non-zero value pointed to means + start in host byte order + \fIkeep_boms\fP if non-zero, BOMs are copied to the output string +.sp +The result of the function is the number of 32-bit units placed into the output +buffer, including the zero terminator if the string was zero-terminated. +.P +If \fIhost_byte_order\fP is not NULL, it is set to indicate the byte order that +is current at the end of the string. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcre_version.3 b/usr/share/man/man3/pcre_version.3 new file mode 100755 index 000000000..0f4973f9c --- /dev/null +++ b/usr/share/man/man3/pcre_version.3 @@ -0,0 +1,31 @@ +.TH PCRE_VERSION 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.SM +.B const char *pcre_version(void); +.PP +.B const char *pcre16_version(void); +.PP +.B const char *pcre32_version(void); +. +.SH DESCRIPTION +.rs +.sp +This function (even in the 16-bit and 32-bit libraries) returns a +zero-terminated, 8-bit character string that gives the version number of the +PCRE library and the date of its release. +.P +There is a complete description of the PCRE native API in the +.\" HREF +\fBpcreapi\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcreposix\fP +.\" +page. diff --git a/usr/share/man/man3/pcreapi.3 b/usr/share/man/man3/pcreapi.3 new file mode 100755 index 000000000..ab3eaa0b5 --- /dev/null +++ b/usr/share/man/man3/pcreapi.3 @@ -0,0 +1,2919 @@ +.TH PCREAPI 3 "09 February 2014" "PCRE 8.35" +.SH NAME +PCRE - Perl-compatible regular expressions +.sp +.B #include <pcre.h> +. +. +.SH "PCRE NATIVE API BASIC FUNCTIONS" +.rs +.sp +.nf +.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP, +.B " const char **\fIerrptr\fP, int *\fIerroffset\fP," +.B " const unsigned char *\fItableptr\fP);" +.sp +.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP, +.B " int *\fIerrorcodeptr\fP," +.B " const char **\fIerrptr\fP, int *\fIerroffset\fP," +.B " const unsigned char *\fItableptr\fP);" +.sp +.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP, +.B " const char **\fIerrptr\fP);" +.sp +.B void pcre_free_study(pcre_extra *\fIextra\fP); +.sp +.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP," +.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP," +.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);" +.sp +.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP," +.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP," +.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP," +.B " int *\fIworkspace\fP, int \fIwscount\fP);" +.fi +. +. +.SH "PCRE NATIVE API STRING EXTRACTION FUNCTIONS" +.rs +.sp +.nf +.B int pcre_copy_named_substring(const pcre *\fIcode\fP, +.B " const char *\fIsubject\fP, int *\fIovector\fP," +.B " int \fIstringcount\fP, const char *\fIstringname\fP," +.B " char *\fIbuffer\fP, int \fIbuffersize\fP);" +.sp +.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP, +.B " int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP," +.B " int \fIbuffersize\fP);" +.sp +.B int pcre_get_named_substring(const pcre *\fIcode\fP, +.B " const char *\fIsubject\fP, int *\fIovector\fP," +.B " int \fIstringcount\fP, const char *\fIstringname\fP," +.B " const char **\fIstringptr\fP);" +.sp +.B int pcre_get_stringnumber(const pcre *\fIcode\fP, +.B " const char *\fIname\fP);" +.sp +.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP, +.B " const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);" +.sp +.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP, +.B " int \fIstringcount\fP, int \fIstringnumber\fP," +.B " const char **\fIstringptr\fP);" +.sp +.B int pcre_get_substring_list(const char *\fIsubject\fP, +.B " int *\fIovector\fP, int \fIstringcount\fP, const char ***\fIlistptr\fP);" +.sp +.B void pcre_free_substring(const char *\fIstringptr\fP); +.sp +.B void pcre_free_substring_list(const char **\fIstringptr\fP); +.fi +. +. +.SH "PCRE NATIVE API AUXILIARY FUNCTIONS" +.rs +.sp +.nf +.B int pcre_jit_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP," +.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP," +.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP," +.B " pcre_jit_stack *\fIjstack\fP);" +.sp +.B pcre_jit_stack *pcre_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP); +.sp +.B void pcre_jit_stack_free(pcre_jit_stack *\fIstack\fP); +.sp +.B void pcre_assign_jit_stack(pcre_extra *\fIextra\fP, +.B " pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);" +.sp +.B const unsigned char *pcre_maketables(void); +.sp +.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP," +.B " int \fIwhat\fP, void *\fIwhere\fP);" +.sp +.B int pcre_refcount(pcre *\fIcode\fP, int \fIadjust\fP); +.sp +.B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP); +.sp +.B const char *pcre_version(void); +.sp +.B int pcre_pattern_to_host_byte_order(pcre *\fIcode\fP, +.B " pcre_extra *\fIextra\fP, const unsigned char *\fItables\fP);" +.fi +. +. +.SH "PCRE NATIVE API INDIRECTED FUNCTIONS" +.rs +.sp +.nf +.B void *(*pcre_malloc)(size_t); +.sp +.B void (*pcre_free)(void *); +.sp +.B void *(*pcre_stack_malloc)(size_t); +.sp +.B void (*pcre_stack_free)(void *); +.sp +.B int (*pcre_callout)(pcre_callout_block *); +.sp +.B int (*pcre_stack_guard)(void); +.fi +. +. +.SH "PCRE 8-BIT, 16-BIT, AND 32-BIT LIBRARIES" +.rs +.sp +As well as support for 8-bit character strings, PCRE also supports 16-bit +strings (from release 8.30) and 32-bit strings (from release 8.32), by means of +two additional libraries. They can be built as well as, or instead of, the +8-bit library. To avoid too much complication, this document describes the +8-bit versions of the functions, with only occasional references to the 16-bit +and 32-bit libraries. +.P +The 16-bit and 32-bit functions operate in the same way as their 8-bit +counterparts; they just use different data types for their arguments and +results, and their names start with \fBpcre16_\fP or \fBpcre32_\fP instead of +\fBpcre_\fP. For every option that has UTF8 in its name (for example, +PCRE_UTF8), there are corresponding 16-bit and 32-bit names with UTF8 replaced +by UTF16 or UTF32, respectively. This facility is in fact just cosmetic; the +16-bit and 32-bit option names define the same bit values. +.P +References to bytes and UTF-8 in this document should be read as references to +16-bit data units and UTF-16 when using the 16-bit library, or 32-bit data +units and UTF-32 when using the 32-bit library, unless specified otherwise. +More details of the specific differences for the 16-bit and 32-bit libraries +are given in the +.\" HREF +\fBpcre16\fP +.\" +and +.\" HREF +\fBpcre32\fP +.\" +pages. +. +. +.SH "PCRE API OVERVIEW" +.rs +.sp +PCRE has its own native API, which is described in this document. There are +also some wrapper functions (for the 8-bit library only) that correspond to the +POSIX regular expression API, but they do not give access to all the +functionality. They are described in the +.\" HREF +\fBpcreposix\fP +.\" +documentation. Both of these APIs define a set of C function calls. A C++ +wrapper (again for the 8-bit library only) is also distributed with PCRE. It is +documented in the +.\" HREF +\fBpcrecpp\fP +.\" +page. +.P +The native API C function prototypes are defined in the header file +\fBpcre.h\fP, and on Unix-like systems the (8-bit) library itself is called +\fBlibpcre\fP. It can normally be accessed by adding \fB-lpcre\fP to the +command for linking an application that uses PCRE. The header file defines the +macros PCRE_MAJOR and PCRE_MINOR to contain the major and minor release numbers +for the library. Applications can use these to include support for different +releases of PCRE. +.P +In a Windows environment, if you want to statically link an application program +against a non-dll \fBpcre.a\fP file, you must define PCRE_STATIC before +including \fBpcre.h\fP or \fBpcrecpp.h\fP, because otherwise the +\fBpcre_malloc()\fP and \fBpcre_free()\fP exported functions will be declared +\fB__declspec(dllimport)\fP, with unwanted results. +.P +The functions \fBpcre_compile()\fP, \fBpcre_compile2()\fP, \fBpcre_study()\fP, +and \fBpcre_exec()\fP are used for compiling and matching regular expressions +in a Perl-compatible manner. A sample program that demonstrates the simplest +way of using them is provided in the file called \fIpcredemo.c\fP in the PCRE +source distribution. A listing of this program is given in the +.\" HREF +\fBpcredemo\fP +.\" +documentation, and the +.\" HREF +\fBpcresample\fP +.\" +documentation describes how to compile and run it. +.P +Just-in-time compiler support is an optional feature of PCRE that can be built +in appropriate hardware environments. It greatly speeds up the matching +performance of many patterns. Simple programs can easily request that it be +used if available, by setting an option that is ignored when it is not +relevant. More complicated programs might need to make use of the functions +\fBpcre_jit_stack_alloc()\fP, \fBpcre_jit_stack_free()\fP, and +\fBpcre_assign_jit_stack()\fP in order to control the JIT code's memory usage. +.P +From release 8.32 there is also a direct interface for JIT execution, which +gives improved performance. The JIT-specific functions are discussed in the +.\" HREF +\fBpcrejit\fP +.\" +documentation. +.P +A second matching function, \fBpcre_dfa_exec()\fP, which is not +Perl-compatible, is also provided. This uses a different algorithm for the +matching. The alternative algorithm finds all possible matches (at a given +point in the subject), and scans the subject just once (unless there are +lookbehind assertions). However, this algorithm does not return captured +substrings. A description of the two matching algorithms and their advantages +and disadvantages is given in the +.\" HREF +\fBpcrematching\fP +.\" +documentation. +.P +In addition to the main compiling and matching functions, there are convenience +functions for extracting captured substrings from a subject string that is +matched by \fBpcre_exec()\fP. They are: +.sp + \fBpcre_copy_substring()\fP + \fBpcre_copy_named_substring()\fP + \fBpcre_get_substring()\fP + \fBpcre_get_named_substring()\fP + \fBpcre_get_substring_list()\fP + \fBpcre_get_stringnumber()\fP + \fBpcre_get_stringtable_entries()\fP +.sp +\fBpcre_free_substring()\fP and \fBpcre_free_substring_list()\fP are also +provided, to free the memory used for extracted strings. +.P +The function \fBpcre_maketables()\fP is used to build a set of character tables +in the current locale for passing to \fBpcre_compile()\fP, \fBpcre_exec()\fP, +or \fBpcre_dfa_exec()\fP. This is an optional facility that is provided for +specialist use. Most commonly, no special tables are passed, in which case +internal tables that are generated when PCRE is built are used. +.P +The function \fBpcre_fullinfo()\fP is used to find out information about a +compiled pattern. The function \fBpcre_version()\fP returns a pointer to a +string containing the version of PCRE and its date of release. +.P +The function \fBpcre_refcount()\fP maintains a reference count in a data block +containing a compiled pattern. This is provided for the benefit of +object-oriented applications. +.P +The global variables \fBpcre_malloc\fP and \fBpcre_free\fP initially contain +the entry points of the standard \fBmalloc()\fP and \fBfree()\fP functions, +respectively. PCRE calls the memory management functions via these variables, +so a calling program can replace them if it wishes to intercept the calls. This +should be done before calling any PCRE functions. +.P +The global variables \fBpcre_stack_malloc\fP and \fBpcre_stack_free\fP are also +indirections to memory management functions. These special functions are used +only when PCRE is compiled to use the heap for remembering data, instead of +recursive function calls, when running the \fBpcre_exec()\fP function. See the +.\" HREF +\fBpcrebuild\fP +.\" +documentation for details of how to do this. It is a non-standard way of +building PCRE, for use in environments that have limited stacks. Because of the +greater use of memory management, it runs more slowly. Separate functions are +provided so that special-purpose external code can be used for this case. When +used, these functions are always called in a stack-like manner (last obtained, +first freed), and always for memory blocks of the same size. There is a +discussion about PCRE's stack usage in the +.\" HREF +\fBpcrestack\fP +.\" +documentation. +.P +The global variable \fBpcre_callout\fP initially contains NULL. It can be set +by the caller to a "callout" function, which PCRE will then call at specified +points during a matching operation. Details are given in the +.\" HREF +\fBpcrecallout\fP +.\" +documentation. +.P +The global variable \fBpcre_stack_guard\fP initially contains NULL. It can be +set by the caller to a function that is called by PCRE whenever it starts +to compile a parenthesized part of a pattern. When parentheses are nested, PCRE +uses recursive function calls, which use up the system stack. This function is +provided so that applications with restricted stacks can force a compilation +error if the stack runs out. The function should return zero if all is well, or +non-zero to force an error. +. +. +.\" HTML <a name="newlines"></a> +.SH NEWLINES +.rs +.sp +PCRE supports five different conventions for indicating line breaks in +strings: a single CR (carriage return) character, a single LF (linefeed) +character, the two-character sequence CRLF, any of the three preceding, or any +Unicode newline sequence. The Unicode newline sequences are the three just +mentioned, plus the single characters VT (vertical tab, U+000B), FF (form feed, +U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS +(paragraph separator, U+2029). +.P +Each of the first three conventions is used by at least one operating system as +its standard newline sequence. When PCRE is built, a default can be specified. +The default default is LF, which is the Unix standard. When PCRE is run, the +default can be overridden, either when a pattern is compiled, or when it is +matched. +.P +At compile time, the newline convention can be specified by the \fIoptions\fP +argument of \fBpcre_compile()\fP, or it can be specified by special text at the +start of the pattern itself; this overrides any other settings. See the +.\" HREF +\fBpcrepattern\fP +.\" +page for details of the special character sequences. +.P +In the PCRE documentation the word "newline" is used to mean "the character or +pair of characters that indicate a line break". The choice of newline +convention affects the handling of the dot, circumflex, and dollar +metacharacters, the handling of #-comments in /x mode, and, when CRLF is a +recognized line ending sequence, the match position advancement for a +non-anchored pattern. There is more detail about this in the +.\" HTML <a href="#execoptions"> +.\" </a> +section on \fBpcre_exec()\fP options +.\" +below. +.P +The choice of newline convention does not affect the interpretation of +the \en or \er escape sequences, nor does it affect what \eR matches, which is +controlled in a similar way, but by separate options. +. +. +.SH MULTITHREADING +.rs +.sp +The PCRE functions can be used in multi-threading applications, with the +proviso that the memory management functions pointed to by \fBpcre_malloc\fP, +\fBpcre_free\fP, \fBpcre_stack_malloc\fP, and \fBpcre_stack_free\fP, and the +callout and stack-checking functions pointed to by \fBpcre_callout\fP and +\fBpcre_stack_guard\fP, are shared by all threads. +.P +The compiled form of a regular expression is not altered during matching, so +the same compiled pattern can safely be used by several threads at once. +.P +If the just-in-time optimization feature is being used, it needs separate +memory stack areas for each thread. See the +.\" HREF +\fBpcrejit\fP +.\" +documentation for more details. +. +. +.SH "SAVING PRECOMPILED PATTERNS FOR LATER USE" +.rs +.sp +The compiled form of a regular expression can be saved and re-used at a later +time, possibly by a different program, and even on a host other than the one on +which it was compiled. Details are given in the +.\" HREF +\fBpcreprecompile\fP +.\" +documentation, which includes a description of the +\fBpcre_pattern_to_host_byte_order()\fP function. However, compiling a regular +expression with one version of PCRE for use with a different version is not +guaranteed to work and may cause crashes. +. +. +.SH "CHECKING BUILD-TIME OPTIONS" +.rs +.sp +.B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP); +.PP +The function \fBpcre_config()\fP makes it possible for a PCRE client to +discover which optional features have been compiled into the PCRE library. The +.\" HREF +\fBpcrebuild\fP +.\" +documentation has more details about these optional features. +.P +The first argument for \fBpcre_config()\fP is an integer, specifying which +information is required; the second argument is a pointer to a variable into +which the information is placed. The returned value is zero on success, or the +negative error code PCRE_ERROR_BADOPTION if the value in the first argument is +not recognized. The following information is available: +.sp + PCRE_CONFIG_UTF8 +.sp +The output is an integer that is set to one if UTF-8 support is available; +otherwise it is set to zero. This value should normally be given to the 8-bit +version of this function, \fBpcre_config()\fP. If it is given to the 16-bit +or 32-bit version of this function, the result is PCRE_ERROR_BADOPTION. +.sp + PCRE_CONFIG_UTF16 +.sp +The output is an integer that is set to one if UTF-16 support is available; +otherwise it is set to zero. This value should normally be given to the 16-bit +version of this function, \fBpcre16_config()\fP. If it is given to the 8-bit +or 32-bit version of this function, the result is PCRE_ERROR_BADOPTION. +.sp + PCRE_CONFIG_UTF32 +.sp +The output is an integer that is set to one if UTF-32 support is available; +otherwise it is set to zero. This value should normally be given to the 32-bit +version of this function, \fBpcre32_config()\fP. If it is given to the 8-bit +or 16-bit version of this function, the result is PCRE_ERROR_BADOPTION. +.sp + PCRE_CONFIG_UNICODE_PROPERTIES +.sp +The output is an integer that is set to one if support for Unicode character +properties is available; otherwise it is set to zero. +.sp + PCRE_CONFIG_JIT +.sp +The output is an integer that is set to one if support for just-in-time +compiling is available; otherwise it is set to zero. +.sp + PCRE_CONFIG_JITTARGET +.sp +The output is a pointer to a zero-terminated "const char *" string. If JIT +support is available, the string contains the name of the architecture for +which the JIT compiler is configured, for example "x86 32bit (little endian + +unaligned)". If JIT support is not available, the result is NULL. +.sp + PCRE_CONFIG_NEWLINE +.sp +The output is an integer whose value specifies the default character sequence +that is recognized as meaning "newline". The values that are supported in +ASCII/Unicode environments are: 10 for LF, 13 for CR, 3338 for CRLF, -2 for +ANYCRLF, and -1 for ANY. In EBCDIC environments, CR, ANYCRLF, and ANY yield the +same values. However, the value for LF is normally 21, though some EBCDIC +environments use 37. The corresponding values for CRLF are 3349 and 3365. The +default should normally correspond to the standard sequence for your operating +system. +.sp + PCRE_CONFIG_BSR +.sp +The output is an integer whose value indicates what character sequences the \eR +escape sequence matches by default. A value of 0 means that \eR matches any +Unicode line ending sequence; a value of 1 means that \eR matches only CR, LF, +or CRLF. The default can be overridden when a pattern is compiled or matched. +.sp + PCRE_CONFIG_LINK_SIZE +.sp +The output is an integer that contains the number of bytes used for internal +linkage in compiled regular expressions. For the 8-bit library, the value can +be 2, 3, or 4. For the 16-bit library, the value is either 2 or 4 and is still +a number of bytes. For the 32-bit library, the value is either 2 or 4 and is +still a number of bytes. The default value of 2 is sufficient for all but the +most massive patterns, since it allows the compiled pattern to be up to 64K in +size. Larger values allow larger regular expressions to be compiled, at the +expense of slower matching. +.sp + PCRE_CONFIG_POSIX_MALLOC_THRESHOLD +.sp +The output is an integer that contains the threshold above which the POSIX +interface uses \fBmalloc()\fP for output vectors. Further details are given in +the +.\" HREF +\fBpcreposix\fP +.\" +documentation. +.sp + PCRE_CONFIG_PARENS_LIMIT +.sp +The output is a long integer that gives the maximum depth of nesting of +parentheses (of any kind) in a pattern. This limit is imposed to cap the amount +of system stack used when a pattern is compiled. It is specified when PCRE is +built; the default is 250. This limit does not take into account the stack that +may already be used by the calling application. For finer control over +compilation stack usage, you can set a pointer to an external checking function +in \fBpcre_stack_guard\fP. +.sp + PCRE_CONFIG_MATCH_LIMIT +.sp +The output is a long integer that gives the default limit for the number of +internal matching function calls in a \fBpcre_exec()\fP execution. Further +details are given with \fBpcre_exec()\fP below. +.sp + PCRE_CONFIG_MATCH_LIMIT_RECURSION +.sp +The output is a long integer that gives the default limit for the depth of +recursion when calling the internal matching function in a \fBpcre_exec()\fP +execution. Further details are given with \fBpcre_exec()\fP below. +.sp + PCRE_CONFIG_STACKRECURSE +.sp +The output is an integer that is set to one if internal recursion when running +\fBpcre_exec()\fP is implemented by recursive function calls that use the stack +to remember their state. This is the usual way that PCRE is compiled. The +output is zero if PCRE was compiled to use blocks of data on the heap instead +of recursive function calls. In this case, \fBpcre_stack_malloc\fP and +\fBpcre_stack_free\fP are called to manage memory blocks on the heap, thus +avoiding the use of the stack. +. +. +.SH "COMPILING A PATTERN" +.rs +.sp +.nf +.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP, +.B " const char **\fIerrptr\fP, int *\fIerroffset\fP," +.B " const unsigned char *\fItableptr\fP);" +.sp +.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP, +.B " int *\fIerrorcodeptr\fP," +.B " const char **\fIerrptr\fP, int *\fIerroffset\fP," +.B " const unsigned char *\fItableptr\fP);" +.fi +.P +Either of the functions \fBpcre_compile()\fP or \fBpcre_compile2()\fP can be +called to compile a pattern into an internal form. The only difference between +the two interfaces is that \fBpcre_compile2()\fP has an additional argument, +\fIerrorcodeptr\fP, via which a numerical error code can be returned. To avoid +too much repetition, we refer just to \fBpcre_compile()\fP below, but the +information applies equally to \fBpcre_compile2()\fP. +.P +The pattern is a C string terminated by a binary zero, and is passed in the +\fIpattern\fP argument. A pointer to a single block of memory that is obtained +via \fBpcre_malloc\fP is returned. This contains the compiled code and related +data. The \fBpcre\fP type is defined for the returned block; this is a typedef +for a structure whose contents are not externally defined. It is up to the +caller to free the memory (via \fBpcre_free\fP) when it is no longer required. +.P +Although the compiled code of a PCRE regex is relocatable, that is, it does not +depend on memory location, the complete \fBpcre\fP data block is not +fully relocatable, because it may contain a copy of the \fItableptr\fP +argument, which is an address (see below). +.P +The \fIoptions\fP argument contains various bit settings that affect the +compilation. It should be zero if no options are required. The available +options are described below. Some of them (in particular, those that are +compatible with Perl, but some others as well) can also be set and unset from +within the pattern (see the detailed description in the +.\" HREF +\fBpcrepattern\fP +.\" +documentation). For those options that can be different in different parts of +the pattern, the contents of the \fIoptions\fP argument specifies their +settings at the start of compilation and execution. The PCRE_ANCHORED, +PCRE_BSR_\fIxxx\fP, PCRE_NEWLINE_\fIxxx\fP, PCRE_NO_UTF8_CHECK, and +PCRE_NO_START_OPTIMIZE options can be set at the time of matching as well as at +compile time. +.P +If \fIerrptr\fP is NULL, \fBpcre_compile()\fP returns NULL immediately. +Otherwise, if compilation of a pattern fails, \fBpcre_compile()\fP returns +NULL, and sets the variable pointed to by \fIerrptr\fP to point to a textual +error message. This is a static string that is part of the library. You must +not try to free it. Normally, the offset from the start of the pattern to the +data unit that was being processed when the error was discovered is placed in +the variable pointed to by \fIerroffset\fP, which must not be NULL (if it is, +an immediate error is given). However, for an invalid UTF-8 or UTF-16 string, +the offset is that of the first data unit of the failing character. +.P +Some errors are not detected until the whole pattern has been scanned; in these +cases, the offset passed back is the length of the pattern. Note that the +offset is in data units, not characters, even in a UTF mode. It may sometimes +point into the middle of a UTF-8 or UTF-16 character. +.P +If \fBpcre_compile2()\fP is used instead of \fBpcre_compile()\fP, and the +\fIerrorcodeptr\fP argument is not NULL, a non-zero error code number is +returned via this argument in the event of an error. This is in addition to the +textual error message. Error codes and messages are listed below. +.P +If the final argument, \fItableptr\fP, is NULL, PCRE uses a default set of +character tables that are built when PCRE is compiled, using the default C +locale. Otherwise, \fItableptr\fP must be an address that is the result of a +call to \fBpcre_maketables()\fP. This value is stored with the compiled +pattern, and used again by \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP when the +pattern is matched. For more discussion, see the section on locale support +below. +.P +This code fragment shows a typical straightforward call to \fBpcre_compile()\fP: +.sp + pcre *re; + const char *error; + int erroffset; + re = pcre_compile( + "^A.*Z", /* the pattern */ + 0, /* default options */ + &error, /* for error message */ + &erroffset, /* for error offset */ + NULL); /* use default character tables */ +.sp +The following names for option bits are defined in the \fBpcre.h\fP header +file: +.sp + PCRE_ANCHORED +.sp +If this bit is set, the pattern is forced to be "anchored", that is, it is +constrained to match only at the first matching point in the string that is +being searched (the "subject string"). This effect can also be achieved by +appropriate constructs in the pattern itself, which is the only way to do it in +Perl. +.sp + PCRE_AUTO_CALLOUT +.sp +If this bit is set, \fBpcre_compile()\fP automatically inserts callout items, +all with number 255, before each pattern item. For discussion of the callout +facility, see the +.\" HREF +\fBpcrecallout\fP +.\" +documentation. +.sp + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE +.sp +These options (which are mutually exclusive) control what the \eR escape +sequence matches. The choice is either to match only CR, LF, or CRLF, or to +match any Unicode newline sequence. The default is specified when PCRE is +built. It can be overridden from within the pattern, or by setting an option +when a compiled pattern is matched. +.sp + PCRE_CASELESS +.sp +If this bit is set, letters in the pattern match both upper and lower case +letters. It is equivalent to Perl's /i option, and it can be changed within a +pattern by a (?i) option setting. In UTF-8 mode, PCRE always understands the +concept of case for characters whose values are less than 128, so caseless +matching is always possible. For characters with higher values, the concept of +case is supported if PCRE is compiled with Unicode property support, but not +otherwise. If you want to use caseless matching for characters 128 and above, +you must ensure that PCRE is compiled with Unicode property support as well as +with UTF-8 support. +.sp + PCRE_DOLLAR_ENDONLY +.sp +If this bit is set, a dollar metacharacter in the pattern matches only at the +end of the subject string. Without this option, a dollar also matches +immediately before a newline at the end of the string (but not before any other +newlines). The PCRE_DOLLAR_ENDONLY option is ignored if PCRE_MULTILINE is set. +There is no equivalent to this option in Perl, and no way to set it within a +pattern. +.sp + PCRE_DOTALL +.sp +If this bit is set, a dot metacharacter in the pattern matches a character of +any value, including one that indicates a newline. However, it only ever +matches one character, even if newlines are coded as CRLF. Without this option, +a dot does not match when the current position is at a newline. This option is +equivalent to Perl's /s option, and it can be changed within a pattern by a +(?s) option setting. A negative class such as [^a] always matches newline +characters, independent of the setting of this option. +.sp + PCRE_DUPNAMES +.sp +If this bit is set, names used to identify capturing subpatterns need not be +unique. This can be helpful for certain types of pattern when it is known that +only one instance of the named subpattern can ever be matched. There are more +details of named subpatterns below; see also the +.\" HREF +\fBpcrepattern\fP +.\" +documentation. +.sp + PCRE_EXTENDED +.sp +If this bit is set, most white space characters in the pattern are totally +ignored except when escaped or inside a character class. However, white space +is not allowed within sequences such as (?> that introduce various +parenthesized subpatterns, nor within a numerical quantifier such as {1,3}. +However, ignorable white space is permitted between an item and a following +quantifier and between a quantifier and a following + that indicates +possessiveness. +.P +White space did not used to include the VT character (code 11), because Perl +did not treat this character as white space. However, Perl changed at release +5.18, so PCRE followed at release 8.34, and VT is now treated as white space. +.P +PCRE_EXTENDED also causes characters between an unescaped # outside a character +class and the next newline, inclusive, to be ignored. PCRE_EXTENDED is +equivalent to Perl's /x option, and it can be changed within a pattern by a +(?x) option setting. +.P +Which characters are interpreted as newlines is controlled by the options +passed to \fBpcre_compile()\fP or by a special sequence at the start of the +pattern, as described in the section entitled +.\" HTML <a href="pcrepattern.html#newlines"> +.\" </a> +"Newline conventions" +.\" +in the \fBpcrepattern\fP documentation. Note that the end of this type of +comment is a literal newline sequence in the pattern; escape sequences that +happen to represent a newline do not count. +.P +This option makes it possible to include comments inside complicated patterns. +Note, however, that this applies only to data characters. White space characters +may never appear within special character sequences in a pattern, for example +within the sequence (?( that introduces a conditional subpattern. +.sp + PCRE_EXTRA +.sp +This option was invented in order to turn on additional functionality of PCRE +that is incompatible with Perl, but it is currently of very little use. When +set, any backslash in a pattern that is followed by a letter that has no +special meaning causes an error, thus reserving these combinations for future +expansion. By default, as in Perl, a backslash followed by a letter with no +special meaning is treated as a literal. (Perl can, however, be persuaded to +give an error for this, by running it with the -w option.) There are at present +no other features controlled by this option. It can also be set by a (?X) +option setting within a pattern. +.sp + PCRE_FIRSTLINE +.sp +If this option is set, an unanchored pattern is required to match before or at +the first newline in the subject string, though the matched text may continue +over the newline. +.sp + PCRE_JAVASCRIPT_COMPAT +.sp +If this option is set, PCRE's behaviour is changed in some ways so that it is +compatible with JavaScript rather than Perl. The changes are as follows: +.P +(1) A lone closing square bracket in a pattern causes a compile-time error, +because this is illegal in JavaScript (by default it is treated as a data +character). Thus, the pattern AB]CD becomes illegal when this option is set. +.P +(2) At run time, a back reference to an unset subpattern group matches an empty +string (by default this causes the current matching alternative to fail). A +pattern such as (\e1)(a) succeeds when this option is set (assuming it can find +an "a" in the subject), whereas it fails by default, for Perl compatibility. +.P +(3) \eU matches an upper case "U" character; by default \eU causes a compile +time error (Perl uses \eU to upper case subsequent characters). +.P +(4) \eu matches a lower case "u" character unless it is followed by four +hexadecimal digits, in which case the hexadecimal number defines the code point +to match. By default, \eu causes a compile time error (Perl uses it to upper +case the following character). +.P +(5) \ex matches a lower case "x" character unless it is followed by two +hexadecimal digits, in which case the hexadecimal number defines the code point +to match. By default, as in Perl, a hexadecimal number is always expected after +\ex, but it may have zero, one, or two digits (so, for example, \exz matches a +binary zero character followed by z). +.sp + PCRE_MULTILINE +.sp +By default, for the purposes of matching "start of line" and "end of line", +PCRE treats the subject string as consisting of a single line of characters, +even if it actually contains newlines. The "start of line" metacharacter (^) +matches only at the start of the string, and the "end of line" metacharacter +($) matches only at the end of the string, or before a terminating newline +(except when PCRE_DOLLAR_ENDONLY is set). Note, however, that unless +PCRE_DOTALL is set, the "any character" metacharacter (.) does not match at a +newline. This behaviour (for ^, $, and dot) is the same as Perl. +.P +When PCRE_MULTILINE it is set, the "start of line" and "end of line" constructs +match immediately following or immediately before internal newlines in the +subject string, respectively, as well as at the very start and end. This is +equivalent to Perl's /m option, and it can be changed within a pattern by a +(?m) option setting. If there are no newlines in a subject string, or no +occurrences of ^ or $ in a pattern, setting PCRE_MULTILINE has no effect. +.sp + PCRE_NEVER_UTF +.sp +This option locks out interpretation of the pattern as UTF-8 (or UTF-16 or +UTF-32 in the 16-bit and 32-bit libraries). In particular, it prevents the +creator of the pattern from switching to UTF interpretation by starting the +pattern with (*UTF). This may be useful in applications that process patterns +from external sources. The combination of PCRE_UTF8 and PCRE_NEVER_UTF also +causes an error. +.sp + PCRE_NEWLINE_CR + PCRE_NEWLINE_LF + PCRE_NEWLINE_CRLF + PCRE_NEWLINE_ANYCRLF + PCRE_NEWLINE_ANY +.sp +These options override the default newline definition that was chosen when PCRE +was built. Setting the first or the second specifies that a newline is +indicated by a single character (CR or LF, respectively). Setting +PCRE_NEWLINE_CRLF specifies that a newline is indicated by the two-character +CRLF sequence. Setting PCRE_NEWLINE_ANYCRLF specifies that any of the three +preceding sequences should be recognized. Setting PCRE_NEWLINE_ANY specifies +that any Unicode newline sequence should be recognized. +.P +In an ASCII/Unicode environment, the Unicode newline sequences are the three +just mentioned, plus the single characters VT (vertical tab, U+000B), FF (form +feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS +(paragraph separator, U+2029). For the 8-bit library, the last two are +recognized only in UTF-8 mode. +.P +When PCRE is compiled to run in an EBCDIC (mainframe) environment, the code for +CR is 0x0d, the same as ASCII. However, the character code for LF is normally +0x15, though in some EBCDIC environments 0x25 is used. Whichever of these is +not LF is made to correspond to Unicode's NEL character. EBCDIC codes are all +less than 256. For more details, see the +.\" HREF +\fBpcrebuild\fP +.\" +documentation. +.P +The newline setting in the options word uses three bits that are treated +as a number, giving eight possibilities. Currently only six are used (default +plus the five values above). This means that if you set more than one newline +option, the combination may or may not be sensible. For example, +PCRE_NEWLINE_CR with PCRE_NEWLINE_LF is equivalent to PCRE_NEWLINE_CRLF, but +other combinations may yield unused numbers and cause an error. +.P +The only time that a line break in a pattern is specially recognized when +compiling is when PCRE_EXTENDED is set. CR and LF are white space characters, +and so are ignored in this mode. Also, an unescaped # outside a character class +indicates a comment that lasts until after the next line break sequence. In +other circumstances, line break sequences in patterns are treated as literal +data. +.P +The newline option that is set at compile time becomes the default that is used +for \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP, but it can be overridden. +.sp + PCRE_NO_AUTO_CAPTURE +.sp +If this option is set, it disables the use of numbered capturing parentheses in +the pattern. Any opening parenthesis that is not followed by ? behaves as if it +were followed by ?: but named parentheses can still be used for capturing (and +they acquire numbers in the usual way). There is no equivalent of this option +in Perl. +.sp + PCRE_NO_AUTO_POSSESS +.sp +If this option is set, it disables "auto-possessification". This is an +optimization that, for example, turns a+b into a++b in order to avoid +backtracks into a+ that can never be successful. However, if callouts are in +use, auto-possessification means that some of them are never taken. You can set +this option if you want the matching functions to do a full unoptimized search +and run all the callouts, but it is mainly provided for testing purposes. +.sp + PCRE_NO_START_OPTIMIZE +.sp +This is an option that acts at matching time; that is, it is really an option +for \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP. If it is set at compile time, +it is remembered with the compiled pattern and assumed at matching time. This +is necessary if you want to use JIT execution, because the JIT compiler needs +to know whether or not this option is set. For details see the discussion of +PCRE_NO_START_OPTIMIZE +.\" HTML <a href="#execoptions"> +.\" </a> +below. +.\" +.sp + PCRE_UCP +.sp +This option changes the way PCRE processes \eB, \eb, \eD, \ed, \eS, \es, \eW, +\ew, and some of the POSIX character classes. By default, only ASCII characters +are recognized, but if PCRE_UCP is set, Unicode properties are used instead to +classify characters. More details are given in the section on +.\" HTML <a href="pcre.html#genericchartypes"> +.\" </a> +generic character types +.\" +in the +.\" HREF +\fBpcrepattern\fP +.\" +page. If you set PCRE_UCP, matching one of the items it affects takes much +longer. The option is available only if PCRE has been compiled with Unicode +property support. +.sp + PCRE_UNGREEDY +.sp +This option inverts the "greediness" of the quantifiers so that they are not +greedy by default, but become greedy if followed by "?". It is not compatible +with Perl. It can also be set by a (?U) option setting within the pattern. +.sp + PCRE_UTF8 +.sp +This option causes PCRE to regard both the pattern and the subject as strings +of UTF-8 characters instead of single-byte strings. However, it is available +only when PCRE is built to include UTF support. If not, the use of this option +provokes an error. Details of how this option changes the behaviour of PCRE are +given in the +.\" HREF +\fBpcreunicode\fP +.\" +page. +.sp + PCRE_NO_UTF8_CHECK +.sp +When PCRE_UTF8 is set, the validity of the pattern as a UTF-8 string is +automatically checked. There is a discussion about the +.\" HTML <a href="pcreunicode.html#utf8strings"> +.\" </a> +validity of UTF-8 strings +.\" +in the +.\" HREF +\fBpcreunicode\fP +.\" +page. If an invalid UTF-8 sequence is found, \fBpcre_compile()\fP returns an +error. If you already know that your pattern is valid, and you want to skip +this check for performance reasons, you can set the PCRE_NO_UTF8_CHECK option. +When it is set, the effect of passing an invalid UTF-8 string as a pattern is +undefined. It may cause your program to crash or loop. Note that this option +can also be passed to \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP, to suppress +the validity checking of subject strings only. If the same string is being +matched many times, the option can be safely set for the second and subsequent +matchings to improve performance. +. +. +.SH "COMPILATION ERROR CODES" +.rs +.sp +The following table lists the error codes than may be returned by +\fBpcre_compile2()\fP, along with the error messages that may be returned by +both compiling functions. Note that error messages are always 8-bit ASCII +strings, even in 16-bit or 32-bit mode. As PCRE has developed, some error codes +have fallen out of use. To avoid confusion, they have not been re-used. +.sp + 0 no error + 1 \e at end of pattern + 2 \ec at end of pattern + 3 unrecognized character follows \e + 4 numbers out of order in {} quantifier + 5 number too big in {} quantifier + 6 missing terminating ] for character class + 7 invalid escape sequence in character class + 8 range out of order in character class + 9 nothing to repeat + 10 [this code is not in use] + 11 internal error: unexpected repeat + 12 unrecognized character after (? or (?- + 13 POSIX named classes are supported only within a class + 14 missing ) + 15 reference to non-existent subpattern + 16 erroffset passed as NULL + 17 unknown option bit(s) set + 18 missing ) after comment + 19 [this code is not in use] + 20 regular expression is too large + 21 failed to get memory + 22 unmatched parentheses + 23 internal error: code overflow + 24 unrecognized character after (?< + 25 lookbehind assertion is not fixed length + 26 malformed number or name after (?( + 27 conditional group contains more than two branches + 28 assertion expected after (?( + 29 (?R or (?[+-]digits must be followed by ) + 30 unknown POSIX class name + 31 POSIX collating elements are not supported + 32 this version of PCRE is compiled without UTF support + 33 [this code is not in use] + 34 character value in \ex{} or \eo{} is too large + 35 invalid condition (?(0) + 36 \eC not allowed in lookbehind assertion + 37 PCRE does not support \eL, \el, \eN{name}, \eU, or \eu + 38 number after (?C is > 255 + 39 closing ) for (?C expected + 40 recursive call could loop indefinitely + 41 unrecognized character after (?P + 42 syntax error in subpattern name (missing terminator) + 43 two named subpatterns have the same name + 44 invalid UTF-8 string (specifically UTF-8) + 45 support for \eP, \ep, and \eX has not been compiled + 46 malformed \eP or \ep sequence + 47 unknown property name after \eP or \ep + 48 subpattern name is too long (maximum 32 characters) + 49 too many named subpatterns (maximum 10000) + 50 [this code is not in use] + 51 octal value is greater than \e377 in 8-bit non-UTF-8 mode + 52 internal error: overran compiling workspace + 53 internal error: previously-checked referenced subpattern + not found + 54 DEFINE group contains more than one branch + 55 repeating a DEFINE group is not allowed + 56 inconsistent NEWLINE options + 57 \eg is not followed by a braced, angle-bracketed, or quoted + name/number or by a plain number + 58 a numbered reference must not be zero + 59 an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT) + 60 (*VERB) not recognized or malformed + 61 number is too big + 62 subpattern name expected + 63 digit expected after (?+ + 64 ] is an invalid data character in JavaScript compatibility mode + 65 different names for subpatterns of the same number are + not allowed + 66 (*MARK) must have an argument + 67 this version of PCRE is not compiled with Unicode property + support + 68 \ec must be followed by an ASCII character + 69 \ek is not followed by a braced, angle-bracketed, or quoted name + 70 internal error: unknown opcode in find_fixedlength() + 71 \eN is not supported in a class + 72 too many forward references + 73 disallowed Unicode code point (>= 0xd800 && <= 0xdfff) + 74 invalid UTF-16 string (specifically UTF-16) + 75 name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) + 76 character value in \eu.... sequence is too large + 77 invalid UTF-32 string (specifically UTF-32) + 78 setting UTF is disabled by the application + 79 non-hex character in \ex{} (closing brace missing?) + 80 non-octal character in \eo{} (closing brace missing?) + 81 missing opening brace after \eo + 82 parentheses are too deeply nested + 83 invalid range in character class + 84 group name must start with a non-digit + 85 parentheses are too deeply nested (stack check) +.sp +The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may +be used if the limits were changed when PCRE was built. +. +. +.\" HTML <a name="studyingapattern"></a> +.SH "STUDYING A PATTERN" +.rs +.sp +.nf +.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP, +.B " const char **\fIerrptr\fP);" +.fi +.PP +If a compiled pattern is going to be used several times, it is worth spending +more time analyzing it in order to speed up the time taken for matching. The +function \fBpcre_study()\fP takes a pointer to a compiled pattern as its first +argument. If studying the pattern produces additional information that will +help speed up matching, \fBpcre_study()\fP returns a pointer to a +\fBpcre_extra\fP block, in which the \fIstudy_data\fP field points to the +results of the study. +.P +The returned value from \fBpcre_study()\fP can be passed directly to +\fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP. However, a \fBpcre_extra\fP block +also contains other fields that can be set by the caller before the block is +passed; these are described +.\" HTML <a href="#extradata"> +.\" </a> +below +.\" +in the section on matching a pattern. +.P +If studying the pattern does not produce any useful information, +\fBpcre_study()\fP returns NULL by default. In that circumstance, if the +calling program wants to pass any of the other fields to \fBpcre_exec()\fP or +\fBpcre_dfa_exec()\fP, it must set up its own \fBpcre_extra\fP block. However, +if \fBpcre_study()\fP is called with the PCRE_STUDY_EXTRA_NEEDED option, it +returns a \fBpcre_extra\fP block even if studying did not find any additional +information. It may still return NULL, however, if an error occurs in +\fBpcre_study()\fP. +.P +The second argument of \fBpcre_study()\fP contains option bits. There are three +further options in addition to PCRE_STUDY_EXTRA_NEEDED: +.sp + PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +.sp +If any of these are set, and the just-in-time compiler is available, the +pattern is further compiled into machine code that executes much faster than +the \fBpcre_exec()\fP interpretive matching function. If the just-in-time +compiler is not available, these options are ignored. All undefined bits in the +\fIoptions\fP argument must be zero. +.P +JIT compilation is a heavyweight optimization. It can take some time for +patterns to be analyzed, and for one-off matches and simple patterns the +benefit of faster execution might be offset by a much slower study time. +Not all patterns can be optimized by the JIT compiler. For those that cannot be +handled, matching automatically falls back to the \fBpcre_exec()\fP +interpreter. For more details, see the +.\" HREF +\fBpcrejit\fP +.\" +documentation. +.P +The third argument for \fBpcre_study()\fP is a pointer for an error message. If +studying succeeds (even if no data is returned), the variable it points to is +set to NULL. Otherwise it is set to point to a textual error message. This is a +static string that is part of the library. You must not try to free it. You +should test the error pointer for NULL after calling \fBpcre_study()\fP, to be +sure that it has run successfully. +.P +When you are finished with a pattern, you can free the memory used for the +study data by calling \fBpcre_free_study()\fP. This function was added to the +API for release 8.20. For earlier versions, the memory could be freed with +\fBpcre_free()\fP, just like the pattern itself. This will still work in cases +where JIT optimization is not used, but it is advisable to change to the new +function when convenient. +.P +This is a typical way in which \fBpcre_study\fP() is used (except that in a +real application there should be tests for errors): +.sp + int rc; + pcre *re; + pcre_extra *sd; + re = pcre_compile("pattern", 0, &error, &erroroffset, NULL); + sd = pcre_study( + re, /* result of pcre_compile() */ + 0, /* no options */ + &error); /* set to NULL or points to a message */ + rc = pcre_exec( /* see below for details of pcre_exec() options */ + re, sd, "subject", 7, 0, 0, ovector, 30); + ... + pcre_free_study(sd); + pcre_free(re); +.sp +Studying a pattern does two things: first, a lower bound for the length of +subject string that is needed to match the pattern is computed. This does not +mean that there are any strings of that length that match, but it does +guarantee that no shorter strings match. The value is used to avoid wasting +time by trying to match strings that are shorter than the lower bound. You can +find out the value in a calling program via the \fBpcre_fullinfo()\fP function. +.P +Studying a pattern is also useful for non-anchored patterns that do not have a +single fixed starting character. A bitmap of possible starting bytes is +created. This speeds up finding a position in the subject at which to start +matching. (In 16-bit mode, the bitmap is used for 16-bit values less than 256. +In 32-bit mode, the bitmap is used for 32-bit values less than 256.) +.P +These two optimizations apply to both \fBpcre_exec()\fP and +\fBpcre_dfa_exec()\fP, and the information is also used by the JIT compiler. +The optimizations can be disabled by setting the PCRE_NO_START_OPTIMIZE option. +You might want to do this if your pattern contains callouts or (*MARK) and you +want to make use of these facilities in cases where matching fails. +.P +PCRE_NO_START_OPTIMIZE can be specified at either compile time or execution +time. However, if PCRE_NO_START_OPTIMIZE is passed to \fBpcre_exec()\fP, (that +is, after any JIT compilation has happened) JIT execution is disabled. For JIT +execution to work with PCRE_NO_START_OPTIMIZE, the option must be set at +compile time. +.P +There is a longer discussion of PCRE_NO_START_OPTIMIZE +.\" HTML <a href="#execoptions"> +.\" </a> +below. +.\" +. +. +.\" HTML <a name="localesupport"></a> +.SH "LOCALE SUPPORT" +.rs +.sp +PCRE handles caseless matching, and determines whether characters are letters, +digits, or whatever, by reference to a set of tables, indexed by character +code point. When running in UTF-8 mode, or in the 16- or 32-bit libraries, this +applies only to characters with code points less than 256. By default, +higher-valued code points never match escapes such as \ew or \ed. However, if +PCRE is built with Unicode property support, all characters can be tested with +\ep and \eP, or, alternatively, the PCRE_UCP option can be set when a pattern +is compiled; this causes \ew and friends to use Unicode property support +instead of the built-in tables. +.P +The use of locales with Unicode is discouraged. If you are handling characters +with code points greater than 128, you should either use Unicode support, or +use locales, but not try to mix the two. +.P +PCRE contains an internal set of tables that are used when the final argument +of \fBpcre_compile()\fP is NULL. These are sufficient for many applications. +Normally, the internal tables recognize only ASCII characters. However, when +PCRE is built, it is possible to cause the internal tables to be rebuilt in the +default "C" locale of the local system, which may cause them to be different. +.P +The internal tables can always be overridden by tables supplied by the +application that calls PCRE. These may be created in a different locale from +the default. As more and more applications change to using Unicode, the need +for this locale support is expected to die away. +.P +External tables are built by calling the \fBpcre_maketables()\fP function, +which has no arguments, in the relevant locale. The result can then be passed +to \fBpcre_compile()\fP as often as necessary. For example, to build and use +tables that are appropriate for the French locale (where accented characters +with values greater than 128 are treated as letters), the following code could +be used: +.sp + setlocale(LC_CTYPE, "fr_FR"); + tables = pcre_maketables(); + re = pcre_compile(..., tables); +.sp +The locale name "fr_FR" is used on Linux and other Unix-like systems; if you +are using Windows, the name for the French locale is "french". +.P +When \fBpcre_maketables()\fP runs, the tables are built in memory that is +obtained via \fBpcre_malloc\fP. It is the caller's responsibility to ensure +that the memory containing the tables remains available for as long as it is +needed. +.P +The pointer that is passed to \fBpcre_compile()\fP is saved with the compiled +pattern, and the same tables are used via this pointer by \fBpcre_study()\fP +and also by \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP. Thus, for any single +pattern, compilation, studying and matching all happen in the same locale, but +different patterns can be processed in different locales. +.P +It is possible to pass a table pointer or NULL (indicating the use of the +internal tables) to \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP (see the +discussion below in the section on matching a pattern). This facility is +provided for use with pre-compiled patterns that have been saved and reloaded. +Character tables are not saved with patterns, so if a non-standard table was +used at compile time, it must be provided again when the reloaded pattern is +matched. Attempting to use this facility to match a pattern in a different +locale from the one in which it was compiled is likely to lead to anomalous +(usually incorrect) results. +. +. +.\" HTML <a name="infoaboutpattern"></a> +.SH "INFORMATION ABOUT A PATTERN" +.rs +.sp +.nf +.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP," +.B " int \fIwhat\fP, void *\fIwhere\fP);" +.fi +.PP +The \fBpcre_fullinfo()\fP function returns information about a compiled +pattern. It replaces the \fBpcre_info()\fP function, which was removed from the +library at version 8.30, after more than 10 years of obsolescence. +.P +The first argument for \fBpcre_fullinfo()\fP is a pointer to the compiled +pattern. The second argument is the result of \fBpcre_study()\fP, or NULL if +the pattern was not studied. The third argument specifies which piece of +information is required, and the fourth argument is a pointer to a variable +to receive the data. The yield of the function is zero for success, or one of +the following negative numbers: +.sp + PCRE_ERROR_NULL the argument \fIcode\fP was NULL + the argument \fIwhere\fP was NULL + PCRE_ERROR_BADMAGIC the "magic number" was not found + PCRE_ERROR_BADENDIANNESS the pattern was compiled with different + endianness + PCRE_ERROR_BADOPTION the value of \fIwhat\fP was invalid + PCRE_ERROR_UNSET the requested field is not set +.sp +The "magic number" is placed at the start of each compiled pattern as an simple +check against passing an arbitrary memory pointer. The endianness error can +occur if a compiled pattern is saved and reloaded on a different host. Here is +a typical call of \fBpcre_fullinfo()\fP, to obtain the length of the compiled +pattern: +.sp + int rc; + size_t length; + rc = pcre_fullinfo( + re, /* result of pcre_compile() */ + sd, /* result of pcre_study(), or NULL */ + PCRE_INFO_SIZE, /* what is required */ + &length); /* where to put the data */ +.sp +The possible values for the third argument are defined in \fBpcre.h\fP, and are +as follows: +.sp + PCRE_INFO_BACKREFMAX +.sp +Return the number of the highest back reference in the pattern. The fourth +argument should point to an \fBint\fP variable. Zero is returned if there are +no back references. +.sp + PCRE_INFO_CAPTURECOUNT +.sp +Return the number of capturing subpatterns in the pattern. The fourth argument +should point to an \fBint\fP variable. +.sp + PCRE_INFO_DEFAULT_TABLES +.sp +Return a pointer to the internal default character tables within PCRE. The +fourth argument should point to an \fBunsigned char *\fP variable. This +information call is provided for internal use by the \fBpcre_study()\fP +function. External callers can cause PCRE to use its internal tables by passing +a NULL table pointer. +.sp + PCRE_INFO_FIRSTBYTE (deprecated) +.sp +Return information about the first data unit of any matched string, for a +non-anchored pattern. The name of this option refers to the 8-bit library, +where data units are bytes. The fourth argument should point to an \fBint\fP +variable. Negative values are used for special cases. However, this means that +when the 32-bit library is in non-UTF-32 mode, the full 32-bit range of +characters cannot be returned. For this reason, this value is deprecated; use +PCRE_INFO_FIRSTCHARACTERFLAGS and PCRE_INFO_FIRSTCHARACTER instead. +.P +If there is a fixed first value, for example, the letter "c" from a pattern +such as (cat|cow|coyote), its value is returned. In the 8-bit library, the +value is always less than 256. In the 16-bit library the value can be up to +0xffff. In the 32-bit library the value can be up to 0x10ffff. +.P +If there is no fixed first value, and if either +.sp +(a) the pattern was compiled with the PCRE_MULTILINE option, and every branch +starts with "^", or +.sp +(b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not set +(if it were set, the pattern would be anchored), +.sp +-1 is returned, indicating that the pattern matches only at the start of a +subject string or after any newline within the string. Otherwise -2 is +returned. For anchored patterns, -2 is returned. +.sp + PCRE_INFO_FIRSTCHARACTER +.sp +Return the value of the first data unit (non-UTF character) of any matched +string in the situation where PCRE_INFO_FIRSTCHARACTERFLAGS returns 1; +otherwise return 0. The fourth argument should point to an \fBuint_t\fP +variable. +.P +In the 8-bit library, the value is always less than 256. In the 16-bit library +the value can be up to 0xffff. In the 32-bit library in UTF-32 mode the value +can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32 mode. +.sp + PCRE_INFO_FIRSTCHARACTERFLAGS +.sp +Return information about the first data unit of any matched string, for a +non-anchored pattern. The fourth argument should point to an \fBint\fP +variable. +.P +If there is a fixed first value, for example, the letter "c" from a pattern +such as (cat|cow|coyote), 1 is returned, and the character value can be +retrieved using PCRE_INFO_FIRSTCHARACTER. If there is no fixed first value, and +if either +.sp +(a) the pattern was compiled with the PCRE_MULTILINE option, and every branch +starts with "^", or +.sp +(b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not set +(if it were set, the pattern would be anchored), +.sp +2 is returned, indicating that the pattern matches only at the start of a +subject string or after any newline within the string. Otherwise 0 is +returned. For anchored patterns, 0 is returned. +.sp + PCRE_INFO_FIRSTTABLE +.sp +If the pattern was studied, and this resulted in the construction of a 256-bit +table indicating a fixed set of values for the first data unit in any matching +string, a pointer to the table is returned. Otherwise NULL is returned. The +fourth argument should point to an \fBunsigned char *\fP variable. +.sp + PCRE_INFO_HASCRORLF +.sp +Return 1 if the pattern contains any explicit matches for CR or LF characters, +otherwise 0. The fourth argument should point to an \fBint\fP variable. An +explicit match is either a literal CR or LF character, or \er or \en. +.sp + PCRE_INFO_JCHANGED +.sp +Return 1 if the (?J) or (?-J) option setting is used in the pattern, otherwise +0. The fourth argument should point to an \fBint\fP variable. (?J) and +(?-J) set and unset the local PCRE_DUPNAMES option, respectively. +.sp + PCRE_INFO_JIT +.sp +Return 1 if the pattern was studied with one of the JIT options, and +just-in-time compiling was successful. The fourth argument should point to an +\fBint\fP variable. A return value of 0 means that JIT support is not available +in this version of PCRE, or that the pattern was not studied with a JIT option, +or that the JIT compiler could not handle this particular pattern. See the +.\" HREF +\fBpcrejit\fP +.\" +documentation for details of what can and cannot be handled. +.sp + PCRE_INFO_JITSIZE +.sp +If the pattern was successfully studied with a JIT option, return the size of +the JIT compiled code, otherwise return zero. The fourth argument should point +to a \fBsize_t\fP variable. +.sp + PCRE_INFO_LASTLITERAL +.sp +Return the value of the rightmost literal data unit that must exist in any +matched string, other than at its start, if such a value has been recorded. The +fourth argument should point to an \fBint\fP variable. If there is no such +value, -1 is returned. For anchored patterns, a last literal value is recorded +only if it follows something of variable length. For example, for the pattern +/^a\ed+z\ed+/ the returned value is "z", but for /^a\edz\ed/ the returned value +is -1. +.P +Since for the 32-bit library using the non-UTF-32 mode, this function is unable +to return the full 32-bit range of characters, this value is deprecated; +instead the PCRE_INFO_REQUIREDCHARFLAGS and PCRE_INFO_REQUIREDCHAR values should +be used. +.sp + PCRE_INFO_MATCH_EMPTY +.sp +Return 1 if the pattern can match an empty string, otherwise 0. The fourth +argument should point to an \fBint\fP variable. +.sp + PCRE_INFO_MATCHLIMIT +.sp +If the pattern set a match limit by including an item of the form +(*LIMIT_MATCH=nnnn) at the start, the value is returned. The fourth argument +should point to an unsigned 32-bit integer. If no such value has been set, the +call to \fBpcre_fullinfo()\fP returns the error PCRE_ERROR_UNSET. +.sp + PCRE_INFO_MAXLOOKBEHIND +.sp +Return the number of characters (NB not data units) in the longest lookbehind +assertion in the pattern. This information is useful when doing multi-segment +matching using the partial matching facilities. Note that the simple assertions +\eb and \eB require a one-character lookbehind. \eA also registers a +one-character lookbehind, though it does not actually inspect the previous +character. This is to ensure that at least one character from the old segment +is retained when a new segment is processed. Otherwise, if there are no +lookbehinds in the pattern, \eA might match incorrectly at the start of a new +segment. +.sp + PCRE_INFO_MINLENGTH +.sp +If the pattern was studied and a minimum length for matching subject strings +was computed, its value is returned. Otherwise the returned value is -1. The +value is a number of characters, which in UTF mode may be different from the +number of data units. The fourth argument should point to an \fBint\fP +variable. A non-negative value is a lower bound to the length of any matching +string. There may not be any strings of that length that do actually match, but +every string that does match is at least that long. +.sp + PCRE_INFO_NAMECOUNT + PCRE_INFO_NAMEENTRYSIZE + PCRE_INFO_NAMETABLE +.sp +PCRE supports the use of named as well as numbered capturing parentheses. The +names are just an additional way of identifying the parentheses, which still +acquire numbers. Several convenience functions such as +\fBpcre_get_named_substring()\fP are provided for extracting captured +substrings by name. It is also possible to extract the data directly, by first +converting the name to a number in order to access the correct pointers in the +output vector (described with \fBpcre_exec()\fP below). To do the conversion, +you need to use the name-to-number map, which is described by these three +values. +.P +The map consists of a number of fixed-size entries. PCRE_INFO_NAMECOUNT gives +the number of entries, and PCRE_INFO_NAMEENTRYSIZE gives the size of each +entry; both of these return an \fBint\fP value. The entry size depends on the +length of the longest name. PCRE_INFO_NAMETABLE returns a pointer to the first +entry of the table. This is a pointer to \fBchar\fP in the 8-bit library, where +the first two bytes of each entry are the number of the capturing parenthesis, +most significant byte first. In the 16-bit library, the pointer points to +16-bit data units, the first of which contains the parenthesis number. In the +32-bit library, the pointer points to 32-bit data units, the first of which +contains the parenthesis number. The rest of the entry is the corresponding +name, zero terminated. +.P +The names are in alphabetical order. If (?| is used to create multiple groups +with the same number, as described in the +.\" HTML <a href="pcrepattern.html#dupsubpatternnumber"> +.\" </a> +section on duplicate subpattern numbers +.\" +in the +.\" HREF +\fBpcrepattern\fP +.\" +page, the groups may be given the same name, but there is only one entry in the +table. Different names for groups of the same number are not permitted. +Duplicate names for subpatterns with different numbers are permitted, +but only if PCRE_DUPNAMES is set. They appear in the table in the order in +which they were found in the pattern. In the absence of (?| this is the order +of increasing number; when (?| is used this is not necessarily the case because +later subpatterns may have lower numbers. +.P +As a simple example of the name/number table, consider the following pattern +after compilation by the 8-bit library (assume PCRE_EXTENDED is set, so white +space - including newlines - is ignored): +.sp +.\" JOIN + (?<date> (?<year>(\ed\ed)?\ed\ed) - + (?<month>\ed\ed) - (?<day>\ed\ed) ) +.sp +There are four named subpatterns, so the table has four entries, and each entry +in the table is eight bytes long. The table is as follows, with non-printing +bytes shows in hexadecimal, and undefined bytes shown as ??: +.sp + 00 01 d a t e 00 ?? + 00 05 d a y 00 ?? ?? + 00 04 m o n t h 00 + 00 02 y e a r 00 ?? +.sp +When writing code to extract data from named subpatterns using the +name-to-number map, remember that the length of the entries is likely to be +different for each compiled pattern. +.sp + PCRE_INFO_OKPARTIAL +.sp +Return 1 if the pattern can be used for partial matching with +\fBpcre_exec()\fP, otherwise 0. The fourth argument should point to an +\fBint\fP variable. From release 8.00, this always returns 1, because the +restrictions that previously applied to partial matching have been lifted. The +.\" HREF +\fBpcrepartial\fP +.\" +documentation gives details of partial matching. +.sp + PCRE_INFO_OPTIONS +.sp +Return a copy of the options with which the pattern was compiled. The fourth +argument should point to an \fBunsigned long int\fP variable. These option bits +are those specified in the call to \fBpcre_compile()\fP, modified by any +top-level option settings at the start of the pattern itself. In other words, +they are the options that will be in force when matching starts. For example, +if the pattern /(?im)abc(?-i)d/ is compiled with the PCRE_EXTENDED option, the +result is PCRE_CASELESS, PCRE_MULTILINE, and PCRE_EXTENDED. +.P +A pattern is automatically anchored by PCRE if all of its top-level +alternatives begin with one of the following: +.sp + ^ unless PCRE_MULTILINE is set + \eA always + \eG always +.\" JOIN + .* if PCRE_DOTALL is set and there are no back + references to the subpattern in which .* appears +.sp +For such patterns, the PCRE_ANCHORED bit is set in the options returned by +\fBpcre_fullinfo()\fP. +.sp + PCRE_INFO_RECURSIONLIMIT +.sp +If the pattern set a recursion limit by including an item of the form +(*LIMIT_RECURSION=nnnn) at the start, the value is returned. The fourth +argument should point to an unsigned 32-bit integer. If no such value has been +set, the call to \fBpcre_fullinfo()\fP returns the error PCRE_ERROR_UNSET. +.sp + PCRE_INFO_SIZE +.sp +Return the size of the compiled pattern in bytes (for all three libraries). The +fourth argument should point to a \fBsize_t\fP variable. This value does not +include the size of the \fBpcre\fP structure that is returned by +\fBpcre_compile()\fP. The value that is passed as the argument to +\fBpcre_malloc()\fP when \fBpcre_compile()\fP is getting memory in which to +place the compiled data is the value returned by this option plus the size of +the \fBpcre\fP structure. Studying a compiled pattern, with or without JIT, +does not alter the value returned by this option. +.sp + PCRE_INFO_STUDYSIZE +.sp +Return the size in bytes (for all three libraries) of the data block pointed to +by the \fIstudy_data\fP field in a \fBpcre_extra\fP block. If \fBpcre_extra\fP +is NULL, or there is no study data, zero is returned. The fourth argument +should point to a \fBsize_t\fP variable. The \fIstudy_data\fP field is set by +\fBpcre_study()\fP to record information that will speed up matching (see the +section entitled +.\" HTML <a href="#studyingapattern"> +.\" </a> +"Studying a pattern" +.\" +above). The format of the \fIstudy_data\fP block is private, but its length +is made available via this option so that it can be saved and restored (see the +.\" HREF +\fBpcreprecompile\fP +.\" +documentation for details). +.sp + PCRE_INFO_REQUIREDCHARFLAGS +.sp +Returns 1 if there is a rightmost literal data unit that must exist in any +matched string, other than at its start. The fourth argument should point to +an \fBint\fP variable. If there is no such value, 0 is returned. If returning +1, the character value itself can be retrieved using PCRE_INFO_REQUIREDCHAR. +.P +For anchored patterns, a last literal value is recorded only if it follows +something of variable length. For example, for the pattern /^a\ed+z\ed+/ the +returned value 1 (with "z" returned from PCRE_INFO_REQUIREDCHAR), but for +/^a\edz\ed/ the returned value is 0. +.sp + PCRE_INFO_REQUIREDCHAR +.sp +Return the value of the rightmost literal data unit that must exist in any +matched string, other than at its start, if such a value has been recorded. The +fourth argument should point to an \fBuint32_t\fP variable. If there is no such +value, 0 is returned. +. +. +.SH "REFERENCE COUNTS" +.rs +.sp +.B int pcre_refcount(pcre *\fIcode\fP, int \fIadjust\fP); +.PP +The \fBpcre_refcount()\fP function is used to maintain a reference count in the +data block that contains a compiled pattern. It is provided for the benefit of +applications that operate in an object-oriented manner, where different parts +of the application may be using the same compiled pattern, but you want to free +the block when they are all done. +.P +When a pattern is compiled, the reference count field is initialized to zero. +It is changed only by calling this function, whose action is to add the +\fIadjust\fP value (which may be positive or negative) to it. The yield of the +function is the new value. However, the value of the count is constrained to +lie between 0 and 65535, inclusive. If the new value is outside these limits, +it is forced to the appropriate limit value. +.P +Except when it is zero, the reference count is not correctly preserved if a +pattern is compiled on one host and then transferred to a host whose byte-order +is different. (This seems a highly unlikely scenario.) +. +. +.SH "MATCHING A PATTERN: THE TRADITIONAL FUNCTION" +.rs +.sp +.nf +.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP," +.B " const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP, +.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);" +.fi +.P +The function \fBpcre_exec()\fP is called to match a subject string against a +compiled pattern, which is passed in the \fIcode\fP argument. If the +pattern was studied, the result of the study should be passed in the +\fIextra\fP argument. You can call \fBpcre_exec()\fP with the same \fIcode\fP +and \fIextra\fP arguments as many times as you like, in order to match +different subject strings with the same pattern. +.P +This function is the main matching facility of the library, and it operates in +a Perl-like manner. For specialist use there is also an alternative matching +function, which is described +.\" HTML <a href="#dfamatch"> +.\" </a> +below +.\" +in the section about the \fBpcre_dfa_exec()\fP function. +.P +In most applications, the pattern will have been compiled (and optionally +studied) in the same process that calls \fBpcre_exec()\fP. However, it is +possible to save compiled patterns and study data, and then use them later +in different processes, possibly even on different hosts. For a discussion +about this, see the +.\" HREF +\fBpcreprecompile\fP +.\" +documentation. +.P +Here is an example of a simple call to \fBpcre_exec()\fP: +.sp + int rc; + int ovector[30]; + rc = pcre_exec( + re, /* result of pcre_compile() */ + NULL, /* we didn't study the pattern */ + "some string", /* the subject string */ + 11, /* the length of the subject string */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + ovector, /* vector of integers for substring information */ + 30); /* number of elements (NOT size in bytes) */ +. +. +.\" HTML <a name="extradata"></a> +.SS "Extra data for \fBpcre_exec()\fR" +.rs +.sp +If the \fIextra\fP argument is not NULL, it must point to a \fBpcre_extra\fP +data block. The \fBpcre_study()\fP function returns such a block (when it +doesn't return NULL), but you can also create one for yourself, and pass +additional information in it. The \fBpcre_extra\fP block contains the following +fields (not necessarily in this order): +.sp + unsigned long int \fIflags\fP; + void *\fIstudy_data\fP; + void *\fIexecutable_jit\fP; + unsigned long int \fImatch_limit\fP; + unsigned long int \fImatch_limit_recursion\fP; + void *\fIcallout_data\fP; + const unsigned char *\fItables\fP; + unsigned char **\fImark\fP; +.sp +In the 16-bit version of this structure, the \fImark\fP field has type +"PCRE_UCHAR16 **". +.sp +In the 32-bit version of this structure, the \fImark\fP field has type +"PCRE_UCHAR32 **". +.P +The \fIflags\fP field is used to specify which of the other fields are set. The +flag bits are: +.sp + PCRE_EXTRA_CALLOUT_DATA + PCRE_EXTRA_EXECUTABLE_JIT + PCRE_EXTRA_MARK + PCRE_EXTRA_MATCH_LIMIT + PCRE_EXTRA_MATCH_LIMIT_RECURSION + PCRE_EXTRA_STUDY_DATA + PCRE_EXTRA_TABLES +.sp +Other flag bits should be set to zero. The \fIstudy_data\fP field and sometimes +the \fIexecutable_jit\fP field are set in the \fBpcre_extra\fP block that is +returned by \fBpcre_study()\fP, together with the appropriate flag bits. You +should not set these yourself, but you may add to the block by setting other +fields and their corresponding flag bits. +.P +The \fImatch_limit\fP field provides a means of preventing PCRE from using up a +vast amount of resources when running patterns that are not going to match, +but which have a very large number of possibilities in their search trees. The +classic example is a pattern that uses nested unlimited repeats. +.P +Internally, \fBpcre_exec()\fP uses a function called \fBmatch()\fP, which it +calls repeatedly (sometimes recursively). The limit set by \fImatch_limit\fP is +imposed on the number of times this function is called during a match, which +has the effect of limiting the amount of backtracking that can take place. For +patterns that are not anchored, the count restarts from zero for each position +in the subject string. +.P +When \fBpcre_exec()\fP is called with a pattern that was successfully studied +with a JIT option, the way that the matching is executed is entirely different. +However, there is still the possibility of runaway matching that goes on for a +very long time, and so the \fImatch_limit\fP value is also used in this case +(but in a different way) to limit how long the matching can continue. +.P +The default value for the limit can be set when PCRE is built; the default +default is 10 million, which handles all but the most extreme cases. You can +override the default by suppling \fBpcre_exec()\fP with a \fBpcre_extra\fP +block in which \fImatch_limit\fP is set, and PCRE_EXTRA_MATCH_LIMIT is set in +the \fIflags\fP field. If the limit is exceeded, \fBpcre_exec()\fP returns +PCRE_ERROR_MATCHLIMIT. +.P +A value for the match limit may also be supplied by an item at the start of a +pattern of the form +.sp + (*LIMIT_MATCH=d) +.sp +where d is a decimal number. However, such a setting is ignored unless d is +less than the limit set by the caller of \fBpcre_exec()\fP or, if no such limit +is set, less than the default. +.P +The \fImatch_limit_recursion\fP field is similar to \fImatch_limit\fP, but +instead of limiting the total number of times that \fBmatch()\fP is called, it +limits the depth of recursion. The recursion depth is a smaller number than the +total number of calls, because not all calls to \fBmatch()\fP are recursive. +This limit is of use only if it is set smaller than \fImatch_limit\fP. +.P +Limiting the recursion depth limits the amount of machine stack that can be +used, or, when PCRE has been compiled to use memory on the heap instead of the +stack, the amount of heap memory that can be used. This limit is not relevant, +and is ignored, when matching is done using JIT compiled code. +.P +The default value for \fImatch_limit_recursion\fP can be set when PCRE is +built; the default default is the same value as the default for +\fImatch_limit\fP. You can override the default by suppling \fBpcre_exec()\fP +with a \fBpcre_extra\fP block in which \fImatch_limit_recursion\fP is set, and +PCRE_EXTRA_MATCH_LIMIT_RECURSION is set in the \fIflags\fP field. If the limit +is exceeded, \fBpcre_exec()\fP returns PCRE_ERROR_RECURSIONLIMIT. +.P +A value for the recursion limit may also be supplied by an item at the start of +a pattern of the form +.sp + (*LIMIT_RECURSION=d) +.sp +where d is a decimal number. However, such a setting is ignored unless d is +less than the limit set by the caller of \fBpcre_exec()\fP or, if no such limit +is set, less than the default. +.P +The \fIcallout_data\fP field is used in conjunction with the "callout" feature, +and is described in the +.\" HREF +\fBpcrecallout\fP +.\" +documentation. +.P +The \fItables\fP field is provided for use with patterns that have been +pre-compiled using custom character tables, saved to disc or elsewhere, and +then reloaded, because the tables that were used to compile a pattern are not +saved with it. See the +.\" HREF +\fBpcreprecompile\fP +.\" +documentation for a discussion of saving compiled patterns for later use. If +NULL is passed using this mechanism, it forces PCRE's internal tables to be +used. +.P +\fBWarning:\fP The tables that \fBpcre_exec()\fP uses must be the same as those +that were used when the pattern was compiled. If this is not the case, the +behaviour of \fBpcre_exec()\fP is undefined. Therefore, when a pattern is +compiled and matched in the same process, this field should never be set. In +this (the most common) case, the correct table pointer is automatically passed +with the compiled pattern from \fBpcre_compile()\fP to \fBpcre_exec()\fP. +.P +If PCRE_EXTRA_MARK is set in the \fIflags\fP field, the \fImark\fP field must +be set to point to a suitable variable. If the pattern contains any +backtracking control verbs such as (*MARK:NAME), and the execution ends up with +a name to pass back, a pointer to the name string (zero terminated) is placed +in the variable pointed to by the \fImark\fP field. The names are within the +compiled pattern; if you wish to retain such a name you must copy it before +freeing the memory of a compiled pattern. If there is no name to pass back, the +variable pointed to by the \fImark\fP field is set to NULL. For details of the +backtracking control verbs, see the section entitled +.\" HTML <a href="pcrepattern#backtrackcontrol"> +.\" </a> +"Backtracking control" +.\" +in the +.\" HREF +\fBpcrepattern\fP +.\" +documentation. +. +. +.\" HTML <a name="execoptions"></a> +.SS "Option bits for \fBpcre_exec()\fP" +.rs +.sp +The unused bits of the \fIoptions\fP argument for \fBpcre_exec()\fP must be +zero. The only bits that may be set are PCRE_ANCHORED, PCRE_NEWLINE_\fIxxx\fP, +PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART, +PCRE_NO_START_OPTIMIZE, PCRE_NO_UTF8_CHECK, PCRE_PARTIAL_HARD, and +PCRE_PARTIAL_SOFT. +.P +If the pattern was successfully studied with one of the just-in-time (JIT) +compile options, the only supported options for JIT execution are +PCRE_NO_UTF8_CHECK, PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, +PCRE_NOTEMPTY_ATSTART, PCRE_PARTIAL_HARD, and PCRE_PARTIAL_SOFT. If an +unsupported option is used, JIT execution is disabled and the normal +interpretive code in \fBpcre_exec()\fP is run. +.sp + PCRE_ANCHORED +.sp +The PCRE_ANCHORED option limits \fBpcre_exec()\fP to matching at the first +matching position. If a pattern was compiled with PCRE_ANCHORED, or turned out +to be anchored by virtue of its contents, it cannot be made unachored at +matching time. +.sp + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE +.sp +These options (which are mutually exclusive) control what the \eR escape +sequence matches. The choice is either to match only CR, LF, or CRLF, or to +match any Unicode newline sequence. These options override the choice that was +made or defaulted when the pattern was compiled. +.sp + PCRE_NEWLINE_CR + PCRE_NEWLINE_LF + PCRE_NEWLINE_CRLF + PCRE_NEWLINE_ANYCRLF + PCRE_NEWLINE_ANY +.sp +These options override the newline definition that was chosen or defaulted when +the pattern was compiled. For details, see the description of +\fBpcre_compile()\fP above. During matching, the newline choice affects the +behaviour of the dot, circumflex, and dollar metacharacters. It may also alter +the way the match position is advanced after a match failure for an unanchored +pattern. +.P +When PCRE_NEWLINE_CRLF, PCRE_NEWLINE_ANYCRLF, or PCRE_NEWLINE_ANY is set, and a +match attempt for an unanchored pattern fails when the current position is at a +CRLF sequence, and the pattern contains no explicit matches for CR or LF +characters, the match position is advanced by two characters instead of one, in +other words, to after the CRLF. +.P +The above rule is a compromise that makes the most common cases work as +expected. For example, if the pattern is .+A (and the PCRE_DOTALL option is not +set), it does not match the string "\er\enA" because, after failing at the +start, it skips both the CR and the LF before retrying. However, the pattern +[\er\en]A does match that string, because it contains an explicit CR or LF +reference, and so advances only by one character after the first failure. +.P +An explicit match for CR of LF is either a literal appearance of one of those +characters, or one of the \er or \en escape sequences. Implicit matches such as +[^X] do not count, nor does \es (which includes CR and LF in the characters +that it matches). +.P +Notwithstanding the above, anomalous effects may still occur when CRLF is a +valid newline sequence and explicit \er or \en escapes appear in the pattern. +.sp + PCRE_NOTBOL +.sp +This option specifies that first character of the subject string is not the +beginning of a line, so the circumflex metacharacter should not match before +it. Setting this without PCRE_MULTILINE (at compile time) causes circumflex +never to match. This option affects only the behaviour of the circumflex +metacharacter. It does not affect \eA. +.sp + PCRE_NOTEOL +.sp +This option specifies that the end of the subject string is not the end of a +line, so the dollar metacharacter should not match it nor (except in multiline +mode) a newline immediately before it. Setting this without PCRE_MULTILINE (at +compile time) causes dollar never to match. This option affects only the +behaviour of the dollar metacharacter. It does not affect \eZ or \ez. +.sp + PCRE_NOTEMPTY +.sp +An empty string is not considered to be a valid match if this option is set. If +there are alternatives in the pattern, they are tried. If all the alternatives +match the empty string, the entire match fails. For example, if the pattern +.sp + a?b? +.sp +is applied to a string not beginning with "a" or "b", it matches an empty +string at the start of the subject. With PCRE_NOTEMPTY set, this match is not +valid, so PCRE searches further into the string for occurrences of "a" or "b". +.sp + PCRE_NOTEMPTY_ATSTART +.sp +This is like PCRE_NOTEMPTY, except that an empty string match that is not at +the start of the subject is permitted. If the pattern is anchored, such a match +can occur only if the pattern contains \eK. +.P +Perl has no direct equivalent of PCRE_NOTEMPTY or PCRE_NOTEMPTY_ATSTART, but it +does make a special case of a pattern match of the empty string within its +\fBsplit()\fP function, and when using the /g modifier. It is possible to +emulate Perl's behaviour after matching a null string by first trying the match +again at the same offset with PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED, and then +if that fails, by advancing the starting offset (see below) and trying an +ordinary match again. There is some code that demonstrates how to do this in +the +.\" HREF +\fBpcredemo\fP +.\" +sample program. In the most general case, you have to check to see if the +newline convention recognizes CRLF as a newline, and if so, and the current +character is CR followed by LF, advance the starting offset by two characters +instead of one. +.sp + PCRE_NO_START_OPTIMIZE +.sp +There are a number of optimizations that \fBpcre_exec()\fP uses at the start of +a match, in order to speed up the process. For example, if it is known that an +unanchored match must start with a specific character, it searches the subject +for that character, and fails immediately if it cannot find it, without +actually running the main matching function. This means that a special item +such as (*COMMIT) at the start of a pattern is not considered until after a +suitable starting point for the match has been found. Also, when callouts or +(*MARK) items are in use, these "start-up" optimizations can cause them to be +skipped if the pattern is never actually used. The start-up optimizations are +in effect a pre-scan of the subject that takes place before the pattern is run. +.P +The PCRE_NO_START_OPTIMIZE option disables the start-up optimizations, possibly +causing performance to suffer, but ensuring that in cases where the result is +"no match", the callouts do occur, and that items such as (*COMMIT) and (*MARK) +are considered at every possible starting position in the subject string. If +PCRE_NO_START_OPTIMIZE is set at compile time, it cannot be unset at matching +time. The use of PCRE_NO_START_OPTIMIZE at matching time (that is, passing it +to \fBpcre_exec()\fP) disables JIT execution; in this situation, matching is +always done using interpretively. +.P +Setting PCRE_NO_START_OPTIMIZE can change the outcome of a matching operation. +Consider the pattern +.sp + (*COMMIT)ABC +.sp +When this is compiled, PCRE records the fact that a match must start with the +character "A". Suppose the subject string is "DEFABC". The start-up +optimization scans along the subject, finds "A" and runs the first match +attempt from there. The (*COMMIT) item means that the pattern must match the +current starting position, which in this case, it does. However, if the same +match is run with PCRE_NO_START_OPTIMIZE set, the initial scan along the +subject string does not happen. The first match attempt is run starting from +"D" and when this fails, (*COMMIT) prevents any further matches being tried, so +the overall result is "no match". If the pattern is studied, more start-up +optimizations may be used. For example, a minimum length for the subject may be +recorded. Consider the pattern +.sp + (*MARK:A)(X|Y) +.sp +The minimum length for a match is one character. If the subject is "ABC", there +will be attempts to match "ABC", "BC", "C", and then finally an empty string. +If the pattern is studied, the final attempt does not take place, because PCRE +knows that the subject is too short, and so the (*MARK) is never encountered. +In this case, studying the pattern does not affect the overall match result, +which is still "no match", but it does affect the auxiliary information that is +returned. +.sp + PCRE_NO_UTF8_CHECK +.sp +When PCRE_UTF8 is set at compile time, the validity of the subject as a UTF-8 +string is automatically checked when \fBpcre_exec()\fP is subsequently called. +The entire string is checked before any other processing takes place. The value +of \fIstartoffset\fP is also checked to ensure that it points to the start of a +UTF-8 character. There is a discussion about the +.\" HTML <a href="pcreunicode.html#utf8strings"> +.\" </a> +validity of UTF-8 strings +.\" +in the +.\" HREF +\fBpcreunicode\fP +.\" +page. If an invalid sequence of bytes is found, \fBpcre_exec()\fP returns the +error PCRE_ERROR_BADUTF8 or, if PCRE_PARTIAL_HARD is set and the problem is a +truncated character at the end of the subject, PCRE_ERROR_SHORTUTF8. In both +cases, information about the precise nature of the error may also be returned +(see the descriptions of these errors in the section entitled \fIError return +values from\fP \fBpcre_exec()\fP +.\" HTML <a href="#errorlist"> +.\" </a> +below). +.\" +If \fIstartoffset\fP contains a value that does not point to the start of a +UTF-8 character (or to the end of the subject), PCRE_ERROR_BADUTF8_OFFSET is +returned. +.P +If you already know that your subject is valid, and you want to skip these +checks for performance reasons, you can set the PCRE_NO_UTF8_CHECK option when +calling \fBpcre_exec()\fP. You might want to do this for the second and +subsequent calls to \fBpcre_exec()\fP if you are making repeated calls to find +all the matches in a single subject string. However, you should be sure that +the value of \fIstartoffset\fP points to the start of a character (or the end +of the subject). When PCRE_NO_UTF8_CHECK is set, the effect of passing an +invalid string as a subject or an invalid value of \fIstartoffset\fP is +undefined. Your program may crash or loop. +.sp + PCRE_PARTIAL_HARD + PCRE_PARTIAL_SOFT +.sp +These options turn on the partial matching feature. For backwards +compatibility, PCRE_PARTIAL is a synonym for PCRE_PARTIAL_SOFT. A partial match +occurs if the end of the subject string is reached successfully, but there are +not enough subject characters to complete the match. If this happens when +PCRE_PARTIAL_SOFT (but not PCRE_PARTIAL_HARD) is set, matching continues by +testing any remaining alternatives. Only if no complete match can be found is +PCRE_ERROR_PARTIAL returned instead of PCRE_ERROR_NOMATCH. In other words, +PCRE_PARTIAL_SOFT says that the caller is prepared to handle a partial match, +but only if no complete match can be found. +.P +If PCRE_PARTIAL_HARD is set, it overrides PCRE_PARTIAL_SOFT. In this case, if a +partial match is found, \fBpcre_exec()\fP immediately returns +PCRE_ERROR_PARTIAL, without considering any other alternatives. In other words, +when PCRE_PARTIAL_HARD is set, a partial match is considered to be more +important that an alternative complete match. +.P +In both cases, the portion of the string that was inspected when the partial +match was found is set as the first matching string. There is a more detailed +discussion of partial and multi-segment matching, with examples, in the +.\" HREF +\fBpcrepartial\fP +.\" +documentation. +. +. +.SS "The string to be matched by \fBpcre_exec()\fP" +.rs +.sp +The subject string is passed to \fBpcre_exec()\fP as a pointer in +\fIsubject\fP, a length in \fIlength\fP, and a starting offset in +\fIstartoffset\fP. The units for \fIlength\fP and \fIstartoffset\fP are bytes +for the 8-bit library, 16-bit data items for the 16-bit library, and 32-bit +data items for the 32-bit library. +.P +If \fIstartoffset\fP is negative or greater than the length of the subject, +\fBpcre_exec()\fP returns PCRE_ERROR_BADOFFSET. When the starting offset is +zero, the search for a match starts at the beginning of the subject, and this +is by far the most common case. In UTF-8 or UTF-16 mode, the offset must point +to the start of a character, or the end of the subject (in UTF-32 mode, one +data unit equals one character, so all offsets are valid). Unlike the pattern +string, the subject may contain binary zeroes. +.P +A non-zero starting offset is useful when searching for another match in the +same subject by calling \fBpcre_exec()\fP again after a previous success. +Setting \fIstartoffset\fP differs from just passing over a shortened string and +setting PCRE_NOTBOL in the case of a pattern that begins with any kind of +lookbehind. For example, consider the pattern +.sp + \eBiss\eB +.sp +which finds occurrences of "iss" in the middle of words. (\eB matches only if +the current position in the subject is not a word boundary.) When applied to +the string "Mississipi" the first call to \fBpcre_exec()\fP finds the first +occurrence. If \fBpcre_exec()\fP is called again with just the remainder of the +subject, namely "issipi", it does not match, because \eB is always false at the +start of the subject, which is deemed to be a word boundary. However, if +\fBpcre_exec()\fP is passed the entire string again, but with \fIstartoffset\fP +set to 4, it finds the second occurrence of "iss" because it is able to look +behind the starting point to discover that it is preceded by a letter. +.P +Finding all the matches in a subject is tricky when the pattern can match an +empty string. It is possible to emulate Perl's /g behaviour by first trying the +match again at the same offset, with the PCRE_NOTEMPTY_ATSTART and +PCRE_ANCHORED options, and then if that fails, advancing the starting offset +and trying an ordinary match again. There is some code that demonstrates how to +do this in the +.\" HREF +\fBpcredemo\fP +.\" +sample program. In the most general case, you have to check to see if the +newline convention recognizes CRLF as a newline, and if so, and the current +character is CR followed by LF, advance the starting offset by two characters +instead of one. +.P +If a non-zero starting offset is passed when the pattern is anchored, one +attempt to match at the given offset is made. This can only succeed if the +pattern does not require the match to be at the start of the subject. +. +. +.SS "How \fBpcre_exec()\fP returns captured substrings" +.rs +.sp +In general, a pattern matches a certain portion of the subject, and in +addition, further substrings from the subject may be picked out by parts of the +pattern. Following the usage in Jeffrey Friedl's book, this is called +"capturing" in what follows, and the phrase "capturing subpattern" is used for +a fragment of a pattern that picks out a substring. PCRE supports several other +kinds of parenthesized subpattern that do not cause substrings to be captured. +.P +Captured substrings are returned to the caller via a vector of integers whose +address is passed in \fIovector\fP. The number of elements in the vector is +passed in \fIovecsize\fP, which must be a non-negative number. \fBNote\fP: this +argument is NOT the size of \fIovector\fP in bytes. +.P +The first two-thirds of the vector is used to pass back captured substrings, +each substring using a pair of integers. The remaining third of the vector is +used as workspace by \fBpcre_exec()\fP while matching capturing subpatterns, +and is not available for passing back information. The number passed in +\fIovecsize\fP should always be a multiple of three. If it is not, it is +rounded down. +.P +When a match is successful, information about captured substrings is returned +in pairs of integers, starting at the beginning of \fIovector\fP, and +continuing up to two-thirds of its length at the most. The first element of +each pair is set to the offset of the first character in a substring, and the +second is set to the offset of the first character after the end of a +substring. These values are always data unit offsets, even in UTF mode. They +are byte offsets in the 8-bit library, 16-bit data item offsets in the 16-bit +library, and 32-bit data item offsets in the 32-bit library. \fBNote\fP: they +are not character counts. +.P +The first pair of integers, \fIovector[0]\fP and \fIovector[1]\fP, identify the +portion of the subject string matched by the entire pattern. The next pair is +used for the first capturing subpattern, and so on. The value returned by +\fBpcre_exec()\fP is one more than the highest numbered pair that has been set. +For example, if two substrings have been captured, the returned value is 3. If +there are no capturing subpatterns, the return value from a successful match is +1, indicating that just the first pair of offsets has been set. +.P +If a capturing subpattern is matched repeatedly, it is the last portion of the +string that it matched that is returned. +.P +If the vector is too small to hold all the captured substring offsets, it is +used as far as possible (up to two-thirds of its length), and the function +returns a value of zero. If neither the actual string matched nor any captured +substrings are of interest, \fBpcre_exec()\fP may be called with \fIovector\fP +passed as NULL and \fIovecsize\fP as zero. However, if the pattern contains +back references and the \fIovector\fP is not big enough to remember the related +substrings, PCRE has to get additional memory for use during matching. Thus it +is usually advisable to supply an \fIovector\fP of reasonable size. +.P +There are some cases where zero is returned (indicating vector overflow) when +in fact the vector is exactly the right size for the final match. For example, +consider the pattern +.sp + (a)(?:(b)c|bd) +.sp +If a vector of 6 elements (allowing for only 1 captured substring) is given +with subject string "abd", \fBpcre_exec()\fP will try to set the second +captured string, thereby recording a vector overflow, before failing to match +"c" and backing up to try the second alternative. The zero return, however, +does correctly indicate that the maximum number of slots (namely 2) have been +filled. In similar cases where there is temporary overflow, but the final +number of used slots is actually less than the maximum, a non-zero value is +returned. +.P +The \fBpcre_fullinfo()\fP function can be used to find out how many capturing +subpatterns there are in a compiled pattern. The smallest size for +\fIovector\fP that will allow for \fIn\fP captured substrings, in addition to +the offsets of the substring matched by the whole pattern, is (\fIn\fP+1)*3. +.P +It is possible for capturing subpattern number \fIn+1\fP to match some part of +the subject when subpattern \fIn\fP has not been used at all. For example, if +the string "abc" is matched against the pattern (a|(z))(bc) the return from the +function is 4, and subpatterns 1 and 3 are matched, but 2 is not. When this +happens, both values in the offset pairs corresponding to unused subpatterns +are set to -1. +.P +Offset values that correspond to unused subpatterns at the end of the +expression are also set to -1. For example, if the string "abc" is matched +against the pattern (abc)(x(yz)?)? subpatterns 2 and 3 are not matched. The +return from the function is 2, because the highest used capturing subpattern +number is 1, and the offsets for for the second and third capturing subpatterns +(assuming the vector is large enough, of course) are set to -1. +.P +\fBNote\fP: Elements in the first two-thirds of \fIovector\fP that do not +correspond to capturing parentheses in the pattern are never changed. That is, +if a pattern contains \fIn\fP capturing parentheses, no more than +\fIovector[0]\fP to \fIovector[2n+1]\fP are set by \fBpcre_exec()\fP. The other +elements (in the first two-thirds) retain whatever values they previously had. +.P +Some convenience functions are provided for extracting the captured substrings +as separate strings. These are described below. +. +. +.\" HTML <a name="errorlist"></a> +.SS "Error return values from \fBpcre_exec()\fP" +.rs +.sp +If \fBpcre_exec()\fP fails, it returns a negative number. The following are +defined in the header file: +.sp + PCRE_ERROR_NOMATCH (-1) +.sp +The subject string did not match the pattern. +.sp + PCRE_ERROR_NULL (-2) +.sp +Either \fIcode\fP or \fIsubject\fP was passed as NULL, or \fIovector\fP was +NULL and \fIovecsize\fP was not zero. +.sp + PCRE_ERROR_BADOPTION (-3) +.sp +An unrecognized bit was set in the \fIoptions\fP argument. +.sp + PCRE_ERROR_BADMAGIC (-4) +.sp +PCRE stores a 4-byte "magic number" at the start of the compiled code, to catch +the case when it is passed a junk pointer and to detect when a pattern that was +compiled in an environment of one endianness is run in an environment with the +other endianness. This is the error that PCRE gives when the magic number is +not present. +.sp + PCRE_ERROR_UNKNOWN_OPCODE (-5) +.sp +While running the pattern match, an unknown item was encountered in the +compiled pattern. This error could be caused by a bug in PCRE or by overwriting +of the compiled pattern. +.sp + PCRE_ERROR_NOMEMORY (-6) +.sp +If a pattern contains back references, but the \fIovector\fP that is passed to +\fBpcre_exec()\fP is not big enough to remember the referenced substrings, PCRE +gets a block of memory at the start of matching to use for this purpose. If the +call via \fBpcre_malloc()\fP fails, this error is given. The memory is +automatically freed at the end of matching. +.P +This error is also given if \fBpcre_stack_malloc()\fP fails in +\fBpcre_exec()\fP. This can happen only when PCRE has been compiled with +\fB--disable-stack-for-recursion\fP. +.sp + PCRE_ERROR_NOSUBSTRING (-7) +.sp +This error is used by the \fBpcre_copy_substring()\fP, +\fBpcre_get_substring()\fP, and \fBpcre_get_substring_list()\fP functions (see +below). It is never returned by \fBpcre_exec()\fP. +.sp + PCRE_ERROR_MATCHLIMIT (-8) +.sp +The backtracking limit, as specified by the \fImatch_limit\fP field in a +\fBpcre_extra\fP structure (or defaulted) was reached. See the description +above. +.sp + PCRE_ERROR_CALLOUT (-9) +.sp +This error is never generated by \fBpcre_exec()\fP itself. It is provided for +use by callout functions that want to yield a distinctive error code. See the +.\" HREF +\fBpcrecallout\fP +.\" +documentation for details. +.sp + PCRE_ERROR_BADUTF8 (-10) +.sp +A string that contains an invalid UTF-8 byte sequence was passed as a subject, +and the PCRE_NO_UTF8_CHECK option was not set. If the size of the output vector +(\fIovecsize\fP) is at least 2, the byte offset to the start of the the invalid +UTF-8 character is placed in the first element, and a reason code is placed in +the second element. The reason codes are listed in the +.\" HTML <a href="#badutf8reasons"> +.\" </a> +following section. +.\" +For backward compatibility, if PCRE_PARTIAL_HARD is set and the problem is a +truncated UTF-8 character at the end of the subject (reason codes 1 to 5), +PCRE_ERROR_SHORTUTF8 is returned instead of PCRE_ERROR_BADUTF8. +.sp + PCRE_ERROR_BADUTF8_OFFSET (-11) +.sp +The UTF-8 byte sequence that was passed as a subject was checked and found to +be valid (the PCRE_NO_UTF8_CHECK option was not set), but the value of +\fIstartoffset\fP did not point to the beginning of a UTF-8 character or the +end of the subject. +.sp + PCRE_ERROR_PARTIAL (-12) +.sp +The subject string did not match, but it did match partially. See the +.\" HREF +\fBpcrepartial\fP +.\" +documentation for details of partial matching. +.sp + PCRE_ERROR_BADPARTIAL (-13) +.sp +This code is no longer in use. It was formerly returned when the PCRE_PARTIAL +option was used with a compiled pattern containing items that were not +supported for partial matching. From release 8.00 onwards, there are no +restrictions on partial matching. +.sp + PCRE_ERROR_INTERNAL (-14) +.sp +An unexpected internal error has occurred. This error could be caused by a bug +in PCRE or by overwriting of the compiled pattern. +.sp + PCRE_ERROR_BADCOUNT (-15) +.sp +This error is given if the value of the \fIovecsize\fP argument is negative. +.sp + PCRE_ERROR_RECURSIONLIMIT (-21) +.sp +The internal recursion limit, as specified by the \fImatch_limit_recursion\fP +field in a \fBpcre_extra\fP structure (or defaulted) was reached. See the +description above. +.sp + PCRE_ERROR_BADNEWLINE (-23) +.sp +An invalid combination of PCRE_NEWLINE_\fIxxx\fP options was given. +.sp + PCRE_ERROR_BADOFFSET (-24) +.sp +The value of \fIstartoffset\fP was negative or greater than the length of the +subject, that is, the value in \fIlength\fP. +.sp + PCRE_ERROR_SHORTUTF8 (-25) +.sp +This error is returned instead of PCRE_ERROR_BADUTF8 when the subject string +ends with a truncated UTF-8 character and the PCRE_PARTIAL_HARD option is set. +Information about the failure is returned as for PCRE_ERROR_BADUTF8. It is in +fact sufficient to detect this case, but this special error code for +PCRE_PARTIAL_HARD precedes the implementation of returned information; it is +retained for backwards compatibility. +.sp + PCRE_ERROR_RECURSELOOP (-26) +.sp +This error is returned when \fBpcre_exec()\fP detects a recursion loop within +the pattern. Specifically, it means that either the whole pattern or a +subpattern has been called recursively for the second time at the same position +in the subject string. Some simple patterns that might do this are detected and +faulted at compile time, but more complicated cases, in particular mutual +recursions between two different subpatterns, cannot be detected until run +time. +.sp + PCRE_ERROR_JIT_STACKLIMIT (-27) +.sp +This error is returned when a pattern that was successfully studied using a +JIT compile option is being matched, but the memory available for the +just-in-time processing stack is not large enough. See the +.\" HREF +\fBpcrejit\fP +.\" +documentation for more details. +.sp + PCRE_ERROR_BADMODE (-28) +.sp +This error is given if a pattern that was compiled by the 8-bit library is +passed to a 16-bit or 32-bit library function, or vice versa. +.sp + PCRE_ERROR_BADENDIANNESS (-29) +.sp +This error is given if a pattern that was compiled and saved is reloaded on a +host with different endianness. The utility function +\fBpcre_pattern_to_host_byte_order()\fP can be used to convert such a pattern +so that it runs on the new host. +.sp + PCRE_ERROR_JIT_BADOPTION +.sp +This error is returned when a pattern that was successfully studied using a JIT +compile option is being matched, but the matching mode (partial or complete +match) does not correspond to any JIT compilation mode. When the JIT fast path +function is used, this error may be also given for invalid options. See the +.\" HREF +\fBpcrejit\fP +.\" +documentation for more details. +.sp + PCRE_ERROR_BADLENGTH (-32) +.sp +This error is given if \fBpcre_exec()\fP is called with a negative value for +the \fIlength\fP argument. +.P +Error numbers -16 to -20, -22, and 30 are not used by \fBpcre_exec()\fP. +. +. +.\" HTML <a name="badutf8reasons"></a> +.SS "Reason codes for invalid UTF-8 strings" +.rs +.sp +This section applies only to the 8-bit library. The corresponding information +for the 16-bit and 32-bit libraries is given in the +.\" HREF +\fBpcre16\fP +.\" +and +.\" HREF +\fBpcre32\fP +.\" +pages. +.P +When \fBpcre_exec()\fP returns either PCRE_ERROR_BADUTF8 or +PCRE_ERROR_SHORTUTF8, and the size of the output vector (\fIovecsize\fP) is at +least 2, the offset of the start of the invalid UTF-8 character is placed in +the first output vector element (\fIovector[0]\fP) and a reason code is placed +in the second element (\fIovector[1]\fP). The reason codes are given names in +the \fBpcre.h\fP header file: +.sp + PCRE_UTF8_ERR1 + PCRE_UTF8_ERR2 + PCRE_UTF8_ERR3 + PCRE_UTF8_ERR4 + PCRE_UTF8_ERR5 +.sp +The string ends with a truncated UTF-8 character; the code specifies how many +bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 characters to be +no longer than 4 bytes, the encoding scheme (originally defined by RFC 2279) +allows for up to 6 bytes, and this is checked first; hence the possibility of +4 or 5 missing bytes. +.sp + PCRE_UTF8_ERR6 + PCRE_UTF8_ERR7 + PCRE_UTF8_ERR8 + PCRE_UTF8_ERR9 + PCRE_UTF8_ERR10 +.sp +The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of the +character do not have the binary value 0b10 (that is, either the most +significant bit is 0, or the next bit is 1). +.sp + PCRE_UTF8_ERR11 + PCRE_UTF8_ERR12 +.sp +A character that is valid by the RFC 2279 rules is either 5 or 6 bytes long; +these code points are excluded by RFC 3629. +.sp + PCRE_UTF8_ERR13 +.sp +A 4-byte character has a value greater than 0x10fff; these code points are +excluded by RFC 3629. +.sp + PCRE_UTF8_ERR14 +.sp +A 3-byte character has a value in the range 0xd800 to 0xdfff; this range of +code points are reserved by RFC 3629 for use with UTF-16, and so are excluded +from UTF-8. +.sp + PCRE_UTF8_ERR15 + PCRE_UTF8_ERR16 + PCRE_UTF8_ERR17 + PCRE_UTF8_ERR18 + PCRE_UTF8_ERR19 +.sp +A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes for a +value that can be represented by fewer bytes, which is invalid. For example, +the two bytes 0xc0, 0xae give the value 0x2e, whose correct coding uses just +one byte. +.sp + PCRE_UTF8_ERR20 +.sp +The two most significant bits of the first byte of a character have the binary +value 0b10 (that is, the most significant bit is 1 and the second is 0). Such a +byte can only validly occur as the second or subsequent byte of a multi-byte +character. +.sp + PCRE_UTF8_ERR21 +.sp +The first byte of a character has the value 0xfe or 0xff. These values can +never occur in a valid UTF-8 string. +.sp + PCRE_UTF8_ERR22 +.sp +This error code was formerly used when the presence of a so-called +"non-character" caused an error. Unicode corrigendum #9 makes it clear that +such characters should not cause a string to be rejected, and so this code is +no longer in use and is never returned. +. +. +.SH "EXTRACTING CAPTURED SUBSTRINGS BY NUMBER" +.rs +.sp +.nf +.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP, +.B " int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP," +.B " int \fIbuffersize\fP);" +.sp +.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP, +.B " int \fIstringcount\fP, int \fIstringnumber\fP," +.B " const char **\fIstringptr\fP);" +.sp +.B int pcre_get_substring_list(const char *\fIsubject\fP, +.B " int *\fIovector\fP, int \fIstringcount\fP, const char ***\fIlistptr\fP);" +.fi +.PP +Captured substrings can be accessed directly by using the offsets returned by +\fBpcre_exec()\fP in \fIovector\fP. For convenience, the functions +\fBpcre_copy_substring()\fP, \fBpcre_get_substring()\fP, and +\fBpcre_get_substring_list()\fP are provided for extracting captured substrings +as new, separate, zero-terminated strings. These functions identify substrings +by number. The next section describes functions for extracting named +substrings. +.P +A substring that contains a binary zero is correctly extracted and has a +further zero added on the end, but the result is not, of course, a C string. +However, you can process such a string by referring to the length that is +returned by \fBpcre_copy_substring()\fP and \fBpcre_get_substring()\fP. +Unfortunately, the interface to \fBpcre_get_substring_list()\fP is not adequate +for handling strings containing binary zeros, because the end of the final +string is not independently indicated. +.P +The first three arguments are the same for all three of these functions: +\fIsubject\fP is the subject string that has just been successfully matched, +\fIovector\fP is a pointer to the vector of integer offsets that was passed to +\fBpcre_exec()\fP, and \fIstringcount\fP is the number of substrings that were +captured by the match, including the substring that matched the entire regular +expression. This is the value returned by \fBpcre_exec()\fP if it is greater +than zero. If \fBpcre_exec()\fP returned zero, indicating that it ran out of +space in \fIovector\fP, the value passed as \fIstringcount\fP should be the +number of elements in the vector divided by three. +.P +The functions \fBpcre_copy_substring()\fP and \fBpcre_get_substring()\fP +extract a single substring, whose number is given as \fIstringnumber\fP. A +value of zero extracts the substring that matched the entire pattern, whereas +higher values extract the captured substrings. For \fBpcre_copy_substring()\fP, +the string is placed in \fIbuffer\fP, whose length is given by +\fIbuffersize\fP, while for \fBpcre_get_substring()\fP a new block of memory is +obtained via \fBpcre_malloc\fP, and its address is returned via +\fIstringptr\fP. The yield of the function is the length of the string, not +including the terminating zero, or one of these error codes: +.sp + PCRE_ERROR_NOMEMORY (-6) +.sp +The buffer was too small for \fBpcre_copy_substring()\fP, or the attempt to get +memory failed for \fBpcre_get_substring()\fP. +.sp + PCRE_ERROR_NOSUBSTRING (-7) +.sp +There is no substring whose number is \fIstringnumber\fP. +.P +The \fBpcre_get_substring_list()\fP function extracts all available substrings +and builds a list of pointers to them. All this is done in a single block of +memory that is obtained via \fBpcre_malloc\fP. The address of the memory block +is returned via \fIlistptr\fP, which is also the start of the list of string +pointers. The end of the list is marked by a NULL pointer. The yield of the +function is zero if all went well, or the error code +.sp + PCRE_ERROR_NOMEMORY (-6) +.sp +if the attempt to get the memory block failed. +.P +When any of these functions encounter a substring that is unset, which can +happen when capturing subpattern number \fIn+1\fP matches some part of the +subject, but subpattern \fIn\fP has not been used at all, they return an empty +string. This can be distinguished from a genuine zero-length substring by +inspecting the appropriate offset in \fIovector\fP, which is negative for unset +substrings. +.P +The two convenience functions \fBpcre_free_substring()\fP and +\fBpcre_free_substring_list()\fP can be used to free the memory returned by +a previous call of \fBpcre_get_substring()\fP or +\fBpcre_get_substring_list()\fP, respectively. They do nothing more than call +the function pointed to by \fBpcre_free\fP, which of course could be called +directly from a C program. However, PCRE is used in some situations where it is +linked via a special interface to another programming language that cannot use +\fBpcre_free\fP directly; it is for these cases that the functions are +provided. +. +. +.SH "EXTRACTING CAPTURED SUBSTRINGS BY NAME" +.rs +.sp +.nf +.B int pcre_get_stringnumber(const pcre *\fIcode\fP, +.B " const char *\fIname\fP);" +.sp +.B int pcre_copy_named_substring(const pcre *\fIcode\fP, +.B " const char *\fIsubject\fP, int *\fIovector\fP," +.B " int \fIstringcount\fP, const char *\fIstringname\fP," +.B " char *\fIbuffer\fP, int \fIbuffersize\fP);" +.sp +.B int pcre_get_named_substring(const pcre *\fIcode\fP, +.B " const char *\fIsubject\fP, int *\fIovector\fP," +.B " int \fIstringcount\fP, const char *\fIstringname\fP," +.B " const char **\fIstringptr\fP);" +.fi +.PP +To extract a substring by name, you first have to find associated number. +For example, for this pattern +.sp + (a+)b(?<xxx>\ed+)... +.sp +the number of the subpattern called "xxx" is 2. If the name is known to be +unique (PCRE_DUPNAMES was not set), you can find the number from the name by +calling \fBpcre_get_stringnumber()\fP. The first argument is the compiled +pattern, and the second is the name. The yield of the function is the +subpattern number, or PCRE_ERROR_NOSUBSTRING (-7) if there is no subpattern of +that name. +.P +Given the number, you can extract the substring directly, or use one of the +functions described in the previous section. For convenience, there are also +two functions that do the whole job. +.P +Most of the arguments of \fBpcre_copy_named_substring()\fP and +\fBpcre_get_named_substring()\fP are the same as those for the similarly named +functions that extract by number. As these are described in the previous +section, they are not re-described here. There are just two differences: +.P +First, instead of a substring number, a substring name is given. Second, there +is an extra argument, given at the start, which is a pointer to the compiled +pattern. This is needed in order to gain access to the name-to-number +translation table. +.P +These functions call \fBpcre_get_stringnumber()\fP, and if it succeeds, they +then call \fBpcre_copy_substring()\fP or \fBpcre_get_substring()\fP, as +appropriate. \fBNOTE:\fP If PCRE_DUPNAMES is set and there are duplicate names, +the behaviour may not be what you want (see the next section). +.P +\fBWarning:\fP If the pattern uses the (?| feature to set up multiple +subpatterns with the same number, as described in the +.\" HTML <a href="pcrepattern.html#dupsubpatternnumber"> +.\" </a> +section on duplicate subpattern numbers +.\" +in the +.\" HREF +\fBpcrepattern\fP +.\" +page, you cannot use names to distinguish the different subpatterns, because +names are not included in the compiled code. The matching process uses only +numbers. For this reason, the use of different names for subpatterns of the +same number causes an error at compile time. +. +. +.SH "DUPLICATE SUBPATTERN NAMES" +.rs +.sp +.nf +.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP, +.B " const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);" +.fi +.PP +When a pattern is compiled with the PCRE_DUPNAMES option, names for subpatterns +are not required to be unique. (Duplicate names are always allowed for +subpatterns with the same number, created by using the (?| feature. Indeed, if +such subpatterns are named, they are required to use the same names.) +.P +Normally, patterns with duplicate names are such that in any one match, only +one of the named subpatterns participates. An example is shown in the +.\" HREF +\fBpcrepattern\fP +.\" +documentation. +.P +When duplicates are present, \fBpcre_copy_named_substring()\fP and +\fBpcre_get_named_substring()\fP return the first substring corresponding to +the given name that is set. If none are set, PCRE_ERROR_NOSUBSTRING (-7) is +returned; no data is returned. The \fBpcre_get_stringnumber()\fP function +returns one of the numbers that are associated with the name, but it is not +defined which it is. +.P +If you want to get full details of all captured substrings for a given name, +you must use the \fBpcre_get_stringtable_entries()\fP function. The first +argument is the compiled pattern, and the second is the name. The third and +fourth are pointers to variables which are updated by the function. After it +has run, they point to the first and last entries in the name-to-number table +for the given name. The function itself returns the length of each entry, or +PCRE_ERROR_NOSUBSTRING (-7) if there are none. The format of the table is +described above in the section entitled \fIInformation about a pattern\fP +.\" HTML <a href="#infoaboutpattern"> +.\" </a> +above. +.\" +Given all the relevant entries for the name, you can extract each of their +numbers, and hence the captured data, if any. +. +. +.SH "FINDING ALL POSSIBLE MATCHES" +.rs +.sp +The traditional matching function uses a similar algorithm to Perl, which stops +when it finds the first match, starting at a given point in the subject. If you +want to find all possible matches, or the longest possible match, consider +using the alternative matching function (see below) instead. If you cannot use +the alternative function, but still need to find all possible matches, you +can kludge it up by making use of the callout facility, which is described in +the +.\" HREF +\fBpcrecallout\fP +.\" +documentation. +.P +What you have to do is to insert a callout right at the end of the pattern. +When your callout function is called, extract and save the current matched +substring. Then return 1, which forces \fBpcre_exec()\fP to backtrack and try +other alternatives. Ultimately, when it runs out of matches, \fBpcre_exec()\fP +will yield PCRE_ERROR_NOMATCH. +. +. +.SH "OBTAINING AN ESTIMATE OF STACK USAGE" +.rs +.sp +Matching certain patterns using \fBpcre_exec()\fP can use a lot of process +stack, which in certain environments can be rather limited in size. Some users +find it helpful to have an estimate of the amount of stack that is used by +\fBpcre_exec()\fP, to help them set recursion limits, as described in the +.\" HREF +\fBpcrestack\fP +.\" +documentation. The estimate that is output by \fBpcretest\fP when called with +the \fB-m\fP and \fB-C\fP options is obtained by calling \fBpcre_exec\fP with +the values NULL, NULL, NULL, -999, and -999 for its first five arguments. +.P +Normally, if its first argument is NULL, \fBpcre_exec()\fP immediately returns +the negative error code PCRE_ERROR_NULL, but with this special combination of +arguments, it returns instead a negative number whose absolute value is the +approximate stack frame size in bytes. (A negative number is used so that it is +clear that no match has happened.) The value is approximate because in some +cases, recursive calls to \fBpcre_exec()\fP occur when there are one or two +additional variables on the stack. +.P +If PCRE has been compiled to use the heap instead of the stack for recursion, +the value returned is the size of each block that is obtained from the heap. +. +. +.\" HTML <a name="dfamatch"></a> +.SH "MATCHING A PATTERN: THE ALTERNATIVE FUNCTION" +.rs +.sp +.nf +.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP," +.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP," +.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP," +.B " int *\fIworkspace\fP, int \fIwscount\fP);" +.fi +.P +The function \fBpcre_dfa_exec()\fP is called to match a subject string against +a compiled pattern, using a matching algorithm that scans the subject string +just once, and does not backtrack. This has different characteristics to the +normal algorithm, and is not compatible with Perl. Some of the features of PCRE +patterns are not supported. Nevertheless, there are times when this kind of +matching can be useful. For a discussion of the two matching algorithms, and a +list of features that \fBpcre_dfa_exec()\fP does not support, see the +.\" HREF +\fBpcrematching\fP +.\" +documentation. +.P +The arguments for the \fBpcre_dfa_exec()\fP function are the same as for +\fBpcre_exec()\fP, plus two extras. The \fIovector\fP argument is used in a +different way, and this is described below. The other common arguments are used +in the same way as for \fBpcre_exec()\fP, so their description is not repeated +here. +.P +The two additional arguments provide workspace for the function. The workspace +vector should contain at least 20 elements. It is used for keeping track of +multiple paths through the pattern tree. More workspace will be needed for +patterns and subjects where there are a lot of potential matches. +.P +Here is an example of a simple call to \fBpcre_dfa_exec()\fP: +.sp + int rc; + int ovector[10]; + int wspace[20]; + rc = pcre_dfa_exec( + re, /* result of pcre_compile() */ + NULL, /* we didn't study the pattern */ + "some string", /* the subject string */ + 11, /* the length of the subject string */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + ovector, /* vector of integers for substring information */ + 10, /* number of elements (NOT size in bytes) */ + wspace, /* working space vector */ + 20); /* number of elements (NOT size in bytes) */ +. +.SS "Option bits for \fBpcre_dfa_exec()\fP" +.rs +.sp +The unused bits of the \fIoptions\fP argument for \fBpcre_dfa_exec()\fP must be +zero. The only bits that may be set are PCRE_ANCHORED, PCRE_NEWLINE_\fIxxx\fP, +PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART, +PCRE_NO_UTF8_CHECK, PCRE_BSR_ANYCRLF, PCRE_BSR_UNICODE, PCRE_NO_START_OPTIMIZE, +PCRE_PARTIAL_HARD, PCRE_PARTIAL_SOFT, PCRE_DFA_SHORTEST, and PCRE_DFA_RESTART. +All but the last four of these are exactly the same as for \fBpcre_exec()\fP, +so their description is not repeated here. +.sp + PCRE_PARTIAL_HARD + PCRE_PARTIAL_SOFT +.sp +These have the same general effect as they do for \fBpcre_exec()\fP, but the +details are slightly different. When PCRE_PARTIAL_HARD is set for +\fBpcre_dfa_exec()\fP, it returns PCRE_ERROR_PARTIAL if the end of the subject +is reached and there is still at least one matching possibility that requires +additional characters. This happens even if some complete matches have also +been found. When PCRE_PARTIAL_SOFT is set, the return code PCRE_ERROR_NOMATCH +is converted into PCRE_ERROR_PARTIAL if the end of the subject is reached, +there have been no complete matches, but there is still at least one matching +possibility. The portion of the string that was inspected when the longest +partial match was found is set as the first matching string in both cases. +There is a more detailed discussion of partial and multi-segment matching, with +examples, in the +.\" HREF +\fBpcrepartial\fP +.\" +documentation. +.sp + PCRE_DFA_SHORTEST +.sp +Setting the PCRE_DFA_SHORTEST option causes the matching algorithm to stop as +soon as it has found one match. Because of the way the alternative algorithm +works, this is necessarily the shortest possible match at the first possible +matching point in the subject string. +.sp + PCRE_DFA_RESTART +.sp +When \fBpcre_dfa_exec()\fP returns a partial match, it is possible to call it +again, with additional subject characters, and have it continue with the same +match. The PCRE_DFA_RESTART option requests this action; when it is set, the +\fIworkspace\fP and \fIwscount\fP options must reference the same vector as +before because data about the match so far is left in them after a partial +match. There is more discussion of this facility in the +.\" HREF +\fBpcrepartial\fP +.\" +documentation. +. +. +.SS "Successful returns from \fBpcre_dfa_exec()\fP" +.rs +.sp +When \fBpcre_dfa_exec()\fP succeeds, it may have matched more than one +substring in the subject. Note, however, that all the matches from one run of +the function start at the same point in the subject. The shorter matches are +all initial substrings of the longer matches. For example, if the pattern +.sp + <.*> +.sp +is matched against the string +.sp + This is <something> <something else> <something further> no more +.sp +the three matched strings are +.sp + <something> + <something> <something else> + <something> <something else> <something further> +.sp +On success, the yield of the function is a number greater than zero, which is +the number of matched substrings. The substrings themselves are returned in +\fIovector\fP. Each string uses two elements; the first is the offset to the +start, and the second is the offset to the end. In fact, all the strings have +the same start offset. (Space could have been saved by giving this only once, +but it was decided to retain some compatibility with the way \fBpcre_exec()\fP +returns data, even though the meaning of the strings is different.) +.P +The strings are returned in reverse order of length; that is, the longest +matching string is given first. If there were too many matches to fit into +\fIovector\fP, the yield of the function is zero, and the vector is filled with +the longest matches. Unlike \fBpcre_exec()\fP, \fBpcre_dfa_exec()\fP can use +the entire \fIovector\fP for returning matched strings. +.P +NOTE: PCRE's "auto-possessification" optimization usually applies to character +repeats at the end of a pattern (as well as internally). For example, the +pattern "a\ed+" is compiled as if it were "a\ed++" because there is no point +even considering the possibility of backtracking into the repeated digits. For +DFA matching, this means that only one possible match is found. If you really +do want multiple matches in such cases, either use an ungreedy repeat +("a\ed+?") or set the PCRE_NO_AUTO_POSSESS option when compiling. +. +. +.SS "Error returns from \fBpcre_dfa_exec()\fP" +.rs +.sp +The \fBpcre_dfa_exec()\fP function returns a negative number when it fails. +Many of the errors are the same as for \fBpcre_exec()\fP, and these are +described +.\" HTML <a href="#errorlist"> +.\" </a> +above. +.\" +There are in addition the following errors that are specific to +\fBpcre_dfa_exec()\fP: +.sp + PCRE_ERROR_DFA_UITEM (-16) +.sp +This return is given if \fBpcre_dfa_exec()\fP encounters an item in the pattern +that it does not support, for instance, the use of \eC or a back reference. +.sp + PCRE_ERROR_DFA_UCOND (-17) +.sp +This return is given if \fBpcre_dfa_exec()\fP encounters a condition item that +uses a back reference for the condition, or a test for recursion in a specific +group. These are not supported. +.sp + PCRE_ERROR_DFA_UMLIMIT (-18) +.sp +This return is given if \fBpcre_dfa_exec()\fP is called with an \fIextra\fP +block that contains a setting of the \fImatch_limit\fP or +\fImatch_limit_recursion\fP fields. This is not supported (these fields are +meaningless for DFA matching). +.sp + PCRE_ERROR_DFA_WSSIZE (-19) +.sp +This return is given if \fBpcre_dfa_exec()\fP runs out of space in the +\fIworkspace\fP vector. +.sp + PCRE_ERROR_DFA_RECURSE (-20) +.sp +When a recursive subpattern is processed, the matching function calls itself +recursively, using private vectors for \fIovector\fP and \fIworkspace\fP. This +error is given if the output vector is not large enough. This should be +extremely rare, as a vector of size 1000 is used. +.sp + PCRE_ERROR_DFA_BADRESTART (-30) +.sp +When \fBpcre_dfa_exec()\fP is called with the \fBPCRE_DFA_RESTART\fP option, +some plausibility checks are made on the contents of the workspace, which +should contain data about the previous partial match. If any of these checks +fail, this error is given. +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcre16\fP(3), \fBpcre32\fP(3), \fBpcrebuild\fP(3), \fBpcrecallout\fP(3), +\fBpcrecpp(3)\fP(3), \fBpcrematching\fP(3), \fBpcrepartial\fP(3), +\fBpcreposix\fP(3), \fBpcreprecompile\fP(3), \fBpcresample\fP(3), +\fBpcrestack\fP(3). +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +University Computing Service +Cambridge CB2 3QH, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 09 February 2014 +Copyright (c) 1997-2014 University of Cambridge. +.fi diff --git a/usr/share/man/man3/pcrebuild.3 b/usr/share/man/man3/pcrebuild.3 new file mode 100755 index 000000000..403f2ae32 --- /dev/null +++ b/usr/share/man/man3/pcrebuild.3 @@ -0,0 +1,550 @@ +.TH PCREBUILD 3 "12 May 2013" "PCRE 8.33" +.SH NAME +PCRE - Perl-compatible regular expressions +. +. +.SH "BUILDING PCRE" +.rs +.sp +PCRE is distributed with a \fBconfigure\fP script that can be used to build the +library in Unix-like environments using the applications known as Autotools. +Also in the distribution are files to support building using \fBCMake\fP +instead of \fBconfigure\fP. The text file +.\" HTML <a href="README.txt"> +.\" </a> +\fBREADME\fP +.\" +contains general information about building with Autotools (some of which is +repeated below), and also has some comments about building on various operating +systems. There is a lot more information about building PCRE without using +Autotools (including information about using \fBCMake\fP and building "by +hand") in the text file called +.\" HTML <a href="NON-AUTOTOOLS-BUILD.txt"> +.\" </a> +\fBNON-AUTOTOOLS-BUILD\fP. +.\" +You should consult this file as well as the +.\" HTML <a href="README.txt"> +.\" </a> +\fBREADME\fP +.\" +file if you are building in a non-Unix-like environment. +. +. +.SH "PCRE BUILD-TIME OPTIONS" +.rs +.sp +The rest of this document describes the optional features of PCRE that can be +selected when the library is compiled. It assumes use of the \fBconfigure\fP +script, where the optional features are selected or deselected by providing +options to \fBconfigure\fP before running the \fBmake\fP command. However, the +same options can be selected in both Unix-like and non-Unix-like environments +using the GUI facility of \fBcmake-gui\fP if you are using \fBCMake\fP instead +of \fBconfigure\fP to build PCRE. +.P +If you are not using Autotools or \fBCMake\fP, option selection can be done by +editing the \fBconfig.h\fP file, or by passing parameter settings to the +compiler, as described in +.\" HTML <a href="NON-AUTOTOOLS-BUILD.txt"> +.\" </a> +\fBNON-AUTOTOOLS-BUILD\fP. +.\" +.P +The complete list of options for \fBconfigure\fP (which includes the standard +ones such as the selection of the installation directory) can be obtained by +running +.sp + ./configure --help +.sp +The following sections include descriptions of options whose names begin with +--enable or --disable. These settings specify changes to the defaults for the +\fBconfigure\fP command. Because of the way that \fBconfigure\fP works, +--enable and --disable always come in pairs, so the complementary option always +exists as well, but as it specifies the default, it is not described. +. +. +.SH "BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES" +.rs +.sp +By default, a library called \fBlibpcre\fP is built, containing functions that +take string arguments contained in vectors of bytes, either as single-byte +characters, or interpreted as UTF-8 strings. You can also build a separate +library, called \fBlibpcre16\fP, in which strings are contained in vectors of +16-bit data units and interpreted either as single-unit characters or UTF-16 +strings, by adding +.sp + --enable-pcre16 +.sp +to the \fBconfigure\fP command. You can also build yet another separate +library, called \fBlibpcre32\fP, in which strings are contained in vectors of +32-bit data units and interpreted either as single-unit characters or UTF-32 +strings, by adding +.sp + --enable-pcre32 +.sp +to the \fBconfigure\fP command. If you do not want the 8-bit library, add +.sp + --disable-pcre8 +.sp +as well. At least one of the three libraries must be built. Note that the C++ +and POSIX wrappers are for the 8-bit library only, and that \fBpcregrep\fP is +an 8-bit program. None of these are built if you select only the 16-bit or +32-bit libraries. +. +. +.SH "BUILDING SHARED AND STATIC LIBRARIES" +.rs +.sp +The Autotools PCRE building process uses \fBlibtool\fP to build both shared and +static libraries by default. You can suppress one of these by adding one of +.sp + --disable-shared + --disable-static +.sp +to the \fBconfigure\fP command, as required. +. +. +.SH "C++ SUPPORT" +.rs +.sp +By default, if the 8-bit library is being built, the \fBconfigure\fP script +will search for a C++ compiler and C++ header files. If it finds them, it +automatically builds the C++ wrapper library (which supports only 8-bit +strings). You can disable this by adding +.sp + --disable-cpp +.sp +to the \fBconfigure\fP command. +. +. +.SH "UTF-8, UTF-16 AND UTF-32 SUPPORT" +.rs +.sp +To build PCRE with support for UTF Unicode character strings, add +.sp + --enable-utf +.sp +to the \fBconfigure\fP command. This setting applies to all three libraries, +adding support for UTF-8 to the 8-bit library, support for UTF-16 to the 16-bit +library, and support for UTF-32 to the to the 32-bit library. There are no +separate options for enabling UTF-8, UTF-16 and UTF-32 independently because +that would allow ridiculous settings such as requesting UTF-16 support while +building only the 8-bit library. It is not possible to build one library with +UTF support and another without in the same configuration. (For backwards +compatibility, --enable-utf8 is a synonym of --enable-utf.) +.P +Of itself, this setting does not make PCRE treat strings as UTF-8, UTF-16 or +UTF-32. As well as compiling PCRE with this option, you also have have to set +the PCRE_UTF8, PCRE_UTF16 or PCRE_UTF32 option (as appropriate) when you call +one of the pattern compiling functions. +.P +If you set --enable-utf when compiling in an EBCDIC environment, PCRE expects +its input to be either ASCII or UTF-8 (depending on the run-time option). It is +not possible to support both EBCDIC and UTF-8 codes in the same version of the +library. Consequently, --enable-utf and --enable-ebcdic are mutually +exclusive. +. +. +.SH "UNICODE CHARACTER PROPERTY SUPPORT" +.rs +.sp +UTF support allows the libraries to process character codepoints up to 0x10ffff +in the strings that they handle. On its own, however, it does not provide any +facilities for accessing the properties of such characters. If you want to be +able to use the pattern escapes \eP, \ep, and \eX, which refer to Unicode +character properties, you must add +.sp + --enable-unicode-properties +.sp +to the \fBconfigure\fP command. This implies UTF support, even if you have +not explicitly requested it. +.P +Including Unicode property support adds around 30K of tables to the PCRE +library. Only the general category properties such as \fILu\fP and \fINd\fP are +supported. Details are given in the +.\" HREF +\fBpcrepattern\fP +.\" +documentation. +. +. +.SH "JUST-IN-TIME COMPILER SUPPORT" +.rs +.sp +Just-in-time compiler support is included in the build by specifying +.sp + --enable-jit +.sp +This support is available only for certain hardware architectures. If this +option is set for an unsupported architecture, a compile time error occurs. +See the +.\" HREF +\fBpcrejit\fP +.\" +documentation for a discussion of JIT usage. When JIT support is enabled, +pcregrep automatically makes use of it, unless you add +.sp + --disable-pcregrep-jit +.sp +to the "configure" command. +. +. +.SH "CODE VALUE OF NEWLINE" +.rs +.sp +By default, PCRE interprets the linefeed (LF) character as indicating the end +of a line. This is the normal newline character on Unix-like systems. You can +compile PCRE to use carriage return (CR) instead, by adding +.sp + --enable-newline-is-cr +.sp +to the \fBconfigure\fP command. There is also a --enable-newline-is-lf option, +which explicitly specifies linefeed as the newline character. +.sp +Alternatively, you can specify that line endings are to be indicated by the two +character sequence CRLF. If you want this, add +.sp + --enable-newline-is-crlf +.sp +to the \fBconfigure\fP command. There is a fourth option, specified by +.sp + --enable-newline-is-anycrlf +.sp +which causes PCRE to recognize any of the three sequences CR, LF, or CRLF as +indicating a line ending. Finally, a fifth option, specified by +.sp + --enable-newline-is-any +.sp +causes PCRE to recognize any Unicode newline sequence. +.P +Whatever line ending convention is selected when PCRE is built can be +overridden when the library functions are called. At build time it is +conventional to use the standard for your operating system. +. +. +.SH "WHAT \eR MATCHES" +.rs +.sp +By default, the sequence \eR in a pattern matches any Unicode newline sequence, +whatever has been selected as the line ending sequence. If you specify +.sp + --enable-bsr-anycrlf +.sp +the default is changed so that \eR matches only CR, LF, or CRLF. Whatever is +selected when PCRE is built can be overridden when the library functions are +called. +. +. +.SH "POSIX MALLOC USAGE" +.rs +.sp +When the 8-bit library is called through the POSIX interface (see the +.\" HREF +\fBpcreposix\fP +.\" +documentation), additional working storage is required for holding the pointers +to capturing substrings, because PCRE requires three integers per substring, +whereas the POSIX interface provides only two. If the number of expected +substrings is small, the wrapper function uses space on the stack, because this +is faster than using \fBmalloc()\fP for each call. The default threshold above +which the stack is no longer used is 10; it can be changed by adding a setting +such as +.sp + --with-posix-malloc-threshold=20 +.sp +to the \fBconfigure\fP command. +. +. +.SH "HANDLING VERY LARGE PATTERNS" +.rs +.sp +Within a compiled pattern, offset values are used to point from one part to +another (for example, from an opening parenthesis to an alternation +metacharacter). By default, in the 8-bit and 16-bit libraries, two-byte values +are used for these offsets, leading to a maximum size for a compiled pattern of +around 64K. This is sufficient to handle all but the most gigantic patterns. +Nevertheless, some people do want to process truly enormous patterns, so it is +possible to compile PCRE to use three-byte or four-byte offsets by adding a +setting such as +.sp + --with-link-size=3 +.sp +to the \fBconfigure\fP command. The value given must be 2, 3, or 4. For the +16-bit library, a value of 3 is rounded up to 4. In these libraries, using +longer offsets slows down the operation of PCRE because it has to load +additional data when handling them. For the 32-bit library the value is always +4 and cannot be overridden; the value of --with-link-size is ignored. +. +. +.SH "AVOIDING EXCESSIVE STACK USAGE" +.rs +.sp +When matching with the \fBpcre_exec()\fP function, PCRE implements backtracking +by making recursive calls to an internal function called \fBmatch()\fP. In +environments where the size of the stack is limited, this can severely limit +PCRE's operation. (The Unix environment does not usually suffer from this +problem, but it may sometimes be necessary to increase the maximum stack size. +There is a discussion in the +.\" HREF +\fBpcrestack\fP +.\" +documentation.) An alternative approach to recursion that uses memory from the +heap to remember data, instead of using recursive function calls, has been +implemented to work round the problem of limited stack size. If you want to +build a version of PCRE that works this way, add +.sp + --disable-stack-for-recursion +.sp +to the \fBconfigure\fP command. With this configuration, PCRE will use the +\fBpcre_stack_malloc\fP and \fBpcre_stack_free\fP variables to call memory +management functions. By default these point to \fBmalloc()\fP and +\fBfree()\fP, but you can replace the pointers so that your own functions are +used instead. +.P +Separate functions are provided rather than using \fBpcre_malloc\fP and +\fBpcre_free\fP because the usage is very predictable: the block sizes +requested are always the same, and the blocks are always freed in reverse +order. A calling program might be able to implement optimized functions that +perform better than \fBmalloc()\fP and \fBfree()\fP. PCRE runs noticeably more +slowly when built in this way. This option affects only the \fBpcre_exec()\fP +function; it is not relevant for \fBpcre_dfa_exec()\fP. +. +. +.SH "LIMITING PCRE RESOURCE USAGE" +.rs +.sp +Internally, PCRE has a function called \fBmatch()\fP, which it calls repeatedly +(sometimes recursively) when matching a pattern with the \fBpcre_exec()\fP +function. By controlling the maximum number of times this function may be +called during a single matching operation, a limit can be placed on the +resources used by a single call to \fBpcre_exec()\fP. The limit can be changed +at run time, as described in the +.\" HREF +\fBpcreapi\fP +.\" +documentation. The default is 10 million, but this can be changed by adding a +setting such as +.sp + --with-match-limit=500000 +.sp +to the \fBconfigure\fP command. This setting has no effect on the +\fBpcre_dfa_exec()\fP matching function. +.P +In some environments it is desirable to limit the depth of recursive calls of +\fBmatch()\fP more strictly than the total number of calls, in order to +restrict the maximum amount of stack (or heap, if --disable-stack-for-recursion +is specified) that is used. A second limit controls this; it defaults to the +value that is set for --with-match-limit, which imposes no additional +constraints. However, you can set a lower limit by adding, for example, +.sp + --with-match-limit-recursion=10000 +.sp +to the \fBconfigure\fP command. This value can also be overridden at run time. +. +. +.SH "CREATING CHARACTER TABLES AT BUILD TIME" +.rs +.sp +PCRE uses fixed tables for processing characters whose code values are less +than 256. By default, PCRE is built with a set of tables that are distributed +in the file \fIpcre_chartables.c.dist\fP. These tables are for ASCII codes +only. If you add +.sp + --enable-rebuild-chartables +.sp +to the \fBconfigure\fP command, the distributed tables are no longer used. +Instead, a program called \fBdftables\fP is compiled and run. This outputs the +source for new set of tables, created in the default locale of your C run-time +system. (This method of replacing the tables does not work if you are cross +compiling, because \fBdftables\fP is run on the local host. If you need to +create alternative tables when cross compiling, you will have to do so "by +hand".) +. +. +.SH "USING EBCDIC CODE" +.rs +.sp +PCRE assumes by default that it will run in an environment where the character +code is ASCII (or Unicode, which is a superset of ASCII). This is the case for +most computer operating systems. PCRE can, however, be compiled to run in an +EBCDIC environment by adding +.sp + --enable-ebcdic +.sp +to the \fBconfigure\fP command. This setting implies +--enable-rebuild-chartables. You should only use it if you know that you are in +an EBCDIC environment (for example, an IBM mainframe operating system). The +--enable-ebcdic option is incompatible with --enable-utf. +.P +The EBCDIC character that corresponds to an ASCII LF is assumed to have the +value 0x15 by default. However, in some EBCDIC environments, 0x25 is used. In +such an environment you should use +.sp + --enable-ebcdic-nl25 +.sp +as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR has the +same value as in ASCII, namely, 0x0d. Whichever of 0x15 and 0x25 is \fInot\fP +chosen as LF is made to correspond to the Unicode NEL character (which, in +Unicode, is 0x85). +.P +The options that select newline behaviour, such as --enable-newline-is-cr, +and equivalent run-time options, refer to these character values in an EBCDIC +environment. +. +. +.SH "PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT" +.rs +.sp +By default, \fBpcregrep\fP reads all files as plain text. You can build it so +that it recognizes files whose names end in \fB.gz\fP or \fB.bz2\fP, and reads +them with \fBlibz\fP or \fBlibbz2\fP, respectively, by adding one or both of +.sp + --enable-pcregrep-libz + --enable-pcregrep-libbz2 +.sp +to the \fBconfigure\fP command. These options naturally require that the +relevant libraries are installed on your system. Configuration will fail if +they are not. +. +. +.SH "PCREGREP BUFFER SIZE" +.rs +.sp +\fBpcregrep\fP uses an internal buffer to hold a "window" on the file it is +scanning, in order to be able to output "before" and "after" lines when it +finds a match. The size of the buffer is controlled by a parameter whose +default value is 20K. The buffer itself is three times this size, but because +of the way it is used for holding "before" lines, the longest line that is +guaranteed to be processable is the parameter size. You can change the default +parameter value by adding, for example, +.sp + --with-pcregrep-bufsize=50K +.sp +to the \fBconfigure\fP command. The caller of \fPpcregrep\fP can, however, +override this value by specifying a run-time option. +. +. +.SH "PCRETEST OPTION FOR LIBREADLINE SUPPORT" +.rs +.sp +If you add +.sp + --enable-pcretest-libreadline +.sp +to the \fBconfigure\fP command, \fBpcretest\fP is linked with the +\fBlibreadline\fP library, and when its input is from a terminal, it reads it +using the \fBreadline()\fP function. This provides line-editing and history +facilities. Note that \fBlibreadline\fP is GPL-licensed, so if you distribute a +binary of \fBpcretest\fP linked in this way, there may be licensing issues. +.P +Setting this option causes the \fB-lreadline\fP option to be added to the +\fBpcretest\fP build. In many operating environments with a sytem-installed +\fBlibreadline\fP this is sufficient. However, in some environments (e.g. +if an unmodified distribution version of readline is in use), some extra +configuration may be necessary. The INSTALL file for \fBlibreadline\fP says +this: +.sp + "Readline uses the termcap functions, but does not link with the + termcap or curses library itself, allowing applications which link + with readline the to choose an appropriate library." +.sp +If your environment has not been set up so that an appropriate library is +automatically included, you may need to add something like +.sp + LIBS="-ncurses" +.sp +immediately before the \fBconfigure\fP command. +. +. +.SH "DEBUGGING WITH VALGRIND SUPPORT" +.rs +.sp +By adding the +.sp + --enable-valgrind +.sp +option to to the \fBconfigure\fP command, PCRE will use valgrind annotations +to mark certain memory regions as unaddressable. This allows it to detect +invalid memory accesses, and is mostly useful for debugging PCRE itself. +. +. +.SH "CODE COVERAGE REPORTING" +.rs +.sp +If your C compiler is gcc, you can build a version of PCRE that can generate a +code coverage report for its test suite. To enable this, you must install +\fBlcov\fP version 1.6 or above. Then specify +.sp + --enable-coverage +.sp +to the \fBconfigure\fP command and build PCRE in the usual way. +.P +Note that using \fBccache\fP (a caching C compiler) is incompatible with code +coverage reporting. If you have configured \fBccache\fP to run automatically +on your system, you must set the environment variable +.sp + CCACHE_DISABLE=1 +.sp +before running \fBmake\fP to build PCRE, so that \fBccache\fP is not used. +.P +When --enable-coverage is used, the following addition targets are added to the +\fIMakefile\fP: +.sp + make coverage +.sp +This creates a fresh coverage report for the PCRE test suite. It is equivalent +to running "make coverage-reset", "make coverage-baseline", "make check", and +then "make coverage-report". +.sp + make coverage-reset +.sp +This zeroes the coverage counters, but does nothing else. +.sp + make coverage-baseline +.sp +This captures baseline coverage information. +.sp + make coverage-report +.sp +This creates the coverage report. +.sp + make coverage-clean-report +.sp +This removes the generated coverage report without cleaning the coverage data +itself. +.sp + make coverage-clean-data +.sp +This removes the captured coverage data without removing the coverage files +created at compile time (*.gcno). +.sp + make coverage-clean +.sp +This cleans all coverage data including the generated coverage report. For more +information about code coverage, see the \fBgcov\fP and \fBlcov\fP +documentation. +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcreapi\fP(3), \fBpcre16\fP, \fBpcre32\fP, \fBpcre_config\fP(3). +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +University Computing Service +Cambridge CB2 3QH, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 12 May 2013 +Copyright (c) 1997-2013 University of Cambridge. +.fi diff --git a/usr/share/man/man3/pcrecallout.3 b/usr/share/man/man3/pcrecallout.3 new file mode 100755 index 000000000..8ebc99595 --- /dev/null +++ b/usr/share/man/man3/pcrecallout.3 @@ -0,0 +1,255 @@ +.TH PCRECALLOUT 3 "12 November 2013" "PCRE 8.34" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH SYNOPSIS +.rs +.sp +.B #include <pcre.h> +.PP +.SM +.B int (*pcre_callout)(pcre_callout_block *); +.PP +.B int (*pcre16_callout)(pcre16_callout_block *); +.PP +.B int (*pcre32_callout)(pcre32_callout_block *); +. +.SH DESCRIPTION +.rs +.sp +PCRE provides a feature called "callout", which is a means of temporarily +passing control to the caller of PCRE in the middle of pattern matching. The +caller of PCRE provides an external function by putting its entry point in the +global variable \fIpcre_callout\fP (\fIpcre16_callout\fP for the 16-bit +library, \fIpcre32_callout\fP for the 32-bit library). By default, this +variable contains NULL, which disables all calling out. +.P +Within a regular expression, (?C) indicates the points at which the external +function is to be called. Different callout points can be identified by putting +a number less than 256 after the letter C. The default value is zero. +For example, this pattern has two callout points: +.sp + (?C1)abc(?C2)def +.sp +If the PCRE_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE +automatically inserts callouts, all with number 255, before each item in the +pattern. For example, if PCRE_AUTO_CALLOUT is used with the pattern +.sp + A(\ed{2}|--) +.sp +it is processed as if it were +.sp +(?C255)A(?C255)((?C255)\ed{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255) +.sp +Notice that there is a callout before and after each parenthesis and +alternation bar. If the pattern contains a conditional group whose condition is +an assertion, an automatic callout is inserted immediately before the +condition. Such a callout may also be inserted explicitly, for example: +.sp + (?(?C9)(?=a)ab|de) +.sp +This applies only to assertion conditions (because they are themselves +independent groups). +.P +Automatic callouts can be used for tracking the progress of pattern matching. +The +.\" HREF +\fBpcretest\fP +.\" +program has a pattern qualifier (/C) that sets automatic callouts; when it is +used, the output indicates how the pattern is being matched. This is useful +information when you are trying to optimize the performance of a particular +pattern. +. +. +.SH "MISSING CALLOUTS" +.rs +.sp +You should be aware that, because of optimizations in the way PCRE compiles and +matches patterns, callouts sometimes do not happen exactly as you might expect. +.P +At compile time, PCRE "auto-possessifies" repeated items when it knows that +what follows cannot be part of the repeat. For example, a+[bc] is compiled as +if it were a++[bc]. The \fBpcretest\fP output when this pattern is anchored and +then applied with automatic callouts to the string "aaaa" is: +.sp + --->aaaa + +0 ^ ^ + +1 ^ a+ + +3 ^ ^ [bc] + No match +.sp +This indicates that when matching [bc] fails, there is no backtracking into a+ +and therefore the callouts that would be taken for the backtracks do not occur. +You can disable the auto-possessify feature by passing PCRE_NO_AUTO_POSSESS +to \fBpcre_compile()\fP, or starting the pattern with (*NO_AUTO_POSSESS). If +this is done in \fBpcretest\fP (using the /O qualifier), the output changes to +this: +.sp + --->aaaa + +0 ^ ^ + +1 ^ a+ + +3 ^ ^ [bc] + +3 ^ ^ [bc] + +3 ^ ^ [bc] + +3 ^^ [bc] + No match +.sp +This time, when matching [bc] fails, the matcher backtracks into a+ and tries +again, repeatedly, until a+ itself fails. +.P +Other optimizations that provide fast "no match" results also affect callouts. +For example, if the pattern is +.sp + ab(?C4)cd +.sp +PCRE knows that any matching string must contain the letter "d". If the subject +string is "abyz", the lack of "d" means that matching doesn't ever start, and +the callout is never reached. However, with "abyd", though the result is still +no match, the callout is obeyed. +.P +If the pattern is studied, PCRE knows the minimum length of a matching string, +and will immediately give a "no match" return without actually running a match +if the subject is not long enough, or, for unanchored patterns, if it has +been scanned far enough. +.P +You can disable these optimizations by passing the PCRE_NO_START_OPTIMIZE +option to the matching function, or by starting the pattern with +(*NO_START_OPT). This slows down the matching process, but does ensure that +callouts such as the example above are obeyed. +. +. +.SH "THE CALLOUT INTERFACE" +.rs +.sp +During matching, when PCRE reaches a callout point, the external function +defined by \fIpcre_callout\fP or \fIpcre[16|32]_callout\fP is called (if it is +set). This applies to both normal and DFA matching. The only argument to the +callout function is a pointer to a \fBpcre_callout\fP or +\fBpcre[16|32]_callout\fP block. These structures contains the following +fields: +.sp + int \fIversion\fP; + int \fIcallout_number\fP; + int *\fIoffset_vector\fP; + const char *\fIsubject\fP; (8-bit version) + PCRE_SPTR16 \fIsubject\fP; (16-bit version) + PCRE_SPTR32 \fIsubject\fP; (32-bit version) + int \fIsubject_length\fP; + int \fIstart_match\fP; + int \fIcurrent_position\fP; + int \fIcapture_top\fP; + int \fIcapture_last\fP; + void *\fIcallout_data\fP; + int \fIpattern_position\fP; + int \fInext_item_length\fP; + const unsigned char *\fImark\fP; (8-bit version) + const PCRE_UCHAR16 *\fImark\fP; (16-bit version) + const PCRE_UCHAR32 *\fImark\fP; (32-bit version) +.sp +The \fIversion\fP field is an integer containing the version number of the +block format. The initial version was 0; the current version is 2. The version +number will change again in future if additional fields are added, but the +intention is never to remove any of the existing fields. +.P +The \fIcallout_number\fP field contains the number of the callout, as compiled +into the pattern (that is, the number after ?C for manual callouts, and 255 for +automatically generated callouts). +.P +The \fIoffset_vector\fP field is a pointer to the vector of offsets that was +passed by the caller to the matching function. When \fBpcre_exec()\fP or +\fBpcre[16|32]_exec()\fP is used, the contents can be inspected, in order to +extract substrings that have been matched so far, in the same way as for +extracting substrings after a match has completed. For the DFA matching +functions, this field is not useful. +.P +The \fIsubject\fP and \fIsubject_length\fP fields contain copies of the values +that were passed to the matching function. +.P +The \fIstart_match\fP field normally contains the offset within the subject at +which the current match attempt started. However, if the escape sequence \eK +has been encountered, this value is changed to reflect the modified starting +point. If the pattern is not anchored, the callout function may be called +several times from the same point in the pattern for different starting points +in the subject. +.P +The \fIcurrent_position\fP field contains the offset within the subject of the +current match pointer. +.P +When the \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP is used, the +\fIcapture_top\fP field contains one more than the number of the highest +numbered captured substring so far. If no substrings have been captured, the +value of \fIcapture_top\fP is one. This is always the case when the DFA +functions are used, because they do not support captured substrings. +.P +The \fIcapture_last\fP field contains the number of the most recently captured +substring. However, when a recursion exits, the value reverts to what it was +outside the recursion, as do the values of all captured substrings. If no +substrings have been captured, the value of \fIcapture_last\fP is -1. This is +always the case for the DFA matching functions. +.P +The \fIcallout_data\fP field contains a value that is passed to a matching +function specifically so that it can be passed back in callouts. It is passed +in the \fIcallout_data\fP field of a \fBpcre_extra\fP or \fBpcre[16|32]_extra\fP +data structure. If no such data was passed, the value of \fIcallout_data\fP in +a callout block is NULL. There is a description of the \fBpcre_extra\fP +structure in the +.\" HREF +\fBpcreapi\fP +.\" +documentation. +.P +The \fIpattern_position\fP field is present from version 1 of the callout +structure. It contains the offset to the next item to be matched in the pattern +string. +.P +The \fInext_item_length\fP field is present from version 1 of the callout +structure. It contains the length of the next item to be matched in the pattern +string. When the callout immediately precedes an alternation bar, a closing +parenthesis, or the end of the pattern, the length is zero. When the callout +precedes an opening parenthesis, the length is that of the entire subpattern. +.P +The \fIpattern_position\fP and \fInext_item_length\fP fields are intended to +help in distinguishing between different automatic callouts, which all have the +same callout number. However, they are set for all callouts. +.P +The \fImark\fP field is present from version 2 of the callout structure. In +callouts from \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP it contains a +pointer to the zero-terminated name of the most recently passed (*MARK), +(*PRUNE), or (*THEN) item in the match, or NULL if no such items have been +passed. Instances of (*PRUNE) or (*THEN) without a name do not obliterate a +previous (*MARK). In callouts from the DFA matching functions this field always +contains NULL. +. +. +.SH "RETURN VALUES" +.rs +.sp +The external callout function returns an integer to PCRE. If the value is zero, +matching proceeds as normal. If the value is greater than zero, matching fails +at the current point, but the testing of other matching possibilities goes +ahead, just as if a lookahead assertion had failed. If the value is less than +zero, the match is abandoned, the matching function returns the negative value. +.P +Negative values should normally be chosen from the set of PCRE_ERROR_xxx +values. In particular, PCRE_ERROR_NOMATCH forces a standard "no match" failure. +The error number PCRE_ERROR_CALLOUT is reserved for use by callout functions; +it will never be used by PCRE itself. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +University Computing Service +Cambridge CB2 3QH, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 12 November 2013 +Copyright (c) 1997-2013 University of Cambridge. +.fi diff --git a/usr/share/man/man3/pcrecompat.3 b/usr/share/man/man3/pcrecompat.3 new file mode 100755 index 000000000..0cc401982 --- /dev/null +++ b/usr/share/man/man3/pcrecompat.3 @@ -0,0 +1,200 @@ +.TH PCRECOMPAT 3 "10 November 2013" "PCRE 8.34" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH "DIFFERENCES BETWEEN PCRE AND PERL" +.rs +.sp +This document describes the differences in the ways that PCRE and Perl handle +regular expressions. The differences described here are with respect to Perl +versions 5.10 and above. +.P +1. PCRE has only a subset of Perl's Unicode support. Details of what it does +have are given in the +.\" HREF +\fBpcreunicode\fP +.\" +page. +.P +2. PCRE allows repeat quantifiers only on parenthesized assertions, but they do +not mean what you might think. For example, (?!a){3} does not assert that the +next three characters are not "a". It just asserts that the next character is +not "a" three times (in principle: PCRE optimizes this to run the assertion +just once). Perl allows repeat quantifiers on other assertions such as \eb, but +these do not seem to have any use. +.P +3. Capturing subpatterns that occur inside negative lookahead assertions are +counted, but their entries in the offsets vector are never set. Perl sometimes +(but not always) sets its numerical variables from inside negative assertions. +.P +4. Though binary zero characters are supported in the subject string, they are +not allowed in a pattern string because it is passed as a normal C string, +terminated by zero. The escape sequence \e0 can be used in the pattern to +represent a binary zero. +.P +5. The following Perl escape sequences are not supported: \el, \eu, \eL, +\eU, and \eN when followed by a character name or Unicode value. (\eN on its +own, matching a non-newline character, is supported.) In fact these are +implemented by Perl's general string-handling and are not part of its pattern +matching engine. If any of these are encountered by PCRE, an error is +generated by default. However, if the PCRE_JAVASCRIPT_COMPAT option is set, +\eU and \eu are interpreted as JavaScript interprets them. +.P +6. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE is +built with Unicode character property support. The properties that can be +tested with \ep and \eP are limited to the general category properties such as +Lu and Nd, script names such as Greek or Han, and the derived properties Any +and L&. PCRE does support the Cs (surrogate) property, which Perl does not; the +Perl documentation says "Because Perl hides the need for the user to understand +the internal representation of Unicode characters, there is no need to +implement the somewhat messy concept of surrogates." +.P +7. PCRE does support the \eQ...\eE escape for quoting substrings. Characters in +between are treated as literals. This is slightly different from Perl in that $ +and @ are also handled as literals inside the quotes. In Perl, they cause +variable interpolation (but of course PCRE does not have variables). Note the +following examples: +.sp + Pattern PCRE matches Perl matches +.sp +.\" JOIN + \eQabc$xyz\eE abc$xyz abc followed by the + contents of $xyz + \eQabc\e$xyz\eE abc\e$xyz abc\e$xyz + \eQabc\eE\e$\eQxyz\eE abc$xyz abc$xyz +.sp +The \eQ...\eE sequence is recognized both inside and outside character classes. +.P +8. Fairly obviously, PCRE does not support the (?{code}) and (??{code}) +constructions. However, there is support for recursive patterns. This is not +available in Perl 5.8, but it is in Perl 5.10. Also, the PCRE "callout" +feature allows an external function to be called during pattern matching. See +the +.\" HREF +\fBpcrecallout\fP +.\" +documentation for details. +.P +9. Subpatterns that are called as subroutines (whether or not recursively) are +always treated as atomic groups in PCRE. This is like Python, but unlike Perl. +Captured values that are set outside a subroutine call can be reference from +inside in PCRE, but not in Perl. There is a discussion that explains these +differences in more detail in the +.\" HTML <a href="pcrepattern.html#recursiondifference"> +.\" </a> +section on recursion differences from Perl +.\" +in the +.\" HREF +\fBpcrepattern\fP +.\" +page. +.P +10. If any of the backtracking control verbs are used in a subpattern that is +called as a subroutine (whether or not recursively), their effect is confined +to that subpattern; it does not extend to the surrounding pattern. This is not +always the case in Perl. In particular, if (*THEN) is present in a group that +is called as a subroutine, its action is limited to that group, even if the +group does not contain any | characters. Note that such subpatterns are +processed as anchored at the point where they are tested. +.P +11. If a pattern contains more than one backtracking control verb, the first +one that is backtracked onto acts. For example, in the pattern +A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C +triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the +same as PCRE, but there are examples where it differs. +.P +12. Most backtracking verbs in assertions have their normal actions. They are +not confined to the assertion. +.P +13. There are some differences that are concerned with the settings of captured +strings when part of a pattern is repeated. For example, matching "aba" against +the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE it is set to "b". +.P +14. PCRE's handling of duplicate subpattern numbers and duplicate subpattern +names is not as general as Perl's. This is a consequence of the fact the PCRE +works internally just with numbers, using an external table to translate +between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b)B), +where the two capturing parentheses have the same number but different names, +is not supported, and causes an error at compile time. If it were allowed, it +would not be possible to distinguish which parentheses matched, because both +names map to capturing subpattern number 1. To avoid this confusing situation, +an error is given at compile time. +.P +15. Perl recognizes comments in some places that PCRE does not, for example, +between the ( and ? at the start of a subpattern. If the /x modifier is set, +Perl allows white space between ( and ? (though current Perls warn that this is +deprecated) but PCRE never does, even if the PCRE_EXTENDED option is set. +.P +16. Perl, when in warning mode, gives warnings for character classes such as +[A-\ed] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE has no +warning features, so it gives an error in these cases because they are almost +certainly user mistakes. +.P +17. In PCRE, the upper/lower case character properties Lu and Ll are not +affected when case-independent matching is specified. For example, \ep{Lu} +always matches an upper case letter. I think Perl has changed in this respect; +in the release at the time of writing (5.16), \ep{Lu} and \ep{Ll} match all +letters, regardless of case, when case independence is specified. +.P +18. PCRE provides some extensions to the Perl regular expression facilities. +Perl 5.10 includes new features that are not in earlier versions of Perl, some +of which (such as named parentheses) have been in PCRE for some time. This list +is with respect to Perl 5.10: +.sp +(a) Although lookbehind assertions in PCRE must match fixed length strings, +each alternative branch of a lookbehind assertion can match a different length +of string. Perl requires them all to have the same length. +.sp +(b) If PCRE_DOLLAR_ENDONLY is set and PCRE_MULTILINE is not set, the $ +meta-character matches only at the very end of the string. +.sp +(c) If PCRE_EXTRA is set, a backslash followed by a letter with no special +meaning is faulted. Otherwise, like Perl, the backslash is quietly ignored. +(Perl can be made to issue a warning.) +.sp +(d) If PCRE_UNGREEDY is set, the greediness of the repetition quantifiers is +inverted, that is, by default they are not greedy, but if followed by a +question mark they are. +.sp +(e) PCRE_ANCHORED can be used at matching time to force a pattern to be tried +only at the first matching position in the subject string. +.sp +(f) The PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART, and +PCRE_NO_AUTO_CAPTURE options for \fBpcre_exec()\fP have no Perl equivalents. +.sp +(g) The \eR escape sequence can be restricted to match only CR, LF, or CRLF +by the PCRE_BSR_ANYCRLF option. +.sp +(h) The callout facility is PCRE-specific. +.sp +(i) The partial matching facility is PCRE-specific. +.sp +(j) Patterns compiled by PCRE can be saved and re-used at a later time, even on +different hosts that have the other endianness. However, this does not apply to +optimized data created by the just-in-time compiler. +.sp +(k) The alternative matching functions (\fBpcre_dfa_exec()\fP, +\fBpcre16_dfa_exec()\fP and \fBpcre32_dfa_exec()\fP,) match in a different way +and are not Perl-compatible. +.sp +(l) PCRE recognizes some special sequences such as (*CR) at the start of +a pattern that set overall options that cannot be changed within the pattern. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +University Computing Service +Cambridge CB2 3QH, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 10 November 2013 +Copyright (c) 1997-2013 University of Cambridge. +.fi diff --git a/usr/share/man/man3/pcrecpp.3 b/usr/share/man/man3/pcrecpp.3 new file mode 100755 index 000000000..fbddd86ab --- /dev/null +++ b/usr/share/man/man3/pcrecpp.3 @@ -0,0 +1,348 @@ +.TH PCRECPP 3 "08 January 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions. +.SH "SYNOPSIS OF C++ WRAPPER" +.rs +.sp +.B #include <pcrecpp.h> +. +.SH DESCRIPTION +.rs +.sp +The C++ wrapper for PCRE was provided by Google Inc. Some additional +functionality was added by Giuseppe Maxia. This brief man page was constructed +from the notes in the \fIpcrecpp.h\fP file, which should be consulted for +further details. Note that the C++ wrapper supports only the original 8-bit +PCRE library. There is no 16-bit or 32-bit support at present. +. +. +.SH "MATCHING INTERFACE" +.rs +.sp +The "FullMatch" operation checks that supplied text matches a supplied pattern +exactly. If pointer arguments are supplied, it copies matched sub-strings that +match sub-patterns into them. +.sp + Example: successful match + pcrecpp::RE re("h.*o"); + re.FullMatch("hello"); +.sp + Example: unsuccessful match (requires full match): + pcrecpp::RE re("e"); + !re.FullMatch("hello"); +.sp + Example: creating a temporary RE object: + pcrecpp::RE("h.*o").FullMatch("hello"); +.sp +You can pass in a "const char*" or a "string" for "text". The examples below +tend to use a const char*. You can, as in the different examples above, store +the RE object explicitly in a variable or use a temporary RE object. The +examples below use one mode or the other arbitrarily. Either could correctly be +used for any of these examples. +.P +You must supply extra pointer arguments to extract matched subpieces. +.sp + Example: extracts "ruby" into "s" and 1234 into "i" + int i; + string s; + pcrecpp::RE re("(\e\ew+):(\e\ed+)"); + re.FullMatch("ruby:1234", &s, &i); +.sp + Example: does not try to extract any extra sub-patterns + re.FullMatch("ruby:1234", &s); +.sp + Example: does not try to extract into NULL + re.FullMatch("ruby:1234", NULL, &i); +.sp + Example: integer overflow causes failure + !re.FullMatch("ruby:1234567891234", NULL, &i); +.sp + Example: fails because there aren't enough sub-patterns: + !pcrecpp::RE("\e\ew+:\e\ed+").FullMatch("ruby:1234", &s); +.sp + Example: fails because string cannot be stored in integer + !pcrecpp::RE("(.*)").FullMatch("ruby", &i); +.sp +The provided pointer arguments can be pointers to any scalar numeric +type, or one of: +.sp + string (matched piece is copied to string) + StringPiece (StringPiece is mutated to point to matched piece) + T (where "bool T::ParseFrom(const char*, int)" exists) + NULL (the corresponding matched sub-pattern is not copied) +.sp +The function returns true iff all of the following conditions are satisfied: +.sp + a. "text" matches "pattern" exactly; +.sp + b. The number of matched sub-patterns is >= number of supplied + pointers; +.sp + c. The "i"th argument has a suitable type for holding the + string captured as the "i"th sub-pattern. If you pass in + void * NULL for the "i"th argument, or a non-void * NULL + of the correct type, or pass fewer arguments than the + number of sub-patterns, "i"th captured sub-pattern is + ignored. +.sp +CAVEAT: An optional sub-pattern that does not exist in the matched +string is assigned the empty string. Therefore, the following will +return false (because the empty string is not a valid number): +.sp + int number; + pcrecpp::RE::FullMatch("abc", "[a-z]+(\e\ed+)?", &number); +.sp +The matching interface supports at most 16 arguments per call. +If you need more, consider using the more general interface +\fBpcrecpp::RE::DoMatch\fP. See \fBpcrecpp.h\fP for the signature for +\fBDoMatch\fP. +.P +NOTE: Do not use \fBno_arg\fP, which is used internally to mark the end of a +list of optional arguments, as a placeholder for missing arguments, as this can +lead to segfaults. +. +. +.SH "QUOTING METACHARACTERS" +.rs +.sp +You can use the "QuoteMeta" operation to insert backslashes before all +potentially meaningful characters in a string. The returned string, used as a +regular expression, will exactly match the original string. +.sp + Example: + string quoted = RE::QuoteMeta(unquoted); +.sp +Note that it's legal to escape a character even if it has no special meaning in +a regular expression -- so this function does that. (This also makes it +identical to the perl function of the same name; see "perldoc -f quotemeta".) +For example, "1.5-2.0?" becomes "1\e.5\e-2\e.0\e?". +. +.SH "PARTIAL MATCHES" +.rs +.sp +You can use the "PartialMatch" operation when you want the pattern +to match any substring of the text. +.sp + Example: simple search for a string: + pcrecpp::RE("ell").PartialMatch("hello"); +.sp + Example: find first number in a string: + int number; + pcrecpp::RE re("(\e\ed+)"); + re.PartialMatch("x*100 + 20", &number); + assert(number == 100); +. +. +.SH "UTF-8 AND THE MATCHING INTERFACE" +.rs +.sp +By default, pattern and text are plain text, one byte per character. The UTF8 +flag, passed to the constructor, causes both pattern and string to be treated +as UTF-8 text, still a byte stream but potentially multiple bytes per +character. In practice, the text is likelier to be UTF-8 than the pattern, but +the match returned may depend on the UTF8 flag, so always use it when matching +UTF8 text. For example, "." will match one byte normally but with UTF8 set may +match up to three bytes of a multi-byte character. +.sp + Example: + pcrecpp::RE_Options options; + options.set_utf8(); + pcrecpp::RE re(utf8_pattern, options); + re.FullMatch(utf8_string); +.sp + Example: using the convenience function UTF8(): + pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8()); + re.FullMatch(utf8_string); +.sp +NOTE: The UTF8 flag is ignored if pcre was not configured with the + --enable-utf8 flag. +. +. +.SH "PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE" +.rs +.sp +PCRE defines some modifiers to change the behavior of the regular expression +engine. The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle to +pass such modifiers to a RE class. Currently, the following modifiers are +supported: +.sp + modifier description Perl corresponding +.sp + PCRE_CASELESS case insensitive match /i + PCRE_MULTILINE multiple lines match /m + PCRE_DOTALL dot matches newlines /s + PCRE_DOLLAR_ENDONLY $ matches only at end N/A + PCRE_EXTRA strict escape parsing N/A + PCRE_EXTENDED ignore white spaces /x + PCRE_UTF8 handles UTF8 chars built-in + PCRE_UNGREEDY reverses * and *? N/A + PCRE_NO_AUTO_CAPTURE disables capturing parens N/A (*) +.sp +(*) Both Perl and PCRE allow non capturing parentheses by means of the +"?:" modifier within the pattern itself. e.g. (?:ab|cd) does not +capture, while (ab|cd) does. +.P +For a full account on how each modifier works, please check the +PCRE API reference page. +.P +For each modifier, there are two member functions whose name is made +out of the modifier in lowercase, without the "PCRE_" prefix. For +instance, PCRE_CASELESS is handled by +.sp + bool caseless() +.sp +which returns true if the modifier is set, and +.sp + RE_Options & set_caseless(bool) +.sp +which sets or unsets the modifier. Moreover, PCRE_EXTRA_MATCH_LIMIT can be +accessed through the \fBset_match_limit()\fP and \fBmatch_limit()\fP member +functions. Setting \fImatch_limit\fP to a non-zero value will limit the +execution of pcre to keep it from doing bad things like blowing the stack or +taking an eternity to return a result. A value of 5000 is good enough to stop +stack blowup in a 2MB thread stack. Setting \fImatch_limit\fP to zero disables +match limiting. Alternatively, you can call \fBmatch_limit_recursion()\fP +which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much PCRE +recurses. \fBmatch_limit()\fP limits the number of matches PCRE does; +\fBmatch_limit_recursion()\fP limits the depth of internal recursion, and +therefore the amount of stack that is used. +.P +Normally, to pass one or more modifiers to a RE class, you declare +a \fIRE_Options\fP object, set the appropriate options, and pass this +object to a RE constructor. Example: +.sp + RE_Options opt; + opt.set_caseless(true); + if (RE("HELLO", opt).PartialMatch("hello world")) ... +.sp +RE_options has two constructors. The default constructor takes no arguments and +creates a set of flags that are off by default. The optional parameter +\fIoption_flags\fP is to facilitate transfer of legacy code from C programs. +This lets you do +.sp + RE(pattern, + RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str); +.sp +However, new code is better off doing +.sp + RE(pattern, + RE_Options().set_caseless(true).set_multiline(true)) + .PartialMatch(str); +.sp +If you are going to pass one of the most used modifiers, there are some +convenience functions that return a RE_Options class with the +appropriate modifier already set: \fBCASELESS()\fP, \fBUTF8()\fP, +\fBMULTILINE()\fP, \fBDOTALL\fP(), and \fBEXTENDED()\fP. +.P +If you need to set several options at once, and you don't want to go through +the pains of declaring a RE_Options object and setting several options, there +is a parallel method that give you such ability on the fly. You can concatenate +several \fBset_xxxxx()\fP member functions, since each of them returns a +reference to its class object. For example, to pass PCRE_CASELESS, +PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one statement, you may write: +.sp + RE(" ^ xyz \e\es+ .* blah$", + RE_Options() + .set_caseless(true) + .set_extended(true) + .set_multiline(true)).PartialMatch(sometext); +.sp +. +. +.SH "SCANNING TEXT INCREMENTALLY" +.rs +.sp +The "Consume" operation may be useful if you want to repeatedly +match regular expressions at the front of a string and skip over +them as they match. This requires use of the "StringPiece" type, +which represents a sub-range of a real string. Like RE, StringPiece +is defined in the pcrecpp namespace. +.sp + Example: read lines of the form "var = value" from a string. + string contents = ...; // Fill string somehow + pcrecpp::StringPiece input(contents); // Wrap in a StringPiece +.sp + string var; + int value; + pcrecpp::RE re("(\e\ew+) = (\e\ed+)\en"); + while (re.Consume(&input, &var, &value)) { + ...; + } +.sp +Each successful call to "Consume" will set "var/value", and also +advance "input" so it points past the matched text. +.P +The "FindAndConsume" operation is similar to "Consume" but does not +anchor your match at the beginning of the string. For example, you +could extract all words from a string by repeatedly calling +.sp + pcrecpp::RE("(\e\ew+)").FindAndConsume(&input, &word) +. +. +.SH "PARSING HEX/OCTAL/C-RADIX NUMBERS" +.rs +.sp +By default, if you pass a pointer to a numeric value, the +corresponding text is interpreted as a base-10 number. You can +instead wrap the pointer with a call to one of the operators Hex(), +Octal(), or CRadix() to interpret the text in another base. The +CRadix operator interprets C-style "0" (base-8) and "0x" (base-16) +prefixes, but defaults to base-10. +.sp + Example: + int a, b, c, d; + pcrecpp::RE re("(.*) (.*) (.*) (.*)"); + re.FullMatch("100 40 0100 0x40", + pcrecpp::Octal(&a), pcrecpp::Hex(&b), + pcrecpp::CRadix(&c), pcrecpp::CRadix(&d)); +.sp +will leave 64 in a, b, c, and d. +. +. +.SH "REPLACING PARTS OF STRINGS" +.rs +.sp +You can replace the first match of "pattern" in "str" with "rewrite". +Within "rewrite", backslash-escaped digits (\e1 to \e9) can be +used to insert text matching corresponding parenthesized group +from the pattern. \e0 in "rewrite" refers to the entire matching +text. For example: +.sp + string s = "yabba dabba doo"; + pcrecpp::RE("b+").Replace("d", &s); +.sp +will leave "s" containing "yada dabba doo". The result is true if the pattern +matches and a replacement occurs, false otherwise. +.P +\fBGlobalReplace\fP is like \fBReplace\fP except that it replaces all +occurrences of the pattern in the string with the rewrite. Replacements are +not subject to re-matching. For example: +.sp + string s = "yabba dabba doo"; + pcrecpp::RE("b+").GlobalReplace("d", &s); +.sp +will leave "s" containing "yada dada doo". It returns the number of +replacements made. +.P +\fBExtract\fP is like \fBReplace\fP, except that if the pattern matches, +"rewrite" is copied into "out" (an additional argument) with substitutions. +The non-matching portions of "text" are ignored. Returns true iff a match +occurred and the extraction happened successfully; if no match occurs, the +string is left unaffected. +. +. +.SH AUTHOR +.rs +.sp +.nf +The C++ wrapper was contributed by Google Inc. +Copyright (c) 2007 Google Inc. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 08 January 2012 +.fi diff --git a/usr/share/man/man3/pcredemo.3 b/usr/share/man/man3/pcredemo.3 new file mode 100755 index 000000000..194629b1f --- /dev/null +++ b/usr/share/man/man3/pcredemo.3 @@ -0,0 +1,424 @@ +.\" Start example. +.de EX +. nr mE \\n(.f +. nf +. nh +. ft CW +.. +. +. +.\" End example. +.de EE +. ft \\n(mE +. fi +. hy \\n(HY +.. +. +.EX +/************************************************* +* PCRE DEMONSTRATION PROGRAM * +*************************************************/ + +/* This is a demonstration program to illustrate the most straightforward ways +of calling the PCRE regular expression library from a C program. See the +pcresample documentation for a short discussion ("man pcresample" if you have +the PCRE man pages installed). + +In Unix-like environments, if PCRE is installed in your standard system +libraries, you should be able to compile this program using this command: + +gcc -Wall pcredemo.c -lpcre -o pcredemo + +If PCRE is not installed in a standard place, it is likely to be installed with +support for the pkg-config mechanism. If you have pkg-config, you can compile +this program using this command: + +gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo + +If you do not have pkg-config, you may have to use this: + +gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \e + -R/usr/local/lib -lpcre -o pcredemo + +Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and +library files for PCRE are installed on your system. Only some operating +systems (e.g. Solaris) use the -R option. + +Building under Windows: + +If you want to statically link this program against a non-dll .a file, you must +define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and +pcre_free() exported functions will be declared __declspec(dllimport), with +unwanted results. So in this environment, uncomment the following line. */ + +/* #define PCRE_STATIC */ + +#include <stdio.h> +#include <string.h> +#include <pcre.h> + +#define OVECCOUNT 30 /* should be a multiple of 3 */ + + +int main(int argc, char **argv) +{ +pcre *re; +const char *error; +char *pattern; +char *subject; +unsigned char *name_table; +unsigned int option_bits; +int erroffset; +int find_all; +int crlf_is_newline; +int namecount; +int name_entry_size; +int ovector[OVECCOUNT]; +int subject_length; +int rc, i; +int utf8; + + +/************************************************************************** +* First, sort out the command line. There is only one possible option at * +* the moment, "-g" to request repeated matching to find all occurrences, * +* like Perl's /g option. We set the variable find_all to a non-zero value * +* if the -g option is present. Apart from that, there must be exactly two * +* arguments. * +**************************************************************************/ + +find_all = 0; +for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-g") == 0) find_all = 1; + else break; + } + +/* After the options, we require exactly two arguments, which are the pattern, +and the subject string. */ + +if (argc - i != 2) + { + printf("Two arguments required: a regex and a subject string\en"); + return 1; + } + +pattern = argv[i]; +subject = argv[i+1]; +subject_length = (int)strlen(subject); + + +/************************************************************************* +* Now we are going to compile the regular expression pattern, and handle * +* and errors that are detected. * +*************************************************************************/ + +re = pcre_compile( + pattern, /* the pattern */ + 0, /* default options */ + &error, /* for error message */ + &erroffset, /* for error offset */ + NULL); /* use default character tables */ + +/* Compilation failed: print the error message and exit */ + +if (re == NULL) + { + printf("PCRE compilation failed at offset %d: %s\en", erroffset, error); + return 1; + } + + +/************************************************************************* +* If the compilation succeeded, we call PCRE again, in order to do a * +* pattern match against the subject string. This does just ONE match. If * +* further matching is needed, it will be done below. * +*************************************************************************/ + +rc = pcre_exec( + re, /* the compiled pattern */ + NULL, /* no extra data - we didn't study the pattern */ + subject, /* the subject string */ + subject_length, /* the length of the subject */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + ovector, /* output vector for substring information */ + OVECCOUNT); /* number of elements in the output vector */ + +/* Matching failed: handle error cases */ + +if (rc < 0) + { + switch(rc) + { + case PCRE_ERROR_NOMATCH: printf("No match\en"); break; + /* + Handle other special cases if you like + */ + default: printf("Matching error %d\en", rc); break; + } + pcre_free(re); /* Release memory used for the compiled pattern */ + return 1; + } + +/* Match succeded */ + +printf("\enMatch succeeded at offset %d\en", ovector[0]); + + +/************************************************************************* +* We have found the first match within the subject string. If the output * +* vector wasn't big enough, say so. Then output any substrings that were * +* captured. * +*************************************************************************/ + +/* The output vector wasn't big enough */ + +if (rc == 0) + { + rc = OVECCOUNT/3; + printf("ovector only has room for %d captured substrings\en", rc - 1); + } + +/* Show substrings stored in the output vector by number. Obviously, in a real +application you might want to do things other than print them. */ + +for (i = 0; i < rc; i++) + { + char *substring_start = subject + ovector[2*i]; + int substring_length = ovector[2*i+1] - ovector[2*i]; + printf("%2d: %.*s\en", i, substring_length, substring_start); + } + + +/************************************************************************** +* That concludes the basic part of this demonstration program. We have * +* compiled a pattern, and performed a single match. The code that follows * +* shows first how to access named substrings, and then how to code for * +* repeated matches on the same subject. * +**************************************************************************/ + +/* See if there are any named substrings, and if so, show them by name. First +we have to extract the count of named parentheses from the pattern. */ + +(void)pcre_fullinfo( + re, /* the compiled pattern */ + NULL, /* no extra data - we didn't study the pattern */ + PCRE_INFO_NAMECOUNT, /* number of named substrings */ + &namecount); /* where to put the answer */ + +if (namecount <= 0) printf("No named substrings\en"); else + { + unsigned char *tabptr; + printf("Named substrings\en"); + + /* Before we can access the substrings, we must extract the table for + translating names to numbers, and the size of each entry in the table. */ + + (void)pcre_fullinfo( + re, /* the compiled pattern */ + NULL, /* no extra data - we didn't study the pattern */ + PCRE_INFO_NAMETABLE, /* address of the table */ + &name_table); /* where to put the answer */ + + (void)pcre_fullinfo( + re, /* the compiled pattern */ + NULL, /* no extra data - we didn't study the pattern */ + PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */ + &name_entry_size); /* where to put the answer */ + + /* Now we can scan the table and, for each entry, print the number, the name, + and the substring itself. */ + + tabptr = name_table; + for (i = 0; i < namecount; i++) + { + int n = (tabptr[0] << 8) | tabptr[1]; + printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2, + ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]); + tabptr += name_entry_size; + } + } + + +/************************************************************************* +* If the "-g" option was given on the command line, we want to continue * +* to search for additional matches in the subject string, in a similar * +* way to the /g option in Perl. This turns out to be trickier than you * +* might think because of the possibility of matching an empty string. * +* What happens is as follows: * +* * +* If the previous match was NOT for an empty string, we can just start * +* the next match at the end of the previous one. * +* * +* If the previous match WAS for an empty string, we can't do that, as it * +* would lead to an infinite loop. Instead, a special call of pcre_exec() * +* is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set. * +* The first of these tells PCRE that an empty string at the start of the * +* subject is not a valid match; other possibilities must be tried. The * +* second flag restricts PCRE to one match attempt at the initial string * +* position. If this match succeeds, an alternative to the empty string * +* match has been found, and we can print it and proceed round the loop, * +* advancing by the length of whatever was found. If this match does not * +* succeed, we still stay in the loop, advancing by just one character. * +* In UTF-8 mode, which can be set by (*UTF8) in the pattern, this may be * +* more than one byte. * +* * +* However, there is a complication concerned with newlines. When the * +* newline convention is such that CRLF is a valid newline, we must * +* advance by two characters rather than one. The newline convention can * +* be set in the regex by (*CR), etc.; if not, we must find the default. * +*************************************************************************/ + +if (!find_all) /* Check for -g */ + { + pcre_free(re); /* Release the memory used for the compiled pattern */ + return 0; /* Finish unless -g was given */ + } + +/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline +sequence. First, find the options with which the regex was compiled; extract +the UTF-8 state, and mask off all but the newline options. */ + +(void)pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &option_bits); +utf8 = option_bits & PCRE_UTF8; +option_bits &= PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_CRLF| + PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF; + +/* If no newline options were set, find the default newline convention from the +build configuration. */ + +if (option_bits == 0) + { + int d; + (void)pcre_config(PCRE_CONFIG_NEWLINE, &d); + /* Note that these values are always the ASCII ones, even in + EBCDIC environments. CR = 13, NL = 10. */ + option_bits = (d == 13)? PCRE_NEWLINE_CR : + (d == 10)? PCRE_NEWLINE_LF : + (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF : + (d == -2)? PCRE_NEWLINE_ANYCRLF : + (d == -1)? PCRE_NEWLINE_ANY : 0; + } + +/* See if CRLF is a valid newline sequence. */ + +crlf_is_newline = + option_bits == PCRE_NEWLINE_ANY || + option_bits == PCRE_NEWLINE_CRLF || + option_bits == PCRE_NEWLINE_ANYCRLF; + +/* Loop for second and subsequent matches */ + +for (;;) + { + int options = 0; /* Normally no options */ + int start_offset = ovector[1]; /* Start at end of previous match */ + + /* If the previous match was for an empty string, we are finished if we are + at the end of the subject. Otherwise, arrange to run another match at the + same point to see if a non-empty match can be found. */ + + if (ovector[0] == ovector[1]) + { + if (ovector[0] == subject_length) break; + options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED; + } + + /* Run the next matching operation */ + + rc = pcre_exec( + re, /* the compiled pattern */ + NULL, /* no extra data - we didn't study the pattern */ + subject, /* the subject string */ + subject_length, /* the length of the subject */ + start_offset, /* starting offset in the subject */ + options, /* options */ + ovector, /* output vector for substring information */ + OVECCOUNT); /* number of elements in the output vector */ + + /* This time, a result of NOMATCH isn't an error. If the value in "options" + is zero, it just means we have found all possible matches, so the loop ends. + Otherwise, it means we have failed to find a non-empty-string match at a + point where there was a previous empty-string match. In this case, we do what + Perl does: advance the matching position by one character, and continue. We + do this by setting the "end of previous match" offset, because that is picked + up at the top of the loop as the point at which to start again. + + There are two complications: (a) When CRLF is a valid newline sequence, and + the current position is just before it, advance by an extra byte. (b) + Otherwise we must ensure that we skip an entire UTF-8 character if we are in + UTF-8 mode. */ + + if (rc == PCRE_ERROR_NOMATCH) + { + if (options == 0) break; /* All matches found */ + ovector[1] = start_offset + 1; /* Advance one byte */ + if (crlf_is_newline && /* If CRLF is newline & */ + start_offset < subject_length - 1 && /* we are at CRLF, */ + subject[start_offset] == '\er' && + subject[start_offset + 1] == '\en') + ovector[1] += 1; /* Advance by one more. */ + else if (utf8) /* Otherwise, ensure we */ + { /* advance a whole UTF-8 */ + while (ovector[1] < subject_length) /* character. */ + { + if ((subject[ovector[1]] & 0xc0) != 0x80) break; + ovector[1] += 1; + } + } + continue; /* Go round the loop again */ + } + + /* Other matching errors are not recoverable. */ + + if (rc < 0) + { + printf("Matching error %d\en", rc); + pcre_free(re); /* Release memory used for the compiled pattern */ + return 1; + } + + /* Match succeded */ + + printf("\enMatch succeeded again at offset %d\en", ovector[0]); + + /* The match succeeded, but the output vector wasn't big enough. */ + + if (rc == 0) + { + rc = OVECCOUNT/3; + printf("ovector only has room for %d captured substrings\en", rc - 1); + } + + /* As before, show substrings stored in the output vector by number, and then + also any named substrings. */ + + for (i = 0; i < rc; i++) + { + char *substring_start = subject + ovector[2*i]; + int substring_length = ovector[2*i+1] - ovector[2*i]; + printf("%2d: %.*s\en", i, substring_length, substring_start); + } + + if (namecount <= 0) printf("No named substrings\en"); else + { + unsigned char *tabptr = name_table; + printf("Named substrings\en"); + for (i = 0; i < namecount; i++) + { + int n = (tabptr[0] << 8) | tabptr[1]; + printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2, + ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]); + tabptr += name_entry_size; + } + } + } /* End of loop to find second and subsequent matches */ + +printf("\en"); +pcre_free(re); /* Release memory used for the compiled pattern */ +return 0; +} + +/* End of pcredemo.c */ +.EE diff --git a/usr/share/man/man3/pcrejit.3 b/usr/share/man/man3/pcrejit.3 new file mode 100755 index 000000000..341403f7c --- /dev/null +++ b/usr/share/man/man3/pcrejit.3 @@ -0,0 +1,431 @@ +.TH PCREJIT 3 "17 March 2013" "PCRE 8.33" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH "PCRE JUST-IN-TIME COMPILER SUPPORT" +.rs +.sp +Just-in-time compiling is a heavyweight optimization that can greatly speed up +pattern matching. However, it comes at the cost of extra processing before the +match is performed. Therefore, it is of most benefit when the same pattern is +going to be matched many times. This does not necessarily mean many calls of a +matching function; if the pattern is not anchored, matching attempts may take +place many times at various positions in the subject, even for a single call. +Therefore, if the subject string is very long, it may still pay to use JIT for +one-off matches. +.P +JIT support applies only to the traditional Perl-compatible matching function. +It does not apply when the DFA matching function is being used. The code for +this support was written by Zoltan Herczeg. +. +. +.SH "8-BIT, 16-BIT AND 32-BIT SUPPORT" +.rs +.sp +JIT support is available for all of the 8-bit, 16-bit and 32-bit PCRE +libraries. To keep this documentation simple, only the 8-bit interface is +described in what follows. If you are using the 16-bit library, substitute the +16-bit functions and 16-bit structures (for example, \fIpcre16_jit_stack\fP +instead of \fIpcre_jit_stack\fP). If you are using the 32-bit library, +substitute the 32-bit functions and 32-bit structures (for example, +\fIpcre32_jit_stack\fP instead of \fIpcre_jit_stack\fP). +. +. +.SH "AVAILABILITY OF JIT SUPPORT" +.rs +.sp +JIT support is an optional feature of PCRE. The "configure" option --enable-jit +(or equivalent CMake option) must be set when PCRE is built if you want to use +JIT. The support is limited to the following hardware platforms: +.sp + ARM v5, v7, and Thumb2 + Intel x86 32-bit and 64-bit + MIPS 32-bit + Power PC 32-bit and 64-bit + SPARC 32-bit (experimental) +.sp +If --enable-jit is set on an unsupported platform, compilation fails. +.P +A program that is linked with PCRE 8.20 or later can tell if JIT support is +available by calling \fBpcre_config()\fP with the PCRE_CONFIG_JIT option. The +result is 1 when JIT is available, and 0 otherwise. However, a simple program +does not need to check this in order to use JIT. The normal API is implemented +in a way that falls back to the interpretive code if JIT is not available. For +programs that need the best possible performance, there is also a "fast path" +API that is JIT-specific. +.P +If your program may sometimes be linked with versions of PCRE that are older +than 8.20, but you want to use JIT when it is available, you can test +the values of PCRE_MAJOR and PCRE_MINOR, or the existence of a JIT macro such +as PCRE_CONFIG_JIT, for compile-time control of your code. +. +. +.SH "SIMPLE USE OF JIT" +.rs +.sp +You have to do two things to make use of the JIT support in the simplest way: +.sp + (1) Call \fBpcre_study()\fP with the PCRE_STUDY_JIT_COMPILE option for + each compiled pattern, and pass the resulting \fBpcre_extra\fP block to + \fBpcre_exec()\fP. +.sp + (2) Use \fBpcre_free_study()\fP to free the \fBpcre_extra\fP block when it is + no longer needed, instead of just freeing it yourself. This ensures that + any JIT data is also freed. +.sp +For a program that may be linked with pre-8.20 versions of PCRE, you can insert +.sp + #ifndef PCRE_STUDY_JIT_COMPILE + #define PCRE_STUDY_JIT_COMPILE 0 + #endif +.sp +so that no option is passed to \fBpcre_study()\fP, and then use something like +this to free the study data: +.sp + #ifdef PCRE_CONFIG_JIT + pcre_free_study(study_ptr); + #else + pcre_free(study_ptr); + #endif +.sp +PCRE_STUDY_JIT_COMPILE requests the JIT compiler to generate code for complete +matches. If you want to run partial matches using the PCRE_PARTIAL_HARD or +PCRE_PARTIAL_SOFT options of \fBpcre_exec()\fP, you should set one or both of +the following options in addition to, or instead of, PCRE_STUDY_JIT_COMPILE +when you call \fBpcre_study()\fP: +.sp + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +.sp +The JIT compiler generates different optimized code for each of the three +modes (normal, soft partial, hard partial). When \fBpcre_exec()\fP is called, +the appropriate code is run if it is available. Otherwise, the pattern is +matched using interpretive code. +.P +In some circumstances you may need to call additional functions. These are +described in the section entitled +.\" HTML <a href="#stackcontrol"> +.\" </a> +"Controlling the JIT stack" +.\" +below. +.P +If JIT support is not available, PCRE_STUDY_JIT_COMPILE etc. are ignored, and +no JIT data is created. Otherwise, the compiled pattern is passed to the JIT +compiler, which turns it into machine code that executes much faster than the +normal interpretive code. When \fBpcre_exec()\fP is passed a \fBpcre_extra\fP +block containing a pointer to JIT code of the appropriate mode (normal or +hard/soft partial), it obeys that code instead of running the interpreter. The +result is identical, but the compiled JIT code runs much faster. +.P +There are some \fBpcre_exec()\fP options that are not supported for JIT +execution. There are also some pattern items that JIT cannot handle. Details +are given below. In both cases, execution automatically falls back to the +interpretive code. If you want to know whether JIT was actually used for a +particular match, you should arrange for a JIT callback function to be set up +as described in the section entitled +.\" HTML <a href="#stackcontrol"> +.\" </a> +"Controlling the JIT stack" +.\" +below, even if you do not need to supply a non-default JIT stack. Such a +callback function is called whenever JIT code is about to be obeyed. If the +execution options are not right for JIT execution, the callback function is not +obeyed. +.P +If the JIT compiler finds an unsupported item, no JIT data is generated. You +can find out if JIT execution is available after studying a pattern by calling +\fBpcre_fullinfo()\fP with the PCRE_INFO_JIT option. A result of 1 means that +JIT compilation was successful. A result of 0 means that JIT support is not +available, or the pattern was not studied with PCRE_STUDY_JIT_COMPILE etc., or +the JIT compiler was not able to handle the pattern. +.P +Once a pattern has been studied, with or without JIT, it can be used as many +times as you like for matching different subject strings. +. +. +.SH "UNSUPPORTED OPTIONS AND PATTERN ITEMS" +.rs +.sp +The only \fBpcre_exec()\fP options that are supported for JIT execution are +PCRE_NO_UTF8_CHECK, PCRE_NO_UTF16_CHECK, PCRE_NO_UTF32_CHECK, PCRE_NOTBOL, +PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART, PCRE_PARTIAL_HARD, and +PCRE_PARTIAL_SOFT. +.P +The only unsupported pattern items are \eC (match a single data unit) when +running in a UTF mode, and a callout immediately before an assertion condition +in a conditional group. +. +. +.SH "RETURN VALUES FROM JIT EXECUTION" +.rs +.sp +When a pattern is matched using JIT execution, the return values are the same +as those given by the interpretive \fBpcre_exec()\fP code, with the addition of +one new error code: PCRE_ERROR_JIT_STACKLIMIT. This means that the memory used +for the JIT stack was insufficient. See +.\" HTML <a href="#stackcontrol"> +.\" </a> +"Controlling the JIT stack" +.\" +below for a discussion of JIT stack usage. For compatibility with the +interpretive \fBpcre_exec()\fP code, no more than two-thirds of the +\fIovector\fP argument is used for passing back captured substrings. +.P +The error code PCRE_ERROR_MATCHLIMIT is returned by the JIT code if searching a +very large pattern tree goes on for too long, as it is in the same circumstance +when JIT is not used, but the details of exactly what is counted are not the +same. The PCRE_ERROR_RECURSIONLIMIT error code is never returned by JIT +execution. +. +. +.SH "SAVING AND RESTORING COMPILED PATTERNS" +.rs +.sp +The code that is generated by the JIT compiler is architecture-specific, and is +also position dependent. For those reasons it cannot be saved (in a file or +database) and restored later like the bytecode and other data of a compiled +pattern. Saving and restoring compiled patterns is not something many people +do. More detail about this facility is given in the +.\" HREF +\fBpcreprecompile\fP +.\" +documentation. It should be possible to run \fBpcre_study()\fP on a saved and +restored pattern, and thereby recreate the JIT data, but because JIT +compilation uses significant resources, it is probably not worth doing this; +you might as well recompile the original pattern. +. +. +.\" HTML <a name="stackcontrol"></a> +.SH "CONTROLLING THE JIT STACK" +.rs +.sp +When the compiled JIT code runs, it needs a block of memory to use as a stack. +By default, it uses 32K on the machine stack. However, some large or +complicated patterns need more than this. The error PCRE_ERROR_JIT_STACKLIMIT +is given when there is not enough stack. Three functions are provided for +managing blocks of memory for use as JIT stacks. There is further discussion +about the use of JIT stacks in the section entitled +.\" HTML <a href="#stackcontrol"> +.\" </a> +"JIT stack FAQ" +.\" +below. +.P +The \fBpcre_jit_stack_alloc()\fP function creates a JIT stack. Its arguments +are a starting size and a maximum size, and it returns a pointer to an opaque +structure of type \fBpcre_jit_stack\fP, or NULL if there is an error. The +\fBpcre_jit_stack_free()\fP function can be used to free a stack that is no +longer needed. (For the technically minded: the address space is allocated by +mmap or VirtualAlloc.) +.P +JIT uses far less memory for recursion than the interpretive code, +and a maximum stack size of 512K to 1M should be more than enough for any +pattern. +.P +The \fBpcre_assign_jit_stack()\fP function specifies which stack JIT code +should use. Its arguments are as follows: +.sp + pcre_extra *extra + pcre_jit_callback callback + void *data +.sp +The \fIextra\fP argument must be the result of studying a pattern with +PCRE_STUDY_JIT_COMPILE etc. There are three cases for the values of the other +two options: +.sp + (1) If \fIcallback\fP is NULL and \fIdata\fP is NULL, an internal 32K block + on the machine stack is used. +.sp + (2) If \fIcallback\fP is NULL and \fIdata\fP is not NULL, \fIdata\fP must be + a valid JIT stack, the result of calling \fBpcre_jit_stack_alloc()\fP. +.sp + (3) If \fIcallback\fP is not NULL, it must point to a function that is + called with \fIdata\fP as an argument at the start of matching, in + order to set up a JIT stack. If the return from the callback + function is NULL, the internal 32K stack is used; otherwise the + return value must be a valid JIT stack, the result of calling + \fBpcre_jit_stack_alloc()\fP. +.sp +A callback function is obeyed whenever JIT code is about to be run; it is not +obeyed when \fBpcre_exec()\fP is called with options that are incompatible for +JIT execution. A callback function can therefore be used to determine whether a +match operation was executed by JIT or by the interpreter. +.P +You may safely use the same JIT stack for more than one pattern (either by +assigning directly or by callback), as long as the patterns are all matched +sequentially in the same thread. In a multithread application, if you do not +specify a JIT stack, or if you assign or pass back NULL from a callback, that +is thread-safe, because each thread has its own machine stack. However, if you +assign or pass back a non-NULL JIT stack, this must be a different stack for +each thread so that the application is thread-safe. +.P +Strictly speaking, even more is allowed. You can assign the same non-NULL stack +to any number of patterns as long as they are not used for matching by multiple +threads at the same time. For example, you can assign the same stack to all +compiled patterns, and use a global mutex in the callback to wait until the +stack is available for use. However, this is an inefficient solution, and not +recommended. +.P +This is a suggestion for how a multithreaded program that needs to set up +non-default JIT stacks might operate: +.sp + During thread initalization + thread_local_var = pcre_jit_stack_alloc(...) +.sp + During thread exit + pcre_jit_stack_free(thread_local_var) +.sp + Use a one-line callback function + return thread_local_var +.sp +All the functions described in this section do nothing if JIT is not available, +and \fBpcre_assign_jit_stack()\fP does nothing unless the \fBextra\fP argument +is non-NULL and points to a \fBpcre_extra\fP block that is the result of a +successful study with PCRE_STUDY_JIT_COMPILE etc. +. +. +.\" HTML <a name="stackfaq"></a> +.SH "JIT STACK FAQ" +.rs +.sp +(1) Why do we need JIT stacks? +.sp +PCRE (and JIT) is a recursive, depth-first engine, so it needs a stack where +the local data of the current node is pushed before checking its child nodes. +Allocating real machine stack on some platforms is difficult. For example, the +stack chain needs to be updated every time if we extend the stack on PowerPC. +Although it is possible, its updating time overhead decreases performance. So +we do the recursion in memory. +.P +(2) Why don't we simply allocate blocks of memory with \fBmalloc()\fP? +.sp +Modern operating systems have a nice feature: they can reserve an address space +instead of allocating memory. We can safely allocate memory pages inside this +address space, so the stack could grow without moving memory data (this is +important because of pointers). Thus we can allocate 1M address space, and use +only a single memory page (usually 4K) if that is enough. However, we can still +grow up to 1M anytime if needed. +.P +(3) Who "owns" a JIT stack? +.sp +The owner of the stack is the user program, not the JIT studied pattern or +anything else. The user program must ensure that if a stack is used by +\fBpcre_exec()\fP, (that is, it is assigned to the pattern currently running), +that stack must not be used by any other threads (to avoid overwriting the same +memory area). The best practice for multithreaded programs is to allocate a +stack for each thread, and return this stack through the JIT callback function. +.P +(4) When should a JIT stack be freed? +.sp +You can free a JIT stack at any time, as long as it will not be used by +\fBpcre_exec()\fP again. When you assign the stack to a pattern, only a pointer +is set. There is no reference counting or any other magic. You can free the +patterns and stacks in any order, anytime. Just \fIdo not\fP call +\fBpcre_exec()\fP with a pattern pointing to an already freed stack, as that +will cause SEGFAULT. (Also, do not free a stack currently used by +\fBpcre_exec()\fP in another thread). You can also replace the stack for a +pattern at any time. You can even free the previous stack before assigning a +replacement. +.P +(5) Should I allocate/free a stack every time before/after calling +\fBpcre_exec()\fP? +.sp +No, because this is too costly in terms of resources. However, you could +implement some clever idea which release the stack if it is not used in let's +say two minutes. The JIT callback can help to achieve this without keeping a +list of the currently JIT studied patterns. +.P +(6) OK, the stack is for long term memory allocation. But what happens if a +pattern causes stack overflow with a stack of 1M? Is that 1M kept until the +stack is freed? +.sp +Especially on embedded sytems, it might be a good idea to release memory +sometimes without freeing the stack. There is no API for this at the moment. +Probably a function call which returns with the currently allocated memory for +any stack and another which allows releasing memory (shrinking the stack) would +be a good idea if someone needs this. +.P +(7) This is too much of a headache. Isn't there any better solution for JIT +stack handling? +.sp +No, thanks to Windows. If POSIX threads were used everywhere, we could throw +out this complicated API. +. +. +.SH "EXAMPLE CODE" +.rs +.sp +This is a single-threaded example that specifies a JIT stack without using a +callback. +.sp + int rc; + int ovector[30]; + pcre *re; + pcre_extra *extra; + pcre_jit_stack *jit_stack; +.sp + re = pcre_compile(pattern, 0, &error, &erroffset, NULL); + /* Check for errors */ + extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error); + jit_stack = pcre_jit_stack_alloc(32*1024, 512*1024); + /* Check for error (NULL) */ + pcre_assign_jit_stack(extra, NULL, jit_stack); + rc = pcre_exec(re, extra, subject, length, 0, 0, ovector, 30); + /* Check results */ + pcre_free(re); + pcre_free_study(extra); + pcre_jit_stack_free(jit_stack); +.sp +. +. +.SH "JIT FAST PATH API" +.rs +.sp +Because the API described above falls back to interpreted execution when JIT is +not available, it is convenient for programs that are written for general use +in many environments. However, calling JIT via \fBpcre_exec()\fP does have a +performance impact. Programs that are written for use where JIT is known to be +available, and which need the best possible performance, can instead use a +"fast path" API to call JIT execution directly instead of calling +\fBpcre_exec()\fP (obviously only for patterns that have been successfully +studied by JIT). +.P +The fast path function is called \fBpcre_jit_exec()\fP, and it takes exactly +the same arguments as \fBpcre_exec()\fP, plus one additional argument that +must point to a JIT stack. The JIT stack arrangements described above do not +apply. The return values are the same as for \fBpcre_exec()\fP. +.P +When you call \fBpcre_exec()\fP, as well as testing for invalid options, a +number of other sanity checks are performed on the arguments. For example, if +the subject pointer is NULL, or its length is negative, an immediate error is +given. Also, unless PCRE_NO_UTF[8|16|32] is set, a UTF subject string is tested +for validity. In the interests of speed, these checks do not happen on the JIT +fast path, and if invalid data is passed, the result is undefined. +.P +Bypassing the sanity checks and the \fBpcre_exec()\fP wrapping can give +speedups of more than 10%. +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcreapi\fP(3) +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel (FAQ by Zoltan Herczeg) +University Computing Service +Cambridge CB2 3QH, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 17 March 2013 +Copyright (c) 1997-2013 University of Cambridge. +.fi diff --git a/usr/share/man/man3/pcrelimits.3 b/usr/share/man/man3/pcrelimits.3 new file mode 100755 index 000000000..423d6a276 --- /dev/null +++ b/usr/share/man/man3/pcrelimits.3 @@ -0,0 +1,71 @@ +.TH PCRELIMITS 3 "05 November 2013" "PCRE 8.34" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH "SIZE AND OTHER LIMITATIONS" +.rs +.sp +There are some size limitations in PCRE but it is hoped that they will never in +practice be relevant. +.P +The maximum length of a compiled pattern is approximately 64K data units (bytes +for the 8-bit library, 16-bit units for the 16-bit library, and 32-bit units for +the 32-bit library) if PCRE is compiled with the default internal linkage size, +which is 2 bytes for the 8-bit and 16-bit libraries, and 4 bytes for the 32-bit +library. If you want to process regular expressions that are truly enormous, +you can compile PCRE with an internal linkage size of 3 or 4 (when building the +16-bit or 32-bit library, 3 is rounded up to 4). See the \fBREADME\fP file in +the source distribution and the +.\" HREF +\fBpcrebuild\fP +.\" +documentation for details. In these cases the limit is substantially larger. +However, the speed of execution is slower. +.P +All values in repeating quantifiers must be less than 65536. +.P +There is no limit to the number of parenthesized subpatterns, but there can be +no more than 65535 capturing subpatterns. There is, however, a limit to the +depth of nesting of parenthesized subpatterns of all kinds. This is imposed in +order to limit the amount of system stack used at compile time. The limit can +be specified when PCRE is built; the default is 250. +.P +There is a limit to the number of forward references to subsequent subpatterns +of around 200,000. Repeated forward references with fixed upper limits, for +example, (?2){0,100} when subpattern number 2 is to the right, are included in +the count. There is no limit to the number of backward references. +.P +The maximum length of name for a named subpattern is 32 characters, and the +maximum number of named subpatterns is 10000. +.P +The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb +is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries. +.P +The maximum length of a subject string is the largest positive number that an +integer variable can hold. However, when using the traditional matching +function, PCRE uses recursion to handle subpatterns and indefinite repetition. +This means that the available stack space may limit the size of a subject +string that can be processed by certain patterns. For a discussion of stack +issues, see the +.\" HREF +\fBpcrestack\fP +.\" +documentation. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +University Computing Service +Cambridge CB2 3QH, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 05 November 2013 +Copyright (c) 1997-2013 University of Cambridge. +.fi diff --git a/usr/share/man/man3/pcrematching.3 b/usr/share/man/man3/pcrematching.3 new file mode 100755 index 000000000..268baf9b8 --- /dev/null +++ b/usr/share/man/man3/pcrematching.3 @@ -0,0 +1,214 @@ +.TH PCREMATCHING 3 "12 November 2013" "PCRE 8.34" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH "PCRE MATCHING ALGORITHMS" +.rs +.sp +This document describes the two different algorithms that are available in PCRE +for matching a compiled regular expression against a given subject string. The +"standard" algorithm is the one provided by the \fBpcre_exec()\fP, +\fBpcre16_exec()\fP and \fBpcre32_exec()\fP functions. These work in the same +as as Perl's matching function, and provide a Perl-compatible matching operation. +The just-in-time (JIT) optimization that is described in the +.\" HREF +\fBpcrejit\fP +.\" +documentation is compatible with these functions. +.P +An alternative algorithm is provided by the \fBpcre_dfa_exec()\fP, +\fBpcre16_dfa_exec()\fP and \fBpcre32_dfa_exec()\fP functions; they operate in +a different way, and are not Perl-compatible. This alternative has advantages +and disadvantages compared with the standard algorithm, and these are described +below. +.P +When there is only one possible way in which a given subject string can match a +pattern, the two algorithms give the same answer. A difference arises, however, +when there are multiple possibilities. For example, if the pattern +.sp + ^<.*> +.sp +is matched against the string +.sp + <something> <something else> <something further> +.sp +there are three possible answers. The standard algorithm finds only one of +them, whereas the alternative algorithm finds all three. +. +. +.SH "REGULAR EXPRESSIONS AS TREES" +.rs +.sp +The set of strings that are matched by a regular expression can be represented +as a tree structure. An unlimited repetition in the pattern makes the tree of +infinite size, but it is still a tree. Matching the pattern to a given subject +string (from a given starting point) can be thought of as a search of the tree. +There are two ways to search a tree: depth-first and breadth-first, and these +correspond to the two matching algorithms provided by PCRE. +. +. +.SH "THE STANDARD MATCHING ALGORITHM" +.rs +.sp +In the terminology of Jeffrey Friedl's book "Mastering Regular +Expressions", the standard algorithm is an "NFA algorithm". It conducts a +depth-first search of the pattern tree. That is, it proceeds along a single +path through the tree, checking that the subject matches what is required. When +there is a mismatch, the algorithm tries any alternatives at the current point, +and if they all fail, it backs up to the previous branch point in the tree, and +tries the next alternative branch at that level. This often involves backing up +(moving to the left) in the subject string as well. The order in which +repetition branches are tried is controlled by the greedy or ungreedy nature of +the quantifier. +.P +If a leaf node is reached, a matching string has been found, and at that point +the algorithm stops. Thus, if there is more than one possible match, this +algorithm returns the first one that it finds. Whether this is the shortest, +the longest, or some intermediate length depends on the way the greedy and +ungreedy repetition quantifiers are specified in the pattern. +.P +Because it ends up with a single path through the tree, it is relatively +straightforward for this algorithm to keep track of the substrings that are +matched by portions of the pattern in parentheses. This provides support for +capturing parentheses and back references. +. +. +.SH "THE ALTERNATIVE MATCHING ALGORITHM" +.rs +.sp +This algorithm conducts a breadth-first search of the tree. Starting from the +first matching point in the subject, it scans the subject string from left to +right, once, character by character, and as it does this, it remembers all the +paths through the tree that represent valid matches. In Friedl's terminology, +this is a kind of "DFA algorithm", though it is not implemented as a +traditional finite state machine (it keeps multiple states active +simultaneously). +.P +Although the general principle of this matching algorithm is that it scans the +subject string only once, without backtracking, there is one exception: when a +lookaround assertion is encountered, the characters following or preceding the +current point have to be independently inspected. +.P +The scan continues until either the end of the subject is reached, or there are +no more unterminated paths. At this point, terminated paths represent the +different matching possibilities (if there are none, the match has failed). +Thus, if there is more than one possible match, this algorithm finds all of +them, and in particular, it finds the longest. The matches are returned in +decreasing order of length. There is an option to stop the algorithm after the +first match (which is necessarily the shortest) is found. +.P +Note that all the matches that are found start at the same point in the +subject. If the pattern +.sp + cat(er(pillar)?)? +.sp +is matched against the string "the caterpillar catchment", the result will be +the three strings "caterpillar", "cater", and "cat" that start at the fifth +character of the subject. The algorithm does not automatically move on to find +matches that start at later positions. +.P +PCRE's "auto-possessification" optimization usually applies to character +repeats at the end of a pattern (as well as internally). For example, the +pattern "a\ed+" is compiled as if it were "a\ed++" because there is no point +even considering the possibility of backtracking into the repeated digits. For +DFA matching, this means that only one possible match is found. If you really +do want multiple matches in such cases, either use an ungreedy repeat +("a\ed+?") or set the PCRE_NO_AUTO_POSSESS option when compiling. +.P +There are a number of features of PCRE regular expressions that are not +supported by the alternative matching algorithm. They are as follows: +.P +1. Because the algorithm finds all possible matches, the greedy or ungreedy +nature of repetition quantifiers is not relevant. Greedy and ungreedy +quantifiers are treated in exactly the same way. However, possessive +quantifiers can make a difference when what follows could also match what is +quantified, for example in a pattern like this: +.sp + ^a++\ew! +.sp +This pattern matches "aaab!" but not "aaa!", which would be matched by a +non-possessive quantifier. Similarly, if an atomic group is present, it is +matched as if it were a standalone pattern at the current point, and the +longest match is then "locked in" for the rest of the overall pattern. +.P +2. When dealing with multiple paths through the tree simultaneously, it is not +straightforward to keep track of captured substrings for the different matching +possibilities, and PCRE's implementation of this algorithm does not attempt to +do this. This means that no captured substrings are available. +.P +3. Because no substrings are captured, back references within the pattern are +not supported, and cause errors if encountered. +.P +4. For the same reason, conditional expressions that use a backreference as the +condition or test for a specific group recursion are not supported. +.P +5. Because many paths through the tree may be active, the \eK escape sequence, +which resets the start of the match when encountered (but may be on some paths +and not on others), is not supported. It causes an error if encountered. +.P +6. Callouts are supported, but the value of the \fIcapture_top\fP field is +always 1, and the value of the \fIcapture_last\fP field is always -1. +.P +7. The \eC escape sequence, which (in the standard algorithm) always matches a +single data unit, even in UTF-8, UTF-16 or UTF-32 modes, is not supported in +these modes, because the alternative algorithm moves through the subject string +one character (not data unit) at a time, for all active paths through the tree. +.P +8. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not +supported. (*FAIL) is supported, and behaves like a failing negative assertion. +. +. +.SH "ADVANTAGES OF THE ALTERNATIVE ALGORITHM" +.rs +.sp +Using the alternative matching algorithm provides the following advantages: +.P +1. All possible matches (at a single point in the subject) are automatically +found, and in particular, the longest match is found. To find more than one +match using the standard algorithm, you have to do kludgy things with +callouts. +.P +2. Because the alternative algorithm scans the subject string just once, and +never needs to backtrack (except for lookbehinds), it is possible to pass very +long subject strings to the matching function in several pieces, checking for +partial matching each time. Although it is possible to do multi-segment +matching using the standard algorithm by retaining partially matched +substrings, it is more complicated. The +.\" HREF +\fBpcrepartial\fP +.\" +documentation gives details of partial matching and discusses multi-segment +matching. +. +. +.SH "DISADVANTAGES OF THE ALTERNATIVE ALGORITHM" +.rs +.sp +The alternative algorithm suffers from a number of disadvantages: +.P +1. It is substantially slower than the standard algorithm. This is partly +because it has to search for all possible matches, but is also because it is +less susceptible to optimization. +.P +2. Capturing parentheses and back references are not supported. +.P +3. Although atomic groups are supported, their use does not provide the +performance advantage that it does for the standard algorithm. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +University Computing Service +Cambridge CB2 3QH, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 12 November 2013 +Copyright (c) 1997-2012 University of Cambridge. +.fi diff --git a/usr/share/man/man3/pcrepartial.3 b/usr/share/man/man3/pcrepartial.3 new file mode 100755 index 000000000..14d0124f1 --- /dev/null +++ b/usr/share/man/man3/pcrepartial.3 @@ -0,0 +1,476 @@ +.TH PCREPARTIAL 3 "02 July 2013" "PCRE 8.34" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH "PARTIAL MATCHING IN PCRE" +.rs +.sp +In normal use of PCRE, if the subject string that is passed to a matching +function matches as far as it goes, but is too short to match the entire +pattern, PCRE_ERROR_NOMATCH is returned. There are circumstances where it might +be helpful to distinguish this case from other cases in which there is no +match. +.P +Consider, for example, an application where a human is required to type in data +for a field with specific formatting requirements. An example might be a date +in the form \fIddmmmyy\fP, defined by this pattern: +.sp + ^\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed$ +.sp +If the application sees the user's keystrokes one by one, and can check that +what has been typed so far is potentially valid, it is able to raise an error +as soon as a mistake is made, by beeping and not reflecting the character that +has been typed, for example. This immediate feedback is likely to be a better +user interface than a check that is delayed until the entire string has been +entered. Partial matching can also be useful when the subject string is very +long and is not all available at once. +.P +PCRE supports partial matching by means of the PCRE_PARTIAL_SOFT and +PCRE_PARTIAL_HARD options, which can be set when calling any of the matching +functions. For backwards compatibility, PCRE_PARTIAL is a synonym for +PCRE_PARTIAL_SOFT. The essential difference between the two options is whether +or not a partial match is preferred to an alternative complete match, though +the details differ between the two types of matching function. If both options +are set, PCRE_PARTIAL_HARD takes precedence. +.P +If you want to use partial matching with just-in-time optimized code, you must +call \fBpcre_study()\fP, \fBpcre16_study()\fP or \fBpcre32_study()\fP with one +or both of these options: +.sp + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE +.sp +PCRE_STUDY_JIT_COMPILE should also be set if you are going to run non-partial +matches on the same pattern. If the appropriate JIT study mode has not been set +for a match, the interpretive matching code is used. +.P +Setting a partial matching option disables two of PCRE's standard +optimizations. PCRE remembers the last literal data unit in a pattern, and +abandons matching immediately if it is not present in the subject string. This +optimization cannot be used for a subject string that might match only +partially. If the pattern was studied, PCRE knows the minimum length of a +matching string, and does not bother to run the matching function on shorter +strings. This optimization is also disabled for partial matching. +. +. +.SH "PARTIAL MATCHING USING pcre_exec() OR pcre[16|32]_exec()" +.rs +.sp +A partial match occurs during a call to \fBpcre_exec()\fP or +\fBpcre[16|32]_exec()\fP when the end of the subject string is reached +successfully, but matching cannot continue because more characters are needed. +However, at least one character in the subject must have been inspected. This +character need not form part of the final matched string; lookbehind assertions +and the \eK escape sequence provide ways of inspecting characters before the +start of a matched substring. The requirement for inspecting at least one +character exists because an empty string can always be matched; without such a +restriction there would always be a partial match of an empty string at the end +of the subject. +.P +If there are at least two slots in the offsets vector when a partial match is +returned, the first slot is set to the offset of the earliest character that +was inspected. For convenience, the second offset points to the end of the +subject so that a substring can easily be identified. If there are at least +three slots in the offsets vector, the third slot is set to the offset of the +character where matching started. +.P +For the majority of patterns, the contents of the first and third slots will be +the same. However, for patterns that contain lookbehind assertions, or begin +with \eb or \eB, characters before the one where matching started may have been +inspected while carrying out the match. For example, consider this pattern: +.sp + /(?<=abc)123/ +.sp +This pattern matches "123", but only if it is preceded by "abc". If the subject +string is "xyzabc12", the first two offsets after a partial match are for the +substring "abc12", because all these characters were inspected. However, the +third offset is set to 6, because that is the offset where matching began. +.P +What happens when a partial match is identified depends on which of the two +partial matching options are set. +. +. +.SS "PCRE_PARTIAL_SOFT WITH pcre_exec() OR pcre[16|32]_exec()" +.rs +.sp +If PCRE_PARTIAL_SOFT is set when \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP +identifies a partial match, the partial match is remembered, but matching +continues as normal, and other alternatives in the pattern are tried. If no +complete match can be found, PCRE_ERROR_PARTIAL is returned instead of +PCRE_ERROR_NOMATCH. +.P +This option is "soft" because it prefers a complete match over a partial match. +All the various matching items in a pattern behave as if the subject string is +potentially complete. For example, \ez, \eZ, and $ match at the end of the +subject, as normal, and for \eb and \eB the end of the subject is treated as a +non-alphanumeric. +.P +If there is more than one partial match, the first one that was found provides +the data that is returned. Consider this pattern: +.sp + /123\ew+X|dogY/ +.sp +If this is matched against the subject string "abc123dog", both +alternatives fail to match, but the end of the subject is reached during +matching, so PCRE_ERROR_PARTIAL is returned. The offsets are set to 3 and 9, +identifying "123dog" as the first partial match that was found. (In this +example, there are two partial matches, because "dog" on its own partially +matches the second alternative.) +. +. +.SS "PCRE_PARTIAL_HARD WITH pcre_exec() OR pcre[16|32]_exec()" +.rs +.sp +If PCRE_PARTIAL_HARD is set for \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP, +PCRE_ERROR_PARTIAL is returned as soon as a partial match is found, without +continuing to search for possible complete matches. This option is "hard" +because it prefers an earlier partial match over a later complete match. For +this reason, the assumption is made that the end of the supplied subject string +may not be the true end of the available data, and so, if \ez, \eZ, \eb, \eB, +or $ are encountered at the end of the subject, the result is +PCRE_ERROR_PARTIAL, provided that at least one character in the subject has +been inspected. +.P +Setting PCRE_PARTIAL_HARD also affects the way UTF-8 and UTF-16 +subject strings are checked for validity. Normally, an invalid sequence +causes the error PCRE_ERROR_BADUTF8 or PCRE_ERROR_BADUTF16. However, in the +special case of a truncated character at the end of the subject, +PCRE_ERROR_SHORTUTF8 or PCRE_ERROR_SHORTUTF16 is returned when +PCRE_PARTIAL_HARD is set. +. +. +.SS "Comparing hard and soft partial matching" +.rs +.sp +The difference between the two partial matching options can be illustrated by a +pattern such as: +.sp + /dog(sbody)?/ +.sp +This matches either "dog" or "dogsbody", greedily (that is, it prefers the +longer string if possible). If it is matched against the string "dog" with +PCRE_PARTIAL_SOFT, it yields a complete match for "dog". However, if +PCRE_PARTIAL_HARD is set, the result is PCRE_ERROR_PARTIAL. On the other hand, +if the pattern is made ungreedy the result is different: +.sp + /dog(sbody)??/ +.sp +In this case the result is always a complete match because that is found first, +and matching never continues after finding a complete match. It might be easier +to follow this explanation by thinking of the two patterns like this: +.sp + /dog(sbody)?/ is the same as /dogsbody|dog/ + /dog(sbody)??/ is the same as /dog|dogsbody/ +.sp +The second pattern will never match "dogsbody", because it will always find the +shorter match first. +. +. +.SH "PARTIAL MATCHING USING pcre_dfa_exec() OR pcre[16|32]_dfa_exec()" +.rs +.sp +The DFA functions move along the subject string character by character, without +backtracking, searching for all possible matches simultaneously. If the end of +the subject is reached before the end of the pattern, there is the possibility +of a partial match, again provided that at least one character has been +inspected. +.P +When PCRE_PARTIAL_SOFT is set, PCRE_ERROR_PARTIAL is returned only if there +have been no complete matches. Otherwise, the complete matches are returned. +However, if PCRE_PARTIAL_HARD is set, a partial match takes precedence over any +complete matches. The portion of the string that was inspected when the longest +partial match was found is set as the first matching string, provided there are +at least two slots in the offsets vector. +.P +Because the DFA functions always search for all possible matches, and there is +no difference between greedy and ungreedy repetition, their behaviour is +different from the standard functions when PCRE_PARTIAL_HARD is set. Consider +the string "dog" matched against the ungreedy pattern shown above: +.sp + /dog(sbody)??/ +.sp +Whereas the standard functions stop as soon as they find the complete match for +"dog", the DFA functions also find the partial match for "dogsbody", and so +return that when PCRE_PARTIAL_HARD is set. +. +. +.SH "PARTIAL MATCHING AND WORD BOUNDARIES" +.rs +.sp +If a pattern ends with one of sequences \eb or \eB, which test for word +boundaries, partial matching with PCRE_PARTIAL_SOFT can give counter-intuitive +results. Consider this pattern: +.sp + /\ebcat\eb/ +.sp +This matches "cat", provided there is a word boundary at either end. If the +subject string is "the cat", the comparison of the final "t" with a following +character cannot take place, so a partial match is found. However, normal +matching carries on, and \eb matches at the end of the subject when the last +character is a letter, so a complete match is found. The result, therefore, is +\fInot\fP PCRE_ERROR_PARTIAL. Using PCRE_PARTIAL_HARD in this case does yield +PCRE_ERROR_PARTIAL, because then the partial match takes precedence. +. +. +.SH "FORMERLY RESTRICTED PATTERNS" +.rs +.sp +For releases of PCRE prior to 8.00, because of the way certain internal +optimizations were implemented in the \fBpcre_exec()\fP function, the +PCRE_PARTIAL option (predecessor of PCRE_PARTIAL_SOFT) could not be used with +all patterns. From release 8.00 onwards, the restrictions no longer apply, and +partial matching with can be requested for any pattern. +.P +Items that were formerly restricted were repeated single characters and +repeated metasequences. If PCRE_PARTIAL was set for a pattern that did not +conform to the restrictions, \fBpcre_exec()\fP returned the error code +PCRE_ERROR_BADPARTIAL (-13). This error code is no longer in use. The +PCRE_INFO_OKPARTIAL call to \fBpcre_fullinfo()\fP to find out if a compiled +pattern can be used for partial matching now always returns 1. +. +. +.SH "EXAMPLE OF PARTIAL MATCHING USING PCRETEST" +.rs +.sp +If the escape sequence \eP is present in a \fBpcretest\fP data line, the +PCRE_PARTIAL_SOFT option is used for the match. Here is a run of \fBpcretest\fP +that uses the date example quoted above: +.sp + re> /^\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed$/ + data> 25jun04\eP + 0: 25jun04 + 1: jun + data> 25dec3\eP + Partial match: 23dec3 + data> 3ju\eP + Partial match: 3ju + data> 3juj\eP + No match + data> j\eP + No match +.sp +The first data string is matched completely, so \fBpcretest\fP shows the +matched substrings. The remaining four strings do not match the complete +pattern, but the first two are partial matches. Similar output is obtained +if DFA matching is used. +.P +If the escape sequence \eP is present more than once in a \fBpcretest\fP data +line, the PCRE_PARTIAL_HARD option is set for the match. +. +. +.SH "MULTI-SEGMENT MATCHING WITH pcre_dfa_exec() OR pcre[16|32]_dfa_exec()" +.rs +.sp +When a partial match has been found using a DFA matching function, it is +possible to continue the match by providing additional subject data and calling +the function again with the same compiled regular expression, this time setting +the PCRE_DFA_RESTART option. You must pass the same working space as before, +because this is where details of the previous partial match are stored. Here is +an example using \fBpcretest\fP, using the \eR escape sequence to set the +PCRE_DFA_RESTART option (\eD specifies the use of the DFA matching function): +.sp + re> /^\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed$/ + data> 23ja\eP\eD + Partial match: 23ja + data> n05\eR\eD + 0: n05 +.sp +The first call has "23ja" as the subject, and requests partial matching; the +second call has "n05" as the subject for the continued (restarted) match. +Notice that when the match is complete, only the last part is shown; PCRE does +not retain the previously partially-matched string. It is up to the calling +program to do that if it needs to. +.P +That means that, for an unanchored pattern, if a continued match fails, it is +not possible to try again at a new starting point. All this facility is capable +of doing is continuing with the previous match attempt. In the previous +example, if the second set of data is "ug23" the result is no match, even +though there would be a match for "aug23" if the entire string were given at +once. Depending on the application, this may or may not be what you want. +The only way to allow for starting again at the next character is to retain the +matched part of the subject and try a new complete match. +.P +You can set the PCRE_PARTIAL_SOFT or PCRE_PARTIAL_HARD options with +PCRE_DFA_RESTART to continue partial matching over multiple segments. This +facility can be used to pass very long subject strings to the DFA matching +functions. +. +. +.SH "MULTI-SEGMENT MATCHING WITH pcre_exec() OR pcre[16|32]_exec()" +.rs +.sp +From release 8.00, the standard matching functions can also be used to do +multi-segment matching. Unlike the DFA functions, it is not possible to +restart the previous match with a new segment of data. Instead, new data must +be added to the previous subject string, and the entire match re-run, starting +from the point where the partial match occurred. Earlier data can be discarded. +.P +It is best to use PCRE_PARTIAL_HARD in this situation, because it does not +treat the end of a segment as the end of the subject when matching \ez, \eZ, +\eb, \eB, and $. Consider an unanchored pattern that matches dates: +.sp + re> /\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed/ + data> The date is 23ja\eP\eP + Partial match: 23ja +.sp +At this stage, an application could discard the text preceding "23ja", add on +text from the next segment, and call the matching function again. Unlike the +DFA matching functions, the entire matching string must always be available, +and the complete matching process occurs for each call, so more memory and more +processing time is needed. +.P +\fBNote:\fP If the pattern contains lookbehind assertions, or \eK, or starts +with \eb or \eB, the string that is returned for a partial match includes +characters that precede the start of what would be returned for a complete +match, because it contains all the characters that were inspected during the +partial match. +. +. +.SH "ISSUES WITH MULTI-SEGMENT MATCHING" +.rs +.sp +Certain types of pattern may give problems with multi-segment matching, +whichever matching function is used. +.P +1. If the pattern contains a test for the beginning of a line, you need to pass +the PCRE_NOTBOL option when the subject string for any call does start at the +beginning of a line. There is also a PCRE_NOTEOL option, but in practice when +doing multi-segment matching you should be using PCRE_PARTIAL_HARD, which +includes the effect of PCRE_NOTEOL. +.P +2. Lookbehind assertions that have already been obeyed are catered for in the +offsets that are returned for a partial match. However a lookbehind assertion +later in the pattern could require even earlier characters to be inspected. You +can handle this case by using the PCRE_INFO_MAXLOOKBEHIND option of the +\fBpcre_fullinfo()\fP or \fBpcre[16|32]_fullinfo()\fP functions to obtain the +length of the longest lookbehind in the pattern. This length is given in +characters, not bytes. If you always retain at least that many characters +before the partially matched string, all should be well. (Of course, near the +start of the subject, fewer characters may be present; in that case all +characters should be retained.) +.P +From release 8.33, there is a more accurate way of deciding which characters to +retain. Instead of subtracting the length of the longest lookbehind from the +earliest inspected character (\fIoffsets[0]\fP), the match start position +(\fIoffsets[2]\fP) should be used, and the next match attempt started at the +\fIoffsets[2]\fP character by setting the \fIstartoffset\fP argument of +\fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP. +.P +For example, if the pattern "(?<=123)abc" is partially +matched against the string "xx123a", the three offset values returned are 2, 6, +and 5. This indicates that the matching process that gave a partial match +started at offset 5, but the characters "123a" were all inspected. The maximum +lookbehind for that pattern is 3, so taking that away from 5 shows that we need +only keep "123a", and the next match attempt can be started at offset 3 (that +is, at "a") when further characters have been added. When the match start is +not the earliest inspected character, \fBpcretest\fP shows it explicitly: +.sp + re> "(?<=123)abc" + data> xx123a\eP\eP + Partial match at offset 5: 123a +.P +3. Because a partial match must always contain at least one character, what +might be considered a partial match of an empty string actually gives a "no +match" result. For example: +.sp + re> /c(?<=abc)x/ + data> ab\eP + No match +.sp +If the next segment begins "cx", a match should be found, but this will only +happen if characters from the previous segment are retained. For this reason, a +"no match" result should be interpreted as "partial match of an empty string" +when the pattern contains lookbehinds. +.P +4. Matching a subject string that is split into multiple segments may not +always produce exactly the same result as matching over one single long string, +especially when PCRE_PARTIAL_SOFT is used. The section "Partial Matching and +Word Boundaries" above describes an issue that arises if the pattern ends with +\eb or \eB. Another kind of difference may occur when there are multiple +matching possibilities, because (for PCRE_PARTIAL_SOFT) a partial match result +is given only when there are no completed matches. This means that as soon as +the shortest match has been found, continuation to a new subject segment is no +longer possible. Consider again this \fBpcretest\fP example: +.sp + re> /dog(sbody)?/ + data> dogsb\eP + 0: dog + data> do\eP\eD + Partial match: do + data> gsb\eR\eP\eD + 0: g + data> dogsbody\eD + 0: dogsbody + 1: dog +.sp +The first data line passes the string "dogsb" to a standard matching function, +setting the PCRE_PARTIAL_SOFT option. Although the string is a partial match +for "dogsbody", the result is not PCRE_ERROR_PARTIAL, because the shorter +string "dog" is a complete match. Similarly, when the subject is presented to +a DFA matching function in several parts ("do" and "gsb" being the first two) +the match stops when "dog" has been found, and it is not possible to continue. +On the other hand, if "dogsbody" is presented as a single string, a DFA +matching function finds both matches. +.P +Because of these problems, it is best to use PCRE_PARTIAL_HARD when matching +multi-segment data. The example above then behaves differently: +.sp + re> /dog(sbody)?/ + data> dogsb\eP\eP + Partial match: dogsb + data> do\eP\eD + Partial match: do + data> gsb\eR\eP\eP\eD + Partial match: gsb +.sp +5. Patterns that contain alternatives at the top level which do not all start +with the same pattern item may not work as expected when PCRE_DFA_RESTART is +used. For example, consider this pattern: +.sp + 1234|3789 +.sp +If the first part of the subject is "ABC123", a partial match of the first +alternative is found at offset 3. There is no partial match for the second +alternative, because such a match does not start at the same point in the +subject string. Attempting to continue with the string "7890" does not yield a +match because only those alternatives that match at one point in the subject +are remembered. The problem arises because the start of the second alternative +matches within the first alternative. There is no problem with anchored +patterns or patterns such as: +.sp + 1234|ABCD +.sp +where no string can be a partial match for both alternatives. This is not a +problem if a standard matching function is used, because the entire match has +to be rerun each time: +.sp + re> /1234|3789/ + data> ABC123\eP\eP + Partial match: 123 + data> 1237890 + 0: 3789 +.sp +Of course, instead of using PCRE_DFA_RESTART, the same technique of re-running +the entire match can also be used with the DFA matching functions. Another +possibility is to work with two buffers. If a partial match at offset \fIn\fP +in the first buffer is followed by "no match" when PCRE_DFA_RESTART is used on +the second buffer, you can then try a new match starting at offset \fIn+1\fP in +the first buffer. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +University Computing Service +Cambridge CB2 3QH, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 02 July 2013 +Copyright (c) 1997-2013 University of Cambridge. +.fi diff --git a/usr/share/man/man3/pcrepattern.3 b/usr/share/man/man3/pcrepattern.3 new file mode 100755 index 000000000..f1c45cda5 --- /dev/null +++ b/usr/share/man/man3/pcrepattern.3 @@ -0,0 +1,3265 @@ +.TH PCREPATTERN 3 "08 January 2014" "PCRE 8.35" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH "PCRE REGULAR EXPRESSION DETAILS" +.rs +.sp +The syntax and semantics of the regular expressions that are supported by PCRE +are described in detail below. There is a quick-reference syntax summary in the +.\" HREF +\fBpcresyntax\fP +.\" +page. PCRE tries to match Perl syntax and semantics as closely as it can. PCRE +also supports some alternative regular expression syntax (which does not +conflict with the Perl syntax) in order to provide some compatibility with +regular expressions in Python, .NET, and Oniguruma. +.P +Perl's regular expressions are described in its own documentation, and +regular expressions in general are covered in a number of books, some of which +have copious examples. Jeffrey Friedl's "Mastering Regular Expressions", +published by O'Reilly, covers regular expressions in great detail. This +description of PCRE's regular expressions is intended as reference material. +.P +This document discusses the patterns that are supported by PCRE when one its +main matching functions, \fBpcre_exec()\fP (8-bit) or \fBpcre[16|32]_exec()\fP +(16- or 32-bit), is used. PCRE also has alternative matching functions, +\fBpcre_dfa_exec()\fP and \fBpcre[16|32_dfa_exec()\fP, which match using a +different algorithm that is not Perl-compatible. Some of the features discussed +below are not available when DFA matching is used. The advantages and +disadvantages of the alternative functions, and how they differ from the normal +functions, are discussed in the +.\" HREF +\fBpcrematching\fP +.\" +page. +. +. +.SH "SPECIAL START-OF-PATTERN ITEMS" +.rs +.sp +A number of options that can be passed to \fBpcre_compile()\fP can also be set +by special items at the start of a pattern. These are not Perl-compatible, but +are provided to make these options accessible to pattern writers who are not +able to change the program that processes the pattern. Any number of these +items may appear, but they must all be together right at the start of the +pattern string, and the letters must be in upper case. +. +. +.SS "UTF support" +.rs +.sp +The original operation of PCRE was on strings of one-byte characters. However, +there is now also support for UTF-8 strings in the original library, an +extra library that supports 16-bit and UTF-16 character strings, and a +third library that supports 32-bit and UTF-32 character strings. To use these +features, PCRE must be built to include appropriate support. When using UTF +strings you must either call the compiling function with the PCRE_UTF8, +PCRE_UTF16, or PCRE_UTF32 option, or the pattern must start with one of +these special sequences: +.sp + (*UTF8) + (*UTF16) + (*UTF32) + (*UTF) +.sp +(*UTF) is a generic sequence that can be used with any of the libraries. +Starting a pattern with such a sequence is equivalent to setting the relevant +option. How setting a UTF mode affects pattern matching is mentioned in several +places below. There is also a summary of features in the +.\" HREF +\fBpcreunicode\fP +.\" +page. +.P +Some applications that allow their users to supply patterns may wish to +restrict them to non-UTF data for security reasons. If the PCRE_NEVER_UTF +option is set at compile time, (*UTF) etc. are not allowed, and their +appearance causes an error. +. +. +.SS "Unicode property support" +.rs +.sp +Another special sequence that may appear at the start of a pattern is (*UCP). +This has the same effect as setting the PCRE_UCP option: it causes sequences +such as \ed and \ew to use Unicode properties to determine character types, +instead of recognizing only characters with codes less than 128 via a lookup +table. +. +. +.SS "Disabling auto-possessification" +.rs +.sp +If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as setting +the PCRE_NO_AUTO_POSSESS option at compile time. This stops PCRE from making +quantifiers possessive when what follows cannot match the repeated item. For +example, by default a+b is treated as a++b. For more details, see the +.\" HREF +\fBpcreapi\fP +.\" +documentation. +. +. +.SS "Disabling start-up optimizations" +.rs +.sp +If a pattern starts with (*NO_START_OPT), it has the same effect as setting the +PCRE_NO_START_OPTIMIZE option either at compile or matching time. This disables +several optimizations for quickly reaching "no match" results. For more +details, see the +.\" HREF +\fBpcreapi\fP +.\" +documentation. +. +. +.\" HTML <a name="newlines"></a> +.SS "Newline conventions" +.rs +.sp +PCRE supports five different conventions for indicating line breaks in +strings: a single CR (carriage return) character, a single LF (linefeed) +character, the two-character sequence CRLF, any of the three preceding, or any +Unicode newline sequence. The +.\" HREF +\fBpcreapi\fP +.\" +page has +.\" HTML <a href="pcreapi.html#newlines"> +.\" </a> +further discussion +.\" +about newlines, and shows how to set the newline convention in the +\fIoptions\fP arguments for the compiling and matching functions. +.P +It is also possible to specify a newline convention by starting a pattern +string with one of the following five sequences: +.sp + (*CR) carriage return + (*LF) linefeed + (*CRLF) carriage return, followed by linefeed + (*ANYCRLF) any of the three above + (*ANY) all Unicode newline sequences +.sp +These override the default and the options given to the compiling function. For +example, on a Unix system where LF is the default newline sequence, the pattern +.sp + (*CR)a.b +.sp +changes the convention to CR. That pattern matches "a\enb" because LF is no +longer a newline. If more than one of these settings is present, the last one +is used. +.P +The newline convention affects where the circumflex and dollar assertions are +true. It also affects the interpretation of the dot metacharacter when +PCRE_DOTALL is not set, and the behaviour of \eN. However, it does not affect +what the \eR escape sequence matches. By default, this is any Unicode newline +sequence, for Perl compatibility. However, this can be changed; see the +description of \eR in the section entitled +.\" HTML <a href="#newlineseq"> +.\" </a> +"Newline sequences" +.\" +below. A change of \eR setting can be combined with a change of newline +convention. +. +. +.SS "Setting match and recursion limits" +.rs +.sp +The caller of \fBpcre_exec()\fP can set a limit on the number of times the +internal \fBmatch()\fP function is called and on the maximum depth of +recursive calls. These facilities are provided to catch runaway matches that +are provoked by patterns with huge matching trees (a typical example is a +pattern with nested unlimited repeats) and to avoid running out of system stack +by too much recursion. When one of these limits is reached, \fBpcre_exec()\fP +gives an error return. The limits can also be set by items at the start of the +pattern of the form +.sp + (*LIMIT_MATCH=d) + (*LIMIT_RECURSION=d) +.sp +where d is any number of decimal digits. However, the value of the setting must +be less than the value set (or defaulted) by the caller of \fBpcre_exec()\fP +for it to have any effect. In other words, the pattern writer can lower the +limits set by the programmer, but not raise them. If there is more than one +setting of one of these limits, the lower value is used. +. +. +.SH "EBCDIC CHARACTER CODES" +.rs +.sp +PCRE can be compiled to run in an environment that uses EBCDIC as its character +code rather than ASCII or Unicode (typically a mainframe system). In the +sections below, character code values are ASCII or Unicode; in an EBCDIC +environment these characters may have different code values, and there are no +code points greater than 255. +. +. +.SH "CHARACTERS AND METACHARACTERS" +.rs +.sp +A regular expression is a pattern that is matched against a subject string from +left to right. Most characters stand for themselves in a pattern, and match the +corresponding characters in the subject. As a trivial example, the pattern +.sp + The quick brown fox +.sp +matches a portion of a subject string that is identical to itself. When +caseless matching is specified (the PCRE_CASELESS option), letters are matched +independently of case. In a UTF mode, PCRE always understands the concept of +case for characters whose values are less than 128, so caseless matching is +always possible. For characters with higher values, the concept of case is +supported if PCRE is compiled with Unicode property support, but not otherwise. +If you want to use caseless matching for characters 128 and above, you must +ensure that PCRE is compiled with Unicode property support as well as with +UTF support. +.P +The power of regular expressions comes from the ability to include alternatives +and repetitions in the pattern. These are encoded in the pattern by the use of +\fImetacharacters\fP, which do not stand for themselves but instead are +interpreted in some special way. +.P +There are two different sets of metacharacters: those that are recognized +anywhere in the pattern except within square brackets, and those that are +recognized within square brackets. Outside square brackets, the metacharacters +are as follows: +.sp + \e general escape character with several uses + ^ assert start of string (or line, in multiline mode) + $ assert end of string (or line, in multiline mode) + . match any character except newline (by default) + [ start character class definition + | start of alternative branch + ( start subpattern + ) end subpattern + ? extends the meaning of ( + also 0 or 1 quantifier + also quantifier minimizer + * 0 or more quantifier + + 1 or more quantifier + also "possessive quantifier" + { start min/max quantifier +.sp +Part of a pattern that is in square brackets is called a "character class". In +a character class the only metacharacters are: +.sp + \e general escape character + ^ negate the class, but only if the first character + - indicates character range +.\" JOIN + [ POSIX character class (only if followed by POSIX + syntax) + ] terminates the character class +.sp +The following sections describe the use of each of the metacharacters. +. +. +.SH BACKSLASH +.rs +.sp +The backslash character has several uses. Firstly, if it is followed by a +character that is not a number or a letter, it takes away any special meaning +that character may have. This use of backslash as an escape character applies +both inside and outside character classes. +.P +For example, if you want to match a * character, you write \e* in the pattern. +This escaping action applies whether or not the following character would +otherwise be interpreted as a metacharacter, so it is always safe to precede a +non-alphanumeric with backslash to specify that it stands for itself. In +particular, if you want to match a backslash, you write \e\e. +.P +In a UTF mode, only ASCII numbers and letters have any special meaning after a +backslash. All other characters (in particular, those whose codepoints are +greater than 127) are treated as literals. +.P +If a pattern is compiled with the PCRE_EXTENDED option, most white space in the +pattern (other than in a character class), and characters between a # outside a +character class and the next newline, inclusive, are ignored. An escaping +backslash can be used to include a white space or # character as part of the +pattern. +.P +If you want to remove the special meaning from a sequence of characters, you +can do so by putting them between \eQ and \eE. This is different from Perl in +that $ and @ are handled as literals in \eQ...\eE sequences in PCRE, whereas in +Perl, $ and @ cause variable interpolation. Note the following examples: +.sp + Pattern PCRE matches Perl matches +.sp +.\" JOIN + \eQabc$xyz\eE abc$xyz abc followed by the + contents of $xyz + \eQabc\e$xyz\eE abc\e$xyz abc\e$xyz + \eQabc\eE\e$\eQxyz\eE abc$xyz abc$xyz +.sp +The \eQ...\eE sequence is recognized both inside and outside character classes. +An isolated \eE that is not preceded by \eQ is ignored. If \eQ is not followed +by \eE later in the pattern, the literal interpretation continues to the end of +the pattern (that is, \eE is assumed at the end). If the isolated \eQ is inside +a character class, this causes an error, because the character class is not +terminated. +. +. +.\" HTML <a name="digitsafterbackslash"></a> +.SS "Non-printing characters" +.rs +.sp +A second use of backslash provides a way of encoding non-printing characters +in patterns in a visible manner. There is no restriction on the appearance of +non-printing characters, apart from the binary zero that terminates a pattern, +but when a pattern is being prepared by text editing, it is often easier to use +one of the following escape sequences than the binary character it represents: +.sp + \ea alarm, that is, the BEL character (hex 07) + \ecx "control-x", where x is any ASCII character + \ee escape (hex 1B) + \ef form feed (hex 0C) + \en linefeed (hex 0A) + \er carriage return (hex 0D) + \et tab (hex 09) + \e0dd character with octal code 0dd + \eddd character with octal code ddd, or back reference + \eo{ddd..} character with octal code ddd.. + \exhh character with hex code hh + \ex{hhh..} character with hex code hhh.. (non-JavaScript mode) + \euhhhh character with hex code hhhh (JavaScript mode only) +.sp +The precise effect of \ecx on ASCII characters is as follows: if x is a lower +case letter, it is converted to upper case. Then bit 6 of the character (hex +40) is inverted. Thus \ecA to \ecZ become hex 01 to hex 1A (A is 41, Z is 5A), +but \ec{ becomes hex 3B ({ is 7B), and \ec; becomes hex 7B (; is 3B). If the +data item (byte or 16-bit value) following \ec has a value greater than 127, a +compile-time error occurs. This locks out non-ASCII characters in all modes. +.P +The \ec facility was designed for use with ASCII characters, but with the +extension to Unicode it is even less useful than it once was. It is, however, +recognized when PCRE is compiled in EBCDIC mode, where data items are always +bytes. In this mode, all values are valid after \ec. If the next character is a +lower case letter, it is converted to upper case. Then the 0xc0 bits of the +byte are inverted. Thus \ecA becomes hex 01, as in ASCII (A is C1), but because +the EBCDIC letters are disjoint, \ecZ becomes hex 29 (Z is E9), and other +characters also generate different values. +.P +After \e0 up to two further octal digits are read. If there are fewer than two +digits, just those that are present are used. Thus the sequence \e0\ex\e07 +specifies two binary zeros followed by a BEL character (code value 7). Make +sure you supply two digits after the initial zero if the pattern character that +follows is itself an octal digit. +.P +The escape \eo must be followed by a sequence of octal digits, enclosed in +braces. An error occurs if this is not the case. This escape is a recent +addition to Perl; it provides way of specifying character code points as octal +numbers greater than 0777, and it also allows octal numbers and back references +to be unambiguously specified. +.P +For greater clarity and unambiguity, it is best to avoid following \e by a +digit greater than zero. Instead, use \eo{} or \ex{} to specify character +numbers, and \eg{} to specify back references. The following paragraphs +describe the old, ambiguous syntax. +.P +The handling of a backslash followed by a digit other than 0 is complicated, +and Perl has changed in recent releases, causing PCRE also to change. Outside a +character class, PCRE reads the digit and any following digits as a decimal +number. If the number is less than 8, or if there have been at least that many +previous capturing left parentheses in the expression, the entire sequence is +taken as a \fIback reference\fP. A description of how this works is given +.\" HTML <a href="#backreferences"> +.\" </a> +later, +.\" +following the discussion of +.\" HTML <a href="#subpattern"> +.\" </a> +parenthesized subpatterns. +.\" +.P +Inside a character class, or if the decimal number following \e is greater than +7 and there have not been that many capturing subpatterns, PCRE handles \e8 and +\e9 as the literal characters "8" and "9", and otherwise re-reads up to three +octal digits following the backslash, using them to generate a data character. +Any subsequent digits stand for themselves. For example: +.sp + \e040 is another way of writing an ASCII space +.\" JOIN + \e40 is the same, provided there are fewer than 40 + previous capturing subpatterns + \e7 is always a back reference +.\" JOIN + \e11 might be a back reference, or another way of + writing a tab + \e011 is always a tab + \e0113 is a tab followed by the character "3" +.\" JOIN + \e113 might be a back reference, otherwise the + character with octal code 113 +.\" JOIN + \e377 might be a back reference, otherwise + the value 255 (decimal) +.\" JOIN + \e81 is either a back reference, or the two + characters "8" and "1" +.sp +Note that octal values of 100 or greater that are specified using this syntax +must not be introduced by a leading zero, because no more than three octal +digits are ever read. +.P +By default, after \ex that is not followed by {, from zero to two hexadecimal +digits are read (letters can be in upper or lower case). Any number of +hexadecimal digits may appear between \ex{ and }. If a character other than +a hexadecimal digit appears between \ex{ and }, or if there is no terminating +}, an error occurs. +.P +If the PCRE_JAVASCRIPT_COMPAT option is set, the interpretation of \ex is +as just described only when it is followed by two hexadecimal digits. +Otherwise, it matches a literal "x" character. In JavaScript mode, support for +code points greater than 256 is provided by \eu, which must be followed by +four hexadecimal digits; otherwise it matches a literal "u" character. +.P +Characters whose value is less than 256 can be defined by either of the two +syntaxes for \ex (or by \eu in JavaScript mode). There is no difference in the +way they are handled. For example, \exdc is exactly the same as \ex{dc} (or +\eu00dc in JavaScript mode). +. +. +.SS "Constraints on character values" +.rs +.sp +Characters that are specified using octal or hexadecimal numbers are +limited to certain values, as follows: +.sp + 8-bit non-UTF mode less than 0x100 + 8-bit UTF-8 mode less than 0x10ffff and a valid codepoint + 16-bit non-UTF mode less than 0x10000 + 16-bit UTF-16 mode less than 0x10ffff and a valid codepoint + 32-bit non-UTF mode less than 0x100000000 + 32-bit UTF-32 mode less than 0x10ffff and a valid codepoint +.sp +Invalid Unicode codepoints are the range 0xd800 to 0xdfff (the so-called +"surrogate" codepoints), and 0xffef. +. +. +.SS "Escape sequences in character classes" +.rs +.sp +All the sequences that define a single character value can be used both inside +and outside character classes. In addition, inside a character class, \eb is +interpreted as the backspace character (hex 08). +.P +\eN is not allowed in a character class. \eB, \eR, and \eX are not special +inside a character class. Like other unrecognized escape sequences, they are +treated as the literal characters "B", "R", and "X" by default, but cause an +error if the PCRE_EXTRA option is set. Outside a character class, these +sequences have different meanings. +. +. +.SS "Unsupported escape sequences" +.rs +.sp +In Perl, the sequences \el, \eL, \eu, and \eU are recognized by its string +handler and used to modify the case of following characters. By default, PCRE +does not support these escape sequences. However, if the PCRE_JAVASCRIPT_COMPAT +option is set, \eU matches a "U" character, and \eu can be used to define a +character by code point, as described in the previous section. +. +. +.SS "Absolute and relative back references" +.rs +.sp +The sequence \eg followed by an unsigned or a negative number, optionally +enclosed in braces, is an absolute or relative back reference. A named back +reference can be coded as \eg{name}. Back references are discussed +.\" HTML <a href="#backreferences"> +.\" </a> +later, +.\" +following the discussion of +.\" HTML <a href="#subpattern"> +.\" </a> +parenthesized subpatterns. +.\" +. +. +.SS "Absolute and relative subroutine calls" +.rs +.sp +For compatibility with Oniguruma, the non-Perl syntax \eg followed by a name or +a number enclosed either in angle brackets or single quotes, is an alternative +syntax for referencing a subpattern as a "subroutine". Details are discussed +.\" HTML <a href="#onigurumasubroutines"> +.\" </a> +later. +.\" +Note that \eg{...} (Perl syntax) and \eg<...> (Oniguruma syntax) are \fInot\fP +synonymous. The former is a back reference; the latter is a +.\" HTML <a href="#subpatternsassubroutines"> +.\" </a> +subroutine +.\" +call. +. +. +.\" HTML <a name="genericchartypes"></a> +.SS "Generic character types" +.rs +.sp +Another use of backslash is for specifying generic character types: +.sp + \ed any decimal digit + \eD any character that is not a decimal digit + \eh any horizontal white space character + \eH any character that is not a horizontal white space character + \es any white space character + \eS any character that is not a white space character + \ev any vertical white space character + \eV any character that is not a vertical white space character + \ew any "word" character + \eW any "non-word" character +.sp +There is also the single sequence \eN, which matches a non-newline character. +This is the same as +.\" HTML <a href="#fullstopdot"> +.\" </a> +the "." metacharacter +.\" +when PCRE_DOTALL is not set. Perl also uses \eN to match characters by name; +PCRE does not support this. +.P +Each pair of lower and upper case escape sequences partitions the complete set +of characters into two disjoint sets. Any given character matches one, and only +one, of each pair. The sequences can appear both inside and outside character +classes. They each match one character of the appropriate type. If the current +matching point is at the end of the subject string, all of them fail, because +there is no character to match. +.P +For compatibility with Perl, \es did not used to match the VT character (code +11), which made it different from the the POSIX "space" class. However, Perl +added VT at release 5.18, and PCRE followed suit at release 8.34. The default +\es characters are now HT (9), LF (10), VT (11), FF (12), CR (13), and space +(32), which are defined as white space in the "C" locale. This list may vary if +locale-specific matching is taking place. For example, in some locales the +"non-breaking space" character (\exA0) is recognized as white space, and in +others the VT character is not. +.P +A "word" character is an underscore or any character that is a letter or digit. +By default, the definition of letters and digits is controlled by PCRE's +low-valued character tables, and may vary if locale-specific matching is taking +place (see +.\" HTML <a href="pcreapi.html#localesupport"> +.\" </a> +"Locale support" +.\" +in the +.\" HREF +\fBpcreapi\fP +.\" +page). For example, in a French locale such as "fr_FR" in Unix-like systems, +or "french" in Windows, some character codes greater than 127 are used for +accented letters, and these are then matched by \ew. The use of locales with +Unicode is discouraged. +.P +By default, characters whose code points are greater than 127 never match \ed, +\es, or \ew, and always match \eD, \eS, and \eW, although this may vary for +characters in the range 128-255 when locale-specific matching is happening. +These escape sequences retain their original meanings from before Unicode +support was available, mainly for efficiency reasons. If PCRE is compiled with +Unicode property support, and the PCRE_UCP option is set, the behaviour is +changed so that Unicode properties are used to determine character types, as +follows: +.sp + \ed any character that matches \ep{Nd} (decimal digit) + \es any character that matches \ep{Z} or \eh or \ev + \ew any character that matches \ep{L} or \ep{N}, plus underscore +.sp +The upper case escapes match the inverse sets of characters. Note that \ed +matches only decimal digits, whereas \ew matches any Unicode digit, as well as +any Unicode letter, and underscore. Note also that PCRE_UCP affects \eb, and +\eB because they are defined in terms of \ew and \eW. Matching these sequences +is noticeably slower when PCRE_UCP is set. +.P +The sequences \eh, \eH, \ev, and \eV are features that were added to Perl at +release 5.10. In contrast to the other sequences, which match only ASCII +characters by default, these always match certain high-valued code points, +whether or not PCRE_UCP is set. The horizontal space characters are: +.sp + U+0009 Horizontal tab (HT) + U+0020 Space + U+00A0 Non-break space + U+1680 Ogham space mark + U+180E Mongolian vowel separator + U+2000 En quad + U+2001 Em quad + U+2002 En space + U+2003 Em space + U+2004 Three-per-em space + U+2005 Four-per-em space + U+2006 Six-per-em space + U+2007 Figure space + U+2008 Punctuation space + U+2009 Thin space + U+200A Hair space + U+202F Narrow no-break space + U+205F Medium mathematical space + U+3000 Ideographic space +.sp +The vertical space characters are: +.sp + U+000A Linefeed (LF) + U+000B Vertical tab (VT) + U+000C Form feed (FF) + U+000D Carriage return (CR) + U+0085 Next line (NEL) + U+2028 Line separator + U+2029 Paragraph separator +.sp +In 8-bit, non-UTF-8 mode, only the characters with codepoints less than 256 are +relevant. +. +. +.\" HTML <a name="newlineseq"></a> +.SS "Newline sequences" +.rs +.sp +Outside a character class, by default, the escape sequence \eR matches any +Unicode newline sequence. In 8-bit non-UTF-8 mode \eR is equivalent to the +following: +.sp + (?>\er\en|\en|\ex0b|\ef|\er|\ex85) +.sp +This is an example of an "atomic group", details of which are given +.\" HTML <a href="#atomicgroup"> +.\" </a> +below. +.\" +This particular group matches either the two-character sequence CR followed by +LF, or one of the single characters LF (linefeed, U+000A), VT (vertical tab, +U+000B), FF (form feed, U+000C), CR (carriage return, U+000D), or NEL (next +line, U+0085). The two-character sequence is treated as a single unit that +cannot be split. +.P +In other modes, two additional characters whose codepoints are greater than 255 +are added: LS (line separator, U+2028) and PS (paragraph separator, U+2029). +Unicode character property support is not needed for these characters to be +recognized. +.P +It is possible to restrict \eR to match only CR, LF, or CRLF (instead of the +complete set of Unicode line endings) by setting the option PCRE_BSR_ANYCRLF +either at compile time or when the pattern is matched. (BSR is an abbrevation +for "backslash R".) This can be made the default when PCRE is built; if this is +the case, the other behaviour can be requested via the PCRE_BSR_UNICODE option. +It is also possible to specify these settings by starting a pattern string with +one of the following sequences: +.sp + (*BSR_ANYCRLF) CR, LF, or CRLF only + (*BSR_UNICODE) any Unicode newline sequence +.sp +These override the default and the options given to the compiling function, but +they can themselves be overridden by options given to a matching function. Note +that these special settings, which are not Perl-compatible, are recognized only +at the very start of a pattern, and that they must be in upper case. If more +than one of them is present, the last one is used. They can be combined with a +change of newline convention; for example, a pattern can start with: +.sp + (*ANY)(*BSR_ANYCRLF) +.sp +They can also be combined with the (*UTF8), (*UTF16), (*UTF32), (*UTF) or +(*UCP) special sequences. Inside a character class, \eR is treated as an +unrecognized escape sequence, and so matches the letter "R" by default, but +causes an error if PCRE_EXTRA is set. +. +. +.\" HTML <a name="uniextseq"></a> +.SS Unicode character properties +.rs +.sp +When PCRE is built with Unicode character property support, three additional +escape sequences that match characters with specific properties are available. +When in 8-bit non-UTF-8 mode, these sequences are of course limited to testing +characters whose codepoints are less than 256, but they do work in this mode. +The extra escape sequences are: +.sp + \ep{\fIxx\fP} a character with the \fIxx\fP property + \eP{\fIxx\fP} a character without the \fIxx\fP property + \eX a Unicode extended grapheme cluster +.sp +The property names represented by \fIxx\fP above are limited to the Unicode +script names, the general category properties, "Any", which matches any +character (including newline), and some special PCRE properties (described +in the +.\" HTML <a href="#extraprops"> +.\" </a> +next section). +.\" +Other Perl properties such as "InMusicalSymbols" are not currently supported by +PCRE. Note that \eP{Any} does not match any characters, so always causes a +match failure. +.P +Sets of Unicode characters are defined as belonging to certain scripts. A +character from one of these sets can be matched using a script name. For +example: +.sp + \ep{Greek} + \eP{Han} +.sp +Those that are not part of an identified script are lumped together as +"Common". The current list of scripts is: +.P +Arabic, +Armenian, +Avestan, +Balinese, +Bamum, +Batak, +Bengali, +Bopomofo, +Brahmi, +Braille, +Buginese, +Buhid, +Canadian_Aboriginal, +Carian, +Chakma, +Cham, +Cherokee, +Common, +Coptic, +Cuneiform, +Cypriot, +Cyrillic, +Deseret, +Devanagari, +Egyptian_Hieroglyphs, +Ethiopic, +Georgian, +Glagolitic, +Gothic, +Greek, +Gujarati, +Gurmukhi, +Han, +Hangul, +Hanunoo, +Hebrew, +Hiragana, +Imperial_Aramaic, +Inherited, +Inscriptional_Pahlavi, +Inscriptional_Parthian, +Javanese, +Kaithi, +Kannada, +Katakana, +Kayah_Li, +Kharoshthi, +Khmer, +Lao, +Latin, +Lepcha, +Limbu, +Linear_B, +Lisu, +Lycian, +Lydian, +Malayalam, +Mandaic, +Meetei_Mayek, +Meroitic_Cursive, +Meroitic_Hieroglyphs, +Miao, +Mongolian, +Myanmar, +New_Tai_Lue, +Nko, +Ogham, +Old_Italic, +Old_Persian, +Old_South_Arabian, +Old_Turkic, +Ol_Chiki, +Oriya, +Osmanya, +Phags_Pa, +Phoenician, +Rejang, +Runic, +Samaritan, +Saurashtra, +Sharada, +Shavian, +Sinhala, +Sora_Sompeng, +Sundanese, +Syloti_Nagri, +Syriac, +Tagalog, +Tagbanwa, +Tai_Le, +Tai_Tham, +Tai_Viet, +Takri, +Tamil, +Telugu, +Thaana, +Thai, +Tibetan, +Tifinagh, +Ugaritic, +Vai, +Yi. +.P +Each character has exactly one Unicode general category property, specified by +a two-letter abbreviation. For compatibility with Perl, negation can be +specified by including a circumflex between the opening brace and the property +name. For example, \ep{^Lu} is the same as \eP{Lu}. +.P +If only one letter is specified with \ep or \eP, it includes all the general +category properties that start with that letter. In this case, in the absence +of negation, the curly brackets in the escape sequence are optional; these two +examples have the same effect: +.sp + \ep{L} + \epL +.sp +The following general category property codes are supported: +.sp + C Other + Cc Control + Cf Format + Cn Unassigned + Co Private use + Cs Surrogate +.sp + L Letter + Ll Lower case letter + Lm Modifier letter + Lo Other letter + Lt Title case letter + Lu Upper case letter +.sp + M Mark + Mc Spacing mark + Me Enclosing mark + Mn Non-spacing mark +.sp + N Number + Nd Decimal number + Nl Letter number + No Other number +.sp + P Punctuation + Pc Connector punctuation + Pd Dash punctuation + Pe Close punctuation + Pf Final punctuation + Pi Initial punctuation + Po Other punctuation + Ps Open punctuation +.sp + S Symbol + Sc Currency symbol + Sk Modifier symbol + Sm Mathematical symbol + So Other symbol +.sp + Z Separator + Zl Line separator + Zp Paragraph separator + Zs Space separator +.sp +The special property L& is also supported: it matches a character that has +the Lu, Ll, or Lt property, in other words, a letter that is not classified as +a modifier or "other". +.P +The Cs (Surrogate) property applies only to characters in the range U+D800 to +U+DFFF. Such characters are not valid in Unicode strings and so +cannot be tested by PCRE, unless UTF validity checking has been turned off +(see the discussion of PCRE_NO_UTF8_CHECK, PCRE_NO_UTF16_CHECK and +PCRE_NO_UTF32_CHECK in the +.\" HREF +\fBpcreapi\fP +.\" +page). Perl does not support the Cs property. +.P +The long synonyms for property names that Perl supports (such as \ep{Letter}) +are not supported by PCRE, nor is it permitted to prefix any of these +properties with "Is". +.P +No character that is in the Unicode table has the Cn (unassigned) property. +Instead, this property is assumed for any code point that is not in the +Unicode table. +.P +Specifying caseless matching does not affect these escape sequences. For +example, \ep{Lu} always matches only upper case letters. This is different from +the behaviour of current versions of Perl. +.P +Matching characters by Unicode property is not fast, because PCRE has to do a +multistage table lookup in order to find a character's property. That is why +the traditional escape sequences such as \ed and \ew do not use Unicode +properties in PCRE by default, though you can make them do so by setting the +PCRE_UCP option or by starting the pattern with (*UCP). +. +. +.SS Extended grapheme clusters +.rs +.sp +The \eX escape matches any number of Unicode characters that form an "extended +grapheme cluster", and treats the sequence as an atomic group +.\" HTML <a href="#atomicgroup"> +.\" </a> +(see below). +.\" +Up to and including release 8.31, PCRE matched an earlier, simpler definition +that was equivalent to +.sp + (?>\ePM\epM*) +.sp +That is, it matched a character without the "mark" property, followed by zero +or more characters with the "mark" property. Characters with the "mark" +property are typically non-spacing accents that affect the preceding character. +.P +This simple definition was extended in Unicode to include more complicated +kinds of composite character by giving each character a grapheme breaking +property, and creating rules that use these properties to define the boundaries +of extended grapheme clusters. In releases of PCRE later than 8.31, \eX matches +one of these clusters. +.P +\eX always matches at least one character. Then it decides whether to add +additional characters according to the following rules for ending a cluster: +.P +1. End at the end of the subject string. +.P +2. Do not end between CR and LF; otherwise end after any control character. +.P +3. Do not break Hangul (a Korean script) syllable sequences. Hangul characters +are of five types: L, V, T, LV, and LVT. An L character may be followed by an +L, V, LV, or LVT character; an LV or V character may be followed by a V or T +character; an LVT or T character may be follwed only by a T character. +.P +4. Do not end before extending characters or spacing marks. Characters with +the "mark" property always have the "extend" grapheme breaking property. +.P +5. Do not end after prepend characters. +.P +6. Otherwise, end the cluster. +. +. +.\" HTML <a name="extraprops"></a> +.SS PCRE's additional properties +.rs +.sp +As well as the standard Unicode properties described above, PCRE supports four +more that make it possible to convert traditional escape sequences such as \ew +and \es to use Unicode properties. PCRE uses these non-standard, non-Perl +properties internally when PCRE_UCP is set. However, they may also be used +explicitly. These properties are: +.sp + Xan Any alphanumeric character + Xps Any POSIX space character + Xsp Any Perl space character + Xwd Any Perl "word" character +.sp +Xan matches characters that have either the L (letter) or the N (number) +property. Xps matches the characters tab, linefeed, vertical tab, form feed, or +carriage return, and any other character that has the Z (separator) property. +Xsp is the same as Xps; it used to exclude vertical tab, for Perl +compatibility, but Perl changed, and so PCRE followed at release 8.34. Xwd +matches the same characters as Xan, plus underscore. +.P +There is another non-standard property, Xuc, which matches any character that +can be represented by a Universal Character Name in C++ and other programming +languages. These are the characters $, @, ` (grave accent), and all characters +with Unicode code points greater than or equal to U+00A0, except for the +surrogates U+D800 to U+DFFF. Note that most base (ASCII) characters are +excluded. (Universal Character Names are of the form \euHHHH or \eUHHHHHHHH +where H is a hexadecimal digit. Note that the Xuc property does not match these +sequences but the characters that they represent.) +. +. +.\" HTML <a name="resetmatchstart"></a> +.SS "Resetting the match start" +.rs +.sp +The escape sequence \eK causes any previously matched characters not to be +included in the final matched sequence. For example, the pattern: +.sp + foo\eKbar +.sp +matches "foobar", but reports that it has matched "bar". This feature is +similar to a lookbehind assertion +.\" HTML <a href="#lookbehind"> +.\" </a> +(described below). +.\" +However, in this case, the part of the subject before the real match does not +have to be of fixed length, as lookbehind assertions do. The use of \eK does +not interfere with the setting of +.\" HTML <a href="#subpattern"> +.\" </a> +captured substrings. +.\" +For example, when the pattern +.sp + (foo)\eKbar +.sp +matches "foobar", the first substring is still set to "foo". +.P +Perl documents that the use of \eK within assertions is "not well defined". In +PCRE, \eK is acted upon when it occurs inside positive assertions, but is +ignored in negative assertions. Note that when a pattern such as (?=ab\eK) +matches, the reported start of the match can be greater than the end of the +match. +. +. +.\" HTML <a name="smallassertions"></a> +.SS "Simple assertions" +.rs +.sp +The final use of backslash is for certain simple assertions. An assertion +specifies a condition that has to be met at a particular point in a match, +without consuming any characters from the subject string. The use of +subpatterns for more complicated assertions is described +.\" HTML <a href="#bigassertions"> +.\" </a> +below. +.\" +The backslashed assertions are: +.sp + \eb matches at a word boundary + \eB matches when not at a word boundary + \eA matches at the start of the subject + \eZ matches at the end of the subject + also matches before a newline at the end of the subject + \ez matches only at the end of the subject + \eG matches at the first matching position in the subject +.sp +Inside a character class, \eb has a different meaning; it matches the backspace +character. If any other of these assertions appears in a character class, by +default it matches the corresponding literal character (for example, \eB +matches the letter B). However, if the PCRE_EXTRA option is set, an "invalid +escape sequence" error is generated instead. +.P +A word boundary is a position in the subject string where the current character +and the previous character do not both match \ew or \eW (i.e. one matches +\ew and the other matches \eW), or the start or end of the string if the +first or last character matches \ew, respectively. In a UTF mode, the meanings +of \ew and \eW can be changed by setting the PCRE_UCP option. When this is +done, it also affects \eb and \eB. Neither PCRE nor Perl has a separate "start +of word" or "end of word" metasequence. However, whatever follows \eb normally +determines which it is. For example, the fragment \eba matches "a" at the start +of a word. +.P +The \eA, \eZ, and \ez assertions differ from the traditional circumflex and +dollar (described in the next section) in that they only ever match at the very +start and end of the subject string, whatever options are set. Thus, they are +independent of multiline mode. These three assertions are not affected by the +PCRE_NOTBOL or PCRE_NOTEOL options, which affect only the behaviour of the +circumflex and dollar metacharacters. However, if the \fIstartoffset\fP +argument of \fBpcre_exec()\fP is non-zero, indicating that matching is to start +at a point other than the beginning of the subject, \eA can never match. The +difference between \eZ and \ez is that \eZ matches before a newline at the end +of the string as well as at the very end, whereas \ez matches only at the end. +.P +The \eG assertion is true only when the current matching position is at the +start point of the match, as specified by the \fIstartoffset\fP argument of +\fBpcre_exec()\fP. It differs from \eA when the value of \fIstartoffset\fP is +non-zero. By calling \fBpcre_exec()\fP multiple times with appropriate +arguments, you can mimic Perl's /g option, and it is in this kind of +implementation where \eG can be useful. +.P +Note, however, that PCRE's interpretation of \eG, as the start of the current +match, is subtly different from Perl's, which defines it as the end of the +previous match. In Perl, these can be different when the previously matched +string was empty. Because PCRE does just one match at a time, it cannot +reproduce this behaviour. +.P +If all the alternatives of a pattern begin with \eG, the expression is anchored +to the starting match position, and the "anchored" flag is set in the compiled +regular expression. +. +. +.SH "CIRCUMFLEX AND DOLLAR" +.rs +.sp +The circumflex and dollar metacharacters are zero-width assertions. That is, +they test for a particular condition being true without consuming any +characters from the subject string. +.P +Outside a character class, in the default matching mode, the circumflex +character is an assertion that is true only if the current matching point is at +the start of the subject string. If the \fIstartoffset\fP argument of +\fBpcre_exec()\fP is non-zero, circumflex can never match if the PCRE_MULTILINE +option is unset. Inside a character class, circumflex has an entirely different +meaning +.\" HTML <a href="#characterclass"> +.\" </a> +(see below). +.\" +.P +Circumflex need not be the first character of the pattern if a number of +alternatives are involved, but it should be the first thing in each alternative +in which it appears if the pattern is ever to match that branch. If all +possible alternatives start with a circumflex, that is, if the pattern is +constrained to match only at the start of the subject, it is said to be an +"anchored" pattern. (There are also other constructs that can cause a pattern +to be anchored.) +.P +The dollar character is an assertion that is true only if the current matching +point is at the end of the subject string, or immediately before a newline at +the end of the string (by default). Note, however, that it does not actually +match the newline. Dollar need not be the last character of the pattern if a +number of alternatives are involved, but it should be the last item in any +branch in which it appears. Dollar has no special meaning in a character class. +.P +The meaning of dollar can be changed so that it matches only at the very end of +the string, by setting the PCRE_DOLLAR_ENDONLY option at compile time. This +does not affect the \eZ assertion. +.P +The meanings of the circumflex and dollar characters are changed if the +PCRE_MULTILINE option is set. When this is the case, a circumflex matches +immediately after internal newlines as well as at the start of the subject +string. It does not match after a newline that ends the string. A dollar +matches before any newlines in the string, as well as at the very end, when +PCRE_MULTILINE is set. When newline is specified as the two-character +sequence CRLF, isolated CR and LF characters do not indicate newlines. +.P +For example, the pattern /^abc$/ matches the subject string "def\enabc" (where +\en represents a newline) in multiline mode, but not otherwise. Consequently, +patterns that are anchored in single line mode because all branches start with +^ are not anchored in multiline mode, and a match for circumflex is possible +when the \fIstartoffset\fP argument of \fBpcre_exec()\fP is non-zero. The +PCRE_DOLLAR_ENDONLY option is ignored if PCRE_MULTILINE is set. +.P +Note that the sequences \eA, \eZ, and \ez can be used to match the start and +end of the subject in both modes, and if all branches of a pattern start with +\eA it is always anchored, whether or not PCRE_MULTILINE is set. +. +. +.\" HTML <a name="fullstopdot"></a> +.SH "FULL STOP (PERIOD, DOT) AND \eN" +.rs +.sp +Outside a character class, a dot in the pattern matches any one character in +the subject string except (by default) a character that signifies the end of a +line. +.P +When a line ending is defined as a single character, dot never matches that +character; when the two-character sequence CRLF is used, dot does not match CR +if it is immediately followed by LF, but otherwise it matches all characters +(including isolated CRs and LFs). When any Unicode line endings are being +recognized, dot does not match CR or LF or any of the other line ending +characters. +.P +The behaviour of dot with regard to newlines can be changed. If the PCRE_DOTALL +option is set, a dot matches any one character, without exception. If the +two-character sequence CRLF is present in the subject string, it takes two dots +to match it. +.P +The handling of dot is entirely independent of the handling of circumflex and +dollar, the only relationship being that they both involve newlines. Dot has no +special meaning in a character class. +.P +The escape sequence \eN behaves like a dot, except that it is not affected by +the PCRE_DOTALL option. In other words, it matches any character except one +that signifies the end of a line. Perl also uses \eN to match characters by +name; PCRE does not support this. +. +. +.SH "MATCHING A SINGLE DATA UNIT" +.rs +.sp +Outside a character class, the escape sequence \eC matches any one data unit, +whether or not a UTF mode is set. In the 8-bit library, one data unit is one +byte; in the 16-bit library it is a 16-bit unit; in the 32-bit library it is +a 32-bit unit. Unlike a dot, \eC always +matches line-ending characters. The feature is provided in Perl in order to +match individual bytes in UTF-8 mode, but it is unclear how it can usefully be +used. Because \eC breaks up characters into individual data units, matching one +unit with \eC in a UTF mode means that the rest of the string may start with a +malformed UTF character. This has undefined results, because PCRE assumes that +it is dealing with valid UTF strings (and by default it checks this at the +start of processing unless the PCRE_NO_UTF8_CHECK, PCRE_NO_UTF16_CHECK or +PCRE_NO_UTF32_CHECK option is used). +.P +PCRE does not allow \eC to appear in lookbehind assertions +.\" HTML <a href="#lookbehind"> +.\" </a> +(described below) +.\" +in a UTF mode, because this would make it impossible to calculate the length of +the lookbehind. +.P +In general, the \eC escape sequence is best avoided. However, one +way of using it that avoids the problem of malformed UTF characters is to use a +lookahead to check the length of the next character, as in this pattern, which +could be used with a UTF-8 string (ignore white space and line breaks): +.sp + (?| (?=[\ex00-\ex7f])(\eC) | + (?=[\ex80-\ex{7ff}])(\eC)(\eC) | + (?=[\ex{800}-\ex{ffff}])(\eC)(\eC)(\eC) | + (?=[\ex{10000}-\ex{1fffff}])(\eC)(\eC)(\eC)(\eC)) +.sp +A group that starts with (?| resets the capturing parentheses numbers in each +alternative (see +.\" HTML <a href="#dupsubpatternnumber"> +.\" </a> +"Duplicate Subpattern Numbers" +.\" +below). The assertions at the start of each branch check the next UTF-8 +character for values whose encoding uses 1, 2, 3, or 4 bytes, respectively. The +character's individual bytes are then captured by the appropriate number of +groups. +. +. +.\" HTML <a name="characterclass"></a> +.SH "SQUARE BRACKETS AND CHARACTER CLASSES" +.rs +.sp +An opening square bracket introduces a character class, terminated by a closing +square bracket. A closing square bracket on its own is not special by default. +However, if the PCRE_JAVASCRIPT_COMPAT option is set, a lone closing square +bracket causes a compile-time error. If a closing square bracket is required as +a member of the class, it should be the first data character in the class +(after an initial circumflex, if present) or escaped with a backslash. +.P +A character class matches a single character in the subject. In a UTF mode, the +character may be more than one data unit long. A matched character must be in +the set of characters defined by the class, unless the first character in the +class definition is a circumflex, in which case the subject character must not +be in the set defined by the class. If a circumflex is actually required as a +member of the class, ensure it is not the first character, or escape it with a +backslash. +.P +For example, the character class [aeiou] matches any lower case vowel, while +[^aeiou] matches any character that is not a lower case vowel. Note that a +circumflex is just a convenient notation for specifying the characters that +are in the class by enumerating those that are not. A class that starts with a +circumflex is not an assertion; it still consumes a character from the subject +string, and therefore it fails if the current pointer is at the end of the +string. +.P +In UTF-8 (UTF-16, UTF-32) mode, characters with values greater than 255 (0xffff) +can be included in a class as a literal string of data units, or by using the +\ex{ escaping mechanism. +.P +When caseless matching is set, any letters in a class represent both their +upper case and lower case versions, so for example, a caseless [aeiou] matches +"A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a +caseful version would. In a UTF mode, PCRE always understands the concept of +case for characters whose values are less than 128, so caseless matching is +always possible. For characters with higher values, the concept of case is +supported if PCRE is compiled with Unicode property support, but not otherwise. +If you want to use caseless matching in a UTF mode for characters 128 and +above, you must ensure that PCRE is compiled with Unicode property support as +well as with UTF support. +.P +Characters that might indicate line breaks are never treated in any special way +when matching character classes, whatever line-ending sequence is in use, and +whatever setting of the PCRE_DOTALL and PCRE_MULTILINE options is used. A class +such as [^a] always matches one of these characters. +.P +The minus (hyphen) character can be used to specify a range of characters in a +character class. For example, [d-m] matches any letter between d and m, +inclusive. If a minus character is required in a class, it must be escaped with +a backslash or appear in a position where it cannot be interpreted as +indicating a range, typically as the first or last character in the class, or +immediately after a range. For example, [b-d-z] matches letters in the range b +to d, a hyphen character, or z. +.P +It is not possible to have the literal character "]" as the end character of a +range. A pattern such as [W-]46] is interpreted as a class of two characters +("W" and "-") followed by a literal string "46]", so it would match "W46]" or +"-46]". However, if the "]" is escaped with a backslash it is interpreted as +the end of range, so [W-\e]46] is interpreted as a class containing a range +followed by two other characters. The octal or hexadecimal representation of +"]" can also be used to end a range. +.P +An error is generated if a POSIX character class (see below) or an escape +sequence other than one that defines a single character appears at a point +where a range ending character is expected. For example, [z-\exff] is valid, +but [A-\ed] and [A-[:digit:]] are not. +.P +Ranges operate in the collating sequence of character values. They can also be +used for characters specified numerically, for example [\e000-\e037]. Ranges +can include any characters that are valid for the current mode. +.P +If a range that includes letters is used when caseless matching is set, it +matches the letters in either case. For example, [W-c] is equivalent to +[][\e\e^_`wxyzabc], matched caselessly, and in a non-UTF mode, if character +tables for a French locale are in use, [\exc8-\excb] matches accented E +characters in both cases. In UTF modes, PCRE supports the concept of case for +characters with values greater than 128 only when it is compiled with Unicode +property support. +.P +The character escape sequences \ed, \eD, \eh, \eH, \ep, \eP, \es, \eS, \ev, +\eV, \ew, and \eW may appear in a character class, and add the characters that +they match to the class. For example, [\edABCDEF] matches any hexadecimal +digit. In UTF modes, the PCRE_UCP option affects the meanings of \ed, \es, \ew +and their upper case partners, just as it does when they appear outside a +character class, as described in the section entitled +.\" HTML <a href="#genericchartypes"> +.\" </a> +"Generic character types" +.\" +above. The escape sequence \eb has a different meaning inside a character +class; it matches the backspace character. The sequences \eB, \eN, \eR, and \eX +are not special inside a character class. Like any other unrecognized escape +sequences, they are treated as the literal characters "B", "N", "R", and "X" by +default, but cause an error if the PCRE_EXTRA option is set. +.P +A circumflex can conveniently be used with the upper case character types to +specify a more restricted set of characters than the matching lower case type. +For example, the class [^\eW_] matches any letter or digit, but not underscore, +whereas [\ew] includes underscore. A positive character class should be read as +"something OR something OR ..." and a negative class as "NOT something AND NOT +something AND NOT ...". +.P +The only metacharacters that are recognized in character classes are backslash, +hyphen (only where it can be interpreted as specifying a range), circumflex +(only at the start), opening square bracket (only when it can be interpreted as +introducing a POSIX class name, or for a special compatibility feature - see +the next two sections), and the terminating closing square bracket. However, +escaping other non-alphanumeric characters does no harm. +. +. +.SH "POSIX CHARACTER CLASSES" +.rs +.sp +Perl supports the POSIX notation for character classes. This uses names +enclosed by [: and :] within the enclosing square brackets. PCRE also supports +this notation. For example, +.sp + [01[:alpha:]%] +.sp +matches "0", "1", any alphabetic character, or "%". The supported class names +are: +.sp + alnum letters and digits + alpha letters + ascii character codes 0 - 127 + blank space or tab only + cntrl control characters + digit decimal digits (same as \ed) + graph printing characters, excluding space + lower lower case letters + print printing characters, including space + punct printing characters, excluding letters and digits and space + space white space (the same as \es from PCRE 8.34) + upper upper case letters + word "word" characters (same as \ew) + xdigit hexadecimal digits +.sp +The default "space" characters are HT (9), LF (10), VT (11), FF (12), CR (13), +and space (32). If locale-specific matching is taking place, the list of space +characters may be different; there may be fewer or more of them. "Space" used +to be different to \es, which did not include VT, for Perl compatibility. +However, Perl changed at release 5.18, and PCRE followed at release 8.34. +"Space" and \es now match the same set of characters. +.P +The name "word" is a Perl extension, and "blank" is a GNU extension from Perl +5.8. Another Perl extension is negation, which is indicated by a ^ character +after the colon. For example, +.sp + [12[:^digit:]] +.sp +matches "1", "2", or any non-digit. PCRE (and Perl) also recognize the POSIX +syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are not +supported, and an error is given if they are encountered. +.P +By default, characters with values greater than 128 do not match any of the +POSIX character classes. However, if the PCRE_UCP option is passed to +\fBpcre_compile()\fP, some of the classes are changed so that Unicode character +properties are used. This is achieved by replacing certain POSIX classes by +other sequences, as follows: +.sp + [:alnum:] becomes \ep{Xan} + [:alpha:] becomes \ep{L} + [:blank:] becomes \eh + [:digit:] becomes \ep{Nd} + [:lower:] becomes \ep{Ll} + [:space:] becomes \ep{Xps} + [:upper:] becomes \ep{Lu} + [:word:] becomes \ep{Xwd} +.sp +Negated versions, such as [:^alpha:] use \eP instead of \ep. Three other POSIX +classes are handled specially in UCP mode: +.TP 10 +[:graph:] +This matches characters that have glyphs that mark the page when printed. In +Unicode property terms, it matches all characters with the L, M, N, P, S, or Cf +properties, except for: +.sp + U+061C Arabic Letter Mark + U+180E Mongolian Vowel Separator + U+2066 - U+2069 Various "isolate"s +.sp +.TP 10 +[:print:] +This matches the same characters as [:graph:] plus space characters that are +not controls, that is, characters with the Zs property. +.TP 10 +[:punct:] +This matches all characters that have the Unicode P (punctuation) property, +plus those characters whose code points are less than 128 that have the S +(Symbol) property. +.P +The other POSIX classes are unchanged, and match only characters with code +points less than 128. +. +. +.SH "COMPATIBILITY FEATURE FOR WORD BOUNDARIES" +.rs +.sp +In the POSIX.2 compliant library that was included in 4.4BSD Unix, the ugly +syntax [[:<:]] and [[:>:]] is used for matching "start of word" and "end of +word". PCRE treats these items as follows: +.sp + [[:<:]] is converted to \eb(?=\ew) + [[:>:]] is converted to \eb(?<=\ew) +.sp +Only these exact character sequences are recognized. A sequence such as +[a[:<:]b] provokes error for an unrecognized POSIX class name. This support is +not compatible with Perl. It is provided to help migrations from other +environments, and is best not used in any new patterns. Note that \eb matches +at the start and the end of a word (see +.\" HTML <a href="#smallassertions"> +.\" </a> +"Simple assertions" +.\" +above), and in a Perl-style pattern the preceding or following character +normally shows which is wanted, without the need for the assertions that are +used above in order to give exactly the POSIX behaviour. +. +. +.SH "VERTICAL BAR" +.rs +.sp +Vertical bar characters are used to separate alternative patterns. For example, +the pattern +.sp + gilbert|sullivan +.sp +matches either "gilbert" or "sullivan". Any number of alternatives may appear, +and an empty alternative is permitted (matching the empty string). The matching +process tries each alternative in turn, from left to right, and the first one +that succeeds is used. If the alternatives are within a subpattern +.\" HTML <a href="#subpattern"> +.\" </a> +(defined below), +.\" +"succeeds" means matching the rest of the main pattern as well as the +alternative in the subpattern. +. +. +.SH "INTERNAL OPTION SETTING" +.rs +.sp +The settings of the PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and +PCRE_EXTENDED options (which are Perl-compatible) can be changed from within +the pattern by a sequence of Perl option letters enclosed between "(?" and ")". +The option letters are +.sp + i for PCRE_CASELESS + m for PCRE_MULTILINE + s for PCRE_DOTALL + x for PCRE_EXTENDED +.sp +For example, (?im) sets caseless, multiline matching. It is also possible to +unset these options by preceding the letter with a hyphen, and a combined +setting and unsetting such as (?im-sx), which sets PCRE_CASELESS and +PCRE_MULTILINE while unsetting PCRE_DOTALL and PCRE_EXTENDED, is also +permitted. If a letter appears both before and after the hyphen, the option is +unset. +.P +The PCRE-specific options PCRE_DUPNAMES, PCRE_UNGREEDY, and PCRE_EXTRA can be +changed in the same way as the Perl-compatible options by using the characters +J, U and X respectively. +.P +When one of these option changes occurs at top level (that is, not inside +subpattern parentheses), the change applies to the remainder of the pattern +that follows. If the change is placed right at the start of a pattern, PCRE +extracts it into the global options (and it will therefore show up in data +extracted by the \fBpcre_fullinfo()\fP function). +.P +An option change within a subpattern (see below for a description of +subpatterns) affects only that part of the subpattern that follows it, so +.sp + (a(?i)b)c +.sp +matches abc and aBc and no other strings (assuming PCRE_CASELESS is not used). +By this means, options can be made to have different settings in different +parts of the pattern. Any changes made in one alternative do carry on +into subsequent branches within the same subpattern. For example, +.sp + (a(?i)b|c) +.sp +matches "ab", "aB", "c", and "C", even though when matching "C" the first +branch is abandoned before the option setting. This is because the effects of +option settings happen at compile time. There would be some very weird +behaviour otherwise. +.P +\fBNote:\fP There are other PCRE-specific options that can be set by the +application when the compiling or matching functions are called. In some cases +the pattern can contain special leading sequences such as (*CRLF) to override +what the application has set or what has been defaulted. Details are given in +the section entitled +.\" HTML <a href="#newlineseq"> +.\" </a> +"Newline sequences" +.\" +above. There are also the (*UTF8), (*UTF16),(*UTF32), and (*UCP) leading +sequences that can be used to set UTF and Unicode property modes; they are +equivalent to setting the PCRE_UTF8, PCRE_UTF16, PCRE_UTF32 and the PCRE_UCP +options, respectively. The (*UTF) sequence is a generic version that can be +used with any of the libraries. However, the application can set the +PCRE_NEVER_UTF option, which locks out the use of the (*UTF) sequences. +. +. +.\" HTML <a name="subpattern"></a> +.SH SUBPATTERNS +.rs +.sp +Subpatterns are delimited by parentheses (round brackets), which can be nested. +Turning part of a pattern into a subpattern does two things: +.sp +1. It localizes a set of alternatives. For example, the pattern +.sp + cat(aract|erpillar|) +.sp +matches "cataract", "caterpillar", or "cat". Without the parentheses, it would +match "cataract", "erpillar" or an empty string. +.sp +2. It sets up the subpattern as a capturing subpattern. This means that, when +the whole pattern matches, that portion of the subject string that matched the +subpattern is passed back to the caller via the \fIovector\fP argument of the +matching function. (This applies only to the traditional matching functions; +the DFA matching functions do not support capturing.) +.P +Opening parentheses are counted from left to right (starting from 1) to obtain +numbers for the capturing subpatterns. For example, if the string "the red +king" is matched against the pattern +.sp + the ((red|white) (king|queen)) +.sp +the captured substrings are "red king", "red", and "king", and are numbered 1, +2, and 3, respectively. +.P +The fact that plain parentheses fulfil two functions is not always helpful. +There are often times when a grouping subpattern is required without a +capturing requirement. If an opening parenthesis is followed by a question mark +and a colon, the subpattern does not do any capturing, and is not counted when +computing the number of any subsequent capturing subpatterns. For example, if +the string "the white queen" is matched against the pattern +.sp + the ((?:red|white) (king|queen)) +.sp +the captured substrings are "white queen" and "queen", and are numbered 1 and +2. The maximum number of capturing subpatterns is 65535. +.P +As a convenient shorthand, if any option settings are required at the start of +a non-capturing subpattern, the option letters may appear between the "?" and +the ":". Thus the two patterns +.sp + (?i:saturday|sunday) + (?:(?i)saturday|sunday) +.sp +match exactly the same set of strings. Because alternative branches are tried +from left to right, and options are not reset until the end of the subpattern +is reached, an option setting in one branch does affect subsequent branches, so +the above patterns match "SUNDAY" as well as "Saturday". +. +. +.\" HTML <a name="dupsubpatternnumber"></a> +.SH "DUPLICATE SUBPATTERN NUMBERS" +.rs +.sp +Perl 5.10 introduced a feature whereby each alternative in a subpattern uses +the same numbers for its capturing parentheses. Such a subpattern starts with +(?| and is itself a non-capturing subpattern. For example, consider this +pattern: +.sp + (?|(Sat)ur|(Sun))day +.sp +Because the two alternatives are inside a (?| group, both sets of capturing +parentheses are numbered one. Thus, when the pattern matches, you can look +at captured substring number one, whichever alternative matched. This construct +is useful when you want to capture part, but not all, of one of a number of +alternatives. Inside a (?| group, parentheses are numbered as usual, but the +number is reset at the start of each branch. The numbers of any capturing +parentheses that follow the subpattern start after the highest number used in +any branch. The following example is taken from the Perl documentation. The +numbers underneath show in which buffer the captured content will be stored. +.sp + # before ---------------branch-reset----------- after + / ( a ) (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x + # 1 2 2 3 2 3 4 +.sp +A back reference to a numbered subpattern uses the most recent value that is +set for that number by any subpattern. The following pattern matches "abcabc" +or "defdef": +.sp + /(?|(abc)|(def))\e1/ +.sp +In contrast, a subroutine call to a numbered subpattern always refers to the +first one in the pattern with the given number. The following pattern matches +"abcabc" or "defabc": +.sp + /(?|(abc)|(def))(?1)/ +.sp +If a +.\" HTML <a href="#conditions"> +.\" </a> +condition test +.\" +for a subpattern's having matched refers to a non-unique number, the test is +true if any of the subpatterns of that number have matched. +.P +An alternative approach to using this "branch reset" feature is to use +duplicate named subpatterns, as described in the next section. +. +. +.SH "NAMED SUBPATTERNS" +.rs +.sp +Identifying capturing parentheses by number is simple, but it can be very hard +to keep track of the numbers in complicated regular expressions. Furthermore, +if an expression is modified, the numbers may change. To help with this +difficulty, PCRE supports the naming of subpatterns. This feature was not +added to Perl until release 5.10. Python had the feature earlier, and PCRE +introduced it at release 4.0, using the Python syntax. PCRE now supports both +the Perl and the Python syntax. Perl allows identically numbered subpatterns to +have different names, but PCRE does not. +.P +In PCRE, a subpattern can be named in one of three ways: (?<name>...) or +(?'name'...) as in Perl, or (?P<name>...) as in Python. References to capturing +parentheses from other parts of the pattern, such as +.\" HTML <a href="#backreferences"> +.\" </a> +back references, +.\" +.\" HTML <a href="#recursion"> +.\" </a> +recursion, +.\" +and +.\" HTML <a href="#conditions"> +.\" </a> +conditions, +.\" +can be made by name as well as by number. +.P +Names consist of up to 32 alphanumeric characters and underscores, but must +start with a non-digit. Named capturing parentheses are still allocated numbers +as well as names, exactly as if the names were not present. The PCRE API +provides function calls for extracting the name-to-number translation table +from a compiled pattern. There is also a convenience function for extracting a +captured substring by name. +.P +By default, a name must be unique within a pattern, but it is possible to relax +this constraint by setting the PCRE_DUPNAMES option at compile time. (Duplicate +names are also always permitted for subpatterns with the same number, set up as +described in the previous section.) Duplicate names can be useful for patterns +where only one instance of the named parentheses can match. Suppose you want to +match the name of a weekday, either as a 3-letter abbreviation or as the full +name, and in both cases you want to extract the abbreviation. This pattern +(ignoring the line breaks) does the job: +.sp + (?<DN>Mon|Fri|Sun)(?:day)?| + (?<DN>Tue)(?:sday)?| + (?<DN>Wed)(?:nesday)?| + (?<DN>Thu)(?:rsday)?| + (?<DN>Sat)(?:urday)? +.sp +There are five capturing substrings, but only one is ever set after a match. +(An alternative way of solving this problem is to use a "branch reset" +subpattern, as described in the previous section.) +.P +The convenience function for extracting the data by name returns the substring +for the first (and in this example, the only) subpattern of that name that +matched. This saves searching to find which numbered subpattern it was. +.P +If you make a back reference to a non-unique named subpattern from elsewhere in +the pattern, the subpatterns to which the name refers are checked in the order +in which they appear in the overall pattern. The first one that is set is used +for the reference. For example, this pattern matches both "foofoo" and +"barbar" but not "foobar" or "barfoo": +.sp + (?:(?<n>foo)|(?<n>bar))\ek<n> +.sp +.P +If you make a subroutine call to a non-unique named subpattern, the one that +corresponds to the first occurrence of the name is used. In the absence of +duplicate numbers (see the previous section) this is the one with the lowest +number. +.P +If you use a named reference in a condition +test (see the +.\" +.\" HTML <a href="#conditions"> +.\" </a> +section about conditions +.\" +below), either to check whether a subpattern has matched, or to check for +recursion, all subpatterns with the same name are tested. If the condition is +true for any one of them, the overall condition is true. This is the same +behaviour as testing by number. For further details of the interfaces for +handling named subpatterns, see the +.\" HREF +\fBpcreapi\fP +.\" +documentation. +.P +\fBWarning:\fP You cannot use different names to distinguish between two +subpatterns with the same number because PCRE uses only the numbers when +matching. For this reason, an error is given at compile time if different names +are given to subpatterns with the same number. However, you can always give the +same name to subpatterns with the same number, even when PCRE_DUPNAMES is not +set. +. +. +.SH REPETITION +.rs +.sp +Repetition is specified by quantifiers, which can follow any of the following +items: +.sp + a literal data character + the dot metacharacter + the \eC escape sequence + the \eX escape sequence + the \eR escape sequence + an escape such as \ed or \epL that matches a single character + a character class + a back reference (see next section) + a parenthesized subpattern (including assertions) + a subroutine call to a subpattern (recursive or otherwise) +.sp +The general repetition quantifier specifies a minimum and maximum number of +permitted matches, by giving the two numbers in curly brackets (braces), +separated by a comma. The numbers must be less than 65536, and the first must +be less than or equal to the second. For example: +.sp + z{2,4} +.sp +matches "zz", "zzz", or "zzzz". A closing brace on its own is not a special +character. If the second number is omitted, but the comma is present, there is +no upper limit; if the second number and the comma are both omitted, the +quantifier specifies an exact number of required matches. Thus +.sp + [aeiou]{3,} +.sp +matches at least 3 successive vowels, but may match many more, while +.sp + \ed{8} +.sp +matches exactly 8 digits. An opening curly bracket that appears in a position +where a quantifier is not allowed, or one that does not match the syntax of a +quantifier, is taken as a literal character. For example, {,6} is not a +quantifier, but a literal string of four characters. +.P +In UTF modes, quantifiers apply to characters rather than to individual data +units. Thus, for example, \ex{100}{2} matches two characters, each of +which is represented by a two-byte sequence in a UTF-8 string. Similarly, +\eX{3} matches three Unicode extended grapheme clusters, each of which may be +several data units long (and they may be of different lengths). +.P +The quantifier {0} is permitted, causing the expression to behave as if the +previous item and the quantifier were not present. This may be useful for +subpatterns that are referenced as +.\" HTML <a href="#subpatternsassubroutines"> +.\" </a> +subroutines +.\" +from elsewhere in the pattern (but see also the section entitled +.\" HTML <a href="#subdefine"> +.\" </a> +"Defining subpatterns for use by reference only" +.\" +below). Items other than subpatterns that have a {0} quantifier are omitted +from the compiled pattern. +.P +For convenience, the three most common quantifiers have single-character +abbreviations: +.sp + * is equivalent to {0,} + + is equivalent to {1,} + ? is equivalent to {0,1} +.sp +It is possible to construct infinite loops by following a subpattern that can +match no characters with a quantifier that has no upper limit, for example: +.sp + (a?)* +.sp +Earlier versions of Perl and PCRE used to give an error at compile time for +such patterns. However, because there are cases where this can be useful, such +patterns are now accepted, but if any repetition of the subpattern does in fact +match no characters, the loop is forcibly broken. +.P +By default, the quantifiers are "greedy", that is, they match as much as +possible (up to the maximum number of permitted times), without causing the +rest of the pattern to fail. The classic example of where this gives problems +is in trying to match comments in C programs. These appear between /* and */ +and within the comment, individual * and / characters may appear. An attempt to +match C comments by applying the pattern +.sp + /\e*.*\e*/ +.sp +to the string +.sp + /* first comment */ not comment /* second comment */ +.sp +fails, because it matches the entire string owing to the greediness of the .* +item. +.P +However, if a quantifier is followed by a question mark, it ceases to be +greedy, and instead matches the minimum number of times possible, so the +pattern +.sp + /\e*.*?\e*/ +.sp +does the right thing with the C comments. The meaning of the various +quantifiers is not otherwise changed, just the preferred number of matches. +Do not confuse this use of question mark with its use as a quantifier in its +own right. Because it has two uses, it can sometimes appear doubled, as in +.sp + \ed??\ed +.sp +which matches one digit by preference, but can match two if that is the only +way the rest of the pattern matches. +.P +If the PCRE_UNGREEDY option is set (an option that is not available in Perl), +the quantifiers are not greedy by default, but individual ones can be made +greedy by following them with a question mark. In other words, it inverts the +default behaviour. +.P +When a parenthesized subpattern is quantified with a minimum repeat count that +is greater than 1 or with a limited maximum, more memory is required for the +compiled pattern, in proportion to the size of the minimum or maximum. +.P +If a pattern starts with .* or .{0,} and the PCRE_DOTALL option (equivalent +to Perl's /s) is set, thus allowing the dot to match newlines, the pattern is +implicitly anchored, because whatever follows will be tried against every +character position in the subject string, so there is no point in retrying the +overall match at any position after the first. PCRE normally treats such a +pattern as though it were preceded by \eA. +.P +In cases where it is known that the subject string contains no newlines, it is +worth setting PCRE_DOTALL in order to obtain this optimization, or +alternatively using ^ to indicate anchoring explicitly. +.P +However, there are some cases where the optimization cannot be used. When .* +is inside capturing parentheses that are the subject of a back reference +elsewhere in the pattern, a match at the start may fail where a later one +succeeds. Consider, for example: +.sp + (.*)abc\e1 +.sp +If the subject is "xyz123abc123" the match point is the fourth character. For +this reason, such a pattern is not implicitly anchored. +.P +Another case where implicit anchoring is not applied is when the leading .* is +inside an atomic group. Once again, a match at the start may fail where a later +one succeeds. Consider this pattern: +.sp + (?>.*?a)b +.sp +It matches "ab" in the subject "aab". The use of the backtracking control verbs +(*PRUNE) and (*SKIP) also disable this optimization. +.P +When a capturing subpattern is repeated, the value captured is the substring +that matched the final iteration. For example, after +.sp + (tweedle[dume]{3}\es*)+ +.sp +has matched "tweedledum tweedledee" the value of the captured substring is +"tweedledee". However, if there are nested capturing subpatterns, the +corresponding captured values may have been set in previous iterations. For +example, after +.sp + /(a|(b))+/ +.sp +matches "aba" the value of the second captured substring is "b". +. +. +.\" HTML <a name="atomicgroup"></a> +.SH "ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS" +.rs +.sp +With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy") +repetition, failure of what follows normally causes the repeated item to be +re-evaluated to see if a different number of repeats allows the rest of the +pattern to match. Sometimes it is useful to prevent this, either to change the +nature of the match, or to cause it fail earlier than it otherwise might, when +the author of the pattern knows there is no point in carrying on. +.P +Consider, for example, the pattern \ed+foo when applied to the subject line +.sp + 123456bar +.sp +After matching all 6 digits and then failing to match "foo", the normal +action of the matcher is to try again with only 5 digits matching the \ed+ +item, and then with 4, and so on, before ultimately failing. "Atomic grouping" +(a term taken from Jeffrey Friedl's book) provides the means for specifying +that once a subpattern has matched, it is not to be re-evaluated in this way. +.P +If we use atomic grouping for the previous example, the matcher gives up +immediately on failing to match "foo" the first time. The notation is a kind of +special parenthesis, starting with (?> as in this example: +.sp + (?>\ed+)foo +.sp +This kind of parenthesis "locks up" the part of the pattern it contains once +it has matched, and a failure further into the pattern is prevented from +backtracking into it. Backtracking past it to previous items, however, works as +normal. +.P +An alternative description is that a subpattern of this type matches the string +of characters that an identical standalone pattern would match, if anchored at +the current point in the subject string. +.P +Atomic grouping subpatterns are not capturing subpatterns. Simple cases such as +the above example can be thought of as a maximizing repeat that must swallow +everything it can. So, while both \ed+ and \ed+? are prepared to adjust the +number of digits they match in order to make the rest of the pattern match, +(?>\ed+) can only match an entire sequence of digits. +.P +Atomic groups in general can of course contain arbitrarily complicated +subpatterns, and can be nested. However, when the subpattern for an atomic +group is just a single repeated item, as in the example above, a simpler +notation, called a "possessive quantifier" can be used. This consists of an +additional + character following a quantifier. Using this notation, the +previous example can be rewritten as +.sp + \ed++foo +.sp +Note that a possessive quantifier can be used with an entire group, for +example: +.sp + (abc|xyz){2,3}+ +.sp +Possessive quantifiers are always greedy; the setting of the PCRE_UNGREEDY +option is ignored. They are a convenient notation for the simpler forms of +atomic group. However, there is no difference in the meaning of a possessive +quantifier and the equivalent atomic group, though there may be a performance +difference; possessive quantifiers should be slightly faster. +.P +The possessive quantifier syntax is an extension to the Perl 5.8 syntax. +Jeffrey Friedl originated the idea (and the name) in the first edition of his +book. Mike McCloskey liked it, so implemented it when he built Sun's Java +package, and PCRE copied it from there. It ultimately found its way into Perl +at release 5.10. +.P +PCRE has an optimization that automatically "possessifies" certain simple +pattern constructs. For example, the sequence A+B is treated as A++B because +there is no point in backtracking into a sequence of A's when B must follow. +.P +When a pattern contains an unlimited repeat inside a subpattern that can itself +be repeated an unlimited number of times, the use of an atomic group is the +only way to avoid some failing matches taking a very long time indeed. The +pattern +.sp + (\eD+|<\ed+>)*[!?] +.sp +matches an unlimited number of substrings that either consist of non-digits, or +digits enclosed in <>, followed by either ! or ?. When it matches, it runs +quickly. However, if it is applied to +.sp + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +.sp +it takes a long time before reporting failure. This is because the string can +be divided between the internal \eD+ repeat and the external * repeat in a +large number of ways, and all have to be tried. (The example uses [!?] rather +than a single character at the end, because both PCRE and Perl have an +optimization that allows for fast failure when a single character is used. They +remember the last single character that is required for a match, and fail early +if it is not present in the string.) If the pattern is changed so that it uses +an atomic group, like this: +.sp + ((?>\eD+)|<\ed+>)*[!?] +.sp +sequences of non-digits cannot be broken, and failure happens quickly. +. +. +.\" HTML <a name="backreferences"></a> +.SH "BACK REFERENCES" +.rs +.sp +Outside a character class, a backslash followed by a digit greater than 0 (and +possibly further digits) is a back reference to a capturing subpattern earlier +(that is, to its left) in the pattern, provided there have been that many +previous capturing left parentheses. +.P +However, if the decimal number following the backslash is less than 10, it is +always taken as a back reference, and causes an error only if there are not +that many capturing left parentheses in the entire pattern. In other words, the +parentheses that are referenced need not be to the left of the reference for +numbers less than 10. A "forward back reference" of this type can make sense +when a repetition is involved and the subpattern to the right has participated +in an earlier iteration. +.P +It is not possible to have a numerical "forward back reference" to a subpattern +whose number is 10 or more using this syntax because a sequence such as \e50 is +interpreted as a character defined in octal. See the subsection entitled +"Non-printing characters" +.\" HTML <a href="#digitsafterbackslash"> +.\" </a> +above +.\" +for further details of the handling of digits following a backslash. There is +no such problem when named parentheses are used. A back reference to any +subpattern is possible using named parentheses (see below). +.P +Another way of avoiding the ambiguity inherent in the use of digits following a +backslash is to use the \eg escape sequence. This escape must be followed by an +unsigned number or a negative number, optionally enclosed in braces. These +examples are all identical: +.sp + (ring), \e1 + (ring), \eg1 + (ring), \eg{1} +.sp +An unsigned number specifies an absolute reference without the ambiguity that +is present in the older syntax. It is also useful when literal digits follow +the reference. A negative number is a relative reference. Consider this +example: +.sp + (abc(def)ghi)\eg{-1} +.sp +The sequence \eg{-1} is a reference to the most recently started capturing +subpattern before \eg, that is, is it equivalent to \e2 in this example. +Similarly, \eg{-2} would be equivalent to \e1. The use of relative references +can be helpful in long patterns, and also in patterns that are created by +joining together fragments that contain references within themselves. +.P +A back reference matches whatever actually matched the capturing subpattern in +the current subject string, rather than anything matching the subpattern +itself (see +.\" HTML <a href="#subpatternsassubroutines"> +.\" </a> +"Subpatterns as subroutines" +.\" +below for a way of doing that). So the pattern +.sp + (sens|respons)e and \e1ibility +.sp +matches "sense and sensibility" and "response and responsibility", but not +"sense and responsibility". If caseful matching is in force at the time of the +back reference, the case of letters is relevant. For example, +.sp + ((?i)rah)\es+\e1 +.sp +matches "rah rah" and "RAH RAH", but not "RAH rah", even though the original +capturing subpattern is matched caselessly. +.P +There are several different ways of writing back references to named +subpatterns. The .NET syntax \ek{name} and the Perl syntax \ek<name> or +\ek'name' are supported, as is the Python syntax (?P=name). Perl 5.10's unified +back reference syntax, in which \eg can be used for both numeric and named +references, is also supported. We could rewrite the above example in any of +the following ways: +.sp + (?<p1>(?i)rah)\es+\ek<p1> + (?'p1'(?i)rah)\es+\ek{p1} + (?P<p1>(?i)rah)\es+(?P=p1) + (?<p1>(?i)rah)\es+\eg{p1} +.sp +A subpattern that is referenced by name may appear in the pattern before or +after the reference. +.P +There may be more than one back reference to the same subpattern. If a +subpattern has not actually been used in a particular match, any back +references to it always fail by default. For example, the pattern +.sp + (a|(bc))\e2 +.sp +always fails if it starts to match "a" rather than "bc". However, if the +PCRE_JAVASCRIPT_COMPAT option is set at compile time, a back reference to an +unset value matches an empty string. +.P +Because there may be many capturing parentheses in a pattern, all digits +following a backslash are taken as part of a potential back reference number. +If the pattern continues with a digit character, some delimiter must be used to +terminate the back reference. If the PCRE_EXTENDED option is set, this can be +white space. Otherwise, the \eg{ syntax or an empty comment (see +.\" HTML <a href="#comments"> +.\" </a> +"Comments" +.\" +below) can be used. +. +.SS "Recursive back references" +.rs +.sp +A back reference that occurs inside the parentheses to which it refers fails +when the subpattern is first used, so, for example, (a\e1) never matches. +However, such references can be useful inside repeated subpatterns. For +example, the pattern +.sp + (a|b\e1)+ +.sp +matches any number of "a"s and also "aba", "ababbaa" etc. At each iteration of +the subpattern, the back reference matches the character string corresponding +to the previous iteration. In order for this to work, the pattern must be such +that the first iteration does not need to match the back reference. This can be +done using alternation, as in the example above, or by a quantifier with a +minimum of zero. +.P +Back references of this type cause the group that they reference to be treated +as an +.\" HTML <a href="#atomicgroup"> +.\" </a> +atomic group. +.\" +Once the whole group has been matched, a subsequent matching failure cannot +cause backtracking into the middle of the group. +. +. +.\" HTML <a name="bigassertions"></a> +.SH ASSERTIONS +.rs +.sp +An assertion is a test on the characters following or preceding the current +matching point that does not actually consume any characters. The simple +assertions coded as \eb, \eB, \eA, \eG, \eZ, \ez, ^ and $ are described +.\" HTML <a href="#smallassertions"> +.\" </a> +above. +.\" +.P +More complicated assertions are coded as subpatterns. There are two kinds: +those that look ahead of the current position in the subject string, and those +that look behind it. An assertion subpattern is matched in the normal way, +except that it does not cause the current matching position to be changed. +.P +Assertion subpatterns are not capturing subpatterns. If such an assertion +contains capturing subpatterns within it, these are counted for the purposes of +numbering the capturing subpatterns in the whole pattern. However, substring +capturing is carried out only for positive assertions. (Perl sometimes, but not +always, does do capturing in negative assertions.) +.P +For compatibility with Perl, assertion subpatterns may be repeated; though +it makes no sense to assert the same thing several times, the side effect of +capturing parentheses may occasionally be useful. In practice, there only three +cases: +.sp +(1) If the quantifier is {0}, the assertion is never obeyed during matching. +However, it may contain internal capturing parenthesized groups that are called +from elsewhere via the +.\" HTML <a href="#subpatternsassubroutines"> +.\" </a> +subroutine mechanism. +.\" +.sp +(2) If quantifier is {0,n} where n is greater than zero, it is treated as if it +were {0,1}. At run time, the rest of the pattern match is tried with and +without the assertion, the order depending on the greediness of the quantifier. +.sp +(3) If the minimum repetition is greater than zero, the quantifier is ignored. +The assertion is obeyed just once when encountered during matching. +. +. +.SS "Lookahead assertions" +.rs +.sp +Lookahead assertions start with (?= for positive assertions and (?! for +negative assertions. For example, +.sp + \ew+(?=;) +.sp +matches a word followed by a semicolon, but does not include the semicolon in +the match, and +.sp + foo(?!bar) +.sp +matches any occurrence of "foo" that is not followed by "bar". Note that the +apparently similar pattern +.sp + (?!foo)bar +.sp +does not find an occurrence of "bar" that is preceded by something other than +"foo"; it finds any occurrence of "bar" whatsoever, because the assertion +(?!foo) is always true when the next three characters are "bar". A +lookbehind assertion is needed to achieve the other effect. +.P +If you want to force a matching failure at some point in a pattern, the most +convenient way to do it is with (?!) because an empty string always matches, so +an assertion that requires there not to be an empty string must always fail. +The backtracking control verb (*FAIL) or (*F) is a synonym for (?!). +. +. +.\" HTML <a name="lookbehind"></a> +.SS "Lookbehind assertions" +.rs +.sp +Lookbehind assertions start with (?<= for positive assertions and (?<! for +negative assertions. For example, +.sp + (?<!foo)bar +.sp +does find an occurrence of "bar" that is not preceded by "foo". The contents of +a lookbehind assertion are restricted such that all the strings it matches must +have a fixed length. However, if there are several top-level alternatives, they +do not all have to have the same fixed length. Thus +.sp + (?<=bullock|donkey) +.sp +is permitted, but +.sp + (?<!dogs?|cats?) +.sp +causes an error at compile time. Branches that match different length strings +are permitted only at the top level of a lookbehind assertion. This is an +extension compared with Perl, which requires all branches to match the same +length of string. An assertion such as +.sp + (?<=ab(c|de)) +.sp +is not permitted, because its single top-level branch can match two different +lengths, but it is acceptable to PCRE if rewritten to use two top-level +branches: +.sp + (?<=abc|abde) +.sp +In some cases, the escape sequence \eK +.\" HTML <a href="#resetmatchstart"> +.\" </a> +(see above) +.\" +can be used instead of a lookbehind assertion to get round the fixed-length +restriction. +.P +The implementation of lookbehind assertions is, for each alternative, to +temporarily move the current position back by the fixed length and then try to +match. If there are insufficient characters before the current position, the +assertion fails. +.P +In a UTF mode, PCRE does not allow the \eC escape (which matches a single data +unit even in a UTF mode) to appear in lookbehind assertions, because it makes +it impossible to calculate the length of the lookbehind. The \eX and \eR +escapes, which can match different numbers of data units, are also not +permitted. +.P +.\" HTML <a href="#subpatternsassubroutines"> +.\" </a> +"Subroutine" +.\" +calls (see below) such as (?2) or (?&X) are permitted in lookbehinds, as long +as the subpattern matches a fixed-length string. +.\" HTML <a href="#recursion"> +.\" </a> +Recursion, +.\" +however, is not supported. +.P +Possessive quantifiers can be used in conjunction with lookbehind assertions to +specify efficient matching of fixed-length strings at the end of subject +strings. Consider a simple pattern such as +.sp + abcd$ +.sp +when applied to a long string that does not match. Because matching proceeds +from left to right, PCRE will look for each "a" in the subject and then see if +what follows matches the rest of the pattern. If the pattern is specified as +.sp + ^.*abcd$ +.sp +the initial .* matches the entire string at first, but when this fails (because +there is no following "a"), it backtracks to match all but the last character, +then all but the last two characters, and so on. Once again the search for "a" +covers the entire string, from right to left, so we are no better off. However, +if the pattern is written as +.sp + ^.*+(?<=abcd) +.sp +there can be no backtracking for the .*+ item; it can match only the entire +string. The subsequent lookbehind assertion does a single test on the last four +characters. If it fails, the match fails immediately. For long strings, this +approach makes a significant difference to the processing time. +. +. +.SS "Using multiple assertions" +.rs +.sp +Several assertions (of any sort) may occur in succession. For example, +.sp + (?<=\ed{3})(?<!999)foo +.sp +matches "foo" preceded by three digits that are not "999". Notice that each of +the assertions is applied independently at the same point in the subject +string. First there is a check that the previous three characters are all +digits, and then there is a check that the same three characters are not "999". +This pattern does \fInot\fP match "foo" preceded by six characters, the first +of which are digits and the last three of which are not "999". For example, it +doesn't match "123abcfoo". A pattern to do that is +.sp + (?<=\ed{3}...)(?<!999)foo +.sp +This time the first assertion looks at the preceding six characters, checking +that the first three are digits, and then the second assertion checks that the +preceding three characters are not "999". +.P +Assertions can be nested in any combination. For example, +.sp + (?<=(?<!foo)bar)baz +.sp +matches an occurrence of "baz" that is preceded by "bar" which in turn is not +preceded by "foo", while +.sp + (?<=\ed{3}(?!999)...)foo +.sp +is another pattern that matches "foo" preceded by three digits and any three +characters that are not "999". +. +. +.\" HTML <a name="conditions"></a> +.SH "CONDITIONAL SUBPATTERNS" +.rs +.sp +It is possible to cause the matching process to obey a subpattern +conditionally or to choose between two alternative subpatterns, depending on +the result of an assertion, or whether a specific capturing subpattern has +already been matched. The two possible forms of conditional subpattern are: +.sp + (?(condition)yes-pattern) + (?(condition)yes-pattern|no-pattern) +.sp +If the condition is satisfied, the yes-pattern is used; otherwise the +no-pattern (if present) is used. If there are more than two alternatives in the +subpattern, a compile-time error occurs. Each of the two alternatives may +itself contain nested subpatterns of any form, including conditional +subpatterns; the restriction to two alternatives applies only at the level of +the condition. This pattern fragment is an example where the alternatives are +complex: +.sp + (?(1) (A|B|C) | (D | (?(2)E|F) | E) ) +.sp +.P +There are four kinds of condition: references to subpatterns, references to +recursion, a pseudo-condition called DEFINE, and assertions. +. +.SS "Checking for a used subpattern by number" +.rs +.sp +If the text between the parentheses consists of a sequence of digits, the +condition is true if a capturing subpattern of that number has previously +matched. If there is more than one capturing subpattern with the same number +(see the earlier +.\" +.\" HTML <a href="#recursion"> +.\" </a> +section about duplicate subpattern numbers), +.\" +the condition is true if any of them have matched. An alternative notation is +to precede the digits with a plus or minus sign. In this case, the subpattern +number is relative rather than absolute. The most recently opened parentheses +can be referenced by (?(-1), the next most recent by (?(-2), and so on. Inside +loops it can also make sense to refer to subsequent groups. The next +parentheses to be opened can be referenced as (?(+1), and so on. (The value +zero in any of these forms is not used; it provokes a compile-time error.) +.P +Consider the following pattern, which contains non-significant white space to +make it more readable (assume the PCRE_EXTENDED option) and to divide it into +three parts for ease of discussion: +.sp + ( \e( )? [^()]+ (?(1) \e) ) +.sp +The first part matches an optional opening parenthesis, and if that +character is present, sets it as the first captured substring. The second part +matches one or more characters that are not parentheses. The third part is a +conditional subpattern that tests whether or not the first set of parentheses +matched. If they did, that is, if subject started with an opening parenthesis, +the condition is true, and so the yes-pattern is executed and a closing +parenthesis is required. Otherwise, since no-pattern is not present, the +subpattern matches nothing. In other words, this pattern matches a sequence of +non-parentheses, optionally enclosed in parentheses. +.P +If you were embedding this pattern in a larger one, you could use a relative +reference: +.sp + ...other stuff... ( \e( )? [^()]+ (?(-1) \e) ) ... +.sp +This makes the fragment independent of the parentheses in the larger pattern. +. +.SS "Checking for a used subpattern by name" +.rs +.sp +Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used +subpattern by name. For compatibility with earlier versions of PCRE, which had +this facility before Perl, the syntax (?(name)...) is also recognized. +.P +Rewriting the above example to use a named subpattern gives this: +.sp + (?<OPEN> \e( )? [^()]+ (?(<OPEN>) \e) ) +.sp +If the name used in a condition of this kind is a duplicate, the test is +applied to all subpatterns of the same name, and is true if any one of them has +matched. +. +.SS "Checking for pattern recursion" +.rs +.sp +If the condition is the string (R), and there is no subpattern with the name R, +the condition is true if a recursive call to the whole pattern or any +subpattern has been made. If digits or a name preceded by ampersand follow the +letter R, for example: +.sp + (?(R3)...) or (?(R&name)...) +.sp +the condition is true if the most recent recursion is into a subpattern whose +number or name is given. This condition does not check the entire recursion +stack. If the name used in a condition of this kind is a duplicate, the test is +applied to all subpatterns of the same name, and is true if any one of them is +the most recent recursion. +.P +At "top level", all these recursion test conditions are false. +.\" HTML <a href="#recursion"> +.\" </a> +The syntax for recursive patterns +.\" +is described below. +. +.\" HTML <a name="subdefine"></a> +.SS "Defining subpatterns for use by reference only" +.rs +.sp +If the condition is the string (DEFINE), and there is no subpattern with the +name DEFINE, the condition is always false. In this case, there may be only one +alternative in the subpattern. It is always skipped if control reaches this +point in the pattern; the idea of DEFINE is that it can be used to define +subroutines that can be referenced from elsewhere. (The use of +.\" HTML <a href="#subpatternsassubroutines"> +.\" </a> +subroutines +.\" +is described below.) For example, a pattern to match an IPv4 address such as +"192.168.23.245" could be written like this (ignore white space and line +breaks): +.sp + (?(DEFINE) (?<byte> 2[0-4]\ed | 25[0-5] | 1\ed\ed | [1-9]?\ed) ) + \eb (?&byte) (\e.(?&byte)){3} \eb +.sp +The first part of the pattern is a DEFINE group inside which a another group +named "byte" is defined. This matches an individual component of an IPv4 +address (a number less than 256). When matching takes place, this part of the +pattern is skipped because DEFINE acts like a false condition. The rest of the +pattern uses references to the named group to match the four dot-separated +components of an IPv4 address, insisting on a word boundary at each end. +. +.SS "Assertion conditions" +.rs +.sp +If the condition is not in any of the above formats, it must be an assertion. +This may be a positive or negative lookahead or lookbehind assertion. Consider +this pattern, again containing non-significant white space, and with the two +alternatives on the second line: +.sp + (?(?=[^a-z]*[a-z]) + \ed{2}-[a-z]{3}-\ed{2} | \ed{2}-\ed{2}-\ed{2} ) +.sp +The condition is a positive lookahead assertion that matches an optional +sequence of non-letters followed by a letter. In other words, it tests for the +presence of at least one letter in the subject. If a letter is found, the +subject is matched against the first alternative; otherwise it is matched +against the second. This pattern matches strings in one of the two forms +dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits. +. +. +.\" HTML <a name="comments"></a> +.SH COMMENTS +.rs +.sp +There are two ways of including comments in patterns that are processed by +PCRE. In both cases, the start of the comment must not be in a character class, +nor in the middle of any other sequence of related characters such as (?: or a +subpattern name or number. The characters that make up a comment play no part +in the pattern matching. +.P +The sequence (?# marks the start of a comment that continues up to the next +closing parenthesis. Nested parentheses are not permitted. If the PCRE_EXTENDED +option is set, an unescaped # character also introduces a comment, which in +this case continues to immediately after the next newline character or +character sequence in the pattern. Which characters are interpreted as newlines +is controlled by the options passed to a compiling function or by a special +sequence at the start of the pattern, as described in the section entitled +.\" HTML <a href="#newlines"> +.\" </a> +"Newline conventions" +.\" +above. Note that the end of this type of comment is a literal newline sequence +in the pattern; escape sequences that happen to represent a newline do not +count. For example, consider this pattern when PCRE_EXTENDED is set, and the +default newline convention is in force: +.sp + abc #comment \en still comment +.sp +On encountering the # character, \fBpcre_compile()\fP skips along, looking for +a newline in the pattern. The sequence \en is still literal at this stage, so +it does not terminate the comment. Only an actual character with the code value +0x0a (the default newline) does so. +. +. +.\" HTML <a name="recursion"></a> +.SH "RECURSIVE PATTERNS" +.rs +.sp +Consider the problem of matching a string in parentheses, allowing for +unlimited nested parentheses. Without the use of recursion, the best that can +be done is to use a pattern that matches up to some fixed depth of nesting. It +is not possible to handle an arbitrary nesting depth. +.P +For some time, Perl has provided a facility that allows regular expressions to +recurse (amongst other things). It does this by interpolating Perl code in the +expression at run time, and the code can refer to the expression itself. A Perl +pattern using code interpolation to solve the parentheses problem can be +created like this: +.sp + $re = qr{\e( (?: (?>[^()]+) | (?p{$re}) )* \e)}x; +.sp +The (?p{...}) item interpolates Perl code at run time, and in this case refers +recursively to the pattern in which it appears. +.P +Obviously, PCRE cannot support the interpolation of Perl code. Instead, it +supports special syntax for recursion of the entire pattern, and also for +individual subpattern recursion. After its introduction in PCRE and Python, +this kind of recursion was subsequently introduced into Perl at release 5.10. +.P +A special item that consists of (? followed by a number greater than zero and a +closing parenthesis is a recursive subroutine call of the subpattern of the +given number, provided that it occurs inside that subpattern. (If not, it is a +.\" HTML <a href="#subpatternsassubroutines"> +.\" </a> +non-recursive subroutine +.\" +call, which is described in the next section.) The special item (?R) or (?0) is +a recursive call of the entire regular expression. +.P +This PCRE pattern solves the nested parentheses problem (assume the +PCRE_EXTENDED option is set so that white space is ignored): +.sp + \e( ( [^()]++ | (?R) )* \e) +.sp +First it matches an opening parenthesis. Then it matches any number of +substrings which can either be a sequence of non-parentheses, or a recursive +match of the pattern itself (that is, a correctly parenthesized substring). +Finally there is a closing parenthesis. Note the use of a possessive quantifier +to avoid backtracking into sequences of non-parentheses. +.P +If this were part of a larger pattern, you would not want to recurse the entire +pattern, so instead you could use this: +.sp + ( \e( ( [^()]++ | (?1) )* \e) ) +.sp +We have put the pattern into parentheses, and caused the recursion to refer to +them instead of the whole pattern. +.P +In a larger pattern, keeping track of parenthesis numbers can be tricky. This +is made easier by the use of relative references. Instead of (?1) in the +pattern above you can write (?-2) to refer to the second most recently opened +parentheses preceding the recursion. In other words, a negative number counts +capturing parentheses leftwards from the point at which it is encountered. +.P +It is also possible to refer to subsequently opened parentheses, by writing +references such as (?+2). However, these cannot be recursive because the +reference is not inside the parentheses that are referenced. They are always +.\" HTML <a href="#subpatternsassubroutines"> +.\" </a> +non-recursive subroutine +.\" +calls, as described in the next section. +.P +An alternative approach is to use named parentheses instead. The Perl syntax +for this is (?&name); PCRE's earlier syntax (?P>name) is also supported. We +could rewrite the above example as follows: +.sp + (?<pn> \e( ( [^()]++ | (?&pn) )* \e) ) +.sp +If there is more than one subpattern with the same name, the earliest one is +used. +.P +This particular example pattern that we have been looking at contains nested +unlimited repeats, and so the use of a possessive quantifier for matching +strings of non-parentheses is important when applying the pattern to strings +that do not match. For example, when this pattern is applied to +.sp + (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa() +.sp +it yields "no match" quickly. However, if a possessive quantifier is not used, +the match runs for a very long time indeed because there are so many different +ways the + and * repeats can carve up the subject, and all have to be tested +before failure can be reported. +.P +At the end of a match, the values of capturing parentheses are those from +the outermost level. If you want to obtain intermediate values, a callout +function can be used (see below and the +.\" HREF +\fBpcrecallout\fP +.\" +documentation). If the pattern above is matched against +.sp + (ab(cd)ef) +.sp +the value for the inner capturing parentheses (numbered 2) is "ef", which is +the last value taken on at the top level. If a capturing subpattern is not +matched at the top level, its final captured value is unset, even if it was +(temporarily) set at a deeper level during the matching process. +.P +If there are more than 15 capturing parentheses in a pattern, PCRE has to +obtain extra memory to store data during a recursion, which it does by using +\fBpcre_malloc\fP, freeing it via \fBpcre_free\fP afterwards. If no memory can +be obtained, the match fails with the PCRE_ERROR_NOMEMORY error. +.P +Do not confuse the (?R) item with the condition (R), which tests for recursion. +Consider this pattern, which matches text in angle brackets, allowing for +arbitrary nesting. Only digits are allowed in nested brackets (that is, when +recursing), whereas any characters are permitted at the outer level. +.sp + < (?: (?(R) \ed++ | [^<>]*+) | (?R)) * > +.sp +In this pattern, (?(R) is the start of a conditional subpattern, with two +different alternatives for the recursive and non-recursive cases. The (?R) item +is the actual recursive call. +. +. +.\" HTML <a name="recursiondifference"></a> +.SS "Differences in recursion processing between PCRE and Perl" +.rs +.sp +Recursion processing in PCRE differs from Perl in two important ways. In PCRE +(like Python, but unlike Perl), a recursive subpattern call is always treated +as an atomic group. That is, once it has matched some of the subject string, it +is never re-entered, even if it contains untried alternatives and there is a +subsequent matching failure. This can be illustrated by the following pattern, +which purports to match a palindromic string that contains an odd number of +characters (for example, "a", "aba", "abcba", "abcdcba"): +.sp + ^(.|(.)(?1)\e2)$ +.sp +The idea is that it either matches a single character, or two identical +characters surrounding a sub-palindrome. In Perl, this pattern works; in PCRE +it does not if the pattern is longer than three characters. Consider the +subject string "abcba": +.P +At the top level, the first character is matched, but as it is not at the end +of the string, the first alternative fails; the second alternative is taken +and the recursion kicks in. The recursive call to subpattern 1 successfully +matches the next character ("b"). (Note that the beginning and end of line +tests are not part of the recursion). +.P +Back at the top level, the next character ("c") is compared with what +subpattern 2 matched, which was "a". This fails. Because the recursion is +treated as an atomic group, there are now no backtracking points, and so the +entire match fails. (Perl is able, at this point, to re-enter the recursion and +try the second alternative.) However, if the pattern is written with the +alternatives in the other order, things are different: +.sp + ^((.)(?1)\e2|.)$ +.sp +This time, the recursing alternative is tried first, and continues to recurse +until it runs out of characters, at which point the recursion fails. But this +time we do have another alternative to try at the higher level. That is the big +difference: in the previous case the remaining alternative is at a deeper +recursion level, which PCRE cannot use. +.P +To change the pattern so that it matches all palindromic strings, not just +those with an odd number of characters, it is tempting to change the pattern to +this: +.sp + ^((.)(?1)\e2|.?)$ +.sp +Again, this works in Perl, but not in PCRE, and for the same reason. When a +deeper recursion has matched a single character, it cannot be entered again in +order to match an empty string. The solution is to separate the two cases, and +write out the odd and even cases as alternatives at the higher level: +.sp + ^(?:((.)(?1)\e2|)|((.)(?3)\e4|.)) +.sp +If you want to match typical palindromic phrases, the pattern has to ignore all +non-word characters, which can be done like this: +.sp + ^\eW*+(?:((.)\eW*+(?1)\eW*+\e2|)|((.)\eW*+(?3)\eW*+\e4|\eW*+.\eW*+))\eW*+$ +.sp +If run with the PCRE_CASELESS option, this pattern matches phrases such as "A +man, a plan, a canal: Panama!" and it works well in both PCRE and Perl. Note +the use of the possessive quantifier *+ to avoid backtracking into sequences of +non-word characters. Without this, PCRE takes a great deal longer (ten times or +more) to match typical phrases, and Perl takes so long that you think it has +gone into a loop. +.P +\fBWARNING\fP: The palindrome-matching patterns above work only if the subject +string does not start with a palindrome that is shorter than the entire string. +For example, although "abcba" is correctly matched, if the subject is "ababa", +PCRE finds the palindrome "aba" at the start, then fails at top level because +the end of the string does not follow. Once again, it cannot jump back into the +recursion to try other alternatives, so the entire match fails. +.P +The second way in which PCRE and Perl differ in their recursion processing is +in the handling of captured values. In Perl, when a subpattern is called +recursively or as a subpattern (see the next section), it has no access to any +values that were captured outside the recursion, whereas in PCRE these values +can be referenced. Consider this pattern: +.sp + ^(.)(\e1|a(?2)) +.sp +In PCRE, this pattern matches "bab". The first capturing parentheses match "b", +then in the second group, when the back reference \e1 fails to match "b", the +second alternative matches "a" and then recurses. In the recursion, \e1 does +now match "b" and so the whole match succeeds. In Perl, the pattern fails to +match because inside the recursive call \e1 cannot access the externally set +value. +. +. +.\" HTML <a name="subpatternsassubroutines"></a> +.SH "SUBPATTERNS AS SUBROUTINES" +.rs +.sp +If the syntax for a recursive subpattern call (either by number or by +name) is used outside the parentheses to which it refers, it operates like a +subroutine in a programming language. The called subpattern may be defined +before or after the reference. A numbered reference can be absolute or +relative, as in these examples: +.sp + (...(absolute)...)...(?2)... + (...(relative)...)...(?-1)... + (...(?+1)...(relative)... +.sp +An earlier example pointed out that the pattern +.sp + (sens|respons)e and \e1ibility +.sp +matches "sense and sensibility" and "response and responsibility", but not +"sense and responsibility". If instead the pattern +.sp + (sens|respons)e and (?1)ibility +.sp +is used, it does match "sense and responsibility" as well as the other two +strings. Another example is given in the discussion of DEFINE above. +.P +All subroutine calls, whether recursive or not, are always treated as atomic +groups. That is, once a subroutine has matched some of the subject string, it +is never re-entered, even if it contains untried alternatives and there is a +subsequent matching failure. Any capturing parentheses that are set during the +subroutine call revert to their previous values afterwards. +.P +Processing options such as case-independence are fixed when a subpattern is +defined, so if it is used as a subroutine, such options cannot be changed for +different calls. For example, consider this pattern: +.sp + (abc)(?i:(?-1)) +.sp +It matches "abcabc". It does not match "abcABC" because the change of +processing option does not affect the called subpattern. +. +. +.\" HTML <a name="onigurumasubroutines"></a> +.SH "ONIGURUMA SUBROUTINE SYNTAX" +.rs +.sp +For compatibility with Oniguruma, the non-Perl syntax \eg followed by a name or +a number enclosed either in angle brackets or single quotes, is an alternative +syntax for referencing a subpattern as a subroutine, possibly recursively. Here +are two of the examples used above, rewritten using this syntax: +.sp + (?<pn> \e( ( (?>[^()]+) | \eg<pn> )* \e) ) + (sens|respons)e and \eg'1'ibility +.sp +PCRE supports an extension to Oniguruma: if a number is preceded by a +plus or a minus sign it is taken as a relative reference. For example: +.sp + (abc)(?i:\eg<-1>) +.sp +Note that \eg{...} (Perl syntax) and \eg<...> (Oniguruma syntax) are \fInot\fP +synonymous. The former is a back reference; the latter is a subroutine call. +. +. +.SH CALLOUTS +.rs +.sp +Perl has a feature whereby using the sequence (?{...}) causes arbitrary Perl +code to be obeyed in the middle of matching a regular expression. This makes it +possible, amongst other things, to extract different substrings that match the +same pair of parentheses when there is a repetition. +.P +PCRE provides a similar feature, but of course it cannot obey arbitrary Perl +code. The feature is called "callout". The caller of PCRE provides an external +function by putting its entry point in the global variable \fIpcre_callout\fP +(8-bit library) or \fIpcre[16|32]_callout\fP (16-bit or 32-bit library). +By default, this variable contains NULL, which disables all calling out. +.P +Within a regular expression, (?C) indicates the points at which the external +function is to be called. If you want to identify different callout points, you +can put a number less than 256 after the letter C. The default value is zero. +For example, this pattern has two callout points: +.sp + (?C1)abc(?C2)def +.sp +If the PCRE_AUTO_CALLOUT flag is passed to a compiling function, callouts are +automatically installed before each item in the pattern. They are all numbered +255. If there is a conditional group in the pattern whose condition is an +assertion, an additional callout is inserted just before the condition. An +explicit callout may also be set at this position, as in this example: +.sp + (?(?C9)(?=a)abc|def) +.sp +Note that this applies only to assertion conditions, not to other types of +condition. +.P +During matching, when PCRE reaches a callout point, the external function is +called. It is provided with the number of the callout, the position in the +pattern, and, optionally, one item of data originally supplied by the caller of +the matching function. The callout function may cause matching to proceed, to +backtrack, or to fail altogether. +.P +By default, PCRE implements a number of optimizations at compile time and +matching time, and one side-effect is that sometimes callouts are skipped. If +you need all possible callouts to happen, you need to set options that disable +the relevant optimizations. More details, and a complete description of the +interface to the callout function, are given in the +.\" HREF +\fBpcrecallout\fP +.\" +documentation. +. +. +.\" HTML <a name="backtrackcontrol"></a> +.SH "BACKTRACKING CONTROL" +.rs +.sp +Perl 5.10 introduced a number of "Special Backtracking Control Verbs", which +are still described in the Perl documentation as "experimental and subject to +change or removal in a future version of Perl". It goes on to say: "Their usage +in production code should be noted to avoid problems during upgrades." The same +remarks apply to the PCRE features described in this section. +.P +The new verbs make use of what was previously invalid syntax: an opening +parenthesis followed by an asterisk. They are generally of the form +(*VERB) or (*VERB:NAME). Some may take either form, possibly behaving +differently depending on whether or not a name is present. A name is any +sequence of characters that does not include a closing parenthesis. The maximum +length of name is 255 in the 8-bit library and 65535 in the 16-bit and 32-bit +libraries. If the name is empty, that is, if the closing parenthesis +immediately follows the colon, the effect is as if the colon were not there. +Any number of these verbs may occur in a pattern. +.P +Since these verbs are specifically related to backtracking, most of them can be +used only when the pattern is to be matched using one of the traditional +matching functions, because these use a backtracking algorithm. With the +exception of (*FAIL), which behaves like a failing negative assertion, the +backtracking control verbs cause an error if encountered by a DFA matching +function. +.P +The behaviour of these verbs in +.\" HTML <a href="#btrepeat"> +.\" </a> +repeated groups, +.\" +.\" HTML <a href="#btassert"> +.\" </a> +assertions, +.\" +and in +.\" HTML <a href="#btsub"> +.\" </a> +subpatterns called as subroutines +.\" +(whether or not recursively) is documented below. +. +. +.\" HTML <a name="nooptimize"></a> +.SS "Optimizations that affect backtracking verbs" +.rs +.sp +PCRE contains some optimizations that are used to speed up matching by running +some checks at the start of each match attempt. For example, it may know the +minimum length of matching subject, or that a particular character must be +present. When one of these optimizations bypasses the running of a match, any +included backtracking verbs will not, of course, be processed. You can suppress +the start-of-match optimizations by setting the PCRE_NO_START_OPTIMIZE option +when calling \fBpcre_compile()\fP or \fBpcre_exec()\fP, or by starting the +pattern with (*NO_START_OPT). There is more discussion of this option in the +section entitled +.\" HTML <a href="pcreapi.html#execoptions"> +.\" </a> +"Option bits for \fBpcre_exec()\fP" +.\" +in the +.\" HREF +\fBpcreapi\fP +.\" +documentation. +.P +Experiments with Perl suggest that it too has similar optimizations, sometimes +leading to anomalous results. +. +. +.SS "Verbs that act immediately" +.rs +.sp +The following verbs act as soon as they are encountered. They may not be +followed by a name. +.sp + (*ACCEPT) +.sp +This verb causes the match to end successfully, skipping the remainder of the +pattern. However, when it is inside a subpattern that is called as a +subroutine, only that subpattern is ended successfully. Matching then continues +at the outer level. If (*ACCEPT) in triggered in a positive assertion, the +assertion succeeds; in a negative assertion, the assertion fails. +.P +If (*ACCEPT) is inside capturing parentheses, the data so far is captured. For +example: +.sp + A((?:A|B(*ACCEPT)|C)D) +.sp +This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is captured by +the outer parentheses. +.sp + (*FAIL) or (*F) +.sp +This verb causes a matching failure, forcing backtracking to occur. It is +equivalent to (?!) but easier to read. The Perl documentation notes that it is +probably useful only when combined with (?{}) or (??{}). Those are, of course, +Perl features that are not present in PCRE. The nearest equivalent is the +callout feature, as for example in this pattern: +.sp + a+(?C)(*FAIL) +.sp +A match with the string "aaaa" always fails, but the callout is taken before +each backtrack happens (in this example, 10 times). +. +. +.SS "Recording which path was taken" +.rs +.sp +There is one verb whose main purpose is to track how a match was arrived at, +though it also has a secondary use in conjunction with advancing the match +starting point (see (*SKIP) below). +.sp + (*MARK:NAME) or (*:NAME) +.sp +A name is always required with this verb. There may be as many instances of +(*MARK) as you like in a pattern, and their names do not have to be unique. +.P +When a match succeeds, the name of the last-encountered (*MARK:NAME), +(*PRUNE:NAME), or (*THEN:NAME) on the matching path is passed back to the +caller as described in the section entitled +.\" HTML <a href="pcreapi.html#extradata"> +.\" </a> +"Extra data for \fBpcre_exec()\fP" +.\" +in the +.\" HREF +\fBpcreapi\fP +.\" +documentation. Here is an example of \fBpcretest\fP output, where the /K +modifier requests the retrieval and outputting of (*MARK) data: +.sp + re> /X(*MARK:A)Y|X(*MARK:B)Z/K + data> XY + 0: XY + MK: A + XZ + 0: XZ + MK: B +.sp +The (*MARK) name is tagged with "MK:" in this output, and in this example it +indicates which of the two alternatives matched. This is a more efficient way +of obtaining this information than putting each alternative in its own +capturing parentheses. +.P +If a verb with a name is encountered in a positive assertion that is true, the +name is recorded and passed back if it is the last-encountered. This does not +happen for negative assertions or failing positive assertions. +.P +After a partial match or a failed match, the last encountered name in the +entire match process is returned. For example: +.sp + re> /X(*MARK:A)Y|X(*MARK:B)Z/K + data> XP + No match, mark = B +.sp +Note that in this unanchored example the mark is retained from the match +attempt that started at the letter "X" in the subject. Subsequent match +attempts starting at "P" and then with an empty string do not get as far as the +(*MARK) item, but nevertheless do not reset it. +.P +If you are interested in (*MARK) values after failed matches, you should +probably set the PCRE_NO_START_OPTIMIZE option +.\" HTML <a href="#nooptimize"> +.\" </a> +(see above) +.\" +to ensure that the match is always attempted. +. +. +.SS "Verbs that act after backtracking" +.rs +.sp +The following verbs do nothing when they are encountered. Matching continues +with what follows, but if there is no subsequent match, causing a backtrack to +the verb, a failure is forced. That is, backtracking cannot pass to the left of +the verb. However, when one of these verbs appears inside an atomic group or an +assertion that is true, its effect is confined to that group, because once the +group has been matched, there is never any backtracking into it. In this +situation, backtracking can "jump back" to the left of the entire atomic group +or assertion. (Remember also, as stated above, that this localization also +applies in subroutine calls.) +.P +These verbs differ in exactly what kind of failure occurs when backtracking +reaches them. The behaviour described below is what happens when the verb is +not in a subroutine or an assertion. Subsequent sections cover these special +cases. +.sp + (*COMMIT) +.sp +This verb, which may not be followed by a name, causes the whole match to fail +outright if there is a later matching failure that causes backtracking to reach +it. Even if the pattern is unanchored, no further attempts to find a match by +advancing the starting point take place. If (*COMMIT) is the only backtracking +verb that is encountered, once it has been passed \fBpcre_exec()\fP is +committed to finding a match at the current starting point, or not at all. For +example: +.sp + a+(*COMMIT)b +.sp +This matches "xxaab" but not "aacaab". It can be thought of as a kind of +dynamic anchor, or "I've started, so I must finish." The name of the most +recently passed (*MARK) in the path is passed back when (*COMMIT) forces a +match failure. +.P +If there is more than one backtracking verb in a pattern, a different one that +follows (*COMMIT) may be triggered first, so merely passing (*COMMIT) during a +match does not always guarantee that a match must be at this starting point. +.P +Note that (*COMMIT) at the start of a pattern is not the same as an anchor, +unless PCRE's start-of-match optimizations are turned off, as shown in this +output from \fBpcretest\fP: +.sp + re> /(*COMMIT)abc/ + data> xyzabc + 0: abc + data> xyzabc\eY + No match +.sp +For this pattern, PCRE knows that any match must start with "a", so the +optimization skips along the subject to "a" before applying the pattern to the +first set of data. The match attempt then succeeds. In the second set of data, +the escape sequence \eY is interpreted by the \fBpcretest\fP program. It causes +the PCRE_NO_START_OPTIMIZE option to be set when \fBpcre_exec()\fP is called. +This disables the optimization that skips along to the first character. The +pattern is now applied starting at "x", and so the (*COMMIT) causes the match +to fail without trying any other starting points. +.sp + (*PRUNE) or (*PRUNE:NAME) +.sp +This verb causes the match to fail at the current starting position in the +subject if there is a later matching failure that causes backtracking to reach +it. If the pattern is unanchored, the normal "bumpalong" advance to the next +starting character then happens. Backtracking can occur as usual to the left of +(*PRUNE), before it is reached, or when matching to the right of (*PRUNE), but +if there is no match to the right, backtracking cannot cross (*PRUNE). In +simple cases, the use of (*PRUNE) is just an alternative to an atomic group or +possessive quantifier, but there are some uses of (*PRUNE) that cannot be +expressed in any other way. In an anchored pattern (*PRUNE) has the same effect +as (*COMMIT). +.P +The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE). +It is like (*MARK:NAME) in that the name is remembered for passing back to the +caller. However, (*SKIP:NAME) searches only for names set with (*MARK). +.sp + (*SKIP) +.sp +This verb, when given without a name, is like (*PRUNE), except that if the +pattern is unanchored, the "bumpalong" advance is not to the next character, +but to the position in the subject where (*SKIP) was encountered. (*SKIP) +signifies that whatever text was matched leading up to it cannot be part of a +successful match. Consider: +.sp + a+(*SKIP)b +.sp +If the subject is "aaaac...", after the first match attempt fails (starting at +the first character in the string), the starting point skips on to start the +next attempt at "c". Note that a possessive quantifer does not have the same +effect as this example; although it would suppress backtracking during the +first match attempt, the second attempt would start at the second character +instead of skipping on to "c". +.sp + (*SKIP:NAME) +.sp +When (*SKIP) has an associated name, its behaviour is modified. When it is +triggered, the previous path through the pattern is searched for the most +recent (*MARK) that has the same name. If one is found, the "bumpalong" advance +is to the subject position that corresponds to that (*MARK) instead of to where +(*SKIP) was encountered. If no (*MARK) with a matching name is found, the +(*SKIP) is ignored. +.P +Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It ignores +names that are set by (*PRUNE:NAME) or (*THEN:NAME). +.sp + (*THEN) or (*THEN:NAME) +.sp +This verb causes a skip to the next innermost alternative when backtracking +reaches it. That is, it cancels any further backtracking within the current +alternative. Its name comes from the observation that it can be used for a +pattern-based if-then-else block: +.sp + ( COND1 (*THEN) FOO | COND2 (*THEN) BAR | COND3 (*THEN) BAZ ) ... +.sp +If the COND1 pattern matches, FOO is tried (and possibly further items after +the end of the group if FOO succeeds); on failure, the matcher skips to the +second alternative and tries COND2, without backtracking into COND1. If that +succeeds and BAR fails, COND3 is tried. If subsequently BAZ fails, there are no +more alternatives, so there is a backtrack to whatever came before the entire +group. If (*THEN) is not inside an alternation, it acts like (*PRUNE). +.P +The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN). +It is like (*MARK:NAME) in that the name is remembered for passing back to the +caller. However, (*SKIP:NAME) searches only for names set with (*MARK). +.P +A subpattern that does not contain a | character is just a part of the +enclosing alternative; it is not a nested alternation with only one +alternative. The effect of (*THEN) extends beyond such a subpattern to the +enclosing alternative. Consider this pattern, where A, B, etc. are complex +pattern fragments that do not contain any | characters at this level: +.sp + A (B(*THEN)C) | D +.sp +If A and B are matched, but there is a failure in C, matching does not +backtrack into A; instead it moves to the next alternative, that is, D. +However, if the subpattern containing (*THEN) is given an alternative, it +behaves differently: +.sp + A (B(*THEN)C | (*FAIL)) | D +.sp +The effect of (*THEN) is now confined to the inner subpattern. After a failure +in C, matching moves to (*FAIL), which causes the whole subpattern to fail +because there are no more alternatives to try. In this case, matching does now +backtrack into A. +.P +Note that a conditional subpattern is not considered as having two +alternatives, because only one is ever used. In other words, the | character in +a conditional subpattern has a different meaning. Ignoring white space, +consider: +.sp + ^.*? (?(?=a) a | b(*THEN)c ) +.sp +If the subject is "ba", this pattern does not match. Because .*? is ungreedy, +it initially matches zero characters. The condition (?=a) then fails, the +character "b" is matched, but "c" is not. At this point, matching does not +backtrack to .*? as might perhaps be expected from the presence of the | +character. The conditional subpattern is part of the single alternative that +comprises the whole pattern, and so the match fails. (If there was a backtrack +into .*?, allowing it to match "b", the match would succeed.) +.P +The verbs just described provide four different "strengths" of control when +subsequent matching fails. (*THEN) is the weakest, carrying on the match at the +next alternative. (*PRUNE) comes next, failing the match at the current +starting position, but allowing an advance to the next character (for an +unanchored pattern). (*SKIP) is similar, except that the advance may be more +than one character. (*COMMIT) is the strongest, causing the entire match to +fail. +. +. +.SS "More than one backtracking verb" +.rs +.sp +If more than one backtracking verb is present in a pattern, the one that is +backtracked onto first acts. For example, consider this pattern, where A, B, +etc. are complex pattern fragments: +.sp + (A(*COMMIT)B(*THEN)C|ABD) +.sp +If A matches but B fails, the backtrack to (*COMMIT) causes the entire match to +fail. However, if A and B match, but C fails, the backtrack to (*THEN) causes +the next alternative (ABD) to be tried. This behaviour is consistent, but is +not always the same as Perl's. It means that if two or more backtracking verbs +appear in succession, all the the last of them has no effect. Consider this +example: +.sp + ...(*COMMIT)(*PRUNE)... +.sp +If there is a matching failure to the right, backtracking onto (*PRUNE) causes +it to be triggered, and its action is taken. There can never be a backtrack +onto (*COMMIT). +. +. +.\" HTML <a name="btrepeat"></a> +.SS "Backtracking verbs in repeated groups" +.rs +.sp +PCRE differs from Perl in its handling of backtracking verbs in repeated +groups. For example, consider: +.sp + /(a(*COMMIT)b)+ac/ +.sp +If the subject is "abac", Perl matches, but PCRE fails because the (*COMMIT) in +the second repeat of the group acts. +. +. +.\" HTML <a name="btassert"></a> +.SS "Backtracking verbs in assertions" +.rs +.sp +(*FAIL) in an assertion has its normal effect: it forces an immediate backtrack. +.P +(*ACCEPT) in a positive assertion causes the assertion to succeed without any +further processing. In a negative assertion, (*ACCEPT) causes the assertion to +fail without any further processing. +.P +The other backtracking verbs are not treated specially if they appear in a +positive assertion. In particular, (*THEN) skips to the next alternative in the +innermost enclosing group that has alternations, whether or not this is within +the assertion. +.P +Negative assertions are, however, different, in order to ensure that changing a +positive assertion into a negative assertion changes its result. Backtracking +into (*COMMIT), (*SKIP), or (*PRUNE) causes a negative assertion to be true, +without considering any further alternative branches in the assertion. +Backtracking into (*THEN) causes it to skip to the next enclosing alternative +within the assertion (the normal behaviour), but if the assertion does not have +such an alternative, (*THEN) behaves like (*PRUNE). +. +. +.\" HTML <a name="btsub"></a> +.SS "Backtracking verbs in subroutines" +.rs +.sp +These behaviours occur whether or not the subpattern is called recursively. +Perl's treatment of subroutines is different in some cases. +.P +(*FAIL) in a subpattern called as a subroutine has its normal effect: it forces +an immediate backtrack. +.P +(*ACCEPT) in a subpattern called as a subroutine causes the subroutine match to +succeed without any further processing. Matching then continues after the +subroutine call. +.P +(*COMMIT), (*SKIP), and (*PRUNE) in a subpattern called as a subroutine cause +the subroutine match to fail. +.P +(*THEN) skips to the next alternative in the innermost enclosing group within +the subpattern that has alternatives. If there is no such group within the +subpattern, (*THEN) causes the subroutine match to fail. +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcreapi\fP(3), \fBpcrecallout\fP(3), \fBpcrematching\fP(3), +\fBpcresyntax\fP(3), \fBpcre\fP(3), \fBpcre16(3)\fP, \fBpcre32(3)\fP. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +University Computing Service +Cambridge CB2 3QH, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 08 January 2014 +Copyright (c) 1997-2014 University of Cambridge. +.fi diff --git a/usr/share/man/man3/pcreperform.3 b/usr/share/man/man3/pcreperform.3 new file mode 100755 index 000000000..fb2aa9592 --- /dev/null +++ b/usr/share/man/man3/pcreperform.3 @@ -0,0 +1,177 @@ +.TH PCREPERFORM 3 "09 January 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH "PCRE PERFORMANCE" +.rs +.sp +Two aspects of performance are discussed below: memory usage and processing +time. The way you express your pattern as a regular expression can affect both +of them. +. +.SH "COMPILED PATTERN MEMORY USAGE" +.rs +.sp +Patterns are compiled by PCRE into a reasonably efficient interpretive code, so +that most simple patterns do not use much memory. However, there is one case +where the memory usage of a compiled pattern can be unexpectedly large. If a +parenthesized subpattern has a quantifier with a minimum greater than 1 and/or +a limited maximum, the whole subpattern is repeated in the compiled code. For +example, the pattern +.sp + (abc|def){2,4} +.sp +is compiled as if it were +.sp + (abc|def)(abc|def)((abc|def)(abc|def)?)? +.sp +(Technical aside: It is done this way so that backtrack points within each of +the repetitions can be independently maintained.) +.P +For regular expressions whose quantifiers use only small numbers, this is not +usually a problem. However, if the numbers are large, and particularly if such +repetitions are nested, the memory usage can become an embarrassment. For +example, the very simple pattern +.sp + ((ab){1,1000}c){1,3} +.sp +uses 51K bytes when compiled using the 8-bit library. When PCRE is compiled +with its default internal pointer size of two bytes, the size limit on a +compiled pattern is 64K data units, and this is reached with the above pattern +if the outer repetition is increased from 3 to 4. PCRE can be compiled to use +larger internal pointers and thus handle larger compiled patterns, but it is +better to try to rewrite your pattern to use less memory if you can. +.P +One way of reducing the memory usage for such patterns is to make use of PCRE's +.\" HTML <a href="pcrepattern.html#subpatternsassubroutines"> +.\" </a> +"subroutine" +.\" +facility. Re-writing the above pattern as +.sp + ((ab)(?2){0,999}c)(?1){0,2} +.sp +reduces the memory requirements to 18K, and indeed it remains under 20K even +with the outer repetition increased to 100. However, this pattern is not +exactly equivalent, because the "subroutine" calls are treated as +.\" HTML <a href="pcrepattern.html#atomicgroup"> +.\" </a> +atomic groups +.\" +into which there can be no backtracking if there is a subsequent matching +failure. Therefore, PCRE cannot do this kind of rewriting automatically. +Furthermore, there is a noticeable loss of speed when executing the modified +pattern. Nevertheless, if the atomic grouping is not a problem and the loss of +speed is acceptable, this kind of rewriting will allow you to process patterns +that PCRE cannot otherwise handle. +. +. +.SH "STACK USAGE AT RUN TIME" +.rs +.sp +When \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP is used for matching, certain +kinds of pattern can cause it to use large amounts of the process stack. In +some environments the default process stack is quite small, and if it runs out +the result is often SIGSEGV. This issue is probably the most frequently raised +problem with PCRE. Rewriting your pattern can often help. The +.\" HREF +\fBpcrestack\fP +.\" +documentation discusses this issue in detail. +. +. +.SH "PROCESSING TIME" +.rs +.sp +Certain items in regular expression patterns are processed more efficiently +than others. It is more efficient to use a character class like [aeiou] than a +set of single-character alternatives such as (a|e|i|o|u). In general, the +simplest construction that provides the required behaviour is usually the most +efficient. Jeffrey Friedl's book contains a lot of useful general discussion +about optimizing regular expressions for efficient performance. This document +contains a few observations about PCRE. +.P +Using Unicode character properties (the \ep, \eP, and \eX escapes) is slow, +because PCRE has to use a multi-stage table lookup whenever it needs a +character's property. If you can find an alternative pattern that does not use +character properties, it will probably be faster. +.P +By default, the escape sequences \eb, \ed, \es, and \ew, and the POSIX +character classes such as [:alpha:] do not use Unicode properties, partly for +backwards compatibility, and partly for performance reasons. However, you can +set PCRE_UCP if you want Unicode character properties to be used. This can +double the matching time for items such as \ed, when matched with +a traditional matching function; the performance loss is less with +a DFA matching function, and in both cases there is not much difference for +\eb. +.P +When a pattern begins with .* not in parentheses, or in parentheses that are +not the subject of a backreference, and the PCRE_DOTALL option is set, the +pattern is implicitly anchored by PCRE, since it can match only at the start of +a subject string. However, if PCRE_DOTALL is not set, PCRE cannot make this +optimization, because the . metacharacter does not then match a newline, and if +the subject string contains newlines, the pattern may match from the character +immediately following one of them instead of from the very start. For example, +the pattern +.sp + .*second +.sp +matches the subject "first\enand second" (where \en stands for a newline +character), with the match starting at the seventh character. In order to do +this, PCRE has to retry the match starting after every newline in the subject. +.P +If you are using such a pattern with subject strings that do not contain +newlines, the best performance is obtained by setting PCRE_DOTALL, or starting +the pattern with ^.* or ^.*? to indicate explicit anchoring. That saves PCRE +from having to scan along the subject looking for a newline to restart at. +.P +Beware of patterns that contain nested indefinite repeats. These can take a +long time to run when applied to a string that does not match. Consider the +pattern fragment +.sp + ^(a+)* +.sp +This can match "aaaa" in 16 different ways, and this number increases very +rapidly as the string gets longer. (The * repeat can match 0, 1, 2, 3, or 4 +times, and for each of those cases other than 0 or 4, the + repeats can match +different numbers of times.) When the remainder of the pattern is such that the +entire match is going to fail, PCRE has in principle to try every possible +variation, and this can take an extremely long time, even for relatively short +strings. +.P +An optimization catches some of the more simple cases such as +.sp + (a+)*b +.sp +where a literal character follows. Before embarking on the standard matching +procedure, PCRE checks that there is a "b" later in the subject string, and if +there is not, it fails the match immediately. However, when there is no +following literal this optimization cannot be used. You can see the difference +by comparing the behaviour of +.sp + (a+)*\ed +.sp +with the pattern above. The former gives a failure almost instantly when +applied to a whole line of "a" characters, whereas the latter takes an +appreciable time with strings longer than about 20 characters. +.P +In many cases, the solution to this kind of performance issue is to use an +atomic group or a possessive quantifier. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +University Computing Service +Cambridge CB2 3QH, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 25 August 2012 +Copyright (c) 1997-2012 University of Cambridge. +.fi diff --git a/usr/share/man/man3/pcreposix.3 b/usr/share/man/man3/pcreposix.3 new file mode 100755 index 000000000..77890f36b --- /dev/null +++ b/usr/share/man/man3/pcreposix.3 @@ -0,0 +1,267 @@ +.TH PCREPOSIX 3 "09 January 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions. +.SH "SYNOPSIS" +.rs +.sp +.B #include <pcreposix.h> +.PP +.nf +.B int regcomp(regex_t *\fIpreg\fP, const char *\fIpattern\fP, +.B " int \fIcflags\fP);" +.sp +.B int regexec(regex_t *\fIpreg\fP, const char *\fIstring\fP, +.B " size_t \fInmatch\fP, regmatch_t \fIpmatch\fP[], int \fIeflags\fP);" +.B " size_t regerror(int \fIerrcode\fP, const regex_t *\fIpreg\fP," +.B " char *\fIerrbuf\fP, size_t \fIerrbuf_size\fP);" +.sp +.B void regfree(regex_t *\fIpreg\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This set of functions provides a POSIX-style API for the PCRE regular +expression 8-bit library. See the +.\" HREF +\fBpcreapi\fP +.\" +documentation for a description of PCRE's native API, which contains much +additional functionality. There is no POSIX-style wrapper for PCRE's 16-bit +and 32-bit library. +.P +The functions described here are just wrapper functions that ultimately call +the PCRE native API. Their prototypes are defined in the \fBpcreposix.h\fP +header file, and on Unix systems the library itself is called +\fBpcreposix.a\fP, so can be accessed by adding \fB-lpcreposix\fP to the +command for linking an application that uses them. Because the POSIX functions +call the native ones, it is also necessary to add \fB-lpcre\fP. +.P +I have implemented only those POSIX option bits that can be reasonably mapped +to PCRE native options. In addition, the option REG_EXTENDED is defined with +the value zero. This has no effect, but since programs that are written to the +POSIX interface often use it, this makes it easier to slot in PCRE as a +replacement library. Other POSIX options are not even defined. +.P +There are also some other options that are not defined by POSIX. These have +been added at the request of users who want to make use of certain +PCRE-specific features via the POSIX calling interface. +.P +When PCRE is called via these functions, it is only the API that is POSIX-like +in style. The syntax and semantics of the regular expressions themselves are +still those of Perl, subject to the setting of various PCRE options, as +described below. "POSIX-like in style" means that the API approximates to the +POSIX definition; it is not fully POSIX-compatible, and in multi-byte encoding +domains it is probably even less compatible. +.P +The header for these functions is supplied as \fBpcreposix.h\fP to avoid any +potential clash with other POSIX libraries. It can, of course, be renamed or +aliased as \fBregex.h\fP, which is the "correct" name. It provides two +structure types, \fIregex_t\fP for compiled internal forms, and +\fIregmatch_t\fP for returning captured substrings. It also defines some +constants whose names start with "REG_"; these are used for setting options and +identifying error codes. +. +. +.SH "COMPILING A PATTERN" +.rs +.sp +The function \fBregcomp()\fP is called to compile a pattern into an +internal form. The pattern is a C string terminated by a binary zero, and +is passed in the argument \fIpattern\fP. The \fIpreg\fP argument is a pointer +to a \fBregex_t\fP structure that is used as a base for storing information +about the compiled regular expression. +.P +The argument \fIcflags\fP is either zero, or contains one or more of the bits +defined by the following macros: +.sp + REG_DOTALL +.sp +The PCRE_DOTALL option is set when the regular expression is passed for +compilation to the native function. Note that REG_DOTALL is not part of the +POSIX standard. +.sp + REG_ICASE +.sp +The PCRE_CASELESS option is set when the regular expression is passed for +compilation to the native function. +.sp + REG_NEWLINE +.sp +The PCRE_MULTILINE option is set when the regular expression is passed for +compilation to the native function. Note that this does \fInot\fP mimic the +defined POSIX behaviour for REG_NEWLINE (see the following section). +.sp + REG_NOSUB +.sp +The PCRE_NO_AUTO_CAPTURE option is set when the regular expression is passed +for compilation to the native function. In addition, when a pattern that is +compiled with this flag is passed to \fBregexec()\fP for matching, the +\fInmatch\fP and \fIpmatch\fP arguments are ignored, and no captured strings +are returned. +.sp + REG_UCP +.sp +The PCRE_UCP option is set when the regular expression is passed for +compilation to the native function. This causes PCRE to use Unicode properties +when matchine \ed, \ew, etc., instead of just recognizing ASCII values. Note +that REG_UTF8 is not part of the POSIX standard. +.sp + REG_UNGREEDY +.sp +The PCRE_UNGREEDY option is set when the regular expression is passed for +compilation to the native function. Note that REG_UNGREEDY is not part of the +POSIX standard. +.sp + REG_UTF8 +.sp +The PCRE_UTF8 option is set when the regular expression is passed for +compilation to the native function. This causes the pattern itself and all data +strings used for matching it to be treated as UTF-8 strings. Note that REG_UTF8 +is not part of the POSIX standard. +.P +In the absence of these flags, no options are passed to the native function. +This means the the regex is compiled with PCRE default semantics. In +particular, the way it handles newline characters in the subject string is the +Perl way, not the POSIX way. Note that setting PCRE_MULTILINE has only +\fIsome\fP of the effects specified for REG_NEWLINE. It does not affect the way +newlines are matched by . (they are not) or by a negative class such as [^a] +(they are). +.P +The yield of \fBregcomp()\fP is zero on success, and non-zero otherwise. The +\fIpreg\fP structure is filled in on success, and one member of the structure +is public: \fIre_nsub\fP contains the number of capturing subpatterns in +the regular expression. Various error codes are defined in the header file. +.P +NOTE: If the yield of \fBregcomp()\fP is non-zero, you must not attempt to +use the contents of the \fIpreg\fP structure. If, for example, you pass it to +\fBregexec()\fP, the result is undefined and your program is likely to crash. +. +. +.SH "MATCHING NEWLINE CHARACTERS" +.rs +.sp +This area is not simple, because POSIX and Perl take different views of things. +It is not possible to get PCRE to obey POSIX semantics, but then PCRE was never +intended to be a POSIX engine. The following table lists the different +possibilities for matching newline characters in PCRE: +.sp + Default Change with +.sp + . matches newline no PCRE_DOTALL + newline matches [^a] yes not changeable + $ matches \en at end yes PCRE_DOLLARENDONLY + $ matches \en in middle no PCRE_MULTILINE + ^ matches \en in middle no PCRE_MULTILINE +.sp +This is the equivalent table for POSIX: +.sp + Default Change with +.sp + . matches newline yes REG_NEWLINE + newline matches [^a] yes REG_NEWLINE + $ matches \en at end no REG_NEWLINE + $ matches \en in middle no REG_NEWLINE + ^ matches \en in middle no REG_NEWLINE +.sp +PCRE's behaviour is the same as Perl's, except that there is no equivalent for +PCRE_DOLLAR_ENDONLY in Perl. In both PCRE and Perl, there is no way to stop +newline from matching [^a]. +.P +The default POSIX newline handling can be obtained by setting PCRE_DOTALL and +PCRE_DOLLAR_ENDONLY, but there is no way to make PCRE behave exactly as for the +REG_NEWLINE action. +. +. +.SH "MATCHING A PATTERN" +.rs +.sp +The function \fBregexec()\fP is called to match a compiled pattern \fIpreg\fP +against a given \fIstring\fP, which is by default terminated by a zero byte +(but see REG_STARTEND below), subject to the options in \fIeflags\fP. These can +be: +.sp + REG_NOTBOL +.sp +The PCRE_NOTBOL option is set when calling the underlying PCRE matching +function. +.sp + REG_NOTEMPTY +.sp +The PCRE_NOTEMPTY option is set when calling the underlying PCRE matching +function. Note that REG_NOTEMPTY is not part of the POSIX standard. However, +setting this option can give more POSIX-like behaviour in some situations. +.sp + REG_NOTEOL +.sp +The PCRE_NOTEOL option is set when calling the underlying PCRE matching +function. +.sp + REG_STARTEND +.sp +The string is considered to start at \fIstring\fP + \fIpmatch[0].rm_so\fP and +to have a terminating NUL located at \fIstring\fP + \fIpmatch[0].rm_eo\fP +(there need not actually be a NUL at that location), regardless of the value of +\fInmatch\fP. This is a BSD extension, compatible with but not specified by +IEEE Standard 1003.2 (POSIX.2), and should be used with caution in software +intended to be portable to other systems. Note that a non-zero \fIrm_so\fP does +not imply REG_NOTBOL; REG_STARTEND affects only the location of the string, not +how it is matched. +.P +If the pattern was compiled with the REG_NOSUB flag, no data about any matched +strings is returned. The \fInmatch\fP and \fIpmatch\fP arguments of +\fBregexec()\fP are ignored. +.P +If the value of \fInmatch\fP is zero, or if the value \fIpmatch\fP is NULL, +no data about any matched strings is returned. +.P +Otherwise,the portion of the string that was matched, and also any captured +substrings, are returned via the \fIpmatch\fP argument, which points to an +array of \fInmatch\fP structures of type \fIregmatch_t\fP, containing the +members \fIrm_so\fP and \fIrm_eo\fP. These contain the offset to the first +character of each substring and the offset to the first character after the end +of each substring, respectively. The 0th element of the vector relates to the +entire portion of \fIstring\fP that was matched; subsequent elements relate to +the capturing subpatterns of the regular expression. Unused entries in the +array have both structure members set to -1. +.P +A successful match yields a zero return; various error codes are defined in the +header file, of which REG_NOMATCH is the "expected" failure code. +. +. +.SH "ERROR MESSAGES" +.rs +.sp +The \fBregerror()\fP function maps a non-zero errorcode from either +\fBregcomp()\fP or \fBregexec()\fP to a printable message. If \fIpreg\fP is not +NULL, the error should have arisen from the use of that structure. A message +terminated by a binary zero is placed in \fIerrbuf\fP. The length of the +message, including the zero, is limited to \fIerrbuf_size\fP. The yield of the +function is the size of buffer needed to hold the whole message. +. +. +.SH MEMORY USAGE +.rs +.sp +Compiling a regular expression causes memory to be allocated and associated +with the \fIpreg\fP structure. The function \fBregfree()\fP frees all such +memory, after which \fIpreg\fP may no longer be used as a compiled expression. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +University Computing Service +Cambridge CB2 3QH, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 09 January 2012 +Copyright (c) 1997-2012 University of Cambridge. +.fi diff --git a/usr/share/man/man3/pcreprecompile.3 b/usr/share/man/man3/pcreprecompile.3 new file mode 100755 index 000000000..40f257a98 --- /dev/null +++ b/usr/share/man/man3/pcreprecompile.3 @@ -0,0 +1,155 @@ +.TH PCREPRECOMPILE 3 "12 November 2013" "PCRE 8.34" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH "SAVING AND RE-USING PRECOMPILED PCRE PATTERNS" +.rs +.sp +If you are running an application that uses a large number of regular +expression patterns, it may be useful to store them in a precompiled form +instead of having to compile them every time the application is run. +If you are not using any private character tables (see the +.\" HREF +\fBpcre_maketables()\fP +.\" +documentation), this is relatively straightforward. If you are using private +tables, it is a little bit more complicated. However, if you are using the +just-in-time optimization feature, it is not possible to save and reload the +JIT data. +.P +If you save compiled patterns to a file, you can copy them to a different host +and run them there. If the two hosts have different endianness (byte order), +you should run the \fBpcre[16|32]_pattern_to_host_byte_order()\fP function on the +new host before trying to match the pattern. The matching functions return +PCRE_ERROR_BADENDIANNESS if they detect a pattern with the wrong endianness. +.P +Compiling regular expressions with one version of PCRE for use with a different +version is not guaranteed to work and may cause crashes, and saving and +restoring a compiled pattern loses any JIT optimization data. +. +. +.SH "SAVING A COMPILED PATTERN" +.rs +.sp +The value returned by \fBpcre[16|32]_compile()\fP points to a single block of +memory that holds the compiled pattern and associated data. You can find the +length of this block in bytes by calling \fBpcre[16|32]_fullinfo()\fP with an +argument of PCRE_INFO_SIZE. You can then save the data in any appropriate +manner. Here is sample code for the 8-bit library that compiles a pattern and +writes it to a file. It assumes that the variable \fIfd\fP refers to a file +that is open for output: +.sp + int erroroffset, rc, size; + char *error; + pcre *re; +.sp + re = pcre_compile("my pattern", 0, &error, &erroroffset, NULL); + if (re == NULL) { ... handle errors ... } + rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size); + if (rc < 0) { ... handle errors ... } + rc = fwrite(re, 1, size, fd); + if (rc != size) { ... handle errors ... } +.sp +In this example, the bytes that comprise the compiled pattern are copied +exactly. Note that this is binary data that may contain any of the 256 possible +byte values. On systems that make a distinction between binary and non-binary +data, be sure that the file is opened for binary output. +.P +If you want to write more than one pattern to a file, you will have to devise a +way of separating them. For binary data, preceding each pattern with its length +is probably the most straightforward approach. Another possibility is to write +out the data in hexadecimal instead of binary, one pattern to a line. +.P +Saving compiled patterns in a file is only one possible way of storing them for +later use. They could equally well be saved in a database, or in the memory of +some daemon process that passes them via sockets to the processes that want +them. +.P +If the pattern has been studied, it is also possible to save the normal study +data in a similar way to the compiled pattern itself. However, if the +PCRE_STUDY_JIT_COMPILE was used, the just-in-time data that is created cannot +be saved because it is too dependent on the current environment. When studying +generates additional information, \fBpcre[16|32]_study()\fP returns a pointer to a +\fBpcre[16|32]_extra\fP data block. Its format is defined in the +.\" HTML <a href="pcreapi.html#extradata"> +.\" </a> +section on matching a pattern +.\" +in the +.\" HREF +\fBpcreapi\fP +.\" +documentation. The \fIstudy_data\fP field points to the binary study data, and +this is what you must save (not the \fBpcre[16|32]_extra\fP block itself). The +length of the study data can be obtained by calling \fBpcre[16|32]_fullinfo()\fP +with an argument of PCRE_INFO_STUDYSIZE. Remember to check that +\fBpcre[16|32]_study()\fP did return a non-NULL value before trying to save the +study data. +. +. +.SH "RE-USING A PRECOMPILED PATTERN" +.rs +.sp +Re-using a precompiled pattern is straightforward. Having reloaded it into main +memory, called \fBpcre[16|32]_pattern_to_host_byte_order()\fP if necessary, you +pass its pointer to \fBpcre[16|32]_exec()\fP or \fBpcre[16|32]_dfa_exec()\fP in +the usual way. +.P +However, if you passed a pointer to custom character tables when the pattern +was compiled (the \fItableptr\fP argument of \fBpcre[16|32]_compile()\fP), you +must now pass a similar pointer to \fBpcre[16|32]_exec()\fP or +\fBpcre[16|32]_dfa_exec()\fP, because the value saved with the compiled pattern +will obviously be nonsense. A field in a \fBpcre[16|32]_extra()\fP block is used +to pass this data, as described in the +.\" HTML <a href="pcreapi.html#extradata"> +.\" </a> +section on matching a pattern +.\" +in the +.\" HREF +\fBpcreapi\fP +.\" +documentation. +.P +\fBWarning:\fP The tables that \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP use +must be the same as those that were used when the pattern was compiled. If this +is not the case, the behaviour is undefined. +.P +If you did not provide custom character tables when the pattern was compiled, +the pointer in the compiled pattern is NULL, which causes the matching +functions to use PCRE's internal tables. Thus, you do not need to take any +special action at run time in this case. +.P +If you saved study data with the compiled pattern, you need to create your own +\fBpcre[16|32]_extra\fP data block and set the \fIstudy_data\fP field to point +to the reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in +the \fIflags\fP field to indicate that study data is present. Then pass the +\fBpcre[16|32]_extra\fP block to the matching function in the usual way. If the +pattern was studied for just-in-time optimization, that data cannot be saved, +and so is lost by a save/restore cycle. +. +. +.SH "COMPATIBILITY WITH DIFFERENT PCRE RELEASES" +.rs +.sp +In general, it is safest to recompile all saved patterns when you update to a +new PCRE release, though not all updates actually require this. +. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +University Computing Service +Cambridge CB2 3QH, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 12 November 2013 +Copyright (c) 1997-2013 University of Cambridge. +.fi diff --git a/usr/share/man/man3/pcresample.3 b/usr/share/man/man3/pcresample.3 new file mode 100755 index 000000000..d7fe7ec54 --- /dev/null +++ b/usr/share/man/man3/pcresample.3 @@ -0,0 +1,99 @@ +.TH PCRESAMPLE 3 "10 January 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH "PCRE SAMPLE PROGRAM" +.rs +.sp +A simple, complete demonstration program, to get you started with using PCRE, +is supplied in the file \fIpcredemo.c\fP in the PCRE distribution. A listing of +this program is given in the +.\" HREF +\fBpcredemo\fP +.\" +documentation. If you do not have a copy of the PCRE distribution, you can save +this listing to re-create \fIpcredemo.c\fP. +.P +The demonstration program, which uses the original PCRE 8-bit library, compiles +the regular expression that is its first argument, and matches it against the +subject string in its second argument. No PCRE options are set, and default +character tables are used. If matching succeeds, the program outputs the +portion of the subject that matched, together with the contents of any captured +substrings. +.P +If the -g option is given on the command line, the program then goes on to +check for further matches of the same regular expression in the same subject +string. The logic is a little bit tricky because of the possibility of matching +an empty string. Comments in the code explain what is going on. +.P +If PCRE is installed in the standard include and library directories for your +operating system, you should be able to compile the demonstration program using +this command: +.sp + gcc -o pcredemo pcredemo.c -lpcre +.sp +If PCRE is installed elsewhere, you may need to add additional options to the +command line. For example, on a Unix-like system that has PCRE installed in +\fI/usr/local\fP, you can compile the demonstration program using a command +like this: +.sp +.\" JOINSH + gcc -o pcredemo -I/usr/local/include pcredemo.c \e + -L/usr/local/lib -lpcre +.sp +In a Windows environment, if you want to statically link the program against a +non-dll \fBpcre.a\fP file, you must uncomment the line that defines PCRE_STATIC +before including \fBpcre.h\fP, because otherwise the \fBpcre_malloc()\fP and +\fBpcre_free()\fP exported functions will be declared +\fB__declspec(dllimport)\fP, with unwanted results. +.P +Once you have compiled and linked the demonstration program, you can run simple +tests like this: +.sp + ./pcredemo 'cat|dog' 'the cat sat on the mat' + ./pcredemo -g 'cat|dog' 'the dog sat on the cat' +.sp +Note that there is a much more comprehensive test program, called +.\" HREF +\fBpcretest\fP, +.\" +which supports many more facilities for testing regular expressions and both +PCRE libraries. The +.\" HREF +\fBpcredemo\fP +.\" +program is provided as a simple coding example. +.P +If you try to run +.\" HREF +\fBpcredemo\fP +.\" +when PCRE is not installed in the standard library directory, you may get an +error like this on some operating systems (e.g. Solaris): +.sp + ld.so.1: a.out: fatal: libpcre.so.0: open failed: No such file or directory +.sp +This is caused by the way shared library support works on those systems. You +need to add +.sp + -R/usr/local/lib +.sp +(for example) to the compile command to get round this problem. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +University Computing Service +Cambridge CB2 3QH, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 10 January 2012 +Copyright (c) 1997-2012 University of Cambridge. +.fi diff --git a/usr/share/man/man3/pcrestack.3 b/usr/share/man/man3/pcrestack.3 new file mode 100755 index 000000000..798f0bca6 --- /dev/null +++ b/usr/share/man/man3/pcrestack.3 @@ -0,0 +1,215 @@ +.TH PCRESTACK 3 "24 June 2012" "PCRE 8.30" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH "PCRE DISCUSSION OF STACK USAGE" +.rs +.sp +When you call \fBpcre[16|32]_exec()\fP, it makes use of an internal function +called \fBmatch()\fP. This calls itself recursively at branch points in the +pattern, in order to remember the state of the match so that it can back up and +try a different alternative if the first one fails. As matching proceeds deeper +and deeper into the tree of possibilities, the recursion depth increases. The +\fBmatch()\fP function is also called in other circumstances, for example, +whenever a parenthesized sub-pattern is entered, and in certain cases of +repetition. +.P +Not all calls of \fBmatch()\fP increase the recursion depth; for an item such +as a* it may be called several times at the same level, after matching +different numbers of a's. Furthermore, in a number of cases where the result of +the recursive call would immediately be passed back as the result of the +current call (a "tail recursion"), the function is just restarted instead. +.P +The above comments apply when \fBpcre[16|32]_exec()\fP is run in its normal +interpretive manner. If the pattern was studied with the +PCRE_STUDY_JIT_COMPILE option, and just-in-time compiling was successful, and +the options passed to \fBpcre[16|32]_exec()\fP were not incompatible, the matching +process uses the JIT-compiled code instead of the \fBmatch()\fP function. In +this case, the memory requirements are handled entirely differently. See the +.\" HREF +\fBpcrejit\fP +.\" +documentation for details. +.P +The \fBpcre[16|32]_dfa_exec()\fP function operates in an entirely different way, +and uses recursion only when there is a regular expression recursion or +subroutine call in the pattern. This includes the processing of assertion and +"once-only" subpatterns, which are handled like subroutine calls. Normally, +these are never very deep, and the limit on the complexity of +\fBpcre[16|32]_dfa_exec()\fP is controlled by the amount of workspace it is given. +However, it is possible to write patterns with runaway infinite recursions; +such patterns will cause \fBpcre[16|32]_dfa_exec()\fP to run out of stack. At +present, there is no protection against this. +.P +The comments that follow do NOT apply to \fBpcre[16|32]_dfa_exec()\fP; they are +relevant only for \fBpcre[16|32]_exec()\fP without the JIT optimization. +. +. +.SS "Reducing \fBpcre[16|32]_exec()\fP's stack usage" +.rs +.sp +Each time that \fBmatch()\fP is actually called recursively, it uses memory +from the process stack. For certain kinds of pattern and data, very large +amounts of stack may be needed, despite the recognition of "tail recursion". +You can often reduce the amount of recursion, and therefore the amount of stack +used, by modifying the pattern that is being matched. Consider, for example, +this pattern: +.sp + ([^<]|<(?!inet))+ +.sp +It matches from wherever it starts until it encounters "<inet" or the end of +the data, and is the kind of pattern that might be used when processing an XML +file. Each iteration of the outer parentheses matches either one character that +is not "<" or a "<" that is not followed by "inet". However, each time a +parenthesis is processed, a recursion occurs, so this formulation uses a stack +frame for each matched character. For a long string, a lot of stack is +required. Consider now this rewritten pattern, which matches exactly the same +strings: +.sp + ([^<]++|<(?!inet))+ +.sp +This uses very much less stack, because runs of characters that do not contain +"<" are "swallowed" in one item inside the parentheses. Recursion happens only +when a "<" character that is not followed by "inet" is encountered (and we +assume this is relatively rare). A possessive quantifier is used to stop any +backtracking into the runs of non-"<" characters, but that is not related to +stack usage. +.P +This example shows that one way of avoiding stack problems when matching long +subject strings is to write repeated parenthesized subpatterns to match more +than one character whenever possible. +. +. +.SS "Compiling PCRE to use heap instead of stack for \fBpcre[16|32]_exec()\fP" +.rs +.sp +In environments where stack memory is constrained, you might want to compile +PCRE to use heap memory instead of stack for remembering back-up points when +\fBpcre[16|32]_exec()\fP is running. This makes it run a lot more slowly, however. +Details of how to do this are given in the +.\" HREF +\fBpcrebuild\fP +.\" +documentation. When built in this way, instead of using the stack, PCRE obtains +and frees memory by calling the functions that are pointed to by the +\fBpcre[16|32]_stack_malloc\fP and \fBpcre[16|32]_stack_free\fP variables. By +default, these point to \fBmalloc()\fP and \fBfree()\fP, but you can replace +the pointers to cause PCRE to use your own functions. Since the block sizes are +always the same, and are always freed in reverse order, it may be possible to +implement customized memory handlers that are more efficient than the standard +functions. +. +. +.SS "Limiting \fBpcre[16|32]_exec()\fP's stack usage" +.rs +.sp +You can set limits on the number of times that \fBmatch()\fP is called, both in +total and recursively. If a limit is exceeded, \fBpcre[16|32]_exec()\fP returns an +error code. Setting suitable limits should prevent it from running out of +stack. The default values of the limits are very large, and unlikely ever to +operate. They can be changed when PCRE is built, and they can also be set when +\fBpcre[16|32]_exec()\fP is called. For details of these interfaces, see the +.\" HREF +\fBpcrebuild\fP +.\" +documentation and the +.\" HTML <a href="pcreapi.html#extradata"> +.\" </a> +section on extra data for \fBpcre[16|32]_exec()\fP +.\" +in the +.\" HREF +\fBpcreapi\fP +.\" +documentation. +.P +As a very rough rule of thumb, you should reckon on about 500 bytes per +recursion. Thus, if you want to limit your stack usage to 8Mb, you should set +the limit at 16000 recursions. A 64Mb stack, on the other hand, can support +around 128000 recursions. +.P +In Unix-like environments, the \fBpcretest\fP test program has a command line +option (\fB-S\fP) that can be used to increase the size of its stack. As long +as the stack is large enough, another option (\fB-M\fP) can be used to find the +smallest limits that allow a particular pattern to match a given subject +string. This is done by calling \fBpcre[16|32]_exec()\fP repeatedly with different +limits. +. +. +.SS "Obtaining an estimate of stack usage" +.rs +.sp +The actual amount of stack used per recursion can vary quite a lot, depending +on the compiler that was used to build PCRE and the optimization or debugging +options that were set for it. The rule of thumb value of 500 bytes mentioned +above may be larger or smaller than what is actually needed. A better +approximation can be obtained by running this command: +.sp + pcretest -m -C +.sp +The \fB-C\fP option causes \fBpcretest\fP to output information about the +options with which PCRE was compiled. When \fB-m\fP is also given (before +\fB-C\fP), information about stack use is given in a line like this: +.sp + Match recursion uses stack: approximate frame size = 640 bytes +.sp +The value is approximate because some recursions need a bit more (up to perhaps +16 more bytes). +.P +If the above command is given when PCRE is compiled to use the heap instead of +the stack for recursion, the value that is output is the size of each block +that is obtained from the heap. +. +. +.SS "Changing stack size in Unix-like systems" +.rs +.sp +In Unix-like environments, there is not often a problem with the stack unless +very long strings are involved, though the default limit on stack size varies +from system to system. Values from 8Mb to 64Mb are common. You can find your +default limit by running the command: +.sp + ulimit -s +.sp +Unfortunately, the effect of running out of stack is often SIGSEGV, though +sometimes a more explicit error message is given. You can normally increase the +limit on stack size by code such as this: +.sp + struct rlimit rlim; + getrlimit(RLIMIT_STACK, &rlim); + rlim.rlim_cur = 100*1024*1024; + setrlimit(RLIMIT_STACK, &rlim); +.sp +This reads the current limits (soft and hard) using \fBgetrlimit()\fP, then +attempts to increase the soft limit to 100Mb using \fBsetrlimit()\fP. You must +do this before calling \fBpcre[16|32]_exec()\fP. +. +. +.SS "Changing stack size in Mac OS X" +.rs +.sp +Using \fBsetrlimit()\fP, as described above, should also work on Mac OS X. It +is also possible to set a stack size when linking a program. There is a +discussion about stack sizes in Mac OS X at this web site: +.\" HTML <a href="http://developer.apple.com/qa/qa2005/qa1419.html"> +.\" </a> +http://developer.apple.com/qa/qa2005/qa1419.html. +.\" +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +University Computing Service +Cambridge CB2 3QH, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 24 June 2012 +Copyright (c) 1997-2012 University of Cambridge. +.fi diff --git a/usr/share/man/man3/pcresyntax.3 b/usr/share/man/man3/pcresyntax.3 new file mode 100755 index 000000000..fd878da4f --- /dev/null +++ b/usr/share/man/man3/pcresyntax.3 @@ -0,0 +1,517 @@ +.TH PCRESYNTAX 3 "08 January 2014" "PCRE 8.35" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH "PCRE REGULAR EXPRESSION SYNTAX SUMMARY" +.rs +.sp +The full syntax and semantics of the regular expressions that are supported by +PCRE are described in the +.\" HREF +\fBpcrepattern\fP +.\" +documentation. This document contains a quick-reference summary of the syntax. +. +. +.SH "QUOTING" +.rs +.sp + \ex where x is non-alphanumeric is a literal x + \eQ...\eE treat enclosed characters as literal +. +. +.SH "CHARACTERS" +.rs +.sp + \ea alarm, that is, the BEL character (hex 07) + \ecx "control-x", where x is any ASCII character + \ee escape (hex 1B) + \ef form feed (hex 0C) + \en newline (hex 0A) + \er carriage return (hex 0D) + \et tab (hex 09) + \e0dd character with octal code 0dd + \eddd character with octal code ddd, or backreference + \eo{ddd..} character with octal code ddd.. + \exhh character with hex code hh + \ex{hhh..} character with hex code hhh.. +.sp +Note that \e0dd is always an octal code, and that \e8 and \e9 are the literal +characters "8" and "9". +. +. +.SH "CHARACTER TYPES" +.rs +.sp + . any character except newline; + in dotall mode, any character whatsoever + \eC one data unit, even in UTF mode (best avoided) + \ed a decimal digit + \eD a character that is not a decimal digit + \eh a horizontal white space character + \eH a character that is not a horizontal white space character + \eN a character that is not a newline + \ep{\fIxx\fP} a character with the \fIxx\fP property + \eP{\fIxx\fP} a character without the \fIxx\fP property + \eR a newline sequence + \es a white space character + \eS a character that is not a white space character + \ev a vertical white space character + \eV a character that is not a vertical white space character + \ew a "word" character + \eW a "non-word" character + \eX a Unicode extended grapheme cluster +.sp +By default, \ed, \es, and \ew match only ASCII characters, even in UTF-8 mode +or in the 16- bit and 32-bit libraries. However, if locale-specific matching is +happening, \es and \ew may also match characters with code points in the range +128-255. If the PCRE_UCP option is set, the behaviour of these escape sequences +is changed to use Unicode properties and they match many more characters. +. +. +.SH "GENERAL CATEGORY PROPERTIES FOR \ep and \eP" +.rs +.sp + C Other + Cc Control + Cf Format + Cn Unassigned + Co Private use + Cs Surrogate +.sp + L Letter + Ll Lower case letter + Lm Modifier letter + Lo Other letter + Lt Title case letter + Lu Upper case letter + L& Ll, Lu, or Lt +.sp + M Mark + Mc Spacing mark + Me Enclosing mark + Mn Non-spacing mark +.sp + N Number + Nd Decimal number + Nl Letter number + No Other number +.sp + P Punctuation + Pc Connector punctuation + Pd Dash punctuation + Pe Close punctuation + Pf Final punctuation + Pi Initial punctuation + Po Other punctuation + Ps Open punctuation +.sp + S Symbol + Sc Currency symbol + Sk Modifier symbol + Sm Mathematical symbol + So Other symbol +.sp + Z Separator + Zl Line separator + Zp Paragraph separator + Zs Space separator +. +. +.SH "PCRE SPECIAL CATEGORY PROPERTIES FOR \ep and \eP" +.rs +.sp + Xan Alphanumeric: union of properties L and N + Xps POSIX space: property Z or tab, NL, VT, FF, CR + Xsp Perl space: property Z or tab, NL, VT, FF, CR + Xuc Univerally-named character: one that can be + represented by a Universal Character Name + Xwd Perl word: property Xan or underscore +.sp +Perl and POSIX space are now the same. Perl added VT to its space character set +at release 5.18 and PCRE changed at release 8.34. +. +. +.SH "SCRIPT NAMES FOR \ep AND \eP" +.rs +.sp +Arabic, +Armenian, +Avestan, +Balinese, +Bamum, +Batak, +Bengali, +Bopomofo, +Brahmi, +Braille, +Buginese, +Buhid, +Canadian_Aboriginal, +Carian, +Chakma, +Cham, +Cherokee, +Common, +Coptic, +Cuneiform, +Cypriot, +Cyrillic, +Deseret, +Devanagari, +Egyptian_Hieroglyphs, +Ethiopic, +Georgian, +Glagolitic, +Gothic, +Greek, +Gujarati, +Gurmukhi, +Han, +Hangul, +Hanunoo, +Hebrew, +Hiragana, +Imperial_Aramaic, +Inherited, +Inscriptional_Pahlavi, +Inscriptional_Parthian, +Javanese, +Kaithi, +Kannada, +Katakana, +Kayah_Li, +Kharoshthi, +Khmer, +Lao, +Latin, +Lepcha, +Limbu, +Linear_B, +Lisu, +Lycian, +Lydian, +Malayalam, +Mandaic, +Meetei_Mayek, +Meroitic_Cursive, +Meroitic_Hieroglyphs, +Miao, +Mongolian, +Myanmar, +New_Tai_Lue, +Nko, +Ogham, +Old_Italic, +Old_Persian, +Old_South_Arabian, +Old_Turkic, +Ol_Chiki, +Oriya, +Osmanya, +Phags_Pa, +Phoenician, +Rejang, +Runic, +Samaritan, +Saurashtra, +Sharada, +Shavian, +Sinhala, +Sora_Sompeng, +Sundanese, +Syloti_Nagri, +Syriac, +Tagalog, +Tagbanwa, +Tai_Le, +Tai_Tham, +Tai_Viet, +Takri, +Tamil, +Telugu, +Thaana, +Thai, +Tibetan, +Tifinagh, +Ugaritic, +Vai, +Yi. +. +. +.SH "CHARACTER CLASSES" +.rs +.sp + [...] positive character class + [^...] negative character class + [x-y] range (can be used for hex characters) + [[:xxx:]] positive POSIX named set + [[:^xxx:]] negative POSIX named set +.sp + alnum alphanumeric + alpha alphabetic + ascii 0-127 + blank space or tab + cntrl control character + digit decimal digit + graph printing, excluding space + lower lower case letter + print printing, including space + punct printing, excluding alphanumeric + space white space + upper upper case letter + word same as \ew + xdigit hexadecimal digit +.sp +In PCRE, POSIX character set names recognize only ASCII characters by default, +but some of them use Unicode properties if PCRE_UCP is set. You can use +\eQ...\eE inside a character class. +. +. +.SH "QUANTIFIERS" +.rs +.sp + ? 0 or 1, greedy + ?+ 0 or 1, possessive + ?? 0 or 1, lazy + * 0 or more, greedy + *+ 0 or more, possessive + *? 0 or more, lazy + + 1 or more, greedy + ++ 1 or more, possessive + +? 1 or more, lazy + {n} exactly n + {n,m} at least n, no more than m, greedy + {n,m}+ at least n, no more than m, possessive + {n,m}? at least n, no more than m, lazy + {n,} n or more, greedy + {n,}+ n or more, possessive + {n,}? n or more, lazy +. +. +.SH "ANCHORS AND SIMPLE ASSERTIONS" +.rs +.sp + \eb word boundary + \eB not a word boundary + ^ start of subject + also after internal newline in multiline mode + \eA start of subject + $ end of subject + also before newline at end of subject + also before internal newline in multiline mode + \eZ end of subject + also before newline at end of subject + \ez end of subject + \eG first matching position in subject +. +. +.SH "MATCH POINT RESET" +.rs +.sp + \eK reset start of match +.sp +\eK is honoured in positive assertions, but ignored in negative ones. +. +. +.SH "ALTERNATION" +.rs +.sp + expr|expr|expr... +. +. +.SH "CAPTURING" +.rs +.sp + (...) capturing group + (?<name>...) named capturing group (Perl) + (?'name'...) named capturing group (Perl) + (?P<name>...) named capturing group (Python) + (?:...) non-capturing group + (?|...) non-capturing group; reset group numbers for + capturing groups in each alternative +. +. +.SH "ATOMIC GROUPS" +.rs +.sp + (?>...) atomic, non-capturing group +. +. +. +. +.SH "COMMENT" +.rs +.sp + (?#....) comment (not nestable) +. +. +.SH "OPTION SETTING" +.rs +.sp + (?i) caseless + (?J) allow duplicate names + (?m) multiline + (?s) single line (dotall) + (?U) default ungreedy (lazy) + (?x) extended (ignore white space) + (?-...) unset option(s) +.sp +The following are recognized only at the very start of a pattern or after one +of the newline or \eR options with similar syntax. More than one of them may +appear. +.sp + (*LIMIT_MATCH=d) set the match limit to d (decimal number) + (*LIMIT_RECURSION=d) set the recursion limit to d (decimal number) + (*NO_AUTO_POSSESS) no auto-possessification (PCRE_NO_AUTO_POSSESS) + (*NO_START_OPT) no start-match optimization (PCRE_NO_START_OPTIMIZE) + (*UTF8) set UTF-8 mode: 8-bit library (PCRE_UTF8) + (*UTF16) set UTF-16 mode: 16-bit library (PCRE_UTF16) + (*UTF32) set UTF-32 mode: 32-bit library (PCRE_UTF32) + (*UTF) set appropriate UTF mode for the library in use + (*UCP) set PCRE_UCP (use Unicode properties for \ed etc) +.sp +Note that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value of the +limits set by the caller of pcre_exec(), not increase them. +. +. +.SH "NEWLINE CONVENTION" +.rs +.sp +These are recognized only at the very start of the pattern or after option +settings with a similar syntax. +.sp + (*CR) carriage return only + (*LF) linefeed only + (*CRLF) carriage return followed by linefeed + (*ANYCRLF) all three of the above + (*ANY) any Unicode newline sequence +. +. +.SH "WHAT \eR MATCHES" +.rs +.sp +These are recognized only at the very start of the pattern or after option +setting with a similar syntax. +.sp + (*BSR_ANYCRLF) CR, LF, or CRLF + (*BSR_UNICODE) any Unicode newline sequence +. +. +.SH "LOOKAHEAD AND LOOKBEHIND ASSERTIONS" +.rs +.sp + (?=...) positive look ahead + (?!...) negative look ahead + (?<=...) positive look behind + (?<!...) negative look behind +.sp +Each top-level branch of a look behind must be of a fixed length. +. +. +.SH "BACKREFERENCES" +.rs +.sp + \en reference by number (can be ambiguous) + \egn reference by number + \eg{n} reference by number + \eg{-n} relative reference by number + \ek<name> reference by name (Perl) + \ek'name' reference by name (Perl) + \eg{name} reference by name (Perl) + \ek{name} reference by name (.NET) + (?P=name) reference by name (Python) +. +. +.SH "SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)" +.rs +.sp + (?R) recurse whole pattern + (?n) call subpattern by absolute number + (?+n) call subpattern by relative number + (?-n) call subpattern by relative number + (?&name) call subpattern by name (Perl) + (?P>name) call subpattern by name (Python) + \eg<name> call subpattern by name (Oniguruma) + \eg'name' call subpattern by name (Oniguruma) + \eg<n> call subpattern by absolute number (Oniguruma) + \eg'n' call subpattern by absolute number (Oniguruma) + \eg<+n> call subpattern by relative number (PCRE extension) + \eg'+n' call subpattern by relative number (PCRE extension) + \eg<-n> call subpattern by relative number (PCRE extension) + \eg'-n' call subpattern by relative number (PCRE extension) +. +. +.SH "CONDITIONAL PATTERNS" +.rs +.sp + (?(condition)yes-pattern) + (?(condition)yes-pattern|no-pattern) +.sp + (?(n)... absolute reference condition + (?(+n)... relative reference condition + (?(-n)... relative reference condition + (?(<name>)... named reference condition (Perl) + (?('name')... named reference condition (Perl) + (?(name)... named reference condition (PCRE) + (?(R)... overall recursion condition + (?(Rn)... specific group recursion condition + (?(R&name)... specific recursion condition + (?(DEFINE)... define subpattern for reference + (?(assert)... assertion condition +. +. +.SH "BACKTRACKING CONTROL" +.rs +.sp +The following act immediately they are reached: +.sp + (*ACCEPT) force successful match + (*FAIL) force backtrack; synonym (*F) + (*MARK:NAME) set name to be passed back; synonym (*:NAME) +.sp +The following act only when a subsequent match failure causes a backtrack to +reach them. They all force a match failure, but they differ in what happens +afterwards. Those that advance the start-of-match point do so only if the +pattern is not anchored. +.sp + (*COMMIT) overall failure, no advance of starting point + (*PRUNE) advance to next starting character + (*PRUNE:NAME) equivalent to (*MARK:NAME)(*PRUNE) + (*SKIP) advance to current matching position + (*SKIP:NAME) advance to position corresponding to an earlier + (*MARK:NAME); if not found, the (*SKIP) is ignored + (*THEN) local failure, backtrack to next alternation + (*THEN:NAME) equivalent to (*MARK:NAME)(*THEN) +. +. +.SH "CALLOUTS" +.rs +.sp + (?C) callout + (?Cn) callout with data n +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcrepattern\fP(3), \fBpcreapi\fP(3), \fBpcrecallout\fP(3), +\fBpcrematching\fP(3), \fBpcre\fP(3). +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +University Computing Service +Cambridge CB2 3QH, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 08 January 2014 +Copyright (c) 1997-2014 University of Cambridge. +.fi diff --git a/usr/share/man/man3/pcreunicode.3 b/usr/share/man/man3/pcreunicode.3 new file mode 100755 index 000000000..cb5e5269a --- /dev/null +++ b/usr/share/man/man3/pcreunicode.3 @@ -0,0 +1,249 @@ +.TH PCREUNICODE 3 "27 February 2013" "PCRE 8.33" +.SH NAME +PCRE - Perl-compatible regular expressions +.SH "UTF-8, UTF-16, UTF-32, AND UNICODE PROPERTY SUPPORT" +.rs +.sp +As well as UTF-8 support, PCRE also supports UTF-16 (from release 8.30) and +UTF-32 (from release 8.32), by means of two additional libraries. They can be +built as well as, or instead of, the 8-bit library. +. +. +.SH "UTF-8 SUPPORT" +.rs +.sp +In order process UTF-8 strings, you must build PCRE's 8-bit library with UTF +support, and, in addition, you must call +.\" HREF +\fBpcre_compile()\fP +.\" +with the PCRE_UTF8 option flag, or the pattern must start with the sequence +(*UTF8) or (*UTF). When either of these is the case, both the pattern and any +subject strings that are matched against it are treated as UTF-8 strings +instead of strings of individual 1-byte characters. +. +. +.SH "UTF-16 AND UTF-32 SUPPORT" +.rs +.sp +In order process UTF-16 or UTF-32 strings, you must build PCRE's 16-bit or +32-bit library with UTF support, and, in addition, you must call +.\" HREF +\fBpcre16_compile()\fP +.\" +or +.\" HREF +\fBpcre32_compile()\fP +.\" +with the PCRE_UTF16 or PCRE_UTF32 option flag, as appropriate. Alternatively, +the pattern must start with the sequence (*UTF16), (*UTF32), as appropriate, or +(*UTF), which can be used with either library. When UTF mode is set, both the +pattern and any subject strings that are matched against it are treated as +UTF-16 or UTF-32 strings instead of strings of individual 16-bit or 32-bit +characters. +. +. +.SH "UTF SUPPORT OVERHEAD" +.rs +.sp +If you compile PCRE with UTF support, but do not use it at run time, the +library will be a bit bigger, but the additional run time overhead is limited +to testing the PCRE_UTF[8|16|32] flag occasionally, so should not be very big. +. +. +.SH "UNICODE PROPERTY SUPPORT" +.rs +.sp +If PCRE is built with Unicode character property support (which implies UTF +support), the escape sequences \ep{..}, \eP{..}, and \eX can be used. +The available properties that can be tested are limited to the general +category properties such as Lu for an upper case letter or Nd for a decimal +number, the Unicode script names such as Arabic or Han, and the derived +properties Any and L&. Full lists is given in the +.\" HREF +\fBpcrepattern\fP +.\" +and +.\" HREF +\fBpcresyntax\fP +.\" +documentation. Only the short names for properties are supported. For example, +\ep{L} matches a letter. Its Perl synonym, \ep{Letter}, is not supported. +Furthermore, in Perl, many properties may optionally be prefixed by "Is", for +compatibility with Perl 5.6. PCRE does not support this. +. +. +.\" HTML <a name="utf8strings"></a> +.SS "Validity of UTF-8 strings" +.rs +.sp +When you set the PCRE_UTF8 flag, the byte strings passed as patterns and +subjects are (by default) checked for validity on entry to the relevant +functions. The entire string is checked before any other processing takes +place. From release 7.3 of PCRE, the check is according the rules of RFC 3629, +which are themselves derived from the Unicode specification. Earlier releases +of PCRE followed the rules of RFC 2279, which allows the full range of 31-bit +values (0 to 0x7FFFFFFF). The current check allows only values in the range U+0 +to U+10FFFF, excluding the surrogate area. (From release 8.33 the so-called +"non-character" code points are no longer excluded because Unicode corrigendum +#9 makes it clear that they should not be.) +.P +Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16, +where they are used in pairs to encode codepoints with values greater than +0xFFFF. The code points that are encoded by UTF-16 pairs are available +independently in the UTF-8 and UTF-32 encodings. (In other words, the whole +surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and +UTF-32.) +.P +If an invalid UTF-8 string is passed to PCRE, an error return is given. At +compile time, the only additional information is the offset to the first byte +of the failing character. The run-time functions \fBpcre_exec()\fP and +\fBpcre_dfa_exec()\fP also pass back this information, as well as a more +detailed reason code if the caller has provided memory in which to do this. +.P +In some situations, you may already know that your strings are valid, and +therefore want to skip these checks in order to improve performance, for +example in the case of a long subject string that is being scanned repeatedly. +If you set the PCRE_NO_UTF8_CHECK flag at compile time or at run time, PCRE +assumes that the pattern or subject it is given (respectively) contains only +valid UTF-8 codes. In this case, it does not diagnose an invalid UTF-8 string. +.P +Note that passing PCRE_NO_UTF8_CHECK to \fBpcre_compile()\fP just disables the +check for the pattern; it does not also apply to subject strings. If you want +to disable the check for a subject string you must pass this option to +\fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP. +.P +If you pass an invalid UTF-8 string when PCRE_NO_UTF8_CHECK is set, the result +is undefined and your program may crash. +. +. +.\" HTML <a name="utf16strings"></a> +.SS "Validity of UTF-16 strings" +.rs +.sp +When you set the PCRE_UTF16 flag, the strings of 16-bit data units that are +passed as patterns and subjects are (by default) checked for validity on entry +to the relevant functions. Values other than those in the surrogate range +U+D800 to U+DFFF are independent code points. Values in the surrogate range +must be used in pairs in the correct manner. +.P +If an invalid UTF-16 string is passed to PCRE, an error return is given. At +compile time, the only additional information is the offset to the first data +unit of the failing character. The run-time functions \fBpcre16_exec()\fP and +\fBpcre16_dfa_exec()\fP also pass back this information, as well as a more +detailed reason code if the caller has provided memory in which to do this. +.P +In some situations, you may already know that your strings are valid, and +therefore want to skip these checks in order to improve performance. If you set +the PCRE_NO_UTF16_CHECK flag at compile time or at run time, PCRE assumes that +the pattern or subject it is given (respectively) contains only valid UTF-16 +sequences. In this case, it does not diagnose an invalid UTF-16 string. +However, if an invalid string is passed, the result is undefined. +. +. +.\" HTML <a name="utf32strings"></a> +.SS "Validity of UTF-32 strings" +.rs +.sp +When you set the PCRE_UTF32 flag, the strings of 32-bit data units that are +passed as patterns and subjects are (by default) checked for validity on entry +to the relevant functions. This check allows only values in the range U+0 +to U+10FFFF, excluding the surrogate area U+D800 to U+DFFF. +.P +If an invalid UTF-32 string is passed to PCRE, an error return is given. At +compile time, the only additional information is the offset to the first data +unit of the failing character. The run-time functions \fBpcre32_exec()\fP and +\fBpcre32_dfa_exec()\fP also pass back this information, as well as a more +detailed reason code if the caller has provided memory in which to do this. +.P +In some situations, you may already know that your strings are valid, and +therefore want to skip these checks in order to improve performance. If you set +the PCRE_NO_UTF32_CHECK flag at compile time or at run time, PCRE assumes that +the pattern or subject it is given (respectively) contains only valid UTF-32 +sequences. In this case, it does not diagnose an invalid UTF-32 string. +However, if an invalid string is passed, the result is undefined. +. +. +.SS "General comments about UTF modes" +.rs +.sp +1. Codepoints less than 256 can be specified in patterns by either braced or +unbraced hexadecimal escape sequences (for example, \ex{b3} or \exb3). Larger +values have to use braced sequences. +.P +2. Octal numbers up to \e777 are recognized, and in UTF-8 mode they match +two-byte characters for values greater than \e177. +.P +3. Repeat quantifiers apply to complete UTF characters, not to individual +data units, for example: \ex{100}{3}. +.P +4. The dot metacharacter matches one UTF character instead of a single data +unit. +.P +5. The escape sequence \eC can be used to match a single byte in UTF-8 mode, or +a single 16-bit data unit in UTF-16 mode, or a single 32-bit data unit in +UTF-32 mode, but its use can lead to some strange effects because it breaks up +multi-unit characters (see the description of \eC in the +.\" HREF +\fBpcrepattern\fP +.\" +documentation). The use of \eC is not supported in the alternative matching +function \fBpcre[16|32]_dfa_exec()\fP, nor is it supported in UTF mode by the +JIT optimization of \fBpcre[16|32]_exec()\fP. If JIT optimization is requested +for a UTF pattern that contains \eC, it will not succeed, and so the matching +will be carried out by the normal interpretive function. +.P +6. The character escapes \eb, \eB, \ed, \eD, \es, \eS, \ew, and \eW correctly +test characters of any code value, but, by default, the characters that PCRE +recognizes as digits, spaces, or word characters remain the same set as in +non-UTF mode, all with values less than 256. This remains true even when PCRE +is built to include Unicode property support, because to do otherwise would +slow down PCRE in many common cases. Note in particular that this applies to +\eb and \eB, because they are defined in terms of \ew and \eW. If you really +want to test for a wider sense of, say, "digit", you can use explicit Unicode +property tests such as \ep{Nd}. Alternatively, if you set the PCRE_UCP option, +the way that the character escapes work is changed so that Unicode properties +are used to determine which characters match. There are more details in the +section on +.\" HTML <a href="pcrepattern.html#genericchartypes"> +.\" </a> +generic character types +.\" +in the +.\" HREF +\fBpcrepattern\fP +.\" +documentation. +.P +7. Similarly, characters that match the POSIX named character classes are all +low-valued characters, unless the PCRE_UCP option is set. +.P +8. However, the horizontal and vertical white space matching escapes (\eh, \eH, +\ev, and \eV) do match all the appropriate Unicode characters, whether or not +PCRE_UCP is set. +.P +9. Case-insensitive matching applies only to characters whose values are less +than 128, unless PCRE is built with Unicode property support. A few Unicode +characters such as Greek sigma have more than two codepoints that are +case-equivalent. Up to and including PCRE release 8.31, only one-to-one case +mappings were supported, but later releases (with Unicode property support) do +treat as case-equivalent all versions of characters such as Greek sigma. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +University Computing Service +Cambridge CB2 3QH, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 27 February 2013 +Copyright (c) 1997-2013 University of Cambridge. +.fi diff --git a/usr/share/man/man3/zlib.3 b/usr/share/man/man3/zlib.3 new file mode 100755 index 000000000..0160e62b6 --- /dev/null +++ b/usr/share/man/man3/zlib.3 @@ -0,0 +1,151 @@ +.TH ZLIB 3 "28 Apr 2013" +.SH NAME +zlib \- compression/decompression library +.SH SYNOPSIS +[see +.I zlib.h +for full description] +.SH DESCRIPTION +The +.I zlib +library is a general purpose data compression library. +The code is thread safe, assuming that the standard library functions +used are thread safe, such as memory allocation routines. +It provides in-memory compression and decompression functions, +including integrity checks of the uncompressed data. +This version of the library supports only one compression method (deflation) +but other algorithms may be added later +with the same stream interface. +.LP +Compression can be done in a single step if the buffers are large enough +or can be done by repeated calls of the compression function. +In the latter case, +the application must provide more input and/or consume the output +(providing more output space) before each call. +.LP +The library also supports reading and writing files in +.IR gzip (1) +(.gz) format +with an interface similar to that of stdio. +.LP +The library does not install any signal handler. +The decoder checks the consistency of the compressed data, +so the library should never crash even in the case of corrupted input. +.LP +All functions of the compression library are documented in the file +.IR zlib.h . +The distribution source includes examples of use of the library +in the files +.I test/example.c +and +.IR test/minigzip.c, +as well as other examples in the +.IR examples/ +directory. +.LP +Changes to this version are documented in the file +.I ChangeLog +that accompanies the source. +.LP +.I zlib +is available in Java using the java.util.zip package: +.IP +http://java.sun.com/developer/technicalArticles/Programming/compression/ +.LP +A Perl interface to +.IR zlib , +written by Paul Marquess (pmqs@cpan.org), +is available at CPAN (Comprehensive Perl Archive Network) sites, +including: +.IP +http://search.cpan.org/~pmqs/IO-Compress-Zlib/ +.LP +A Python interface to +.IR zlib , +written by A.M. Kuchling (amk@magnet.com), +is available in Python 1.5 and later versions: +.IP +http://docs.python.org/library/zlib.html +.LP +.I zlib +is built into +.IR tcl: +.IP +http://wiki.tcl.tk/4610 +.LP +An experimental package to read and write files in .zip format, +written on top of +.I zlib +by Gilles Vollant (info@winimage.com), +is available at: +.IP +http://www.winimage.com/zLibDll/minizip.html +and also in the +.I contrib/minizip +directory of the main +.I zlib +source distribution. +.SH "SEE ALSO" +The +.I zlib +web site can be found at: +.IP +http://zlib.net/ +.LP +The data format used by the zlib library is described by RFC +(Request for Comments) 1950 to 1952 in the files: +.IP +http://tools.ietf.org/html/rfc1950 (for the zlib header and trailer format) +.br +http://tools.ietf.org/html/rfc1951 (for the deflate compressed data format) +.br +http://tools.ietf.org/html/rfc1952 (for the gzip header and trailer format) +.LP +Mark Nelson wrote an article about +.I zlib +for the Jan. 1997 issue of Dr. Dobb's Journal; +a copy of the article is available at: +.IP +http://marknelson.us/1997/01/01/zlib-engine/ +.SH "REPORTING PROBLEMS" +Before reporting a problem, +please check the +.I zlib +web site to verify that you have the latest version of +.IR zlib ; +otherwise, +obtain the latest version and see if the problem still exists. +Please read the +.I zlib +FAQ at: +.IP +http://zlib.net/zlib_faq.html +.LP +before asking for help. +Send questions and/or comments to zlib@gzip.org, +or (for the Windows DLL version) to Gilles Vollant (info@winimage.com). +.SH AUTHORS +Version 1.2.8 +Copyright (C) 1995-2013 Jean-loup Gailly (jloup@gzip.org) +and Mark Adler (madler@alumni.caltech.edu). +.LP +This software is provided "as-is," +without any express or implied warranty. +In no event will the authors be held liable for any damages +arising from the use of this software. +See the distribution directory with respect to requirements +governing redistribution. +The deflate format used by +.I zlib +was defined by Phil Katz. +The deflate and +.I zlib +specifications were written by L. Peter Deutsch. +Thanks to all the people who reported problems and suggested various +improvements in +.IR zlib ; +who are too numerous to cite here. +.LP +UNIX manual page by R. P. C. Rodgers, +U.S. National Library of Medicine (rodgers@nlm.nih.gov). +.\" end of man page |