summaryrefslogtreecommitdiff
path: root/modules/string/macros/%_strsplit.sci
blob: b8d5f86a296cc26092df619b7319e9b7121084cb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
// Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
// Copyright (C) DIGITEO - 2009-2010 - Allan CORNET
//
// This file must be used under the terms of the CeCILL.
// This source file is licensed as described in the file COPYING, which
// you should have received as part of this distribution.  The terms
// are also available at
// http://www.cecill.info/licences/Licence_CeCILL_V2.1-en.txt

function [strs, matched_separators] = %_strsplit(varargin)

    //====== private macros ==================
    function out_str = replace_regexp_char(in_str)
        out_str = strsubst(in_str, "\", "\\");
        out_str = strsubst(out_str, "/", "\/");
        out_str = strsubst(out_str, "|", "\|");
        out_str = strsubst(out_str, ".", "\.");
        out_str = strsubst(out_str, "$", "\$");
        out_str = strsubst(out_str, "[", "\[");
        out_str = strsubst(out_str, "]", "\]");
        out_str = strsubst(out_str, "(", "\(");
        out_str = strsubst(out_str, ")", "\)");
        out_str = strsubst(out_str, "{", "\{");
        out_str = strsubst(out_str, "}", "\}");
        out_str = strsubst(out_str, "^", "\^");
        out_str = strsubst(out_str, "?", "\?");
        out_str = strsubst(out_str, "*", "\*");
        out_str = strsubst(out_str, "+", "\+");
        out_str = strsubst(out_str, "-", "\-");
    endfunction
    //========================================
    function bOK = isPattern(str)
        bOK = %f;
        lenstr = length(str);
        if lenstr > 1 then
            bOK = ((part(str, 1) == "/") & (part(str, lenstr) == "/"));
        end
    endfunction
    //========================================
    function regexp_pattern = createPattern(StringsInput)
        dims_StringsInput = size(StringsInput,"*");
        if (dims_StringsInput == 1) then
            if ~isPattern(StringsInput) then
                regexp_pattern = "/" + replace_regexp_char(StringsInput) + "/";
            else
                regexp_pattern = StringsInput;
            end
        else
            regexp_pattern = "";
            for i = 1:dims_StringsInput
                if (i == 1) then
                    regexp_pattern = replace_regexp_char(StringsInput(i));
                else
                    regexp_pattern = regexp_pattern + "|" + replace_regexp_char(StringsInput(i));
                end
            end
            regexp_pattern = "/" + regexp_pattern + "/";
        end
    endfunction
    //========================================
    matched_separators = [];
    strs = [];

    [lhs, rhs] = argn(0);

    // input types are checked in strsplit primitive
    if (rhs == 1) then
        len = length(varargin(1));
        if len == 0 then
            strs = "";
        else
            len = length(varargin(1));
            if len > 1 then
                strs = strsplit( varargin(1), 1:len - 1 );
            else
                strs = varargin(1);
            end
        end
        if (lhs == 2) then
            dims_strs = size(strs);
            matched_separators = emptystr(dims_strs(1), dims_strs(2));
        end
    else

        strsplit_limit = -1; // no limit
        if (rhs == 3) then
            strsplit_limit = varargin(3);
        end

        if varargin(2) == "" then
            [strs, matched_separators] = strsplit(varargin(1));
        else
            strsplit_pattern = createPattern(varargin(2));

            [start_regexp, end_regexp, match_regexp] =  regexp(varargin(1), strsplit_pattern);
            if (start_regexp <> []) then
                if (end_regexp($) == length(varargin(1))) then
                    end_regexp($) = end_regexp($) - 1;
                end

                len = length(varargin(1));
                if len > 1 then
                    strs = strsplit(varargin(1), end_regexp);
                    strs = strsubst(strs, strsplit_pattern, "", "r");
                    matched_separators = match_regexp;
                else
                    strs = varargin(1);
                    matched_separators = [];
                end
            else
                strs = varargin(1);
                matched_separators = [];
            end
        end

        if (strsplit_limit > 0) then
            dim_strs = size(strs,"*");
            if (strsplit_limit >= dim_strs) then
                strsplit_limit = dim_strs;
            else
                strsremain = strs(strsplit_limit + 1 : $);
                dim_strsremain = size(strsremain,"*");
                strslimited = strs(1:strsplit_limit);

                foundedremain = matched_separators(strsplit_limit + 1 : $);
                foundedremain($ + 1: dim_strsremain) = "";
                matched_separators = matched_separators(1:strsplit_limit);

                str = strsremain + foundedremain;
                strs = [strslimited ; strcat(str)];
            end
        end
    end
    //========================================

endfunction