// Scilab ( http://www.scilab.org/ ) - This file is part of Scilab // Copyright (C) 2013 - Samuel GOUGEON // Copyright (C) 2009 - Université du Maine - Samuel GOUGEON // // This file must be used under the terms of the CeCILL. // This source file is licensed as described in the file COPYING, which // you should have received as part of this distribution. The terms // are also available at // http://www.cecill.info/licences/Licence_CeCILL_V2.1-en.txt function [nb, loc] = members(A, S, varargin) // // Looks for how many times in S each element of A occurs // Looks for how many rows of S match each row of A // Can return the position in S of each respective first or last match. // // Calling sequence: // ------------------------- // [nb [,loc]] = members(A, S) // [nb [,loc]] = members(A, S, "last") // [nb [,loc]] = members(A, S, "rows"|"cols") // [nb [,loc]] = members(A, S, "rows"|"cols", "last") // [nb [,loc]] = members(A, S, "rows"|"cols", "shuffle") // [nb [,loc]] = members(A, S, "rows"|"cols", "shuffle", "last") // // Input / Output arguments: // ------------------------- // A : Matrix or hypermatrix of booleans, integers, reals, complexes, polynomials or strings: // entities (needles) to search in S (haystack). // S : Matrix or hypermatrix of same datatype as A // "last": litteral keyword (optional): if it is provided, indices of last // occurrences are returned instead of for the first ones. // "rows": litteral keyword (optional). if it is provided, each row of A // is searched as a whole, among rows of S. // "cols": litteral keyword (optional). if it is provided, each column of A // is searched as a whole, among columns of S. // "rows" and "cols" options are exclusive. If both are specified, // only the last specified one is considered. // "shuffle": litteral keyword (optional), modifying the "rows" or "cols" // option. If it is provided, detection of -- say -- A rows in S is // performed without respect of the order of elements in each row. // This option is ignored when processing polynomials. // // NORMAL mode: neither "rows" nor "cols" is set. Then, // nb : Matrix of reals: same sizes as A // nb(i, j, ...): number of occurrences of A(i, j, ...) in S. // loc : Matrix of reals: same sizes as A // loc(i, j, ...): linear index in S of the first occurrence of A(i, j, ...). // If "last" is set, the index of the last occurrence is returned instead. // loc(i, j, ...) returns zero if A(i, j, ...) is not found. // ROW-WISE mode: // nb : Row of reals with size(A, 1) elements // nb(i): number of occurrences of A(i, :) found as rows of S // loc : Row of reals with size(A, 1) elements // loc(i): index of the first row in S which matches A(i, :). // If "last" is set, the index of the last occurrence is returned instead. // loc(i, j, ...) returns zero if A(i, :) is not found. // COLUMN-WISE mode: // same as above. nb and loc are row vectors with size(A, 2) elements. // // REMARK: %inf, -%inf values are supported in A as well as in S. // // LIMITATION: in normal element-wise mode, %nan are supported only in A. // // Examples: // --------- // a) with reals: // N = [ 7 3 // %inf 0 // %nan 1 ]; // H = [ 5 8 0 4 // 3 4 7 7 // 3 %inf %inf 2 // 7 5 5 8 ]; // [nb, loc] = members(N, H) // [nb, loc] = members(N, H, "last") // // b) with hypermatrices, from previous N and H: // N = matrix(N, [3 1 2]); // H = matrix(H, [4 2 2]); // [nb, loc] = members(N, H, "last") // // c) with integers: // N = int8(grand(3, 2, "uin", -5, 5)); // H = int8(grand(4, 4, "uin", -5, 5)); // [nb, loc] = members(N, H) // // d) with polynomials (complex coefficients are accepted): // z = %z; // N = [z (1-z)^2 ; -4 %i*z ]; // H = [2 %i*z -z 3-z z z^3 z]; // [nb, loc] = members(N, H) // // e) with text: // N = [ "Hi" "Hu" "Allo"]; // H = [ "Hello" "Bonjour" "Allo" // "Holà" "Allo" "Hallo" // "Hi" "Hé" "Salud" ]; // [nb, loc] = members(N, H, "last") // // f) by rows: // H = [ // 3 3 0 // 4 1 0 // 2 0 3 // 0 1 4 // 3 4 3 // 0 1 4 // 3 1 0 ]; // N = [ // 1 2 3 // 0 1 4 // 3 0 3 // 4 1 0 // 2 0 2 ]; // N, H // [nb, loc] = members(N, H, "rows") // [nb, loc] = members(N, H, "rows", "last") // [nb, loc] = members(N, H, "rows", "shuffle") // // g) by columns, from previous N and H: // N = N.', H = H.' // [nb, loc] = members(N, H, "cols", "shuffle") [lhs, rhs] = argn(); nb = []; if rhs == 0 then head_comments("members"); return end if rhs < 2 then error(msprintf(gettext("%s: Wrong number of input argument(s): at least %d expected.\n"), "members", 2)); end if A == [] then if lhs > 1 then loc = []; end return end if S == [] then nb = zeros(A); if lhs > 1 then loc = zeros(A); end return end sA = size(A); type_A = type(A(:)); type_S = type(S(:)); if type_A ~= type_S then error(msprintf(gettext("%s: Wrong type for input argument #%d: expected same type as first argument.\n"), "members", 2)); end if and(type_A ~= [1 2 4 8 10]) then error(msprintf(gettext("%s: Wrong type for input argument #%d: Matrix of integers, reals, complexes, booleans, polynomials or strings expected.\n"), "members", 1)); end if and(type_S ~= [1 2 4 8 10]) then error(msprintf(gettext("%s: Wrong type for input argument #%d: Matrix of integers, reals, complexes, booleans, polynomials or strings expected.\n"), "members", 2)); end if or(isnan(S)) then error(msprintf(gettext("%s: Wrong value for argument #%d: Must not contain NaN.\n"), "members", 2)); end // Checking optional flags (if any): last = %f; direction = ""; shuffle = %f; shuffle_temp = %f; if rhs > 2 then i = 2; for option=varargin i = i + 1; if typeof(option) ~= "string" | ~isscalar(option) then msg = _("%s: Wrong type for argument #%d: Scalar string expected => option ignored.\n"); warning(msprintf(msg, "members", i)) continue end o = convstr(option, "l"); if o == "last" then last = %t; elseif o == "rows" direction = "rows"; elseif o == "cols" direction = "cols"; elseif o == "shuffle" shuffle_temp = %t; else msg = _("%s: Unknown option ""%s"" => ignored.\n"); warning(msprintf(msg, "members", o)) end end if shuffle_temp == %t then if direction == "" then msg = _("%s: Option ""%s"" only relevant for ""%s"" and ""%s"" modes => ignored.\n"); warning(msprintf(msg, "members", "shuffle", "rows", "cols")) else shuffle = %t; end end end // ------------------------------------------------------------------------ // Usual processing: searching for matching individual components // -------------------------------------------------------------- if direction == "" then // Reals: special faster processing with dsearch(). // %inf, -%inf are supported in A and S. %nan are only supported in A. // Processing integers and strings requires bugs 6305 & 12778 to be fixed. if type_A == 8 then // Convert integers into reals in order to use dsearch A = double(A); S = double(S); elseif type_A == 4 then // Convert booleans into reals A = bool2s(A); S = bool2s(S); end if type_A == 1 & isreal(A) & isreal(S) then S = S(:); if last then S = S($:-1:1); end [Su, kS] = unique(S); [i, nbS] = dsearch(S, Su, "d"); I = dsearch(A(:), Su, "d"); k = find(I~=0); nb = I; nb(k) = nbS(I(k)); nb = matrix(nb, size(A)); if lhs > 1 then loc = I; loc(k) = kS(I(k)); if last then loc(k) = length(S)-loc(k)+1; end loc = matrix(loc, size(A)); end // ------------------------------------------------------------------------ else // Other cases : polynomials, text, complexes // ========================================== LA = size(A, "*"); LS = size(S, "*"); A = A(:); if ~last then S = S($:-1:1); end // Slices may be needed in case of memory overflow // // Function returning the memory space in [bytes] occupied by each // querried variable. If a variable exists in several contexts, // only the last created one is considered function varargout = varspace(varargin) [Ln, Ls] = who("local"); vs = list(); for vn = varargin i = find(Ln==vn); if i ~= [] then vs($+1) = Ls(i(1))*8; else vs($+1) = 0; end end varargout = vs; endfunction // ----------------------------------- // Memory space needed to process a i:j slice of A: // nb : j doubles (occupied at the end) // loc : j doubles (occupied at the end) // tmp : LS * (j-i+1) doubles needed // A2 : LS * mem(A(i:j)) needed // S2 : mem(S)*(j-i+1) needed // Function setting the thickness of the slice, according to the // available and needed memory function j = set_j(i, LS, memS, rA) // rA : remaining unprocessed part of A [memA2, memS2, memtmp] = varspace("A2", "S2", "tmp"); // Available memory [bytes]: m = stacksize(); avail = (m(1) - m(2))*8 + memA2 + memS2 + memtmp - 16*(i-1); // [bytes] avail = 0.15*avail; // A lot of memory is used as Intermediate memory // Diadic loop on e = j-i+1 (slice's thickness): // Init e = size(rA, "*"); Vtest = rA(1:e); memrA = varspace("Vtest"); // [bytes] // Loop fitting the slice's thickness e for the available memory while e*(memS+8*LS+16)+LS*memrA > avail & e > 1 e = ceil(e/2); // Remaining thickness => divided by 2 Vtest = rA(1:e); memrA = varspace("Vtest"); end if (e*(memS+8*LS+16)+LS*memrA > avail) then msg = _("%s: not enough memory to proceed\n"); error(sprintf(msg, "members")); end j = i+e-1; endfunction memS = varspace("S"); // [bytes] // Starting and ending indices in A for the current slice i = 1; // Starting index: initialization j = 0; // Ending index: initialization while j thickness of the slice = j-i+1) j = set_j(i, LS, memS, A(i:$)) // Progression bar if j < LA then if isdef("waitH") then waitbar(j/LA, waitH); elseif j/LA < 0.4 // Else: too few steps => not worthwhile waitH = waitbar(j/LA); end end A2 = repmat(A(i:j), 1, LS); S2 = repmat(S(:).', j-i+1, 1); tmp = bool2s(A2==S2); nb(i:j) = sum(tmp, "c"); if lhs > 1 then tmp = tmp.* ( (1:LS) .*. ones(j-i+1, 1) ); loc(i:j) = max(tmp, "c"); end i = j+1; // End of slice processing end // Deleting the waitbar (if any) and clearing transient variables clear A2 S2 tmp if isdef("waitH") then delete(waitH); end // Final operations on the overall result(s) nb = matrix(nb, sA); if lhs > 1 if ~last k = loc~=0; loc(k) = LS - loc(k) + 1; end loc = matrix(loc, sA); end end // ======================================================================== else // Row-wise processing: searching for matching rows // ------------------------------------------------ // Additional input checking: A = squeeze(A); if ~ismatrix(A) then msg = _("%s: Wrong type for argument #%d: Matrix expected.\n"); // error #209 error(msprintf(msg, "members", 1)) end S = squeeze(S); if ~ismatrix(S) then msg = _("%s: Wrong type for argument #%d: Matrix expected.\n"); // error #209 error(msprintf(msg, "members", 2)) end if direction == "rows" & size(A, 2) ~= size(S, 2) then msg = _("%s: Incompatible input arguments #%d and #%d: Same number of columns expected.\n"); error(msprintf(msg, "members", 1, 2)) elseif direction == "cols" & size(A, 1) ~= size(S, 1) then msg = _("%s: Incompatible input arguments #%d and #%d: Same number of rows expected.\n"); error(msprintf(msg, "members", 1, 2)) end // Column-wise = Row-wise after transposition if direction == "cols" then A = A.'; S = S.'; end // If "shuffle" is set, we first sort elements along each row of A and S if type_A ~= 2 & shuffle then // Polynomials are not sortable A = gsort(A, "c", "i"); S = gsort(S, "c", "i"); end // Pre-Processing: Preparing initial chart of double indices nLA = size(A, 1); // Number of rows of A nLS = size(S, 1); // Number of rows of S I1 = (1:nLA)' .*. ones(nLS, 1); // indices of A rows, grouped by S rows I2 = ones(nLA, 1) .*. (1:nLS)'; // set of S rows indices, replicated by the number of A rows IND = [I1 I2]; // Column #1 gives index in A. column #2 gives index in S // Processing: loop over columns for k = 1:size(S, 2) cA = A(IND(:, 1), k); cS = S(IND(:, 2), k); IND = IND(find(cA==cS), :); if IND == [] then break end end // Post-processing if last then [nb, loc] = members((1:nLA)', IND(:, 1), "last"); else [nb, loc] = members((1:nLA)', IND(:, 1)); end k = find(loc~=0); loc(k) = IND(loc(k), 2); nb = nb'; loc = loc'; end endfunction