/* Library file: MakeXX.GL */ /* Created: 14th July 1995 by Felix */ /* */ /* Last modified: */ /* 06 Jun 96 FJR Exported info from MakeXX instead of */ /* only allowinf saving to a file */ /* Used size rather than type check for */ /* file name */ /* 18 Apr 97 FJR Added colNums to MakeXX to stop it */ /* deleting rows due to unimportant data */ /* 4 May 97 FJR MakeXX only returns matrix of colNums */ /* 18 Jun 97 FJR Added code to make lags/leads/diffs */ /* 31 Jul 97 FJR MakeXX returns unmomented matrix */ /* */ /* Routines to convert a normal X-matrix into an X'X matrix */ /* suitable for XPReg. */ /* DiffCol, SeasCol and LagCol are defined in Constant.GL */ /* */ /* Exported: */ /* */ #DEFINECS ICol 1 #DEFINECS TCol 2 #DEFINECS XDataCol 3 PROC (1) = MakeInfo (infoName, data); /* Make and information matrix and save it */ /* In: */ /* infoName Name of information matrix */ /* data Row vector of names, XDataCol..COLS */ /* Out: */ /* info Information matrix */ /* File on disk: "infoName" if non-null */ LOCAL info; info = "Constant"|TRIMR(data', XDataCol-1, 0); info = info ~ ONES(ROWS(info), 1) ~ info; IF infoName $/= ""; SAVE ^infoName = info; ENDIF; RETP (info); ENDP; /* MakeInfo */ PROC (4) = CalcTs (tVec, subset); /* Calculate T from max and min values of period indicator */ /* and check consistency of subset. */ /* In: */ /* tVec Vector of periodic indicators */ /* subset Vector of periods to use */ /* Out: */ /* nPeriods Number of data periods to save */ /* offset Adjustment to make tVec to make it 0..T-1 */ /* subSet 2 x max no of periods; first row is flag */ /* for acceptable, second row is offset in */ /* terms of output vector. */ /* balanced Dataset is balanced ie T(i)=T for all i */ LOCAL tMax; LOCAL tMin; LOCAL nPeriods; LOCAL i; LOCAL offset; LOCAL balanced; LOCAL location; LOCAL temp; tMax = MAXC(tVec); offset = MINC(tVec); nPeriods = tMax - offset + 1; temp = SEQA(offset, 1, nPeriods); temp = COUNTS(tVec, temp); balanced = temp==(ONES(nPeriods, 1)*temp[1]); IF subSet == 0; subset = SEQA(1, 1, nPeriods); ENDIF; temp = ZEROS(2, nPeriods); location = 0; i = 1; DO WHILE i <= nPeriods; IF NOT SCALMISS(INDNV(i, subset)); temp[1,i] = 1; temp[2, i] = location; location = location + 1; ENDIF; i = i + 1; ENDO; subset = temp; nPeriods = SUMC(subSet[1,.]'); RETP (nPeriods, offset, subset, balanced); ENDP; /* CalcTs */ PROC (1) = GetLLD (data, colNums, errCode); /* Calculate leads/lags/diffs for one person */ /* In: */ /* data Raw data for an individual */ /* colNums columns to use with only leads/lags */ /* errCode Error string - duff entries converted to it */ /* Out: */ /* data with levels replaced by appropriate values */ /* NB data needs to be in ascending order for lags to work; */ /* Set XSorted in Options.GL if data is already sorted. */ LOCAL temp; LOCAL loc; LOCAL tempCol; LOCAL i; LOCAL j; LOCAL k; IF NOT XSorted; /* Options to be found in Options.gl */ data = SORTC(data,TCol); ENDIF; temp = data; i = ROWS(data); DO WHILE i >0; j = ROWS(colNums); DO WHILE j>0; IF colNums[j,DiffCol]/=0; /* diff */ IF i-colNums[j,DiffCol] > 0; /* enough obs */ IF (data[i,TCol]-colNums[j,DiffCol]) == (data[i-colNums[j,DiffCol],TCol]); tempCol = data[i-colNums[j,DiffCol]:i,colNums[j,ItemCol]]; IF NOT ISMISS(MISS(tempCol, errCode)); temp[i,colNums[j,1]] = tempCol'*PTriang(colNums[j,DiffCol], NOT False); ELSE; temp[i,TCol] = MISS(0,0); ENDIF; ELSE; temp[i,TCol] = MISS(0,0); ENDIF; ELSE; temp[i,TCol] = MISS(0,0); ENDIF; ELSEIF colNums[j,SeasCol]/=0; /* seasonal diff */ loc = INDNV(data[i,TCol]-ABS(colNums[j,SeasCol]),data[.,TCol]); IF SCALMISS(loc); temp[i,TCol] = MISS(0,0); ELSEIF data[i loc,colNums[j,1]] $/=errCode; temp[i,colNums[j,1]] =data[i,colNums[j,1]]-data[loc,colNums[j,1]]; ELSE; temp[i,TCol] = MISS(0,0); ENDIF; ELSEIF colNums[j, LagCol] /=0; /* lag/lead */ loc = INDNV(data[i,TCol]+colNums[j,LagCol],data[.,TCol]); IF SCALMISS(loc); temp[i,TCol] = MISS(0,0); ELSEIF data[loc,colNums[j,1]] $/=errCode; temp[i,colNums[j,1]] = data[loc,colNums[j,1]]; ELSE; temp[i,TCol] = MISS(0,0); ENDIF; ENDIF; IF SCALMISS(temp[i, TCol]); j = 0; ENDIF; j = j - 1; ENDO; i = i - 1; ENDO; RETP (temp); ENDP; /* GetLLD */ PROC (3) = MakeXX (data, outName, infoName, subSetT, colNums, calcMean, errCode, balOnly, keepRaw); /* Procedure to make cross-product matrix. Data should */ /* be in columnar form with the INDIVIDUAL IDENTIFIER i in */ /* the ICol column and the PERIODIC IDENTIFIER t in column */ /* TCol, followed by K columns of data. Data need not */ /* be balanced. A constant column will be added for each */ /* period. A means matrices will created if "means" is */ /* non-zero. Output is a TKxTK matrix and an info matrix */ /* will be saved if "infoName" is not a null string. */ /* names are taken from the top row, which is then */ /* discarded. Files kept on disk to save memory. The */ /* periodic identifier need not go from 1 to T, but is */ /* assumed to increment by one each period. Individual */ /* identifier assumed to be character data. Means matrix */ /* will not be calculated for balanced datasets. */ /* Lags, leads, diffs, calculated before conversion to */ /* moment ie missing values in lags etc deleted as usual. */ /* Matrix is created as (levels) (lead/lag) (diffs). */ /* In: */ /* data Input matrix or name of file on disk to be */ /* used (assumed valid); top row is var names */ /* outName Name for output matrix; if null, matrix */ /* is returned. See below */ /* infoName Name of information matrix or null */ /* subSetT Years to use when creating matrix, numbered */ /* 1..T. Zero value means use all years. */ /* colNums columns to use (0 use all) */ /* Column 1 and 2 ignored except for checking */ /* Col 2 has diff length */ /* Col 3 has lag length (+ for leads) */ /* calcMean Calculate means matrix (if not balanced) */ /* errCode Error string - drop these obs unless its "" */ /* balOnly Create a balanced matrix only */ /* keepRaw Keep raw data ie unmomented */ /* Out: */ /* xx No. of rows of XX (==no of cols) if outname */ /* non-null; otherwise complete XX matrix OR */ /* X matrix with appropriate data if keepRaw */ /* Constant term in col 1 */ /* infoName Information matrix */ /* balanced Data is balanced or not */ /* Files on disk: "outName"=X'X created suitable for */ /* XPReg. "infoName" also created if non-null. */ LOCAL i; LOCAL balanced; LOCAL nObs; LOCAL offset; LOCAL tOut; LOCAL nOut; LOCAL outLoc; LOCAL k; LOCAL kPlus; LOCAL tMean; LOCAL newItem; LOCAL currName; LOCAL xx; i = ZEROS(1,LagCol); IF ROWS(data) == 1; /* file name */ LOAD data = ^data; ENDIF; IF colNums==0; colNums = i; ELSE; colNums = i | i | DeleteR(colNums,colNums[.,ItemCol].<=TCol); colNums[1:2,ItemCol] = ICol | TCol; ENDIF; data=data[.,colNums[.,ItemCol]]; data[.,ICol] = UPPER(data[.,ICol]); {tOut, offset, subSetT, balanced} = CalcTs (data[2:ROWS(data), TCol], subSetT); /* Calculate leads/lags/diffs */ IF SUMC(SUMC(ABS(colNums[.,DiffCol SeasCol LagCol]))) > 0; nObs = ROWS(data); currName = 2; newItem = colNums; newItem[.,ItemCol] = SEQA(1,1,ROWS(colNums)); newItem = SelectR(newItem,(SUMC((newItem[.,DiffCol SeasCol LagCol])')./=0)); i = 2; DO WHILE i <= nObs; IF data[i, ICol] $/=data[currName, ICol]; IF (i-1)>currName; data[currName:i-1,.] = GetLLD (data[currName:i-1,.], newItem, errCode); ELSE; data[currName, TCol] = MISS(0,0); ENDIF; currName = i; ENDIF; i = i + 1; ENDO; IF (i-1)>currName; data[currName:i-1,.] = GetLLD (data[currName:i-1,.], newItem, errCode); ELSE; data[currName, TCol] = MISS(0,0); ENDIF; ENDIF; IF errCode$/=""; /* Remove missing values */ xx = data .$== errCode; xx=sumc(xx'); /* get non-false */ data = DelNoPR(data, xx); ENDIF; nObs = ROWS(data); infoName = MakeInfo (infoName, data[1,.]); data = data[2:nObs,.]; nObs = nObs-1; data[.,TCol] = data[.,TCol]-offset; /* change to 0..T-1 */ k = COLS(data); kPlus = k - XDataCol + 2; IF keepRaw; xx = ZEROS (ROWS(data), tOut*kPlus); nOut = 1; ELSE; xx = ZEROS (tOut*kPlus, tOut*kPlus); ENDIF; currName = UPPER(data[1,ICol]); newItem = ZEROS(1, tOut*kPlus); tMean = 0; i = 1; DO WHILE i <= nObs; IF UPPER(data[i, ICol]) $/=currName; /* Update matrix with last individual */ IF (NOT balOnly) OR (tMean==tOut); IF keepRaw; xx[nOut,.] = newItem; nOut = nOut+1; ELSEIF calcMean AND NOT balanced; xx = xx + MOMENT(newItem, 0)/tMean; ELSE; xx = xx + MOMENT(newItem, 0); ENDIF; ENDIF; currName = UPPER(data[i,ICol]); newItem = ZEROS(1, tOut*kPlus); tMean = 0; ENDIF; IF NOT SCALMISS(data[i, TCol]); IF subsetT[1,data[i, TCol]+1]; /* period ok */ outLoc = (subsetT[2,data[i, TCol]+1]*kPlus) + 1; newItem[outLoc:outLoc+kPlus-1] = 1~data[i,(XDataCol):k]; tMean = tMean + 1; ENDIF; ENDIF; i = i + 1; ENDO; /* Add in last one */ IF (NOT balOnly) OR (tMean==tOut); IF keepRaw; xx[nOut,.] = newItem; xx = xx[1:nOut,.]; ELSEIF calcMean AND NOT balanced; xx = xx + MOMENT(newItem, 0)/tMean; ELSE; xx = xx + MOMENT(newItem, 0); ENDIF; ENDIF; IF outName $/= ""; SAVE ^outName = xx; xx = ROWS(xx); ENDIF; RETP (xx, infoName, balanced); ENDP; /* MakeXX */ PROC (1) = MakeFmt (inName, outName, colI, colT, nCols, errCode); /* Procedure to turn ASCII text (eg QPW output) into .FMT */ /* suitable for MakeXX. */ /* In: */ /* inName Name of text file (inc ext; assumed valid) */ /* outName Name for output matrix; if null, matrix */ /* is returned. */ /* colI Individual identifier column */ /* colT Periodic identifier column */ /* nCols Number of columns of data */ /* errCode Error string - drop these obs unless its "" */ /* Out: */ /* data 0 or data matrix if outname null */ /* Files on disk: "outName"=X matrix suitable for MakeXX */ LOCAL nRows; LOCAL temp; LOCAL data; LOAD data[] = ^inName; nRows = ROWS(data)/nCols; IF nRows /= TRUNC(nRows); PRINT "Number of items: " (nRows*nCols) " Expected rows,cols: " ;; PRINT nRows nCols; Warn ("Insufficient data items"); data = 0; ELSE; data = RESHAPE (data, nRows, nCols); /* Remove missing values */ temp = data .$== errCode; data = MISSEX(data, temp); data = PACKR(data); /* Now rearrange to get colI, colT first */ /* NB If ICol and TCol change this won't work */ IF colI /= ICol; temp = data[.,colI]; data[.,(ICol+1):colI] = data[.,(ICol):colI-1]; data[.,ICol] = temp; colT = colT + 1; ENDIF; IF colT /= TCol; temp = data[.,colT]; data[.,(TCol+1):colT] = data[.,(TCol):colT-1]; data[.,TCol] = temp; ENDIF; ENDIF; IF outName$/=""; SAVE ^outName = data; data = 0; ENDIF; RETP (data); ENDP; /* MakeFmt */ /* END MakeXX.GL */