
e8ec3f70f93f5c8f26acb09e83537ce2.ppt
- Количество слайдов: 23
Lab #1: - spatial autocorrelation games - constructing Moran scatterplots - constructing semivariogram plots Spatial statistics in practice Center for Tropical Ecology and Biodiversity, Tunghai University & Fushan Botanical Garden
SASIM game: web & R versions SASIM http: //www. nku. edu/~longa/cgi-bin/cgi-tcl-examples/generic/SA/SA. cgi ####################### # THE AUTOCORRELATION GAME # ####################### # to start: run the code below # ####################### rm(list = ls()) library(spdep) s<-function(i, j){ a<-dat[i] b<-dat[j] dat[i]<-b dat[j]<-a return(dat) } plot. new<-function(dat){ plot(0: 1, , xaxt="n", yaxt="n", bty="n", xlab="", ylab= "", type="n") segments(seq(0, 1, by=0. 25), rep(0, 5), seq(0, 1, by=0. 25), r ep(1, 5)) segments(rep(0, 5), seq(0, 1, by=0. 25), rep(1, 5), seq(0, 1, b y=0. 25)) text(x, y, round(dat, 2), cex=2) text(x+0. 10, y+0. 10, 1: 16, col="grey", cex=0. 8) title(main=paste("Moran I=", round(moran. test(dat, listw)$estimate[1], 3))) } x<-rep(seq(0. 25/2, 0. 75+0. 25/2, by=. 25), 4) y<-rep(seq(0. 25/2, 0. 75+0. 25/2, by=. 25), each=4) dat<-runif(16, 0, 10) plot(0: 1, , xaxt="n", yaxt="n", bty="n", xlab="", ylab=" ", type="n") segments(seq(0, 1, by=0. 25), rep(0, 5), seq(0, 1, by=0. 25), re p(1, 5)) segments(rep(0, 5), seq(0, 1, by=0. 25), rep(1, 5), seq(0, 1, by =0. 25)) text(x, y, round(dat, 2), cex=2) text(x+0. 10, y+0. 10, 1: 16, col="grey", cex=0. 8) neigh<-cell 2 nb(4, 4, type="rook", torus=FALSE) listw<-nb 2 listw(neigh, style="B") title(main=paste("Moran I=", round(moran. test(dat, listw)$estimate[1], 3))) ########################### # THE AUTOCORRELATION GAME # ########################### # how to play: # # 1. to start: run the code above a 4 -by-4 lattice # # will appear the values in black are simulate # # from a uniform distribution with range (0, 10) # # and rounded up to 1/100 cells are labelled by # # the grey number on the top-right corner # # Moran I gives the value of the autocorrelation # # index for the current data on the lattice # # 2. to switch the values in cells i and j: # # type "dat<-s(i, j)" # # 3. to see the result: type "plot. new(dat)" # # 4. have fun # ########################### dat<-s(2, 15) plot. new(dat)
Arc. View extension: Map. Stat 3 • Generate neighbors matrix – Rook’s adjacency – Queen’s adjacency • Calculate MC
Basic changes when customizing any of the SAS files • • • file paths INPUT statements (e. g. , list of variables) n, n-1 (Puerto Rico: 73, 72) variables in KEEP= statements variables in outfile (DATA _NULL_) PUT statements (e. g. , list of variables) • TRY definition
Calculating eigenfunctions: MCmax FILENAME CONN 'D: JYU-SUMMERSCHOOL 2006LAB#1PR-CON. TXT'; FILENAME OUTFILE 1 'D: JYU-SUMMERSCHOOL 2006LAB#1PR-EIGENVECTORS. TXT'; FILENAME OUTFILE 2 'D: JYU-SUMMERSCHOOL 2006LAB#1PR-MC-EIG. TXT'; FILENAME OUTFILE 3 'D: JYU-SUMMERSCHOOL 2006LAB#1PR-EIG. TXT'; ********************** * CHANGES NEED TO BE MADE FOR N AND (N-1) * *********************; TITLE 'EIGENFUNCTIONS FOR THE PR DATA'; ********************************************************* * READ IN THE N-BY-N GEOGRAPHIC CONNECTIVITY MATRIX; CODE THE DIAGONAL WITH 1 S, MAKE SURE NOT TO MISREAD THE ID * *********************************************************; DATA STEP 1; INFILE CONN; INPUT IDC C 1 -C 73; DROP IDC; RUN; DATA STEP 2; ARRAY M{73} M 1 -M 73; DO I=1 TO 73; DO J=1 TO 73; M{J} = -1/73; IF I=J THEN M{J}=1 -1/73; END; OUTPUT; END; DROP I J; RUN; PROC IML; USE STEP 1; READ ALL INTO C; USE STEP 2; READ ALL INTO M; IDEN=I(73); CM=C*M; MCM=M*CM; ONE=J(73, 1); CSUM=C*ONE; CALL EIGEN(EVALS, EVECS, MCM); CREATE STEP 3 FROM EVALS; APPEND FROM EVALS; CREATE STEP 4 FROM EVECS; APPEND FROM EVECS; CREATE STEP 5 FROM CSUM; APPEND FROM CSUM; QUIT;
DATA STEP 3(REPLACE=YES); SET STEP 3; EVALS=COL 1; DROP COL 1; RUN; DATA STEP 4(REPLACE=YES); SET STEP 4; ARRAY EVECS{73} E 1 -E 73; ARRAY IMLOUT{73} COL 1 -COL 73; DO I=1 TO 73; EVECS{I} = IMLOUT{I}; END; DROP COL 1 -COL 73 I; X 0=1; RUN; DATA STEP 5(REPLACE=YES); SET STEP 5; CSUM=COL 1; DROP COL 1; RUN; DATA COMBINE; SET STEP 3; SET STEP 4; SET STEP 5; RUN; PROC MEANS DATA=COMBINE NOPRINT; VAR CSUM X 0; OUTPUT OUT=CSUM SUM=CSUM N; RUN; PROC MEANS DATA=COMBINE NOPRINT; VAR EVALS; OUTPUT OUT=LAM 1 MAX=LAM 1; RUN; DATA MAXMC; SET CSUM(KEEP=CSUM N); SET LAM 1(KEEP=LAM 1); MCMAX = N*LAM 1/CSUM; RUN; PROC PRINT; RUN; DATA _NULL_; SET STEP 4; writing to FILE OUTFILE 1 LRECL=2048; output files ID=_N_; PUT ID E 1 -E 73; RUN; DATA _NULL_; SET STEP 3; IF _N_=1 THEN SET CSUM; IF _N_=1 THEN SET LAM 1; FILE OUTFILE 2; ID=_N_; MCO=N*EVALS/CSUM; MCADJ=EVALS/LAM 1; PUT ID EVALS MCO MCADJ; RUN;
DATA STEP 1(REPLACE=YES); SET STEP 1; ARRAY CONN{73} C 1 -C 73; CSUM=0; DO I=1 TO 73; IF _N_=I THEN CONN{I} = 1; CSUM=CSUM + CONN{I}; END; IDC=_N_; X 0=1; CSUM=CSUM-1; DROP I; RUN; PROC MEANS DATA=STEP 1 NOPRINT; VAR X 0 CSUM; OUTPUT OUT=APP SUM=N CSUM; RUN; PROC PRINCOMP DATA=STEP 1(TYPE=CORR) OUTSTAT=EVECS NOPRINT; VAR C 1 -C 73; RUN; DATA EVECSTMP; SET EVECS; IF _N_=1 THEN SET APP(KEEP=CSUM N); IF _TYPE_='EIGENVAL' THEN IKEEP=1; ELSE IKEEP=0; IF IKEEP=0 THEN DELETE; EVAL 1=C 2 -1; MCMAXAPP = N*EVAL 1/CSUM; DROP _TYPE_ _NAME_ IKEEP C 1 -C 73; RUN; PROC PRINT; RUN; DATA EVALS; SET EVECS; IF _TYPE_="EIGENVAL" THEN IKEEP=1; ELSE IKEEP=0; IF IKEEP=0 THEN DELETE; RUN; PROC TRANSPOSE DATA=EVALS OUT=APPEVALS PREFIX=EVALS; VAR C 1 -C 73; RUN; DATA EIGEN; SET APPEVALS(KEEP=EVALS 1); LAMBDAC=EVALS 1 -1; DROP EVALS 1; RUN; DATA APPEVALS(REPLACE=YES); SET APPEVALS; EVALS=EVALS 1 -1; IF _NAME_="C 1" THEN DELETE; DROP _NAME_ EVALS 1; RUN; PROC TRANSPOSE DATA=STEP 1 OUT=TCSUM PREFIX=TCSUM; VAR CSUM; RUN; DATA MATRIXW; SET STEP 1; IF _N_=1 THEN SET TCSUM; _TYPE_="CORR"; ARRAY TCSUM{73} TCSUM 1 -TCSUM 73; ARRAY CONN{73} C 1 -C 73; DO I=1 TO 73; CONN{I}=CONN{I}/(SQRT(TCSUM{I})*SQRT(CSUM)); IF _N_=I THEN CONN{I} = 1; END; IDC=_N_; DROP TCSUM 1 -TCSUM 73 I CSUM _NAME_; RUN;
PROC PRINCOMP DATA=MATRIXW(TYPE=CORR) OUTSTAT=WEVECS NOPRINT; VAR C 1 -C 73; RUN; DATA WEVALSTMP; SET WEVECS; IF _N_=1 THEN SET APP(KEEP=CSUM N); IF _TYPE_='EIGENVAL' THEN IKEEP=1; ELSE IKEEP=0; IF IKEEP=0 THEN DELETE; DROP _TYPE_ _NAME_ IKEEP; RUN; PROC TRANSPOSE DATA=WEVALSTMP OUT=WEVALS PREFIX=WEVALS; VAR C 1 -C 73; RUN; DATA EIGEN(REPLACE=YES); SET EIGEN; SET WEVALS(KEEP=WEVALS 1); LAMBDAW=WEVALS 1 -1; DROP WEVALS 1; RUN; PROC MEANS; VAR LAMBDAW; RUN; DATA _NULL_; SET EIGEN; FILE OUTFILE 3; ID=_N_; PUT ID LAMBDAC LAMBDAW; RUN; ********************* * ADJUSTING THE APPROXIMATE EIGENVECTORS * *********************; DATA EVECS(REPLACE=YES); SET EVECS; IF _TYPE_='SCORE' THEN IKEEP=1; ELSE IKEEP=0; IF IKEEP=0 THEN DELETE; RUN; PROC TRANSPOSE OUT=STEP 6 PREFIX=C; VAR C 1 -C 73; RUN; writing to output file for SAR DATA STEP 6(REPLACE=YES); SET STEP 6; SET STEP 1(KEEP=IDC); RUN; PROC FACTOR DATA=STEP 6 N=72 ROTATE=QUARTIMAX OUT=FINAL OUTSTAT=FINALE NOPRINT; VAR C 2 -C 73; RUN; /* DATA _NULL_; SET FINAL; FILE OUTFILE 1 LRECL=2048; ID=_N_; PUT ID FACTOR 1 -FACTOR 72; RUN; DATA _NULL_; SET APPEVALS; IF _N_=1 THEN SET CSUM; IF _N_=1 THEN SET EVECSTMP; FILE OUTFILE 2; ID=_N_; MCO=N*EVALS/CSUM; MCADJ=EVALS/EVAL 1; PUT ID EVALS MCO MCADJ; RUN; */ writing approximations to output files
MC as a linear regression solution From OLS theory: b = (XTX)-1 XTY • Convert the attribute variable in question to zscores • Let X = z. Y and Y = Cz. Y • • Regress Cz. Y on z. Y, with a no-intercept option • Let X = 1 and Y = C 1 • • Regress C 1 on 1, with a no-intercept option • MC = bnumerator/bdenominator
GR from MC squared deviations row sums of matrix C The GR contains all of the locational information captured by the MC, but emphasizes any marked deviations (e. g. , outliers).
SAS code to calculate MC & GR FILENAME INDATA ‘D: JYU-SUMMERSCHOOL 2006LAB#1PR-AREAS-COMPETITION. TXT'; FILENAME CONN 'D: JYU-SUMMERSCHOOL 2006LAB#1PR-CON. TXT'; TITLE 'MC AND GR FOR THE PR AREA DATA'; ******************************* * READ IN THE N-BY-N GEOGRAPHIC CONNECTIVITY MATRIX; * * CODE THE DIAGONAL WITH 1 S, MAKE SURE NOT TO MISREAD THE ID * *******************************; DATA STEP 1; INFILE CONN; INPUT IDC C 1 -C 73; ARRAY CONN{73} C 1 -C 73; CSUM = 0; DO I=1 TO 73; CSUM = CSUM + CONN{I}; END; RUN; PROC MEANS DATA=STEP 1 NOPRINT; VAR CSUM; OUTPUT OUT=CSUM SUM=SUMCSUM; RUN; ************************ * READ IN GEOREFERENCED DATA; * * THEN CENTER THE SELEDTED ATTRIBUTE VARIABLE * ************************; DATA STEP 2; INFILE INDATA LRECL=1024; INPUT ID AREAM AREACT MCT_RATIO TRMCT_RATIO AREAPT MPT_RATIO TRMPT_RATIO NAME$; Y = LOG(AREAM + 0. 001); * Y = TRMPT_RATIO; X 0=1; RUN; PROC STANDARD MEAN=0 STD=1 OUT=STEP 2(REPLACE=YES); VAR Y; RUN;
PROC MEANS NOPRINT; VAR X 0; OUTPUT OUT=STEP 0 SUM=N; RUN; DATA STEP 0(REPLACE=YES); SET STEP 0; SET CSUM; APPSEMC = SQRT(2/SUMCSUM); RUN; DATA STEP 2(REPLACE=YES); SET STEP 2; SET STEP 1; ARRAY CONN{73} C 1 -C 73; ARRAY YCONN{73} CY 1 -CY 73; DO I=1 TO 73; YCONN{I} = Y*CONN{I}; END; Y 2 CSUM=(Y**2)*CSUM; RUN; PROC MEANS NOPRINT; VAR CY 1 -CY 73; OUTPUT OUT=CYOUT 1 SUM=CY 1 -CY 73; RUN; PROC TRANSPOSE DATA=CYOUT 1 PREFIX=CY OUT=CYOUT 2; VAR CY 1 -CY 73; RUN; DATA STEP 3; SET STEP 2(KEEP=X 0 Y 2 CSUM Y CSUM); SET CYOUT 2(KEEP=CY 1); CZY=CY 1; ZY=Y; DROP CY 1; X 0 GR=X 0; RUN; PROC REG OUTEST=OUTNMCB; MODEL CZY=ZY/NOINT; OUTPUT OUT=MCPRED P=CZYHAT; RUN;
AND construct a Moran scatterplot ***************** * CONSTRUCTING A MC SCATTERPLOT * *****************; PROC GPLOT; PLOT CZY*ZY CZYHAT*ZY/OVERLAY; RUN; PROC REG DATA=STEP 3 OUTEST=OUTDMCB; MODEL CSUM=X 0/NOINT; RUN; PROC REG DATA=STEP 3 OUTEST=OUTNGRB NOPRINT; MODEL Y 2 CSUM=X 0 GR/NOINT; RUN; DATA STEP 0(REPLACE=YES); SET STEP 0; SET OUTNMCB(KEEP=ZY); SET OUTDMCB(KEEP=X 0); SET OUTNGRB(KEEP=X 0 GR); MC = ZY/X 0; EMC = -1/(N-1); GR = X 0 GR/X 0 - ((N-1)/N)*MC; IF MC> 0 THEN PROBMC=1 - PROBNORM((MC-EMC)/APPSEMC); ELSE PROBMC= PROBNORM((MC-EMC)/APPSEMC); DROP ZY X 0; RUN; PROC PRINT; VAR N GR MC EMC APPSEMC PROBMC; RUN;
SAS code for constructing a semivariogram plot FILENAME INDATA 1 ‘D: JYU-SUMMERSCHOOL 2006LAB#1PR-AREAS-COMPETITION. TXT'; FILENAME INDATA 2 ‘D: JYU-SUMMERSCHOOL 2006LAB#1PR-DEM&QUAD-DATA. TXT'; TITLE 'SEMIVARIOGRAM CONSTRUCTION FOR THE PR DATA'; ************************ * READ IN GEOREFERENCED DATA; * * THEN CENTER THE SELEDTED ATTRIBUTE VARIABLE * ************************; DATA STEP 1; INFILE INDATA 1 LRECL=1024; INPUT ID AREAM AREACT MCT_RATIO TRMCT_RATIO AREAPT MPT_RATIO TRMPT_RATIO NAME$; * Y = LOG(AREAM + 0. 001); * Y = TRMPT_RATIO; RUN; PROC SORT DATA=STEP 1 OUT=STEP 1(REPLACE=YES); BY NAME; RUN; DATA STEP 2; INFILE INDATA 2 LRECL=1024; INPUT IDDEM MELEV SELEV U V QUAD NAME$; U=U/1000; V=V/1000; * Y=LOG(MELEV+17. 5); Y=(SELEV-25)**0. 5; RUN; PROC SORT DATA=STEP 2 OUT=STEP 2(REPLACE=YES); BY NAME; RUN; DATA STEP 3; MERGE STEP 1 STEP 2; BY NAME; RUN;
PROC VARIOGRAM DATA=STEP 3 OUTP=PAIRS; COMPUTE NOVARIOGRAM; COORDINATES XC=U YC=V; VAR Y; RUN; DATA PAIRS(REPLACE=YES); SET PAIRS; GAMMA=(V 1 -V 2)**2; DROP X 1 Y 1 X 2 Y 2 COS VARNAME; RUN; PROC MEANS MAX; VAR DISTANCE; RUN; PROC VARIOGRAM DATA=STEP 3 OUTVAR=GAMMA; COMPUTE LAGD=3 MAXLAGS=50; COORDINATES XC=U YC=V; VAR Y; RUN; DATA GAMMA(REPLACE=YES); SET GAMMA; IF LAG =0 THEN DELETE; IF LAG=-1 THEN LAG=0; IF LAG=0 THEN DISTANCE=0; IF LAG=0 THEN VARIOG=0; RUN; PROC PRINT; RUN; PROC GPLOT; PLOT VARIOG*DISTANCE; RUN;
SAS code for semivariogram trend line FILENAME INDATA 1 ‘D: JYU-SUMMERSCHOOL 2006LAB#1PR-AREAS-COMPETITION. TXT'; FILENAME INDATA 2 ‘D: JYU-SUMMERSCHOOL 2006LAB#1PR-DEM&QUAD-DATA. TXT'; TITLE 'CONSTRUCT A SEMIVARIOGRAM FOR THE PR AREA DATA'; DATA STEP 0; MAXDIST=80; RUN; ************************ * READ IN GEOREFERENCED DATA; * * THEN CENTER THE SELEDTED ATTRIBUTE VARIABLE * ************************; DATA STEP 1 A; INFILE INDATA 1 LRECL=1024; INPUT ID AREAM AREACT MCT_RATIO TRMCT_RATIO AREAPT MPT_RATIO TRMPT_RATIO NAME$; * Y = LOG(AREAM + 0. 001); * Y = TRMPT_RATIO; X 0=1; RUN; PROC SORT DATA=STEP 1 A OUT=STEP 1 A(REPLACE=YES); BY NAME; RUN; PROC MEANS NOPRINT; VAR X 0; OUTPUT OUT=OUTN SUM=N; RUN; DATA STEP 1 B; INFILE INDATA 2 LRECL=1024; INPUT IDDEM MELEV SELEV U V QUAD NAME$; coordinates U=U/1000; V=V/1000; are labeled u, v * Y=LOG(MELEV+17. 5); Y=(SELEV-25)**0. 5; RUN; PROC SORT DATA=STEP 1 B OUT=STEP 1 B(REPLACE=YES); BY NAME; RUN; DATA STEP 1; MERGE STEP 1 A STEP 1 B; BY NAME; RUN;
PROC VARIOGRAM DATA=STEP 1 OUTP=PAIRS; COMPUTE NOVARIOGRAM; COORDINATES XC=U YC=V; VAR Y; RUN; DATA PAIRS(REPLACE=YES); SET PAIRS; GAMMA=(V 1 -V 2)**2; DROP X 1 Y 1 X 2 Y 2 COS VARNAME; STDDIST=DISTANCE; COUNT=1; RUN; PROC MEANS SUM; VAR COUNT; RUN; PROC MEANS MAX; VAR DISTANCE; RUN; PROC STANDARD OUT=STEP 1(REPLACE=YES) MEAN=0 STD=1; /* CREATES STANDARDIZED DISTANCE INDEX */ VAR STDDIST; RUN; DATA PAIRS(REPLACE=YES); SET PAIRS; STDDIST=ROUND(STDDIST, . 001); /* ROUNDS STANDARDIZED DISTANCE INDEX */ RUN; PROC SORT DATA=PAIRS(REPLACE=YES); BY STDDIST; RUN; PROC MEANS MEAN NOPRINT; BY STDDIST; VAR DIST GAMMA; /* INITIAL CLUSTERING USING STANDARDIZED DISTANCES INDEX */ OUTPUT OUT=TMEAN=MDIST MGAMMA; RUN; DATA STEP 2; SET TMEAN; SEQID=_N_; DIST=MDIST; GAMMA=MGAMMA; DROP MDIST MGAMMA; RUN; PROC SORT OUT=STEP 2(REPLACE=YES); BY DIST; RUN; DATA STEP 2(REPLACE=YES); SET STEP 2; SEQID = _N_; MERGEVAL=1; RUN;
PROC CLUSTER METHOD=WARD NOPRINT OUTTREE=TREE; ID SEQID; VAR DIST; COPY GAMMA; FREQ _FREQ_; RUN; ************************* * THE NUMBER OF GROUPS (N = ? ? ? ) NEEDS TO BE SET * *************************; PROC TREE DATA=TREE NOPRINT N=75 OUT=TEMP 1; ID SEQID; RUN; PROC SORT DATA=TEMP 1 OUT=TEMP 1(REPLACE=YES); BY SEQID; RUN; DATA STEP 2 (REPLACE=YES); MERGE STEP 2(DROP=MERGEVAL) TEMP 1; BY SEQID; RUN; DATA OUTN(REPLACE=YES); SET OUTN; DIST=0. 0001; STDDIST=DIST; GAMMA=0; _TYPE_=0; _FREQ_=N; SEQID=0; CLUSTER=0; CLUSNAME='CL 000'; RUN; DATA STEP 2(REPLACE=YES); SET STEP 2 OUTN; RUN; PROC SORT DATA=STEP 2 OUT=STEP 1(REPLACE=YES); BY CLUSTER; RUN; PROC MEANS DATA=STEP 1 NOPRINT; BY CLUSTER; VAR DIST GAMMA; FREQ _FREQ_; OUTPUT OUT=RESULTS MIN=DISTMIN GAMMAMIN MAX=DISTMAX GAMMAMAX MEAN=DISTM GAMMAM; RUN; PROC SORT OUT=STEP 1(REPLACE=YES); BY DISTM; RUN; PROC PRINT; VAR GAMMAM DISTM _FREQ_; RUN; PROC MEANS SUM; VAR _FREQ_; RUN; PROC GPLOT DATA=RESULTS; PLOT GAMMAM*DISTM; RUN; DATA RESULTS(REPLACE=YES); SET RESULTS; IF _N_=1 THEN SET STEP 0; IF DISTM > MAXDIST THEN DELETE; RUN;
********************** * TREND LINES FOR THE SEMIVARIOGRAM PLOTS * **********************; /* SPHERICAL */ TITLE 'SPHERICAL SEMIVARIOGRAM MODEL'; PROC NLIN DATA=RESULTS NOITPRINT MAXITER=500 METHOD=MARQUARDT; PARMS C 0=0. 01 C 1=1 R=10; BOUNDS C 0>0, C 1>0, O
Arc. GIS 9. 1: semivariograms & MC Semivariogram – Open Arc. Map – Add (+) *. shp file – TOOLS -> EXTENSIONS -> Geostatistical analyst – VIEW -> TOOLBARS -> Geostatistical analyst – Geostatistical wizard
• Select variable (e. g. , STD_E) select “kriging” option select “prediction map” option select “model” • Inspect “semivariogram” inspect “anisotropy”
Arc. GIS 9. 1: semivariograms & MC MC – TOOLS -> SPATIAL STATISTICS TOOLS -> ANALYZING PATTERN -> Spatial Autocorrelation • Select “Display Output Graphically” • Select “inverse distance squared”
What you learned in today’s lab: 1. Change path names, etc. (Slide #4—print for a ready reference) 2. Compute eigenfunctions 3. Compute MC and GR 4. Construct Moran scatterplots and semivariogram plots 5. Fit trend lines to Moran scatterplots and semi-variogram plots 6. Implement kriging in Arc. GIS