@InProceedings{Mahinthakumar_SC99_19991113, author = {Gnanamanika Mahinthakumar and Forrest M. Hoffman and William W. Hargrove and Nicolas T. Karonis}, title = {Multivariate Geographic Clustering in a Metacomputing Environment Using {G}lobus}, booktitle = {Supercomputing '99: Proceedings of the 1999 {ACM/IEEE} conference on Supercomputing ({CDROM})}, series = {Supercomputing '99}, isbn = {1-58113-091-0}, doi = {10.1145/331532.331537}, dates = {13--19 November 1999}, location = {Portland, Oregon, United States}, publisher = {ACM Press}, address = {New York, NY, USA}, day = 13, month = nov, year = 1999, abstract = {The authors present a metacomputing application of multivariate, nonhierarchical statistical clustering to geographic environmental data from the 48 conterminous United States in order to produce maps of regions of ecological similarity, called \textit{ecoregions}. These maps represent finer scale regionalizations than do those generated by the traditional technique: an expert with a marker pen. Several variables (e.g., temperature, organic matter, rainfall etc.) thought to affect the growth of vegetation are clustered at resolutions as fine as one square kilometer (1~km$^2$). These data can represent over 7.8 million map cells in an $n$-dimensional ($n = 9$ to $25$) data space. A parallel version of the iterative statistical clustering algorithm is developed by the authors using the MPI (Message Passing Interface) message passing routines. The parallel algorithm uses a classical, self-scheduling, single-program, multiple data (SPMD) organization; performs dynamic load balancing for reasonable performance in heterogeneous metacomputing environments; and provides fault tolerance by saving intermediate results for easy restarts in case of hardware failure. The parallel algorithm was tested on various geographically distributed heterogeneous metacomputing configurations involving an IBM SP3, an IBM SP2, and two SGI Origin 2000’s. The tests were performed with minimal code modification, and were made possible by Globus, (a metacomputing software toolkit) and the Globus-enabled version of MPI (MPICH-G). Our performance tests indicate that while the algorithm works reasonably well under the metacomputing environment for a moderate number of processors, the communication overhead can become prohibitive for large processor configurations.} }