Bib Christian-1991
From ResiliNetsWiki
@article{cristian1991, title={Understanding fault-tolerant distributed systems}, author = {Cristian, Flaviu}, journal={Communications of the ACM}, abstract = {We propose a small number of basic concepts that can be used to explain the architecture of fault-tolerant distributed systems and we discuss a list of architectural issues that we find useful to consider when designing or examining such systems. For each issue we present known solutions and design alternatives, we discuss their relative merits and we give examples of systems which adopt one approach or the other. The aim is to introduce some order in the complex discipline of designing and understanding fault-tolerant distributed systems. 1 1 Introduction Computing systems consist of a multitude of hardware and software components that are bound to fail eventually. In many systems, such component failures can lead to unanticipated, potentially disruptive failure behavior and to service unavailability. Some systems are designed to be fault-tolerant: they either exhibit a well-defined failure behavior when components fail or mask component failures to users, that is, continue t...}, volume={34}, number={2}, pages={56--78}, year={1991}, publisher={ACM Press New York, NY, USA} }
TY - JOUR TI - Understanding Fault-Tolerant Distributed Systems AU - Cristian, Flaviu JO - Communications of the ACM VL - 34 IS - 2 PY - 1991 AB - We propose a small number of basic concepts that can be used to explain the architecture of fault-tolerant distributed systems and we discuss a list of architectural issues that we find useful to consider when designing or examining such systems. For each issue we present known solutions and design alternatives, we discuss their relative merits and we give examples of systems which adopt one approach or the other. The aim is to introduce some order in the complex discipline of designing and understanding fault-tolerant distributed systems. 1 1 Introduction Computing systems consist of a multitude of hardware and software components that are bound to fail eventually. In many systems, such component failures can lead to unanticipated, potentially disruptive failure behavior and to service unavailability. Some systems are designed to be fault-tolerant: they either exhibit a well-defined failure behavior when components fail or mask component failures to users, that is, continue t... ID - 10.1145/102792.102801 WP - http://portal.acm.org/citation.cfm?doid=102792.102801