@InProceedings{vaish04using,
  author =       {Vaibhav Vaish and Bennett Wilburn and Neel Joshi and
  Marc Levoy},
  title  =       {Using Plane + Parallax for Calibrating Dense Camera Arrays},
  booktitle =    {Proc. CVPR},
  year =         {2004}
}

@Book{krotkov,
  author = 	 {Eric Paul Krotkov},
  editor = 	 {},
  title = 	 {Active Computer Vision by Cooperative Focus and Stereo},
  publisher = 	 {Springer-Verlag},
  year = 	 {1989},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  OPTannote = 	 {}
}

@InProceedings{wexler02bayesian,
  author =       {Yoni Wexler, Andrew Fitzgibbon and Andrew Zisserman},
  title  =       {Bayesian Estimation of Layers From Multiple Images},
  booktitle =    {Proc. ECCV},
  year =         {2002},
  OPTannote =    {By registering the images to
the foreground, they effectively produce a series of images where the
foreground is constant and the background changes. When the background is
uniform, the problem becomes poorly constrained. To solve this, they
formulate the problem as a maximum a posteriori estimation and 
introduce three new priors to regularize the it: clamping foreground and
alpha values, spatial continuity of alpha values while respecting
gradients, and a learned probability distribution for alpha.}
}

@InProceedings{apostoloff04bayesian,
  author =       {Nicholas Apostoloff and Andrew Fitzgibbon},
  title  =       {Bayesian Video Matting Using Learnt Image Priors},
  booktitle =    {Proc. CVPR},
  year =         {2004},
  OPTannote =    {They extend wexler's 02 paper to exploint spatial AND
  temporal smoothness.}
}

@InProceedings{tomasi98bilateral,
  author = 	 {C. Tomasi and R. Manduchi},
  title = 	 {Bilateral Filtering for Gray and Color Images},
  booktitle = 	 {Proc. ICCV},
  year = 	 {1998},
  OPTannote =    {Performs domain (pixel space) and range (color space)
  filtering together -- hence called Bilateral filtering. Preserves edges
  but smooths out noise in same material regions of the image. For pixels
  near an edge, only those pixels (in the domain) which are similar in
  color to the pixel will be used for the filtering -- hene preserving the
  edge. This technique is similar to performing filtering in a combined
  domain+range space. The range (color) space can be in CIE-Lab color
  space, to provide for a Euclidean distance for similar colors.}
}

@InProceedings{swaminathan03perspective,
  author = 	 {Rahul Swaminathan, Michael D. Grossberg and Shree
  K. Nayar},
  title = 	 {A Perspective on Distortions},
  booktitle = 	 {Proc. CVPR},
  year = 	 {2003},
  OPTannote = { Provides 3 things: 1) a metric to quantify distortions in
  a view 2) a method to compute minimall distorted views from an MVI
  (Multi-Viewpoint image) and 3) an approximation to 2, which morphs an
  entire MVI to a quasi-single viewpoint perspective. Quantifying the
  caustic distortions from a view (of an MVI) is done by finding the
  parameters of the closest perspective camera which minimizes the
  disparity in corresponding points in the view and the image of the
  perspective camera. Estimating the view from a perspective camera
  assumes knowledge of the scene, so the MVI is backprojected to the
  scene, and a perspective camera is rendered from the backprojected
  scene. They also talk about modelling the perspective camera as a 3x4
  matrix, so only solve for 11 parameters (linearly). In part 2, they seek
  to find the mininally distorted views from an MVI. In other words, they
  seek an "optimal" viewmap, mapping from MVI to view. To do this, they
  need some estimate of the scene. They model the scene as a primitive
  (like plane, or sphere) with a parameter vector s. s is distributed over
  some range according to a probability density function. They optimize
  over s to find the scene which (when projecting the MVI upon) produces
  the least distorted image. They then show synthetic images with a sphere
  ball inside a cube, and approximate the scene with a sphere. But this
  optimization is a slow procedure, so they seek to warp the MVI to a
  quasi-perspective (think warping a sphere MVI so the lines are
  straight). This is done with an angle-based distortion metric based on
  the actual scene point and the estimated scene point (didn't understand
  this section fully). Then, at every pixel of the MVI, we estimate a
  scene depth d which minimizes the angular distortion.}
}


@InProceedings{ramamoorthi02analytic,
  author = 	 {Ravi Ramamoorthi},
  title = 	 {Analytic PCA Construction for Theoretical Analysis of
  Lighting Variability in Images of a Lambertian Object},
  booktitle = 	 {Proc. PAMI},
  year = 	 {2002},
  OPTannote = {shows relationship between PCA and the spherical harmonic
  representation of incident illumination and reflection. In PCA, first 5
  principal components explain most of the image variation. Computes the
  covariance (correlation?) matrix of a vector of images under different
  illumionation and shows that the a related matrix (orthoginality matrix
  for spherical harmonics) has eigenvectors which correspond to the
  principal components of the image (from PCA). For pixels distributed
  over the hemisphere (ie: from a single viewpoint and not from all
  directions from a surface), he shows that the first 5 principal
  components suffice (cover 95 percent of data). By shifting by the mean
  (as in PCA), this removes the "DC" component of the spherical harmonics.}
}


@InProceedings{baker98theory,
  author = 	 {Simon Baker and Shree K. Nayar},
  title = 	 {A Theory of Catadioptric Image Formation},
  booktitle = 	 {Proc. ICCV},
  year = 	 {1998},

  OPTannote = {catadioptric sensor == combination lens and mirrors to
  capture a wider FOV. argue that cata. sensor should form a single
  effective viewpoint. paper does 3 things: 1) dervies complete class of
  single-lens single-mirror and single effective viewpoint (which are
  conic sections) 2) computes spatial resolution of cata. sensor in terms
  of camera resolution (defined as diff Area/diff solid angle) 3) analysis
  of defocus blur due to use of a curved mirror}  
}

@InProceedings{boykov99fast,
  author = 	 {Yuri Boykov and Olga Veksler and Ramin Zabih},
  title = 	 {Fast Approximate Energy Minimization via Graph Cuts},
  booktitle = 	 {Proc. ICCV},
  year = 	 {1999},
  OPTannote = {A minimization technique for energy functions of the form
  Edata + Esmooth. The energy function takes in a mapping. A mapping is a
  function from pixels to labels (ie: pixels to disparities, pixels to
  colors, etc.). Edata represents difference between observed label and
  the assigned label. Esmooth ensures some continuity/smoothness in the
  labeling. Note: assumes that the E function is smooth, which may not be
  the case a object boundaries, shadows, depth discontinuities,
  etc. Energy functions can be metric (preseves triangle inequality) or
  semi-metric.  The two types of minimizing moves are alpha-beta swaps
  (swaps pixels of either label alpha or beta), or alpha-expansions
  (expands a label to more pixels). These methods produce local minimums
  even when large moves are allowed. Basic minimization algorithm
  (alpha-beta swap): pick 2 labels, find the optimal number of pixels to
  swap labels, compute the new energy function, if the new energy function
  is lower than previous, set the new one, and continue with new pair of
  labels. If the energy is higher, than we're done (at local min). Even if
  local min is found, they state (didn't prove in paper) that the local
  min is within some bound of the global minimum. To find the optimal
  number of pixels to swap (and to which label), they use graph cuts. They
  map a particular labeling (and alpha-beta) to a graph, and show that a
  graph cut corresponds to a new labeling where the new label is 1
  alpha-beta swap from the old labeling. By assigning certain weights to
  each edge, they show that the weight of the graph cut is exactly the
  energy of that labeling. Then, to find the labeling with smallest
  energy, one needs to find the min graph cut. The same procedure is
  presented for alpha-expansions, but now an auxillary node is also
  presented -- to disambiguiate cases where 2 pixels have different labels
  during an alpha expansion, and when they have the same labels during an
  alpha expansion. Their results don't really show how good/bad their
  algorithm is -- they compare their work with simulated annealing and
  normalized correlation, but who's to say that one is better than the
  other? Vaibhav stated a good point: vision people don't do segmentation
  for the sake of segmentation -- they use it for something. This paper
  compares its algorithm with other segmentations, but don't show whether
  it's useful for the actual application. Another question is: what types
  of energy fuctions is this good for? There's another paper on this --
  need to get reference from Vaibhav.}  
}

@TechReport{szeliski1993Recovering3D,
  author = 	 {Richard Szeliski and Sing Bing Kang},
  title = 	 {Recovering 3D Shape and Motion from Image Streams using
  Non-Linear Least Squares},
  institution =  {Digital Equipment Corporation, Cambridge Research Lab},
  year = 	 {1993},
  OPTannote = 	 {Non-linear least squares to solve for 3d structure,
  motion and camera parameters simultaneously, smart parameterization of
  unknowns and formulation of Hessian and gradient vectors.}
}

@InProceedings{faugeras98,
  author = 	 {Olivier Faugeras and Renaud Keriven},
  title = 	 {Complete Dense Stereovision using Level Set Methods},
  booktitle = 	 {Proc. ECCV},
  year = 	 {1998},
  OPTannote = 	 {scene is represented by a level set, and level sets are
  evolved to find the final scene function. A PDE describes how level sets
  go from one to another, the metric is a correlation that is correct to
  an affine transformation.}
}


@TechReport{kiriakos98,
  author = 	 {Kiriakos N. Kutulakos and Steven M. Seitz},
  title = 	 {A Theory of Shape by Space Carving},
  institution =  {U. Rochester C.S. Dept},
  year = 	 {1998},
  number = 	 {TR #692},
  month = 	 {May},
  OPTannote = {scene is represented by voxels, initially a rough volume is
  provided, then voxels which are not "photo-consistent" are
  removed. "Photo-consistent" voxels are voxels that are consistent with a
  given lighting model (ie: lambertian, etc.)} 
}

@Book{trucco98,
  author = 	 {Emanuele Trucco and Alessandro Verri},
  ALTeditor = 	 {},
  title = 	 {Introductory Techniques for 3-D Computer Vision},
  publisher = 	 {Prentice-Hall},
  year = 	 {1998},
}

@Book{hartley2000,
  author = 	 {Richard Hartley and Andrew Zisserman},
  ALTeditor = 	 {},
  title = 	 {Multiple View Geometry in computer vision},
  publisher = 	 {Press Syndicate of the University of Cambridge},
  year = 	 {2000},
}