@article{JSFS_2006__147_1_5_0, author = {Lugosi, G\'abor}, title = {Pr\'ediction randomis\'ee de suites individuelles}, journal = {Journal de la soci\'et\'e fran\c caise de statistique}, pages = {5--37}, publisher = {Soci\'et\'e fran\c caise de statistique}, volume = {147}, number = {1}, year = {2006}, language = {fr}, url = {http://www.numdam.org/item/JSFS_2006__147_1_5_0/} }
Lugosi, Gábor. Prédiction randomisée de suites individuelles. Journal de la société française de statistique, Tome 147 (2006) no. 1, pp. 5-37. http://www.numdam.org/item/JSFS_2006__147_1_5_0/
[1] Using confidence bounds for exploitation-exploration trade-offs. Journal of Machine Learning Research, 3 :397-422. | MR 1984023 | Zbl 1084.68543
(2002).[2] The nonstochastic multiarmed bandit problem. SIAM Journal on Computing, 32 :48-77. | MR 1954855 | Zbl 1029.68087
, , et (2002).[3] Structural results for online learning models with and without queries. Machine Learning, 36 :147-181. | Zbl 0947.68080
et (1999).[4] Adaptive routing with end-to-end feedback : distributed learning and geometric approaches. In Proceedings of the 36th ACM Symposium on the Theory of Computing, pages 45-53. ACM Press. | MR 2121584 | Zbl 1192.68020
et (2004).[5] Weighted sums of certain dependent random variables. Tohoku Mathematical Journal, 68 :357-367. | MR 221571 | Zbl 0178.21103
(1967).[6] On pseudo-games. Annals of Mathematical Statistics, 39 :1932-1945. | MR 234560 | Zbl 0222.90056
(1968).[7] Bandit Problems. Chapman and Hall. | MR 813698
et (1985).[8] An analog of the minimax theorem for vector payoffs. Pacific Journal of Mathematics, 6 :1-8. | MR 81804 | Zbl 0074.34403
(1956).[9] Controlled random walks. In Proceedings of the International Congress of Mathematicians, 1954, volume III, pages 336-338. North-Holland. | MR 85141 | Zbl 0073.13204
(1956).[10] Tracking a small set of experts by mixing past posteriors. Journal of Machine Learning Research, 3 :363-396. | MR 1984022 | Zbl 1084.68552
et (2002).[11] Analysis of two gradient-based algorithms for on-line regression. Journal of Computer and System Sciences, 59(3) :392-411. | MR 1726769 | Zbl 0961.68148
(1999).[12] How to use expert advice. Journal of the ACM, 44(3) :427-485. | MR 1470152 | Zbl 0890.68066
, , , , et (1997).[13] On prediction of individual sequences. Annals of Statistics, 27 :1865-1895. | MR 1765620 | Zbl 0961.62081
et (1999).[14] Potential-based algorithms in on-line prediction and game theory. Machine Learning, 51 :239-261. | Zbl 1026.68152
et (2003).[15] Prediction, Learning, and Games. Cambridge University Press, New York. | MR 2409394 | Zbl 1114.91001
et (2006).[16] Minimizing regret with label efficient prediction. IEEE Transactions on Information Theory, 51 :2152-2162. | MR 2235288 | Zbl 1078.68693
, et (2004).[17] Regret minimization under partial monitoring. Prépublication DMA-04-18, Ecole normale supérieure, Paris.
, et (2004).[18] Behavior of sequential predictors of binary sequences. In Proceedings of the 4th Prague Conference on Information Theory, Statistical Decision Functions, Random Processes, pages 263-272. Maison d'édition de l'Académie des sciences de Tchécoslovaquie, Prague. | MR 217944
(1965).[19] Universal prediction of individual sequences. IEEE Transactions on Information Theory, 38 :1258-127. | MR 1168747 | Zbl 0775.94076
, et (1992).[20] Prediction in the worst-case. Annals of Statistics, 19 :1084-1090. | MR 1105864 | Zbl 0725.62085
(1991).[21] Asymptotic calibration. Biometrika, 85 :379-390. | MR 1649119 | Zbl 0947.62059
et (1998).[22] Regret in the on-line decision problem. Games and Economic Behavior, 29 :7-36. | MR 1729308 | Zbl 0984.91025
et (1999).[23] On tail probabilities for martingales. Annals of Probability, 3 :100-118. | MR 380971 | Zbl 0313.60037
(1975).[24] The Theory of Learning in Games. MIT Press. | MR 1629477 | Zbl 0939.91004
et (1998).[25] Multi-Armed Bandit Allocation Indices. Wiley-lnterscience series in Systems and Optimization. Wiley. | MR 996417 | Zbl 0699.90068
(1989).[26] Efficient algorithms and minimax bounds for zero-delay lossy source coding. IEEE Transactions on Signal Processing, 52 :2337-2347.
, et (2004).[27] A "follow the perturbed leader"-type algorithm for zero-delay quantization of individual sequences. In Data Compression Conference, pages 342-351.
, et (2004).[28] Tracking the best of many experts. In Proceedings of the 18th Annual Conference on Learning Theory, pages 204-216. | Zbl 1137.68540
, et (2005).[29] The on-line shortest path bandit problem. Manuscrit.
, , et (2005).[30] Approximation to Bayes risk in repeated play. Contributions to the theory of games, 3 :97-139. | Zbl 0078.32804
(1957).[31] A simple adaptive procedure leading to correlated equilibrium. Econometrica, 68 :1127-1150. | MR 1779146 | Zbl 1020.91003
et (2000).[32] A general class of adaptive strategies. Journal of Economic Theory, 98 :26-54. | MR 1832647 | Zbl 0994.91007
et (2001)[33] A reinforcement procedure leading to correlated equilibrium. In Economic Essays : A Festschrift for Werner Hildenbrand, pages 181-200. Springer. | MR 1940643 | Zbl 1023.91004
et (2002).[34] Sequential prediction of individual sequences under general loss functions. IEEE Transactions on Information Theory, 44 :1906-1925, 1998. | MR 1664051 | Zbl 1026.68579
, et (1998).[35] Apple tasting. Information and Computation, 161(2) :85-139. | MR 1781564 | Zbl 1045.68573
, et (2000).[36] Some label efficient learning results. In Proceedings of the 10th Annual Conference on Computational Learning Theory, pages 218-230. ACM Press.
et (1997).[37] Predicting nearly as well as the best pruning of a decision tree. Machine Learning, 27(1) :51-68.
et (1997).[38] Tracking the best expert. Machine Learning, 32(2) :151-178. | Zbl 0912.68165
et (1998).[39] Probability inequalities for sums of bounded random variables. Journal ofthe American Statistical Association, 58 :13-30. | MR 144363 | Zbl 0127.10602
(1963).[40] Prediction with expert advice by following the perturbed and penalized leader. Prépublication IDSIA-20-04, Istituto Dalle Molle di Studi sull'Tntelligenza Artificiale, Suisse.
et (2004).[41] Efficient algorithms for the online decision problem. In Proceedings of the 16th Annual Conference on Learning Theory, pages 26-40. Springer.
et (2003).[42] Asymptotically efficient adaptive allocation rules. Advances in Applied Mathematics, 6 :4-22. | MR 776826 | Zbl 0568.62074
et (1985).[43] On the complexity of an individual sequence. IEEE Transactions on Information Theory, 22 :75-81. | MR 389403 | Zbl 0337.94013
et (1976).[44] The weighted majority algorithm. Information and Computation, 108 :212-261. | MR 1265851 | Zbl 0804.68121
et (1994).[45] On-line learning with imperfect monitoring. In Proceedings of the 16th Annual Conference on Learning Theory, pages 552-567. Springer. | Zbl 05686321
et (2003).[46] Online geometric optimization in the bandit setting against an adaptive adversary. In Proceedings of the 17th Annual Conference on Learning Theory, pages 109-123. Springer, 2004. | MR 2177904 | Zbl 1078.68128
et (2004).[47] On repeated games with incomplete information played by non-Bayesian players. International Journal of Game Theory, 9 :157-167. | MR 595967 | Zbl 0441.90118
(1980).[48] Universal schemes for sequential decision from individual data sequences. IEEE Transactions on Information Theory, 39.1280-1292. | MR 1267158 | Zbl 0801.94012
et (1993).[49] Universal prediction. IEEE Transactions on Information Theory, 44 :2124-2147. | MR 1658815 | Zbl 0933.94008
et (1998).[50] Repeated games. core Discussion paper, numéros 9420, 9421, 9422, Louvain-la-Neuve, Belgique.
, et (1994).[51] Discrete Parameter Martingales. North-Holland. | MR 402915
(1975).[52] Discrete prediction games with arbitrary feedback and loss. In Proceedings of the 14th Annual Conference on Computational Learning Theory, pages 208-223. | MR 2042037 | Zbl 0992.68506
et (2001).[53] Some aspects of the sequential design of experiments. Bullettin of the American Mathematical Society, 55 :527-535. | MR 50246 | Zbl 0049.37009
(1952).[54] Minimizing regret : The general case. Games and Economic Behavior, 29 :224-243. | MR 1729318 | Zbl 0954.91011
(1999).[55] Information incomplète et regret interne en prédiction de suites individuelles. Thèse de doctorat, Université Paris-Sud, Orsay.
(2005).[56] Path kernels and multiplicative updates. Journal of Machine Learning Research, 4 :773-818. | MR 2075997 | Zbl 1083.68592
et (2004).[57] Aggregating strategies. In Proceedings of the 3rd Annual Workshop on Computational Learning Theory, pages 372-383.
(1990).[58] A game of prediction with expert advice. Journal of Computer and System Sciences, 56 :153-173. | MR 1629690 | Zbl 0945.68528
(1998).[59] Derandomizing stochastic prediction strategies. Machine Learning, 35 :247-282. | Zbl 0941.68128
(1999).[60] Competitive on-line statistics. International Statistical Review, 69 :213-248. | Zbl 1211.62200
(2001).[61] Universal prediction of binary individual sequences in the presence of noise. IEEE Transactions on Information Theory, 47 :2151-2173. | MR 1873194 | Zbl 1016.94017
et (2001).[62] Twofold universal prediction schemes for achieving the finite state predictability of a noisy individual binary sequence. IEEE Transactions on Information Theory, 47 :1849-1866. | MR 1842523 | Zbl 1051.62523
, et (2001).[63] Coding theorems for individual sequences. IEEE Transactions on Information Theory, 24 :405-412. | MR 504371 | Zbl 0416.94006
(1978).[64] Distortion-rate theory for individual sequences. IEEE Transactions on Information Theory, 26 :137-143. | MR 570318 | Zbl 0431.94030
(1980).[65] A universal algorithm for sequential data-compression. IEEE Transactions on Information Theory, 23 :337-343. | MR 530215 | Zbl 0379.94010
et (1977).