indexing - 在 RavenDB 中创建更多类似的内容

标签 indexing mapreduce ravendb morelikethis

我的域中有这些文档:

public class Article {
    public string Id { get; set; }
    // some other properties
    public IList<string> KeywordIds { get; set; }
}

public class Keyword {
    public string Id { get; set; }
    public string UrlName { get; set; }
    public string Title { get; set; }
    public string Tooltip { get; set; }
    public string Description { get; set; }
}

我有这种情况:

  • 文章 A1 有关键字 K1
  • 文章 A2 有关键字 K1
  • 一位用户阅读文章 A1
  • 我想建议用户阅读文章A2

我知道我可以使用 More Like This 包并且我阅读了文档,但我不知道该怎么做?你能帮帮我吗?

最佳答案

看看这个例子,你可以用“流派”代替你的“关键字”:

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Linq.Expressions;
using Lucene.Net.Analysis;
using Raven.Abstractions.Data;
using Raven.Abstractions.Indexing;
using Raven.Client;
using Raven.Client.Bundles.MoreLikeThis;
using Raven.Client.Indexes;
using Raven.Tests.Helpers;
using Xunit;

namespace RavenDBEval
{
    public class MoreLikeThisEvaluation : RavenTestBase
    {
        private readonly IDocumentStore _store;

        public MoreLikeThisEvaluation()
        {
            _store = (IDocumentStore)NewDocumentStore();
            _store.Initialize();
        }

    [Fact]
    public void ShouldMatchTwoMoviesWithSameCast()
    {
        string id;
        using (var session = _store.OpenSession())
        {
            new MoviesByCastIndex().Execute(_store);
            new MoviesByGenreIndex().Execute(_store);
            GetGenreList().ForEach(session.Store);
            var list = GetMovieList();
            list.ForEach(session.Store);
            session.SaveChanges();
            id = session.Advanced.GetDocumentId(list.First());
            WaitForIndexing(_store);
        }

        using (var session = _store.OpenSession())
        {
            var moreLikeThisByCast = session
                .Advanced
                .MoreLikeThis<Movie, MoviesByCastIndex>(new MoreLikeThisQuery
                                                     {
                                                         DocumentId = id, 
                                                         Fields = new[] { "Cast" },
                                                         MinimumTermFrequency = 1,
                                                         MinimumDocumentFrequency = 2
                                                     });
            var moreLikeThisByGenre = session
                .Advanced
                .MoreLikeThis<Movie, MoviesByGenreIndex>(new MoreLikeThisQuery
                {
                    DocumentId = id,
                    Fields = new[] { "Genres" },
                    MinimumTermFrequency = 1,
                    MinimumDocumentFrequency = 2
                });

            foreach (var movie in moreLikeThisByCast)
            {
                Debug.Print("{0}, Cast={1}", movie.Title, string.Join(",", movie.Cast));
            }

            Assert.NotEmpty(moreLikeThisByCast);

            foreach (var movie in moreLikeThisByGenre)
            {
                Debug.Print("{0}", movie.Title);
                foreach (var genreId in movie.Genres)
                {
                    var genre = session.Load<Genre>(genreId);
                    Debug.Print("\t\t{0}", genre.Name);
                }
            }
            Assert.NotEmpty(moreLikeThisByGenre);

        }
    }

    private static List<Genre> GetGenreList()
    {
        return new List<Genre>
                   {
                       new Genre {Id = "genres/1", Name = "Comedy"},
                       new Genre {Id = "genres/2", Name = "Drama"},
                       new Genre {Id = "genres/3", Name = "Action"},
                       new Genre {Id = "genres/4", Name = "Sci Fi"},
                   };
    } 

    private static List<Movie> GetMovieList()
    {
        return new List<Movie>
                   {
                       new Movie
                           {
                               Title = "Star Wars Episode IV: A New Hope",
                               Genres = new[] {"genres/3", "genres/4"},
                               Cast = new[]
                                          {
                                              "Mark Hamill",
                                              "Harrison Ford",
                                              "Carrie Fisher"
                                          }
                           },
                       new Movie
                           {
                               Title = "Star Wars Episode V: The Empire Strikes Back",
                               Genres = new[] {"genres/3", "genres/4"},
                               Cast = new[]
                                          {
                                              "Mark Hamill",
                                              "Harrison Ford",
                                              "Carrie Fisher"
                                          }
                           },
                       new Movie
                           {
                               Title = "Some Fake Movie",
                               Genres = new[] {"genres/2"},
                               Cast = new[]
                                          {
                                              "James Franco",
                                              "Sting",
                                              "Carrie Fisher"
                                          }
                           },
                       new Movie
                           {
                               Title = "The Conversation",
                               Genres = new[] {"genres/2"},
                               Cast =
                                   new[]
                                       {
                                           "Gene Hackman",
                                           "John Cazale",
                                           "Allen Garfield",
                                           "Harrison Ford"
                                       }
                           },
                       new Movie
                           {
                               Title = "Animal House",
                               Genres = new[] {"genres/1"},
                               Cast = new[]
                                          {
                                              "John Belushi",
                                              "Karen Allen",
                                              "Tom Hulce"
                                          }
                           },
                       new Movie
                           {
                               Title="Superman",
                               Genres = new[] {"genres/3", "genres/4"},
                               Cast= new[]
                                         {
                                             "Christopher Reeve", 
                                             "Margot Kidder", 
                                             "Gene Hackman",
                                             "Glen Ford"
                                         }
                           }
                   };
    }
}

public class Movie
{
    public string Id { get; set; }
    public string Title { get; set; }
    public string[] Cast { get; set; }
    public string[] Genres { get; set; }
}

public class Genre
{
    public string Id { get; set; }
    public string Name { get; set; }
}

public class MoviesByGenreIndex : AbstractIndexCreationTask<Movie>
{
    public MoviesByGenreIndex()
    {
        Map = docs => from doc in docs
                      select new { doc.Genres };

        Analyzers = new Dictionary<Expression<Func<Movie, object>>, string>
                        {
                            {
                                x => x.Genres,
                                typeof (KeywordAnalyzer).FullName
                                }
                        };

        Stores = new Dictionary<Expression<Func<Movie, object>>, FieldStorage>
                     {
                         {
                             x => x.Genres, FieldStorage.Yes
                         }
                     };
    }
}

public class MoviesByCastIndex : AbstractIndexCreationTask<Movie>
{
    public MoviesByCastIndex()
    {
        Map = docs => from doc in docs
                      select new { doc.Cast };

        Analyzers = new Dictionary<Expression<Func<Movie, object>>, string>
                        {
                            {
                                x => x.Cast,
                                typeof (KeywordAnalyzer).FullName
                                }
                        };

        Stores = new Dictionary<Expression<Func<Movie, object>>, FieldStorage>
                     {
                         {
                             x => x.Cast, FieldStorage.Yes
                         }
                     };
    }
}

输出:

-星球大战第五集:帝国反击战, Actor =马克哈米尔,哈里森福特,嘉莉费舍尔 -Some Fake Movie, Cast=James Franco,Sting,Carrie Fisher -The Conversation, Cast=Gene Hackman,John Cazale,Allen Garfield,Harrison Ford 按类型: -星球大战前传 V:帝国反击战 行动 科幻 -超人 行动 科幻

注意 nuget 包:

<?xml version="1.0" encoding="utf-8"?>
<packages>
  <package id="Lucene.Net" version="3.0.3" targetFramework="net40" />
  <package id="Lucene.Net.Contrib" version="3.0.3" targetFramework="net40" />
  <package id="RavenDB.Client" version="2.0.2261" targetFramework="net40" />
  <package id="RavenDB.Database" version="2.0.2261" targetFramework="net40" />
  <package id="RavenDB.Embedded" version="2.0.2261" targetFramework="net40" />
  <package id="RavenDB.Tests.Helpers" version="2.0.2261" targetFramework="net40" />
  <package id="SharpZipLib" version="0.86.0" targetFramework="net40" />
  <package id="xunit" version="1.9.1" targetFramework="net40" />
</packages>

关于indexing - 在 RavenDB 中创建更多类似的内容,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/12428215/

相关文章:

unit-testing - 单元测试 RavenDB

ravendb - 面向对象数据库和文档数据库有什么区别?

c# - RavenDb 检查索引是否存在

MySQL 索引 : inverted or forward

java - Lucene 中多值字段的性能问题

javascript - 查找字符串中指定字符的所有索引

javascript - ES6 Map Reduce 数组通过 Redux 状态的映射和填充变平

mapreduce - Cloudera CDH4 如何与 Avro 配合使用?

eclipse - 当hadoop不在同一主机中时,从Eclipse执行MapReduce时出错

sql - 在图中索引和查询路径的最有效方法