Import English Wikipedia dump into SQL Server

I’ve downloaded the latest English Wikipedia dump (enwiki-latest-pages-articles-multistream.xml) from here, and I’m trying to import it to SQL Server 2018.

I can’t see the XML file because it weighs over 75 GB, and thus I don’t know what kind of tables I should create before I’m going to work with Bulk XML.

How can I do this? I can write some script on Python or C#. Thanks in advance!

Add Comment
2 Answer(s)

Use following

SQL Query to create database

Create Database Feed ; GO  USE [Feed] drop table Doc drop table Links ; GO  CREATE TABLE [dbo].[Doc](    DocID int primary key,    Title [varchar](50) NULL,    URL [varchar](50) NULL,    Abstract [varchar](50) NULL )  CREATE TABLE Links(    DocID int,     LinkType [varchar](10) NULL,    Anchor [varchar](50) NULL,     Link [varchar](50) NULL    CONSTRAINT FK_DocID FOREIGN KEY (DocID)         REFERENCES dbo.Doc (DocID) ) 

c# code to fill database from xml

using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Xml; using System.Xml.Linq; using System.Data; using System.Data.SqlClient;  namespace ConsoleApplication1 {     class Program     {         const string FILENAME = @"c:\temp\test.xml";         const string CONNECTION_STRING = @"Server=.\SQLEXPRESS;Database=Feed;Trusted_Connection=True;";         const string INSERT_DOC =             "INSERT INTO [Feed].[dbo].[Doc] (" +             "DocID, Title , URL, Abstract)" +             " VALUES " +             "(@DocID, @Title, @URL, @Abstract)";         const string INSERT_LINK =             "INSERT INTO [Feed].[dbo].[Links] (" +             "DocID, LinkType , Anchor, Link)" +             " VALUES " +             "(@DocID, @Linktype, @Anchor, @Link)";          static void Main(string[] args)         {             SqlConnection conn = new SqlConnection(CONNECTION_STRING);             conn.Open();              SqlCommand docCmd = new SqlCommand(INSERT_DOC, conn);              docCmd.Parameters.Add("@DocID", SqlDbType.Int);             docCmd.Parameters.Add("@Title", SqlDbType.VarChar, 50);             docCmd.Parameters.Add("@URL", SqlDbType.VarChar, 50);             docCmd.Parameters.Add("@Abstract", SqlDbType.VarChar, 50);              SqlCommand linksCmd = new SqlCommand(INSERT_LINK, conn);              linksCmd.Parameters.Add("@DocID", SqlDbType.Int);             linksCmd.Parameters.Add("@LinkType", SqlDbType.VarChar, 10);             linksCmd.Parameters.Add("@Anchor", SqlDbType.VarChar, 50);             linksCmd.Parameters.Add("@Link", SqlDbType.VarChar, 50);              XmlReader reader = XmlReader.Create(FILENAME);             int id = 0;             while (!reader.EOF)             {                 if (reader.Name != "doc")                 {                     reader.ReadToFollowing("doc");                 }                 if (!reader.EOF)                 {                     XElement doc = (XElement)XElement.ReadFrom(reader);                     id++;                      docCmd.Parameters["@DocID"].Value = id;                     docCmd.Parameters["@Title"].Value = (string)doc.Element("title");                     docCmd.Parameters["@URL"].Value = (string)doc.Element("url");                     docCmd.Parameters["@Abstract"].Value = (string)doc.Element("abstract");                     int docRowsChanged = docCmd.ExecuteNonQuery();                      foreach (XElement sublink in doc.Descendants("sublink"))                     {                         linksCmd.Parameters["@DocID"].Value = id;                         linksCmd.Parameters["@LinkType"].Value = (string)sublink.Attribute("linktype");                         linksCmd.Parameters["@Anchor"].Value = (string)sublink.Element("anchor");                         linksCmd.Parameters["@Link"].Value = (string)sublink.Element("link");                         int linksRowsChanged = linksCmd.ExecuteNonQuery();                     }                  }             }         }     } } 
Answered on July 17, 2020.
Add Comment

The best way to handle this would be to create a MediaWiki instance and import the XML dump into that instance, which would then make MediaWiki to create a SQL database you can then use for other purposes.

You can do this by downloading MediaWiki from If you really need to use Microsoft SQL server, I recommend to download MediaWiki 1.31.8 (latest LTS), because MSSQL server support was dropped in 1.34 (see more at

Once your MediaWiki server is up and running, you need to run the import script. The best way is to copy the XML file to the server where the wiki is running, and run php maintenance/importDump.php /path/to/dumpfile from the MediaWiki root directory (if your MediaWiki is running in /var/www/html/mw, then cd to that directory and then run the script), see

Hope this helps!

Answered on July 17, 2020.
Add Comment

Your Answer

By posting your answer, you agree to the privacy policy and terms of service.