Privacy Policy Cookie Policy Terms and Conditions Utente:Leonard Vertighel/temp - Wikipedia

Utente:Leonard Vertighel/temp

Da Wikipedia, l'enciclopedia libera.

Unfinished anchor link check script.

Released under GPL.

(Contains code from MediaWiki 1.5)

BUG: Add boilerplate text... ;)


Tables:

create table if not exists sl_links (
        sll_from varchar(255),
        sll_to varchar(255),
        sll_sec varchar(255)
);

create table if not exists sl_targets (
        slt_title varchar(255),
        slt_sec varchar(255),
        index(slt_title, slt_sec)
);

<?php //config.php

$config['mysql_host'] = '127.0.0.1';
$config['mysql_user'] = 'root';
$config['mysql_pass'] = '';
$config['mysql_db'] = 'wikipedia';

?>

<?php //functions.php

// database

function db_connect($new = false) {
        global $config;
        $db = mysql_connect($config['mysql_host'], $config['mysql_user'], $config['mysql_pass'], $new);
        mysql_select_db($config['mysql_db'], $db);
        return $db;
}

// functions from MediaWiki 1.5

function MW_decodeCharReferences( $text ) {
        return preg_replace_callback(
                '/&([A-Za-z0-9]+);|&\#([0-9]+);|&\#x([0-9A-Za-z]+);|&\#X([0-9A-Za-z]+);|(&)/x',
                'MW_decodeCharReferencesCallback',
                $text );
}

function MW_decodeCharReferencesCallback( $matches ) {
        if( $matches[1] != '' ) {
                return MW_decodeEntity( $matches[1] );
        } elseif( $matches[2] != '' ) {
                return  MW_decodeChar( intval( $matches[2] ) );
        } elseif( $matches[3] != ''  ) {
                return  MW_decodeChar( hexdec( $matches[3] ) );
        } elseif( $matches[4] != '' ) {
                return  MW_decodeChar( hexdec( $matches[4] ) );
        }
        # Last case should be an ampersand by itself
        return $matches[0];
}

function MW_decodeEntity( $name ) {
        $wgHtmlEntities = array(
        'Aacute'   => 193,
        'aacute'   => 225,
        'Acirc'    => 194,
        'acirc'    => 226,
        'acute'    => 180,
        'AElig'    => 198,
        'aelig'    => 230,
        'Agrave'   => 192,
        'agrave'   => 224,
        'alefsym'  => 8501,
        'Alpha'    => 913,
        'alpha'    => 945,
        'amp'      => 38,
        'and'      => 8743,
        'ang'      => 8736,
        'Aring'    => 197,
        'aring'    => 229,
        'asymp'    => 8776,
        'Atilde'   => 195,
        'atilde'   => 227,
        'Auml'     => 196,
        'auml'     => 228,
        'bdquo'    => 8222,
        'Beta'     => 914,
        'beta'     => 946,
        'brvbar'   => 166,
        'bull'     => 8226,
        'cap'      => 8745,
        'Ccedil'   => 199,
        'ccedil'   => 231,
        'cedil'    => 184,
        'cent'     => 162,
        'Chi'      => 935,
        'chi'      => 967,
        'circ'     => 710,
        'clubs'    => 9827,
        'cong'     => 8773,
        'copy'     => 169,
        'crarr'    => 8629,
        'cup'      => 8746,
        'curren'   => 164,
        'dagger'   => 8224,
        'Dagger'   => 8225,
        'darr'     => 8595,
        'dArr'     => 8659,
        'deg'      => 176,
        'Delta'    => 916,
        'delta'    => 948,
        'diams'    => 9830,
        'divide'   => 247,
        'Eacute'   => 201,
        'eacute'   => 233,
        'Ecirc'    => 202,
        'ecirc'    => 234,
        'Egrave'   => 200,
        'egrave'   => 232,
        'empty'    => 8709,
        'emsp'     => 8195,
        'ensp'     => 8194,
        'Epsilon'  => 917,
        'epsilon'  => 949,
        'equiv'    => 8801,
        'Eta'      => 919,
        'eta'      => 951,
        'ETH'      => 208,
        'eth'      => 240,
        'Euml'     => 203,
        'euml'     => 235,
        'euro'     => 8364,
        'exist'    => 8707,
        'fnof'     => 402,
        'forall'   => 8704,
        'frac12'   => 189,
        'frac14'   => 188,
        'frac34'   => 190,
        'frasl'    => 8260,
        'Gamma'    => 915,
        'gamma'    => 947,
        'ge'       => 8805,
        'gt'       => 62,
        'harr'     => 8596,
        'hArr'     => 8660,
        'hearts'   => 9829,
        'hellip'   => 8230,
        'Iacute'   => 205,
        'iacute'   => 237,
        'Icirc'    => 206,
        'icirc'    => 238,
        'iexcl'    => 161,
        'Igrave'   => 204,
        'igrave'   => 236,
        'image'    => 8465,
        'infin'    => 8734,
        'int'      => 8747,
        'Iota'     => 921,
        'iota'     => 953,
        'iquest'   => 191,
        'isin'     => 8712,
        'Iuml'     => 207,
        'iuml'     => 239,
        'Kappa'    => 922,
        'kappa'    => 954,
        'Lambda'   => 923,
        'lambda'   => 955,
        'lang'     => 9001,
        'laquo'    => 171,
        'larr'     => 8592,
        'lArr'     => 8656,
        'lceil'    => 8968,
        'ldquo'    => 8220,
        'le'       => 8804,
        'lfloor'   => 8970,
        'lowast'   => 8727,
        'loz'      => 9674,
        'lrm'      => 8206,
        'lsaquo'   => 8249,
        'lsquo'    => 8216,
        'lt'       => 60,
        'macr'     => 175,
        'mdash'    => 8212,
        'micro'    => 181,
        'middot'   => 183,
        'minus'    => 8722,
        'Mu'       => 924,
        'mu'       => 956,
        'nabla'    => 8711,
        'nbsp'     => 160,
        'ndash'    => 8211,
        'ne'       => 8800,
        'ni'       => 8715,
        'not'      => 172,
        'notin'    => 8713,
        'nsub'     => 8836,
        'Ntilde'   => 209,
        'ntilde'   => 241,
        'Nu'       => 925,
        'nu'       => 957,
        'Oacute'   => 211,
        'oacute'   => 243,
        'Ocirc'    => 212,
        'ocirc'    => 244,
        'OElig'    => 338,
        'oelig'    => 339,
        'Ograve'   => 210,
        'ograve'   => 242,
        'oline'    => 8254,
        'Omega'    => 937,
        'omega'    => 969,
        'Omicron'  => 927,
        'omicron'  => 959,
        'oplus'    => 8853,
        'or'       => 8744,
        'ordf'     => 170,
        'ordm'     => 186,
        'Oslash'   => 216,
        'oslash'   => 248,
        'Otilde'   => 213,
        'otilde'   => 245,
        'otimes'   => 8855,
        'Ouml'     => 214,
        'ouml'     => 246,
        'para'     => 182,
        'part'     => 8706,
        'permil'   => 8240,
        'perp'     => 8869,
        'Phi'      => 934,
        'phi'      => 966,
        'Pi'       => 928,
        'pi'       => 960,
        'piv'      => 982,
        'plusmn'   => 177,
        'pound'    => 163,
        'prime'    => 8242,
        'Prime'    => 8243,
        'prod'     => 8719,
        'prop'     => 8733,
        'Psi'      => 936,
        'psi'      => 968,
        'quot'     => 34,
        'radic'    => 8730,
        'rang'     => 9002,
        'raquo'    => 187,
        'rarr'     => 8594,
        'rArr'     => 8658,
        'rceil'    => 8969,
        'rdquo'    => 8221,
        'real'     => 8476,
        'reg'      => 174,
        'rfloor'   => 8971,
        'Rho'      => 929,
        'rho'      => 961,
        'rlm'      => 8207,
        'rsaquo'   => 8250,
        'rsquo'    => 8217,
        'sbquo'    => 8218,
        'Scaron'   => 352,
        'scaron'   => 353,
        'sdot'     => 8901,
        'sect'     => 167,
        'shy'      => 173,
        'Sigma'    => 931,
        'sigma'    => 963,
        'sigmaf'   => 962,
        'sim'      => 8764,
        'spades'   => 9824,
        'sub'      => 8834,
        'sube'     => 8838,
        'sum'      => 8721,
        'sup'      => 8835,
        'sup1'     => 185,
        'sup2'     => 178,
        'sup3'     => 179,
        'supe'     => 8839,
        'szlig'    => 223,
        'Tau'      => 932,
        'tau'      => 964,
        'there4'   => 8756,
        'Theta'    => 920,
        'theta'    => 952,
        'thetasym' => 977,
        'thinsp'   => 8201,
        'THORN'    => 222,
        'thorn'    => 254,
        'tilde'    => 732,
        'times'    => 215,
        'trade'    => 8482,
        'Uacute'   => 218,
        'uacute'   => 250,
        'uarr'     => 8593,
        'uArr'     => 8657,
        'Ucirc'    => 219,
        'ucirc'    => 251,
        'Ugrave'   => 217,
        'ugrave'   => 249,
        'uml'      => 168,
        'upsih'    => 978,
        'Upsilon'  => 933,
        'upsilon'  => 965,
        'Uuml'     => 220,
        'uuml'     => 252,
        'weierp'   => 8472,
        'Xi'       => 926,
        'xi'       => 958,
        'Yacute'   => 221,
        'yacute'   => 253,
        'yen'      => 165,
        'Yuml'     => 376,
        'yuml'     => 255,
        'Zeta'     => 918,
        'zeta'     => 950,
        'zwj'      => 8205,
        'zwnj'     => 8204 );
        if( isset( $wgHtmlEntities[$name] ) ) {
                return MW_codepointToUtf8( $wgHtmlEntities[$name] );
        } else {
                return "&$name;";
        }
}

function MW_decodeChar( $codepoint ) {
        if( MW_validateCodepoint( $codepoint ) ) {
                return MW_codepointToUtf8( $codepoint );
        } else {
                return ''; # ok forget about UTF8_REPLACEMENT; for now
        }
}
        
function MW_validateCodepoint( $codepoint ) {
        return ($codepoint ==    0x09)
                || ($codepoint ==    0x0a)
                || ($codepoint ==    0x0d)
                || ($codepoint >=    0x20 && $codepoint <=   0xd7ff)
                || ($codepoint >=  0xe000 && $codepoint <=   0xfffd)
                || ($codepoint >= 0x10000 && $codepoint <= 0x10ffff);
}
        
function MW_codepointToUtf8( $codepoint ) {
        if($codepoint <              0x80) return chr($codepoint);
        if($codepoint <    0x800) return chr($codepoint >>     6 & 0x3f | 0xc0) .
                                                                         chr($codepoint           & 0x3f | 0x80);
        if($codepoint <  0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) .
                                                                         chr($codepoint >>        6 & 0x3f | 0x80) .
                                                                         chr($codepoint           & 0x3f | 0x80);
        if($codepoint < 0x110000) return chr($codepoint >> 18 & 0x07 | 0xf0) .
                                                                         chr($codepoint >> 12 & 0x3f | 0x80) .
                                                                         chr($codepoint >>        6 & 0x3f | 0x80) .
                                                                         chr($codepoint           & 0x3f | 0x80);

        die("Asked for code outside of range ($codepoint)\n");
}

// end functions from MediaWiki

?>

<?php

//Warning: truncate the sl_* tables first

//BUG: will output mysql error if query has no values
//BUG: urlencode()d links should be decoded
//BUG: nedd to take multiple equal section titles into account
//NOTE: Redirs are excluded in theory, but some are not marked as such in dump
//      (might be useful to take them into account as well,
//        so they will be correct if they will be supported in future)

//We are not taking into account:
//links in templates
//sections in templates
//templates in section titles

//Other problems:
//Markup in section titles, in particular math
//(simple markup like links, italics etc. is ok)

require_once('config.php');
require_once('functions.php');

$dbr = db_connect();
$dbw = db_connect(true);

// for now, we are just concerned with main namespace,
// so we will ignore all links with namespace or interwiki prefix
// prefixes are NOT case sensitive
$prefixes = array(
        'media',
        'speciale',
        'special',
        'discussione',
        'talk',
        'utente',
        'user',
        'discussioni_utente',
        'user_talk',
        'wikipedia',
        'discussioni_wikipedia',
        'wikipedia_talk',
        'immagine',
        'image',
        'discussioni_immagine',
        'image_talk',
        'mediawiki',
        'discussioni_mediawiki',
        'mediawiki_talk',
        'template',
        'discussioni_template',
        'template_talk',
        'aiuto',
        'help',
        'discussioni_aiuto',
        'help_talk',
        'categoria',
        'category',
        'discussioni_categoria',
        'category_talk',
        'portale',
        'portal',
        'discussioni_portale',
        'portal_talk');
$query = 'select iw_prefix from interwiki';
$result = mysql_query ($query, $dbr);
while ( $row = mysql_fetch_array($result) ) {
        $prefixes[] = $row['iw_prefix'];
}
echo count($prefixes), " prefixes found.\n";

echo "Retrieving pages from db...\n";
$query = 'select page_title, old_text from page inner join text on page_latest = old_id where page_namespace = 0 and page_is_redirect = 0';
$result = mysql_unbuffered_query($query, $dbr);
$s_l = ' ';
$s_t = ' ';
$query_l = "insert into sl_links (sll_from, sll_to, sll_sec) values";
$query_t = "insert into sl_targets (slt_title, slt_sec) values";
$counter = 0;
echo "Starting section title/anchor and section link extraction...\n";
while ( $row = mysql_fetch_assoc($result) ) {
        $title = mysql_escape_string($row['page_title']);
        // remove html comments (what about nested comments?)
        // decode references now to avoid trouble with &#num; entities
        $tmp = explode('<!--', MW_decodeCharReferences($row['old_text']));
        $text = $tmp[0];
        unset($tmp[0]);
        foreach ( $tmp as $tmp2 ) {
                list($dummy, $tmp3) = explode('-->', $tmp2, 2);
                $text .= $tmp3;
        }
        // sections (may occur at very beginning of page!)
        preg_match_all('/(^|\n)={2,6}([^=\n]([^\n]*[^=\n])?)={2,6}/u', $text, $matches, PREG_SET_ORDER);
        foreach ( $matches as $match ) {
                // do NOT collapse whitespace (mimick behaviour of MediaWiki 1.6alpha)
                $sec = strtr(trim($match[2]), ' ', '_');
                // quick and dirty cleanup of most frequent wiki-markup etc.
                // will produce incorrect results with math, templates, and some other cases
                $sec = preg_replace("/'{2,3}/u", '', $sec); // italics, bold (might kill some apostrophe)
                $sec = preg_replace('/\[\[[^]|\n]+\|([^]\n]+)\]\]/u', '$1', $sec); // wikilink w/ text
                $sec = preg_replace('/\[\[|\]\]/u', '', $sec); // any further double square brackets (even non-matched)
                $sec = preg_replace('|<[^>\n]+>|u', '', $sec); // anything that vaguely resembles an HTML tag
                // this should be more or less the anchor name:
                // (note that we cannot recover original text from this)
                $sec = strtr(urlencode($sec), '%', '.');
                $query_t .= $s_t . "('$title', '$sec')";
                $s_t = ', ';
        }
        // anchors ('id' attributes of html elements - 'name' seems not accepted as of MW1.6alpha)
        // (should probably be restricted to allowed elements)
        // (maybe should check for uniqueness?)
        // very rough:
        preg_match_all('/<[a-z]+[^<>\n]*[\s^\n]+id[\s^\n]*=[\s^\n]*([^<>\n]*)>/u', $text, $matches, PREG_SET_ORDER);
        foreach ( $matches as $match ) {
                $tmp = $match[1];
                $anc = '';
                if ( strpos($tmp, '"') === 0 ) {
                        $tmp = mb_substr($tmp, 1);
                        if ( strpos($tmp, '"') ) {
                                list($anc, $dummy) = explode('"', $tmp, 2);
                        }
                } elseif ( strpos($tmp, "'") === 0 ) {
                        $tmp = mb_substr($tmp, 1);
                        if ( strpos($tmp, "'") ) {
                                list($anc, $dummy) = explode("'", $tmp, 2);
                        }
                } else {
                        preg_match('/^[A-Za-z0-9]+/u', $tmp, $a_match);
                        $anc = $a_match[0];
                }
                // do NOT collapse NOR trim whitespace (mimick behaviour of MediaWiki 1.6alpha)
                $anc = strtr($anc, ' ', '_');
                // this should be more or less the anchor name:
                // (note that we cannot recover original text from this)
                $anc = strtr(urlencode($anc), '%', '.');
                if ( $anc ) {
                        $query_t .= $s_t . "('$title', '$anc')";
                        $s_t = ', ';
                }
        }
        // links to sections (note that prefix must NOT contain any ':')
        preg_match_all('/\[\[:?(([^]|\n:]+):)?([^]|\n]*)#([^]|\n]+)(\||\]\])/u', $text, $matches, PREG_SET_ORDER);
        foreach ( $matches as $match ) {
                // we are concerned only with internal links to main namespace:
                $l_prefix = strtolower(preg_replace('/[\s_]+/u', '_', trim($match[2])));
                if ( in_array($l_prefix, $prefixes) ) {
                        continue;
                }
                // not a valid prefix, so add it back to title:
                $tmp = preg_replace('/[\s_]+/u', '_', trim($match[1] . $match[3])); // collapse whitespace and _
                $l_title = mysql_escape_string(mb_strtoupper(mb_substr($tmp, 0, 1)) . mb_substr($tmp, 1));
                if ( $l_title == '' ) {
                        $l_title = $title;
                }
                $l_sec = preg_replace('/[\s_]+/u', '_', trim($match[4])); // collapse whitespace and _
                // this is just a guess, MediaWiki source should be analyzed to confirm/correct
                if ( $l_sec != urldecode($l_sec) ) {
                        $l_sec = strtr($l_sec, '%', '.');
                } else {
                        $l_sec = strtr(urlencode($l_sec), '%', '.');
                }
                $query_l .= $s_l . "('$title', '$l_title', '$l_sec')";
                $s_l = ', ';
        }
        $counter++;
        if ( ($counter%1000) == 0 ) {
                echo $counter, "\n";
                mysql_query($query_l, $dbw);
                if ( mysql_errno($dbw) ) {
                        echo mysql_error($dbw), "\n";
                }
                mysql_query($query_t, $dbw);
                if ( mysql_errno($dbw) ) {
                        echo mysql_error($dbw), "\n";
                }
                $s_l = ' ';
                $s_t = ' ';
                $query_l = "insert into sl_links (sll_from, sll_to, sll_sec) values";
                $query_t = "insert into sl_targets (slt_title, slt_sec) values";
        }
}

// leftover query (not an elegant solution...)
mysql_query($query_l, $dbw);
if ( mysql_errno($dbw) ) {
        echo mysql_error($dbw), "\n";
}
mysql_query($query_t, $dbw);
if ( mysql_errno($dbw) ) {
        echo mysql_error($dbw), "\n";
}

// and now for the redirects
echo "Loading redirects from db...\n";
$query = 'select page_title, old_text from page inner join text on page_latest = old_id where page_namespace = 0 and page_is_redirect = 1';
$result = mysql_unbuffered_query($query, $dbr);
$counter = 0;
echo "Creating redirect targets...\n";
while ( $row = mysql_fetch_assoc($result) ) {
        $title = mysql_escape_string($row['page_title']);
        $text = MW_decodeCharReferences($row['old_text']);
        // we use essentially the same regexp as above, 
        //since prefixes and anchors etc. might exist here as well 
        //(but anchor is optional (and meaningless as of MW1.6alpha))
        preg_match('/#\s*redirect\s*\[\[:?(([^]|\n:]+):)?([^]|\n]*)(#([^]|\n]+))?(\||\]\])/ui', $text, $match);
        // we are concerned only with internal links to main namespace:
        $l_prefix = strtolower(preg_replace('/[\s_]+/u', '_', trim($match[2])));
        if ( in_array($l_prefix, $prefixes) ) {
                continue;
        }
        // not a valid prefix, so add it back to title:
        $tmp = preg_replace('/[\s_]+/u', '_', trim($match[1] . $match[3])); // collapse whitespace and _
        $l_title = mysql_escape_string(mb_strtoupper(mb_substr($tmp, 0, 1)) . mb_substr($tmp, 1));
        $query = "insert into sl_targets (slt_title, slt_sec) select '$title', slt_sec from sl_targets where slt_title = '$l_title'";
        mysql_query($query, $dbw);
        if ( mysql_errno($dbw) ) {
                echo mysql_error($dbw), "\n";
        }
        $counter++;
        if ( ($counter%1000) == 0 ) {
                echo $counter, "\n";
        }
}
?>

Queries:

General list:
        Page-containing-link: link (number of occurrences in page)
select concat('#[[', sll_from, ']]: [[', sll_to, '#', sll_sec, ']] (', count(*), ')') from sl_links left join sl_targets on sll_to = slt_title and sll_sec = slt_sec where slt_sec is null group by sll_from, sll_to, sll_sec;

Star Wars planets (and analogous cases):
select concat('#[[Pianeti_di_Guerre_Stellari#', sll_sec, ']]: ', group_concat(concat('[[', sll_from, ']]') separator ', ')) from sl_links left join sl_targets on sll_to = slt_title and sll_sec = slt_sec where sll_to = 'Pianeti_di_Guerre_Stellari' and slt_sec is null group by sll_sec;

THIS WEB:

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - be - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - closed_zh_tw - co - cr - cs - csb - cu - cv - cy - da - de - diq - dv - dz - ee - el - eml - en - eo - es - et - eu - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gd - gl - glk - gn - got - gu - gv - ha - haw - he - hi - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mg - mh - mi - mk - ml - mn - mo - mr - ms - mt - mus - my - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - rm - rmy - rn - ro - roa_rup - roa_tara - ru - ru_sib - rw - sa - sc - scn - sco - sd - se - searchcom - sg - sh - si - simple - sk - sl - sm - sn - so - sq - sr - ss - st - su - sv - sw - ta - te - test - tet - tg - th - ti - tk - tl - tlh - tn - to - tokipona - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu

Static Wikipedia 2008 (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - en - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu -

Static Wikipedia 2007:

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - be - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - closed_zh_tw - co - cr - cs - csb - cu - cv - cy - da - de - diq - dv - dz - ee - el - eml - en - eo - es - et - eu - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gd - gl - glk - gn - got - gu - gv - ha - haw - he - hi - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mg - mh - mi - mk - ml - mn - mo - mr - ms - mt - mus - my - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - rm - rmy - rn - ro - roa_rup - roa_tara - ru - ru_sib - rw - sa - sc - scn - sco - sd - se - searchcom - sg - sh - si - simple - sk - sl - sm - sn - so - sq - sr - ss - st - su - sv - sw - ta - te - test - tet - tg - th - ti - tk - tl - tlh - tn - to - tokipona - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu

Static Wikipedia 2006:

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - be - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - closed_zh_tw - co - cr - cs - csb - cu - cv - cy - da - de - diq - dv - dz - ee - el - eml - en - eo - es - et - eu - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gd - gl - glk - gn - got - gu - gv - ha - haw - he - hi - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mg - mh - mi - mk - ml - mn - mo - mr - ms - mt - mus - my - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - rm - rmy - rn - ro - roa_rup - roa_tara - ru - ru_sib - rw - sa - sc - scn - sco - sd - se - searchcom - sg - sh - si - simple - sk - sl - sm - sn - so - sq - sr - ss - st - su - sv - sw - ta - te - test - tet - tg - th - ti - tk - tl - tlh - tn - to - tokipona - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu