GOV/Webservice/Python-suds: Unterschied zwischen den Versionen
< GOV • Webservice
Zur Navigation springen
Zur Suche springen
(Python-suds) |
(weitere Spalten) |
||
(6 dazwischenliegende Versionen desselben Benutzers werden nicht angezeigt) | |||
Zeile 1: | Zeile 1: | ||
== GOV-Typen nach SQLite Importieren == | == GOV-Typen nach SQLite Importieren == | ||
=== Create Table f_gov_type === | === Create Table f_gov_type === | ||
< | <syntaxhighlight lang="SQL"> | ||
-- Objekttypen-Tabelle in SQLite anlegen | -- Objekttypen-Tabelle in SQLite anlegen | ||
DROP TABLE IF EXISTS f_gov_type; | DROP TABLE IF EXISTS f_gov_type; | ||
Zeile 10: | Zeile 10: | ||
type_desc_2 text | type_desc_2 text | ||
); | ); | ||
</ | </syntaxhighlight> | ||
=== Import nach f_gov_type === | === Import nach f_gov_type === | ||
Dieses importiert alle | Dieses importiert alle Objektypen-Texte in eine SQLite-Tabelle. | ||
< | <syntaxhighlight lang="python">from SimpleServiceImpl_services import * | ||
# Datenbank mit Pfad | # Datenbank mit Pfad | ||
sql_db = r'C:\temp\Demo\data\gov_demo.db' | sql_db = r'C:\temp\Demo\data\gov_demo.db' | ||
Zeile 52: | Zeile 52: | ||
continue | continue | ||
print ('Anzahl Objekt-Typen: ' + str(j)) | print ('Anzahl Objekt-Typen: ' + str(j)) | ||
</ | </syntaxhighlight> | ||
== GOV-Einträge über Koordinaten nach SQLite als json importieren == | == GOV-Einträge über Koordinaten nach SQLite als json importieren == | ||
=== Create Table === | === Create Table === | ||
< | <syntaxhighlight lang="SQL"> | ||
DROP TABLE IF EXISTS f_gov_data; | DROP TABLE IF EXISTS f_gov_data; | ||
Zeile 65: | Zeile 65: | ||
item_childs text -- Liste der Kind-Objekte als Json | item_childs text -- Liste der Kind-Objekte als Json | ||
); | ); | ||
</ | </syntaxhighlight> | ||
=== Import über Koordinaten === | === Import über Koordinaten === | ||
< | <syntaxhighlight lang="python"> | ||
# | # Umrechnung laufende Tage pro Jahr | ||
import juliandate | import juliandate | ||
Zeile 89: | Zeile 89: | ||
out = {} | out = {} | ||
for k, v in asdict(d).items(): | for k, v in asdict(d).items(): | ||
# | # Zeitumrechnung | ||
if k == '_jd': | if k == '_jd': | ||
dt = juliandate.to_gregorian(v) | dt = juliandate.to_gregorian(v) | ||
Zeile 108: | Zeile 108: | ||
out[k] = v | out[k] = v | ||
return out | return out | ||
sql_db = r'C:\temp\Demo\data\gov_demo.db' | sql_db = r'C:\temp\Demo\data\gov_demo.db' | ||
conn = sqlite3.connect(sql_db, isolation_level=None) | conn = sqlite3.connect(sql_db, isolation_level=None) | ||
# Da nur die ersten 500 Treffer geliefert werden, muss der Abfrage-Bereich klein genug gewählt werden. | # Da nur die ersten 500 Treffer geliefert werden, muss der Abfrage-Bereich klein genug gewählt werden. | ||
Zeile 145: | Zeile 137: | ||
"'" + gov_item + "','" + gov_eintrag_txt + "','" + gov_eintrag_child_txt + "')" | "'" + gov_item + "','" + gov_eintrag_txt + "','" + gov_eintrag_child_txt + "')" | ||
rc = c.execute(sql_ins) | rc = c.execute(sql_ins) | ||
</ | </syntaxhighlight> | ||
== Json- Views == | == Json- Views == | ||
Zeile 151: | Zeile 143: | ||
Es können alle Arrays per SQL-Selektion abgefragt werden. Hier ein Beispiel. | Es können alle Arrays per SQL-Selektion abgefragt werden. Hier ein Beispiel. | ||
< | <syntaxhighlight lang="SQL"> | ||
drop view if exists v_gov_part_of_govid_1_3; | drop view if exists v_gov_part_of_govid_1_3; | ||
create view v_gov_part_of_govid_1_3 as | create view v_gov_part_of_govid_1_3 as | ||
select gov_id, breite, laenge, name_1, sprache_1, name_2, sprache_2, | select gov_id, item_data, | ||
breite, laenge, hoehe, | |||
plz_1, plz_1_ab, plz_1_bis, | |||
plz_2, plz_2_ab, plz_2_bis, | |||
name_1, sprache_1, name_2, sprache_2, | |||
part_of_govid_1, part_of_govid_2, part_of_govid_3, text, | part_of_govid_1, part_of_govid_2, part_of_govid_3, text, | ||
case when (type_desc_2="") then f_gov_type.type_desc_1 else type_desc_1 || ' (' || type_desc_2 || ')' end as typ | case when (type_desc_2="") then f_gov_type.type_desc_1 else type_desc_1 || ' (' || type_desc_2 || ')' end as typ | ||
from | from | ||
( | ( | ||
select gov_id, | select gov_id, item_data, | ||
json_extract(item_data,'$.position._lat') as breite, | |||
json_extract(item_data,'$.position._lon') as laenge, | |||
json_extract(item_data,'$.position._height') as hoehe, | |||
-- | |||
json_extract(item_data,'$.postal-code[0]._value') as plz_1, | |||
json_extract(item_data,'$.postal-code[0].timespan.begin._jd') as plz_1_ab, | |||
json_extract(item_data,'$.postal-code[0].timespan.end._jd') as plz_1_bis, | |||
-- | |||
json_extract(item_data,'$.postal-code[1]._value') as plz_2, | |||
json_extract(item_data,'$.postal-code[1].timespan.begin._jd') as plz_2_ab, | |||
json_extract(item_data,'$.postal-code[1].timespan.end._jd') as plz_2_bis, | |||
-- | -- | ||
json_extract(item_data,'$.name[0]._value') as name_1, | |||
json_extract(item_data,'$.name[0]._lang') as sprache_1, | |||
json_extract(item_data,'$.name[1]._value') as name_2, | |||
json_extract(item_data,'$.name[1]._lang') as sprache_2, | |||
-- | |||
json_extract(item_data,'$.part-of[0]._ref') AS part_of_govid_1, | |||
json_extract(item_data,'$.part-of[1]._ref') as part_of_govid_2, | |||
json_extract(item_data,'$.part-of[2]._ref') as part_of_govid_3, | |||
json_extract(item_data,'$.note[0].text') as text, | |||
json_extract(item_data,'$.type[0]._value') as t_1 | |||
from f_gov_data | from f_gov_data | ||
) a | ) a | ||
Zeile 180: | Zeile 184: | ||
on a.t_1 = f_gov_type.type_id | on a.t_1 = f_gov_type.type_id | ||
; | ; | ||
</ | </syntaxhighlight> | ||
=== Abfrage auf Externe Kennungen === | === Abfrage auf Externe Kennungen === | ||
Auf diese Weise können Dubletten ermittelt werden. | Auf diese Weise können Dubletten ermittelt werden. | ||
< | <syntaxhighlight lang="SQL"> | ||
drop view if exists v_gov_external_ref_1_6; | drop view if exists v_gov_external_ref_1_6; | ||
Zeile 219: | Zeile 223: | ||
order by gov_id, extref | order by gov_id, extref | ||
; | ; | ||
</ | </syntaxhighlight> | ||
== Weblinks == | |||
* https://www.sqlite.org/lang_corefunc.html | |||
* https://www.sqlite.org/json1.html | |||
* https://pypi.org/project/suds/ | |||
* https://stackoverflow.com/questions/2412486/serializing-a-suds-object-in-python | |||
[[Kategorie:GOV-Intern]] | [[Kategorie:GOV-Intern]] |
Aktuelle Version vom 28. August 2023, 05:11 Uhr
GOV-Typen nach SQLite Importieren
Create Table f_gov_type
-- Objekttypen-Tabelle in SQLite anlegen
DROP TABLE IF EXISTS f_gov_type;
CREATE TABLE f_gov_type (
type_id integer,
type_desc_1 text,
type_desc_2 text
);
Import nach f_gov_type
Dieses importiert alle Objektypen-Texte in eine SQLite-Tabelle.
from SimpleServiceImpl_services import *
# Datenbank mit Pfad
sql_db = r'C:\temp\Demo\data\gov_demo.db'
from suds.client import Client
simple_service_url = 'http://gov.genealogy.net/services/SimpleService?wsdl'
read_client = Client(simple_service_url)
# Zugriff auf SQLlite
import sqlite3
# https://docs.python.org/3/library/sqlite3.html
# Zugriff auf SQLite-DB mit Autocommit
conn = sqlite3.connect(sql_db, isolation_level=None)
# Tabelle leeren
c = conn.cursor()
c.execute('DELETE FROM f_gov_type')
# Anzahl der Typen eintragen (Wert schätzen)
j = 0
for i in range(1, 290):
try:
gov_eintrag = read_client.service.getTypeDescription(i, 'deu')
j = j + 1
(desc_1, desc_2) = (gov_eintrag[0])
if (desc_2 == None):
desc_2 = ''
ins_sql = 'INSERT INTO f_gov_type(type_id, type_desc_1, type_desc_2) ' + \
'VALUES (' + str(i) + \
",'" + desc_1 + \
"','" + desc_2 + "')"
# print('SQL:' + ins_sql)
c.execute(ins_sql)
# wenn es die Nummer nicht gibt - überspringen
except:
continue
print ('Anzahl Objekt-Typen: ' + str(j))
GOV-Einträge über Koordinaten nach SQLite als json importieren
Create Table
DROP TABLE IF EXISTS f_gov_data;
CREATE TABLE f_gov_data (
lfd_id integer primary key autoincrement, -- Schlüssel in der Tabelle
gov_id text, -- GOV-Kennung
item_data text, -- GOV-Eintrag als Json
item_childs text -- Liste der Kind-Objekte als Json
);
Import über Koordinaten
# Umrechnung laufende Tage pro Jahr
import juliandate
from suds.client import Client
simple_service_url = 'http://gov.genealogy.net/services/SimpleService?wsdl'
read_client = Client(simple_service_url)
# Datenspeicherung als json
import json
# Zugriff auf SQLlite
import sqlite3
# https://docs.python.org/3/library/sqlite3.html
# Serialisieren WSDL-Suds-Objekt als Dictionary
from suds.sudsobject import asdict
def recursive_asdict(d):
out = {}
for k, v in asdict(d).items():
# Zeitumrechnung
if k == '_jd':
dt = juliandate.to_gregorian(v)
vc = str(10000 * dt[0] + 100 * dt[1] + dt[2])
v = vc[0:4] + '-' + vc[4:6] + '-' + vc[6:8]
if k == '_last-modification':
v = str(v)
if hasattr(v, '__keylist__'):
out[k] = recursive_asdict(v)
elif isinstance(v, list):
out[k] = []
for item in v:
if hasattr(item, '__keylist__'):
out[k].append(recursive_asdict(item))
else:
out[k].append(item)
else:
out[k] = v
return out
sql_db = r'C:\temp\Demo\data\gov_demo.db'
conn = sqlite3.connect(sql_db, isolation_level=None)
# Da nur die ersten 500 Treffer geliefert werden, muss der Abfrage-Bereich klein genug gewählt werden.
# Ein Raster von 0.1 Dezimal-Grad ist geeignet.
latitude0 = 50.65
latitude1 = 50.75
longitude0 = 5.80
longitude1 = 6.90
# Liste der GOV-IDs über Koordinaten (max. 500)
item_list = read_client.service.searchByBoundingBox(latitude0, latitude1, longitude0, longitude1)
for i, gov_item in enumerate (item_list.item):
gov_eintrag = read_client.service.getObject(gov_item)
gov_eintrag_dict = recursive_asdict(gov_eintrag)
gov_eintrag_json = json.dumps (gov_eintrag_dict, sort_keys=True, default=str, ensure_ascii=False)
gov_eintrag_txt = gov_eintrag_json.replace("'", "''")
gov_eintrag_child = read_client.service.getChildObjects(gov_item)
gov_eintrag_child_dict = recursive_asdict(gov_eintrag_child)
gov_eintrag_child_json = json.dumps(gov_eintrag_child_dict, sort_keys=True, default=str, ensure_ascii=False)
gov_eintrag_child_txt = gov_eintrag_child_json.replace("'", "''")
# print ('GOV-ID: ' + gov_item )
sql_ins = 'insert into f_gov_data (gov_id, item_data, item_childs) values (' + \
"'" + gov_item + "','" + gov_eintrag_txt + "','" + gov_eintrag_child_txt + "')"
rc = c.execute(sql_ins)
Json- Views
Abfrage auf Arrays
Es können alle Arrays per SQL-Selektion abgefragt werden. Hier ein Beispiel.
drop view if exists v_gov_part_of_govid_1_3;
create view v_gov_part_of_govid_1_3 as
select gov_id, item_data,
breite, laenge, hoehe,
plz_1, plz_1_ab, plz_1_bis,
plz_2, plz_2_ab, plz_2_bis,
name_1, sprache_1, name_2, sprache_2,
part_of_govid_1, part_of_govid_2, part_of_govid_3, text,
case when (type_desc_2="") then f_gov_type.type_desc_1 else type_desc_1 || ' (' || type_desc_2 || ')' end as typ
from
(
select gov_id, item_data,
json_extract(item_data,'$.position._lat') as breite,
json_extract(item_data,'$.position._lon') as laenge,
json_extract(item_data,'$.position._height') as hoehe,
--
json_extract(item_data,'$.postal-code[0]._value') as plz_1,
json_extract(item_data,'$.postal-code[0].timespan.begin._jd') as plz_1_ab,
json_extract(item_data,'$.postal-code[0].timespan.end._jd') as plz_1_bis,
--
json_extract(item_data,'$.postal-code[1]._value') as plz_2,
json_extract(item_data,'$.postal-code[1].timespan.begin._jd') as plz_2_ab,
json_extract(item_data,'$.postal-code[1].timespan.end._jd') as plz_2_bis,
--
json_extract(item_data,'$.name[0]._value') as name_1,
json_extract(item_data,'$.name[0]._lang') as sprache_1,
json_extract(item_data,'$.name[1]._value') as name_2,
json_extract(item_data,'$.name[1]._lang') as sprache_2,
--
json_extract(item_data,'$.part-of[0]._ref') AS part_of_govid_1,
json_extract(item_data,'$.part-of[1]._ref') as part_of_govid_2,
json_extract(item_data,'$.part-of[2]._ref') as part_of_govid_3,
json_extract(item_data,'$.note[0].text') as text,
json_extract(item_data,'$.type[0]._value') as t_1
from f_gov_data
) a
left join f_gov_type
on a.t_1 = f_gov_type.type_id
;
Abfrage auf Externe Kennungen
Auf diese Weise können Dubletten ermittelt werden.
drop view if exists v_gov_external_ref_1_6;
create view v_gov_external_ref_1_6 as
select gov_id,
json_extract(item_data,'$.external-reference[0]._value') AS extref
from f_gov_data
where json_extract(item_data,'$.external-reference[0]') IS NOT NULL
union all
select gov_id,
json_extract(item_data,'$.external-reference[1]._value') AS extref
from f_gov_data
where json_extract(item_data,'$.external-reference[1]') IS NOT NULL
union all
select gov_id,
json_extract(item_data,'$.external-reference[2]._value') AS extref
from f_gov_data
where json_extract(item_data,'$.external-reference[2]') IS NOT NULL
union all
select gov_id,
json_extract(item_data,'$.external-reference[3]._value') AS extref
from f_gov_data
where json_extract(item_data,'$.external-reference[3]') IS NOT NULL
union all
select gov_id,
json_extract(item_data,'$.external-reference[4]._value') AS extref
from f_gov_data
where json_extract(item_data,'$.external-reference[4]') IS NOT NULL
union all
select gov_id,
json_extract(item_data,'$.external-reference[5]._value') AS extref
from f_gov_data
where json_extract(item_data,'$.external-reference[5]') IS NOT NULL
order by gov_id, extref
;